extractpatterns 0.0.1 → 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. checksums.yaml +4 -4
  2. data/lib/extractpatterns.rb +10 -1
  3. metadata +2 -2
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 24ecd7395c9c79e1f035a2420c1e8d6053816d65
4
- data.tar.gz: 9d1f1b45a380ff2de4b5cf2b5e7150f943df29a8
3
+ metadata.gz: b4839a2718581a04fd0fba727ebe49a555d0e429
4
+ data.tar.gz: 6ff61dbdb39ca8db3d994a928ead04b90924706a
5
5
  SHA512:
6
- metadata.gz: 03b98db5070f0ada7452d5738d5c36eb8bdbb54a51b16e23bae28909fa017417ba62769fab87edee0be432f872b8f0fc5d106b105ced3f80bd6ffb271086f140
7
- data.tar.gz: a1e84c75ba367661a8ca80625de90d4b4ee5875a9c47e75cd5072ddfd2b79e6b2302fcebca217dd4bb845e545805a179fa0cf9128c99da4d769f643a552d28f2
6
+ metadata.gz: b227fcfdb2fbca4a5bb8c127d7b293d85ebcc738a1c0ad6b23fd86115b3d7439b8ba65459b1eac1835f3fe2358f6b9bd25ef74c42f4605c0ec233c21950c9ad4
7
+ data.tar.gz: 55f286d1d433963fd58c94527de287268e69e6e7f229eb9bb6ae5d13a5396c7a6daea2ab78ae9668b29e34696a820c143fa113e5843cb6589b2c9753941ad40e
@@ -43,10 +43,19 @@ class ExtractPatterns
43
43
  # Extract set terms
44
44
  def find_known_terms(item, field, extract_list)
45
45
  d = TermExtractor.new(JSON.pretty_generate([item]), [field], "extracted_codewords")
46
- d.extractSetTerms(File.read(extract_list), ["codeword"], "case_sensitive")
46
+ d.extractSetTerms(fixEncode(File.read(extract_list)), ["codeword"], "case_sensitive")
47
47
  return JSON.parse(d.getAllOutput).first["extracted_codewords"]
48
48
  end
49
49
 
50
+ # Fix encoding errors
51
+ def fixEncode(str)
52
+ if str.is_a?(String)
53
+ return str.unpack('C*').pack('U*')
54
+ else
55
+ return str
56
+ end
57
+ end
58
+
50
59
  # Normalize and match synonyms and deduplicate
51
60
  def normalize_results(extracted_raw, synonym_list)
52
61
  synonyms = JSON.parse(File.read(synonym_list))
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: extractpatterns
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.0.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - M. C. McGrath
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-12-25 00:00:00.000000000 Z
11
+ date: 2015-12-28 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: Extracts entities and terms from any JSON.
14
14
  email: shidash@shidash.com