extractpatterns 0.0.1 → 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (3) hide show
  1. checksums.yaml +4 -4
  2. data/lib/extractpatterns.rb +10 -1
  3. metadata +2 -2
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 24ecd7395c9c79e1f035a2420c1e8d6053816d65
4
- data.tar.gz: 9d1f1b45a380ff2de4b5cf2b5e7150f943df29a8
3
+ metadata.gz: b4839a2718581a04fd0fba727ebe49a555d0e429
4
+ data.tar.gz: 6ff61dbdb39ca8db3d994a928ead04b90924706a
5
5
  SHA512:
6
- metadata.gz: 03b98db5070f0ada7452d5738d5c36eb8bdbb54a51b16e23bae28909fa017417ba62769fab87edee0be432f872b8f0fc5d106b105ced3f80bd6ffb271086f140
7
- data.tar.gz: a1e84c75ba367661a8ca80625de90d4b4ee5875a9c47e75cd5072ddfd2b79e6b2302fcebca217dd4bb845e545805a179fa0cf9128c99da4d769f643a552d28f2
6
+ metadata.gz: b227fcfdb2fbca4a5bb8c127d7b293d85ebcc738a1c0ad6b23fd86115b3d7439b8ba65459b1eac1835f3fe2358f6b9bd25ef74c42f4605c0ec233c21950c9ad4
7
+ data.tar.gz: 55f286d1d433963fd58c94527de287268e69e6e7f229eb9bb6ae5d13a5396c7a6daea2ab78ae9668b29e34696a820c143fa113e5843cb6589b2c9753941ad40e
@@ -43,10 +43,19 @@ class ExtractPatterns
43
43
  # Extract set terms
44
44
  def find_known_terms(item, field, extract_list)
45
45
  d = TermExtractor.new(JSON.pretty_generate([item]), [field], "extracted_codewords")
46
- d.extractSetTerms(File.read(extract_list), ["codeword"], "case_sensitive")
46
+ d.extractSetTerms(fixEncode(File.read(extract_list)), ["codeword"], "case_sensitive")
47
47
  return JSON.parse(d.getAllOutput).first["extracted_codewords"]
48
48
  end
49
49
 
50
+ # Fix encoding errors
51
+ def fixEncode(str)
52
+ if str.is_a?(String)
53
+ return str.unpack('C*').pack('U*')
54
+ else
55
+ return str
56
+ end
57
+ end
58
+
50
59
  # Normalize and match synonyms and deduplicate
51
60
  def normalize_results(extracted_raw, synonym_list)
52
61
  synonyms = JSON.parse(File.read(synonym_list))
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: extractpatterns
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.0.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - M. C. McGrath
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-12-25 00:00:00.000000000 Z
11
+ date: 2015-12-28 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: Extracts entities and terms from any JSON.
14
14
  email: shidash@shidash.com