extractpatterns 0.0.7 → 0.0.8
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/extractpatterns.rb +5 -5
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 6e1d12367c3b10102a6e8e5869f916db672fffbe
|
4
|
+
data.tar.gz: a18f2d9de28356f0dccf631ee0494278fd01ef11
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 02e1683ffbed193381bd2244ec59b9eedf8f25fb51c7ac276a0afb72356ae4a67ce5f3181dcc02f546fabf42e164d58aa393332426d1cf707f4d3d3a371dded8
|
7
|
+
data.tar.gz: 6576f9eda00e58d8157ae98d32c623e92dda16eec2852105762e0b4fc45e116b8f12dea1b6db2f84b43461ab0378be6d957a4fd1c1835dcf8d31dad30fcec346
|
data/lib/extractpatterns.rb
CHANGED
@@ -42,7 +42,7 @@ class ExtractPatterns
|
|
42
42
|
|
43
43
|
# Extract set terms
|
44
44
|
def find_known_terms(item, field, extract_list)
|
45
|
-
d = TermExtractor.new(
|
45
|
+
d = TermExtractor.new(JSON.pretty_generate([item]), [field], "extracted_codewords")
|
46
46
|
d.extractSetTerms(fixEncode(File.read(extract_list)), ["codeword"], "case_sensitive")
|
47
47
|
return JSON.parse(d.getAllOutput).first["extracted_codewords"]
|
48
48
|
end
|
@@ -87,13 +87,13 @@ class ExtractPatterns
|
|
87
87
|
|
88
88
|
@fields.each do |field|
|
89
89
|
# Extract list results, allcaps, and known codewords from each field
|
90
|
-
list_results = comma_list_matches(
|
91
|
-
allcaps_results = get_allcaps(
|
90
|
+
list_results = comma_list_matches(item[field])
|
91
|
+
allcaps_results = get_allcaps(item[field], allcaps_length)
|
92
92
|
merge_results = item[merge_field] ? item[merge_field] : []
|
93
|
-
|
93
|
+
# known_terms_results = find_known_terms(fixEncode(item), field, extract_list)
|
94
94
|
|
95
95
|
# Merge results and post-process
|
96
|
-
item[@match_name] = item[@match_name] | normalize_results((allcaps_results | list_results | merge_results
|
96
|
+
item[@match_name] = item[@match_name] | normalize_results((allcaps_results | list_results | merge_results ),extract_list)
|
97
97
|
end
|
98
98
|
|
99
99
|
# Push updated item out
|