extractpatterns 0.0.2 → 0.0.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/extractpatterns.rb +5 -4
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 17d17e273a60c9543bb78b5c193d88f908c174c3
|
4
|
+
data.tar.gz: fcc30f5af578d856446c0090a5e466200c729a3e
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: bde3529e9d77807d00fbf0f23d2884d80ccd9e3d69fd57e8c2fde7dccbc1d140c4eff99c3e2e37509cc35706d2635b44159df74c0a11ce7e3221ac9803b922e8
|
7
|
+
data.tar.gz: ce78a9713e51829aae58cf551c9d718c45ffa21cda1f656dcf92e61cd7aff0abe3aa6248c109bdba25ed75eede7c4fab444534b7ad355fda6213bbd51afad055
|
data/lib/extractpatterns.rb
CHANGED
@@ -42,7 +42,7 @@ class ExtractPatterns
|
|
42
42
|
|
43
43
|
# Extract set terms
|
44
44
|
def find_known_terms(item, field, extract_list)
|
45
|
-
d = TermExtractor.new(JSON.pretty_generate([item]), [field], "extracted_codewords")
|
45
|
+
d = TermExtractor.new(fixEncode(JSON.pretty_generate([item])), [field], "extracted_codewords")
|
46
46
|
d.extractSetTerms(fixEncode(File.read(extract_list)), ["codeword"], "case_sensitive")
|
47
47
|
return JSON.parse(d.getAllOutput).first["extracted_codewords"]
|
48
48
|
end
|
@@ -90,10 +90,10 @@ class ExtractPatterns
|
|
90
90
|
list_results = comma_list_matches(item[field])
|
91
91
|
allcaps_results = get_allcaps(item[field], allcaps_length)
|
92
92
|
merge_results = item[merge_field] ? item[merge_field] : []
|
93
|
-
known_terms_results = find_known_terms(item, field, extract_list)
|
93
|
+
#known_terms_results = find_known_terms(item, field, extract_list)
|
94
94
|
|
95
95
|
# Merge results and post-process
|
96
|
-
item[@match_name] = item[@match_name] | normalize_results((allcaps_results | list_results | merge_results
|
96
|
+
item[@match_name] = item[@match_name] | normalize_results((allcaps_results | list_results | merge_results ),
|
97
97
|
extract_list)
|
98
98
|
end
|
99
99
|
|
@@ -139,5 +139,6 @@ end
|
|
139
139
|
# end
|
140
140
|
#end
|
141
141
|
|
142
|
-
#e = ExtractPatterns.new(File.read("
|
142
|
+
#e = ExtractPatterns.new(File.read("/home/shidash/Data/unknown_test/Never_Shake_A_Baby_SIGINT.json"), ["additional_info", "job_description", "skills", "summary"], "tools_mentioned")
|
143
|
+
#puts e.search_fields(6, "extract_list.json", nil)
|
143
144
|
#puts e.ranked_hash_output(overalloutput)
|