entityextractor 0.0.1 → 0.0.2
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/entityextractor.rb +7 -5
- metadata +2 -2
data/lib/entityextractor.rb
CHANGED
@@ -37,16 +37,17 @@ class EntityExtractor
|
|
37
37
|
def extractALLCAPS(minchar, ignoreterms)
|
38
38
|
@input.each do |i|
|
39
39
|
addlist = Array.new
|
40
|
-
|
40
|
+
savefield = i[@extractfield].to_s + " "
|
41
|
+
parseALLCAPS(i[@extractfield].to_s, i, minchar, addlist, ignoreterms, savefield)
|
41
42
|
end
|
42
43
|
end
|
43
44
|
|
44
45
|
# Parses terms in all caps
|
45
|
-
def parseALLCAPS(toParse, i, minchar, addlist, ignoreterms)
|
46
|
+
def parseALLCAPS(toParse, i, minchar, addlist, ignoreterms, savefield)
|
46
47
|
if toParse =~ (/[A-Z]{#{minchar}}/)
|
47
48
|
index = toParse =~ (/[A-Z]{#{minchar}}/)
|
48
49
|
charnum = 0
|
49
|
-
|
50
|
+
|
50
51
|
# Find word in all caps
|
51
52
|
toParse.each_char do |c|
|
52
53
|
if charnum >= index
|
@@ -71,14 +72,15 @@ class EntityExtractor
|
|
71
72
|
if !(ignoreterms.include? toParse[index..charnum])
|
72
73
|
addlist.push(toParse[index..charnum])
|
73
74
|
end
|
74
|
-
|
75
|
+
|
75
76
|
parsedstring = toParse[0..charnum]
|
76
77
|
toParse.slice! parsedstring
|
77
|
-
parseALLCAPS(toParse, i, minchar, addlist, ignoreterms)
|
78
|
+
parseALLCAPS(toParse, i, minchar, addlist, ignoreterms, savefield)
|
78
79
|
|
79
80
|
# If there are no (more) results, append addlist to JSON
|
80
81
|
else
|
81
82
|
i["extract"] = addlist
|
83
|
+
i[@extractfield] = savefield
|
82
84
|
@output.push(i)
|
83
85
|
end
|
84
86
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: entityextractor
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.2
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2014-02-
|
12
|
+
date: 2014-02-26 00:00:00.000000000 Z
|
13
13
|
dependencies: []
|
14
14
|
description: Extracts entities and terms from any JSON.
|
15
15
|
email: shidash@shidash.com
|