entityextractor 0.0.1 → 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. data/lib/entityextractor.rb +7 -5
  2. metadata +2 -2
@@ -37,16 +37,17 @@ class EntityExtractor
37
37
  def extractALLCAPS(minchar, ignoreterms)
38
38
  @input.each do |i|
39
39
  addlist = Array.new
40
- parseALLCAPS(i[@extractfield].to_s, i, minchar, addlist, ignoreterms)
40
+ savefield = i[@extractfield].to_s + " "
41
+ parseALLCAPS(i[@extractfield].to_s, i, minchar, addlist, ignoreterms, savefield)
41
42
  end
42
43
  end
43
44
 
44
45
  # Parses terms in all caps
45
- def parseALLCAPS(toParse, i, minchar, addlist, ignoreterms)
46
+ def parseALLCAPS(toParse, i, minchar, addlist, ignoreterms, savefield)
46
47
  if toParse =~ (/[A-Z]{#{minchar}}/)
47
48
  index = toParse =~ (/[A-Z]{#{minchar}}/)
48
49
  charnum = 0
49
-
50
+
50
51
  # Find word in all caps
51
52
  toParse.each_char do |c|
52
53
  if charnum >= index
@@ -71,14 +72,15 @@ class EntityExtractor
71
72
  if !(ignoreterms.include? toParse[index..charnum])
72
73
  addlist.push(toParse[index..charnum])
73
74
  end
74
-
75
+
75
76
  parsedstring = toParse[0..charnum]
76
77
  toParse.slice! parsedstring
77
- parseALLCAPS(toParse, i, minchar, addlist, ignoreterms)
78
+ parseALLCAPS(toParse, i, minchar, addlist, ignoreterms, savefield)
78
79
 
79
80
  # If there are no (more) results, append addlist to JSON
80
81
  else
81
82
  i["extract"] = addlist
83
+ i[@extractfield] = savefield
82
84
  @output.push(i)
83
85
  end
84
86
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: entityextractor
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.0.2
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2014-02-23 00:00:00.000000000 Z
12
+ date: 2014-02-26 00:00:00.000000000 Z
13
13
  dependencies: []
14
14
  description: Extracts entities and terms from any JSON.
15
15
  email: shidash@shidash.com