entityextractor 0.0.2 → 0.0.3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (2) hide show
  1. data/lib/entityextractor.rb +49 -36
  2. metadata +2 -2
@@ -1,49 +1,27 @@
1
1
  require 'json'
2
2
 
3
3
  class EntityExtractor
4
- def initialize(input, extractfield)
4
+ def initialize(input, *extractfield)
5
5
  @input = JSON.parse(input)
6
- @extractfield = extractfield
6
+ @extractfield = *extractfield
7
7
  @output = Array.new
8
8
  end
9
9
 
10
10
  # Extract terms input from preset list
11
- def extractTerms(*terms)
12
- @input.each do |i|
13
- addlist = Array.new
14
- count = 0
11
+ def extractTerms(*terms, i, addlist, field)
12
+ count = 0
15
13
 
16
- # Check the item for each term
17
- terms.each do |t|
18
- count+=1
19
- if i[@extractfield].to_s.include? t
20
- addlist.push(t)
21
-
22
- # Add found terms to output on last term
23
- if count == terms.length
24
- i["extract"] = addlist
25
- @output.push(i)
26
- end
27
-
28
- elsif count == terms.length
29
- i["extract"] = addlist
30
- @output.push(i)
31
- end
14
+ # Check the item for each term
15
+ terms.each do |t|
16
+ count+=1
17
+ if i[field].to_s.include? t
18
+ addlist.push(t)
32
19
  end
33
20
  end
34
21
  end
35
22
 
36
- # Extract all terms in ALLCAPS (specifiy min num CAPS chars in row)
37
- def extractALLCAPS(minchar, ignoreterms)
38
- @input.each do |i|
39
- addlist = Array.new
40
- savefield = i[@extractfield].to_s + " "
41
- parseALLCAPS(i[@extractfield].to_s, i, minchar, addlist, ignoreterms, savefield)
42
- end
43
- end
44
-
45
23
  # Parses terms in all caps
46
- def parseALLCAPS(toParse, i, minchar, addlist, ignoreterms, savefield)
24
+ def parseALLCAPS(toParse, i, minchar, addlist, ignoreterms, savefield, extractfield)
47
25
  if toParse =~ (/[A-Z]{#{minchar}}/)
48
26
  index = toParse =~ (/[A-Z]{#{minchar}}/)
49
27
  charnum = 0
@@ -75,13 +53,11 @@ class EntityExtractor
75
53
 
76
54
  parsedstring = toParse[0..charnum]
77
55
  toParse.slice! parsedstring
78
- parseALLCAPS(toParse, i, minchar, addlist, ignoreterms, savefield)
56
+ parseALLCAPS(toParse, i, minchar, addlist, ignoreterms, savefield, extractfield)
79
57
 
80
58
  # If there are no (more) results, append addlist to JSON
81
59
  else
82
- i["extract"] = addlist
83
- i[@extractfield] = savefield
84
- @output.push(i)
60
+ i[extractfield] = savefield
85
61
  end
86
62
  end
87
63
 
@@ -108,4 +84,41 @@ class EntityExtractor
108
84
  def genJSON
109
85
  JSON.pretty_generate(@output)
110
86
  end
87
+
88
+ def extract(type, minchar, ignoreterms, *terms)
89
+ @input.each do |i|
90
+ addlist = Array.new
91
+
92
+ # Generate set terms list
93
+ if type == "set"
94
+ @extractfield.each do |f|
95
+ extractTerms(*terms, i, addlist, f)
96
+ end
97
+
98
+ i["extract"] = addlist
99
+ @output.push(i)
100
+
101
+ # Generate ALLCAPS terms list
102
+ elsif type == "ALLCAPS"
103
+ @extractfield.each do |f|
104
+ savefield = i[f].to_s + " "
105
+ parseALLCAPS(i[f].to_s, i, minchar, addlist, ignoreterms, savefield, f)
106
+ end
107
+
108
+ i["extract"] = addlist
109
+ @output.push(i)
110
+
111
+ # Extract both set terms and ALLCAPS
112
+ elsif type == "both"
113
+ @extractfield.each do |f|
114
+ savefield = i[f].to_s + " "
115
+ parseALLCAPS(i[f].to_s, i, minchar, addlist, ignoreterms, savefield, f)
116
+ extractTerms(*terms, i, addlist, f)
117
+ end
118
+
119
+ i["extract"] = addlist
120
+ @output.push(i)
121
+ end
122
+ end
123
+ end
111
124
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: entityextractor
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.2
4
+ version: 0.0.3
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2014-02-26 00:00:00.000000000 Z
12
+ date: 2014-03-01 00:00:00.000000000 Z
13
13
  dependencies: []
14
14
  description: Extracts entities and terms from any JSON.
15
15
  email: shidash@shidash.com