entityextractor 0.0.10 → 0.0.11

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 0f99308791264c4e567c495fe9964e68ad932ac5
4
- data.tar.gz: ec717ca535969ad20c646bb4559763c69a973aee
3
+ metadata.gz: 6d73c2f909fbe862625da5f0952032715d696de2
4
+ data.tar.gz: c33ee22a6a5cbb3eb37f649c6db2df26df366c75
5
5
  SHA512:
6
- metadata.gz: 2d7acd5e1645f5de5c1b52b2fb3cd14eb739c8444f962c34d54e914d1161aa1cfa0e17ba3cb10afdeb7100b9cbbb4d1a266b6078f2ebdde125d72a897a71e78c
7
- data.tar.gz: 97cfa5c16df408a6dfd23b4a51bda99ef50caa997042ffc4370a15549b01106eff089889a430284377833e68edb608091079bd40c0129dbc36e08ddaf394e554
6
+ metadata.gz: f98e67a81133a74c666d474b5d22bd0018e5d941ec7bc5f875a319ea1ee3677d93ff3917591f23fdbfdb0c0e2cc14c4f866c4b1289aa8a908acf83f6f8c07791
7
+ data.tar.gz: a5a40b53bcb12550bf096cea1b4f72f63be2b254d079df89c322619041f67dfb51f3d9da9d479f422c058acbe4257e6b17dd1e8eb0834d08ba2874d4cf31c2bd
@@ -1,24 +1,32 @@
1
1
  require 'json'
2
2
  load 'extractdates.rb'
3
+ load 'handleinput.rb'
3
4
  require 'uploadconvert'
4
5
 
5
6
  class EntityExtractor
6
- def initialize(input, *extractfield)
7
+ def initialize(input, fieldoutname, *extractfield)
7
8
  @input = JSON.parse(input)
9
+ @fieldoutname = fieldoutname
8
10
  @extractfield = *extractfield
9
11
  @output = Array.new
10
12
  end
11
13
 
12
14
  # Extract terms input from preset list
13
- def extractTerms(*terms, i, addlist, field)
15
+ def extractTerms(extractlist, i, addlist, field)
14
16
  count = 0
17
+ downcased = i[field].to_s.downcase
15
18
 
16
19
  # Check the item for each term
17
- terms.each do |t|
20
+ extractlist.each do |t, c|
18
21
  count+=1
19
-
20
- if i[field].to_s.include? t
21
- addlist.push(t.upcase)
22
+ if c == true
23
+ if i[field].to_s.include? t
24
+ addlist.push(t)
25
+ end
26
+ else
27
+ if downcased.include? t.downcase
28
+ addlist.push(t)
29
+ end
22
30
  end
23
31
  end
24
32
  end
@@ -70,7 +78,7 @@ class EntityExtractor
70
78
 
71
79
  # Generate hash of all extracted terms
72
80
  @output.each do |i|
73
- i["extract"].each do |e|
81
+ i[@fieldoutname].each do |e|
74
82
  if extracthash.has_key? e
75
83
  extracthash[e] += 1
76
84
  else
@@ -88,8 +96,12 @@ class EntityExtractor
88
96
  JSON.pretty_generate(@output)
89
97
  end
90
98
 
91
- def extract(type, minchar, ignoreterms, *terms)
99
+ def extract(type, minchar, ignoreterms, terms, ignorefields, caseinfo, mapto)
92
100
  flag = 0
101
+
102
+ h = HandleInput.new(terms, ignorefields, caseinfo)
103
+ extractlist = h.detecttype
104
+
93
105
  @input.each do |i|
94
106
  if i.length == 2
95
107
  i = @input
@@ -101,10 +113,14 @@ class EntityExtractor
101
113
  # Generate set terms list
102
114
  if type == "set"
103
115
  @extractfield.each do |f|
104
- extractTerms(*terms, i, addlist, f)
116
+ extractTerms(extractlist, i, addlist, f)
117
+ end
118
+
119
+ if mapto
120
+ i[@fieldoutname] = h.mapout(addlist, mapto)
121
+ else
122
+ i[@fieldoutname] = addlist
105
123
  end
106
-
107
- i["extract"] = addlist
108
124
  @output.push(i)
109
125
 
110
126
  # Generate ALLCAPS terms list
@@ -114,7 +130,7 @@ class EntityExtractor
114
130
  parseALLCAPS(i[f].to_s, i, minchar, addlist, ignoreterms, savefield, f)
115
131
  end
116
132
 
117
- i["extract"] = addlist
133
+ i[@fieldoutname] = addlist
118
134
  @output.push(i)
119
135
 
120
136
  # Extract dates
@@ -130,10 +146,15 @@ class EntityExtractor
130
146
  @extractfield.each do |f|
131
147
  savefield = i[f].to_s + " "
132
148
  parseALLCAPS(i[f].to_s, i, minchar, addlist, ignoreterms, savefield, f)
133
- extractTerms(*terms, i, addlist, f)
149
+ extractTerms(extractlist, i, addlist, f)
150
+ end
151
+
152
+ if mapto
153
+ i[@fieldoutname] = h.mapout(addlist, mapto)
154
+ else
155
+ i[@fieldoutname] = addlist
134
156
  end
135
157
 
136
- i["extract"] = addlist
137
158
  @output.push(i)
138
159
  end
139
160
 
@@ -143,3 +164,4 @@ class EntityExtractor
143
164
  end
144
165
  end
145
166
  end
167
+
@@ -0,0 +1,138 @@
1
+ require 'json'
2
+
3
+ class HandleInput
4
+ def initialize(json, ignorefields, caseinfo)
5
+ @json = json
6
+ if @ignorefields != nil
7
+ @ignorefields = ignorefields
8
+ else
9
+ @ignorefields = Array.new
10
+ end
11
+
12
+ @caseinfo = caseinfo
13
+ @output = Array.new
14
+ @outhash = Hash.new
15
+ end
16
+
17
+ # Map output to value
18
+ def mapout(addlist, mapto)
19
+ outarr = Array.new
20
+
21
+ addlist.each do |a|
22
+ if mapto == "key"
23
+ @json.each do |k, v|
24
+ # If it's a nested hash
25
+ if v.is_a? Hash
26
+ # Go through all values
27
+ v.each do |z, w|
28
+ # Check if k is already included
29
+ if !outarr.include? k
30
+ if w == a
31
+ outarr.push(k)
32
+ end
33
+ end
34
+ end
35
+ else
36
+ # Map for dictionaries
37
+ if !outarr.include? k
38
+ if v == a || k == a
39
+ outarr.push(k)
40
+ end
41
+ end
42
+ end
43
+ end
44
+ else
45
+ @json.each do |k, v|
46
+ v.each do |z, w|
47
+ # Only map if not already matched
48
+ if !outarr.include? v[mapto]
49
+ # Check if vals match
50
+ if w == a
51
+ outarr.push(v[mapto])
52
+ end
53
+ end
54
+ end
55
+ end
56
+ end
57
+ end
58
+
59
+ return outarr
60
+ end
61
+
62
+ # Figure out which type of input it is: array, hash, hash with hash values
63
+ def detecttype
64
+ if @json.is_a? Array
65
+ @output = @json
66
+ checkCase
67
+ elsif @json.is_a? Hash
68
+ @json.each do |k, v|
69
+ if v.is_a? Hash
70
+ parseValHash
71
+ break
72
+ else
73
+ parseDictionary
74
+ break
75
+ end
76
+ end
77
+ end
78
+
79
+ return @outhash
80
+ end
81
+
82
+ # Adds case sensitive preferences
83
+ def checkCase
84
+ if @caseinfo == "casesensitive"
85
+ @output.each do |i|
86
+ @outhash[i] = true
87
+ end
88
+ elsif @caseinfo == "noncasesensitive"
89
+ @output.each do |i|
90
+ @outhash[i] = false
91
+ end
92
+ end
93
+ end
94
+
95
+ # Handle hashes where the values are a hash
96
+ def parseValHash
97
+ @json.each do |k, v|
98
+ if !@ignorefields.include? "hashkey"
99
+ if @caseinfo.include? "hashkey"
100
+ @outhash[k] = false
101
+ else
102
+ @outhash[k] = true
103
+ end
104
+ end
105
+
106
+ v.each do |i, j|
107
+ if !@ignorefields.include? i
108
+ if @caseinfo.include? i
109
+ @outhash[j] = false
110
+ else
111
+ @outhash[j] = true
112
+ end
113
+ end
114
+ end
115
+ end
116
+ end
117
+
118
+ # Handle hashes
119
+ def parseDictionary
120
+ @json.each do |k, v|
121
+ if !@ignorefields.include? "hashkey"
122
+ if @caseinfo.include? "hashkey"
123
+ @outhash[k] = false
124
+ else
125
+ @outhash[k] = true
126
+ end
127
+ end
128
+
129
+ if !@ignorefields.include? "hashval"
130
+ if @caseinfo.include? "hashval"
131
+ @outhash[v] = false
132
+ else
133
+ @outhash[v] = true
134
+ end
135
+ end
136
+ end
137
+ end
138
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: entityextractor
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.10
4
+ version: 0.0.11
5
5
  platform: ruby
6
6
  authors:
7
7
  - M. C. McGrath
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-04-11 00:00:00.000000000 Z
11
+ date: 2014-08-11 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: Extracts entities and terms from any JSON.
14
14
  email: shidash@shidash.com
@@ -17,6 +17,7 @@ extensions: []
17
17
  extra_rdoc_files: []
18
18
  files:
19
19
  - lib/extractdates.rb
20
+ - lib/handleinput.rb
20
21
  - lib/entityextractor.rb
21
22
  homepage: https://github.com/Shidash/EntityExtractor
22
23
  licenses:
@@ -43,3 +44,4 @@ signing_key:
43
44
  specification_version: 4
44
45
  summary: Extracts entities and terms
45
46
  test_files: []
47
+ has_rdoc: