entityextractor 0.0.10 → 0.0.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 0f99308791264c4e567c495fe9964e68ad932ac5
4
- data.tar.gz: ec717ca535969ad20c646bb4559763c69a973aee
3
+ metadata.gz: 6d73c2f909fbe862625da5f0952032715d696de2
4
+ data.tar.gz: c33ee22a6a5cbb3eb37f649c6db2df26df366c75
5
5
  SHA512:
6
- metadata.gz: 2d7acd5e1645f5de5c1b52b2fb3cd14eb739c8444f962c34d54e914d1161aa1cfa0e17ba3cb10afdeb7100b9cbbb4d1a266b6078f2ebdde125d72a897a71e78c
7
- data.tar.gz: 97cfa5c16df408a6dfd23b4a51bda99ef50caa997042ffc4370a15549b01106eff089889a430284377833e68edb608091079bd40c0129dbc36e08ddaf394e554
6
+ metadata.gz: f98e67a81133a74c666d474b5d22bd0018e5d941ec7bc5f875a319ea1ee3677d93ff3917591f23fdbfdb0c0e2cc14c4f866c4b1289aa8a908acf83f6f8c07791
7
+ data.tar.gz: a5a40b53bcb12550bf096cea1b4f72f63be2b254d079df89c322619041f67dfb51f3d9da9d479f422c058acbe4257e6b17dd1e8eb0834d08ba2874d4cf31c2bd
@@ -1,24 +1,32 @@
1
1
  require 'json'
2
2
  load 'extractdates.rb'
3
+ load 'handleinput.rb'
3
4
  require 'uploadconvert'
4
5
 
5
6
  class EntityExtractor
6
- def initialize(input, *extractfield)
7
+ def initialize(input, fieldoutname, *extractfield)
7
8
  @input = JSON.parse(input)
9
+ @fieldoutname = fieldoutname
8
10
  @extractfield = *extractfield
9
11
  @output = Array.new
10
12
  end
11
13
 
12
14
  # Extract terms input from preset list
13
- def extractTerms(*terms, i, addlist, field)
15
+ def extractTerms(extractlist, i, addlist, field)
14
16
  count = 0
17
+ downcased = i[field].to_s.downcase
15
18
 
16
19
  # Check the item for each term
17
- terms.each do |t|
20
+ extractlist.each do |t, c|
18
21
  count+=1
19
-
20
- if i[field].to_s.include? t
21
- addlist.push(t.upcase)
22
+ if c == true
23
+ if i[field].to_s.include? t
24
+ addlist.push(t)
25
+ end
26
+ else
27
+ if downcased.include? t.downcase
28
+ addlist.push(t)
29
+ end
22
30
  end
23
31
  end
24
32
  end
@@ -70,7 +78,7 @@ class EntityExtractor
70
78
 
71
79
  # Generate hash of all extracted terms
72
80
  @output.each do |i|
73
- i["extract"].each do |e|
81
+ i[@fieldoutname].each do |e|
74
82
  if extracthash.has_key? e
75
83
  extracthash[e] += 1
76
84
  else
@@ -88,8 +96,12 @@ class EntityExtractor
88
96
  JSON.pretty_generate(@output)
89
97
  end
90
98
 
91
- def extract(type, minchar, ignoreterms, *terms)
99
+ def extract(type, minchar, ignoreterms, terms, ignorefields, caseinfo, mapto)
92
100
  flag = 0
101
+
102
+ h = HandleInput.new(terms, ignorefields, caseinfo)
103
+ extractlist = h.detecttype
104
+
93
105
  @input.each do |i|
94
106
  if i.length == 2
95
107
  i = @input
@@ -101,10 +113,14 @@ class EntityExtractor
101
113
  # Generate set terms list
102
114
  if type == "set"
103
115
  @extractfield.each do |f|
104
- extractTerms(*terms, i, addlist, f)
116
+ extractTerms(extractlist, i, addlist, f)
117
+ end
118
+
119
+ if mapto
120
+ i[@fieldoutname] = h.mapout(addlist, mapto)
121
+ else
122
+ i[@fieldoutname] = addlist
105
123
  end
106
-
107
- i["extract"] = addlist
108
124
  @output.push(i)
109
125
 
110
126
  # Generate ALLCAPS terms list
@@ -114,7 +130,7 @@ class EntityExtractor
114
130
  parseALLCAPS(i[f].to_s, i, minchar, addlist, ignoreterms, savefield, f)
115
131
  end
116
132
 
117
- i["extract"] = addlist
133
+ i[@fieldoutname] = addlist
118
134
  @output.push(i)
119
135
 
120
136
  # Extract dates
@@ -130,10 +146,15 @@ class EntityExtractor
130
146
  @extractfield.each do |f|
131
147
  savefield = i[f].to_s + " "
132
148
  parseALLCAPS(i[f].to_s, i, minchar, addlist, ignoreterms, savefield, f)
133
- extractTerms(*terms, i, addlist, f)
149
+ extractTerms(extractlist, i, addlist, f)
150
+ end
151
+
152
+ if mapto
153
+ i[@fieldoutname] = h.mapout(addlist, mapto)
154
+ else
155
+ i[@fieldoutname] = addlist
134
156
  end
135
157
 
136
- i["extract"] = addlist
137
158
  @output.push(i)
138
159
  end
139
160
 
@@ -143,3 +164,4 @@ class EntityExtractor
143
164
  end
144
165
  end
145
166
  end
167
+
@@ -0,0 +1,138 @@
1
+ require 'json'
2
+
3
+ class HandleInput
4
+ def initialize(json, ignorefields, caseinfo)
5
+ @json = json
6
+ if @ignorefields != nil
7
+ @ignorefields = ignorefields
8
+ else
9
+ @ignorefields = Array.new
10
+ end
11
+
12
+ @caseinfo = caseinfo
13
+ @output = Array.new
14
+ @outhash = Hash.new
15
+ end
16
+
17
+ # Map output to value
18
+ def mapout(addlist, mapto)
19
+ outarr = Array.new
20
+
21
+ addlist.each do |a|
22
+ if mapto == "key"
23
+ @json.each do |k, v|
24
+ # If it's a nested hash
25
+ if v.is_a? Hash
26
+ # Go through all values
27
+ v.each do |z, w|
28
+ # Check if k is already included
29
+ if !outarr.include? k
30
+ if w == a
31
+ outarr.push(k)
32
+ end
33
+ end
34
+ end
35
+ else
36
+ # Map for dictionaries
37
+ if !outarr.include? k
38
+ if v == a || k == a
39
+ outarr.push(k)
40
+ end
41
+ end
42
+ end
43
+ end
44
+ else
45
+ @json.each do |k, v|
46
+ v.each do |z, w|
47
+ # Only map if not already matched
48
+ if !outarr.include? v[mapto]
49
+ # Check if vals match
50
+ if w == a
51
+ outarr.push(v[mapto])
52
+ end
53
+ end
54
+ end
55
+ end
56
+ end
57
+ end
58
+
59
+ return outarr
60
+ end
61
+
62
+ # Figure out which type of input it is: array, hash, hash with hash values
63
+ def detecttype
64
+ if @json.is_a? Array
65
+ @output = @json
66
+ checkCase
67
+ elsif @json.is_a? Hash
68
+ @json.each do |k, v|
69
+ if v.is_a? Hash
70
+ parseValHash
71
+ break
72
+ else
73
+ parseDictionary
74
+ break
75
+ end
76
+ end
77
+ end
78
+
79
+ return @outhash
80
+ end
81
+
82
+ # Adds case sensitive preferences
83
+ def checkCase
84
+ if @caseinfo == "casesensitive"
85
+ @output.each do |i|
86
+ @outhash[i] = true
87
+ end
88
+ elsif @caseinfo == "noncasesensitive"
89
+ @output.each do |i|
90
+ @outhash[i] = false
91
+ end
92
+ end
93
+ end
94
+
95
+ # Handle hashes where the values are a hash
96
+ def parseValHash
97
+ @json.each do |k, v|
98
+ if !@ignorefields.include? "hashkey"
99
+ if @caseinfo.include? "hashkey"
100
+ @outhash[k] = false
101
+ else
102
+ @outhash[k] = true
103
+ end
104
+ end
105
+
106
+ v.each do |i, j|
107
+ if !@ignorefields.include? i
108
+ if @caseinfo.include? i
109
+ @outhash[j] = false
110
+ else
111
+ @outhash[j] = true
112
+ end
113
+ end
114
+ end
115
+ end
116
+ end
117
+
118
+ # Handle hashes
119
+ def parseDictionary
120
+ @json.each do |k, v|
121
+ if !@ignorefields.include? "hashkey"
122
+ if @caseinfo.include? "hashkey"
123
+ @outhash[k] = false
124
+ else
125
+ @outhash[k] = true
126
+ end
127
+ end
128
+
129
+ if !@ignorefields.include? "hashval"
130
+ if @caseinfo.include? "hashval"
131
+ @outhash[v] = false
132
+ else
133
+ @outhash[v] = true
134
+ end
135
+ end
136
+ end
137
+ end
138
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: entityextractor
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.10
4
+ version: 0.0.11
5
5
  platform: ruby
6
6
  authors:
7
7
  - M. C. McGrath
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-04-11 00:00:00.000000000 Z
11
+ date: 2014-08-11 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: Extracts entities and terms from any JSON.
14
14
  email: shidash@shidash.com
@@ -17,6 +17,7 @@ extensions: []
17
17
  extra_rdoc_files: []
18
18
  files:
19
19
  - lib/extractdates.rb
20
+ - lib/handleinput.rb
20
21
  - lib/entityextractor.rb
21
22
  homepage: https://github.com/Shidash/EntityExtractor
22
23
  licenses:
@@ -43,3 +44,4 @@ signing_key:
43
44
  specification_version: 4
44
45
  summary: Extracts entities and terms
45
46
  test_files: []
47
+ has_rdoc: