dover_to_calais 0.2.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,15 +1,15 @@
1
1
  ---
2
2
  !binary "U0hBMQ==":
3
3
  metadata.gz: !binary |-
4
- OWJmOWEyMGFjNDk2ZjZiODYyNjQ1NDM2YjM0YjMyNzQ1MmUzZjg3MA==
4
+ MmI4YTNmZGZkYTViNDVlMzVmNGRiMGM3YTFlYzQ5YTc1ZTdmMDU3ZA==
5
5
  data.tar.gz: !binary |-
6
- MTllMDRiOTNlNDg2Y2FiNmY1MmQyMjAyMzViNWJiZWFmN2ZjYTY3ZA==
6
+ M2U0YjI4ZTJhM2E4M2U4MGM2N2E2OWZkYjUwYzExM2YyOWExZDYxNg==
7
7
  SHA512:
8
8
  metadata.gz: !binary |-
9
- OGI3NDU4YWU1YzllMjBiNTVlMmU3NzNhMDUzYmNhNWYzODY0ZTE3MDQzZmMx
10
- NmZiMDMxZjYzMTI3ZTdkYWU5MGNiNDc3ZTE2ZTRjYThjYjc5ZDQxNjFlZjU0
11
- MGRmZWY5ZGM4NTAwYjAyZTEyZmY5M2I5MDdjNDA4NWQ1MDE4MDM=
9
+ ZjA0NDgwNjM4MzQxZDYxZmMwNjFmYzg2MTc4MjI1MDY0ZmU2ZGI2ZWZmNTY4
10
+ MjU5OTBhZWVkY2YyYzEyODhiOTI4NWI3NGQwZjI4NTg4ZWNiNmVlZWQwZjAx
11
+ NGJjYjc5YzdhMWJmN2UyODQ1YzM0MGUxY2VkOGU4YmQ5NjU3OTc=
12
12
  data.tar.gz: !binary |-
13
- ODUyZGFhN2JhYjdjZDAyNmMxMTNhZjY0MjJhNWQ5YjU2OTY0OTQyNmU4MDkz
14
- NjljZTU1NDUzYTRhN2I2MTA3MmQ3MTM3MDYxODUyMjgzOGVkYTYzNzY4MjA1
15
- YTgyZDNkNjE3YjI1NWJiYTJkMzNjN2RiYzEzN2M1MWNmYzFhMzU=
13
+ YjY3NDk2ODY1YjE4ZjM3NDMzZWQyMzc5Njc3YzRiODgyODYxNjJjMGY3YmUz
14
+ MDJiNzQzMTZhZjQxYWJjOWIxMjIyZjU3YjE3YmE5MDVmMjk2YjA3Y2QxZmQz
15
+ MmQ1YjY0ZTljNGQ0NGRkODU4NTg0ZWIzYzM4YWRmZGIxYTgxNDM=
data/Rakefile CHANGED
@@ -3,5 +3,10 @@ require 'cucumber'
3
3
  require 'cucumber/rake/task'
4
4
 
5
5
  Cucumber::Rake::Task.new(:features) do |t|
6
- t.cucumber_opts = "features --format pretty API_KEY=#{ENV['API_KEY']}"
6
+ if ENV['TAGS'].empty?
7
+ t.cucumber_opts = "features --format pretty API_KEY=#{ENV['API_KEY']}"
8
+ else
9
+ t.cucumber_opts = "features --format pretty API_KEY=#{ENV['API_KEY']} --tags #{ENV['TAGS']}"
10
+ end
11
+
7
12
  end
@@ -21,6 +21,10 @@ Gem::Specification.new do |spec|
21
21
  spec.add_runtime_dependency "eventmachine", "~> 1.0", ">= 1.0.3"
22
22
  spec.add_runtime_dependency "em-http-request", "~> 1.1"
23
23
  spec.add_runtime_dependency "yomu", "~> 0.1", ">= 0.1.9"
24
+ spec.add_runtime_dependency "json", "~> 1.5", ">= 1.5.5"
25
+ spec.add_runtime_dependency "ohm", "~> 2.0", ">= 2.0.0"
26
+ spec.add_runtime_dependency "ohm-contrib", "~> 2.0", ">= 2.0.0"
27
+ spec.add_runtime_dependency "em-throttled_queue", "~> 1.1", ">= 1.1.0"
24
28
 
25
29
 
26
30
  spec.files = `git ls-files`.split($/)
@@ -0,0 +1,10 @@
1
+ Feature: Ability to detect relationships between entities and events
2
+
3
+ Background:
4
+ Given the file 'test_file_1.txt' is successfully processed with the rich output
5
+
6
+ @rich_output
7
+ Scenario: Filter an entity with the rich output format
8
+ When I filter the response on {:entity => 'Event', :value => 'Meeting'}
9
+ Then The output should be an error
10
+
@@ -1,7 +1,11 @@
1
- Feature: Able to handle wide range of data formats as input
2
- Scenario Outline: Processing various data-source formats
1
+ Feature: Able to handle wide range of data formats as input
2
+
3
+
4
+ @simple_output
5
+ Scenario Outline: Processing various data-source formats (Simple Format)
3
6
  Given the file <input>
4
- When DoverToCalais processes this file
7
+ When the Output format is set to 'Text/Simple'
8
+ And DoverToCalais processes this file
5
9
  Then the output should have no errors
6
10
 
7
11
  Examples:
@@ -12,3 +16,19 @@ Feature: Able to handle wide range of data formats as input
12
16
  |test_file_1.pdf|
13
17
  |test_file_1.rtf|
14
18
  |test_file_1.txt|
19
+
20
+ @rich_output
21
+ Scenario Outline: Processing various data-source formats (Rich Format)
22
+ Given the file <input>
23
+ When the Output format is set to 'Application/JSON'
24
+ And DoverToCalais processes this file
25
+ Then the output should have no errors
26
+
27
+ Examples:
28
+ | input |
29
+ |test_file_1.doc |
30
+ |test_file_1.html|
31
+ |test_file_1.odt|
32
+ |test_file_1.pdf|
33
+ |test_file_1.rtf|
34
+ |test_file_1.txt|
@@ -1,24 +1,30 @@
1
+ @simple_output
1
2
  Feature: Ability to select certain OpenCalais entities based on certain conditions
2
3
 
3
4
  Background:
4
- Given the file 'test_file_1.txt' is successfully processed
5
+ Given the file 'test_file_1.txt' is successfully processed with the simple output
5
6
 
6
7
 
7
- Scenario: Select all entities with a specific name
8
+ Scenario: Filter all entities with a specific name
8
9
  When I filter on {:entity => 'EmailAddress'}
9
10
  Then the output should have 2 entries
10
11
  And All entries should be named 'EmailAddress'
11
12
 
12
- Scenario: Select an entity with a specific value
13
+ Scenario: Filter an entity with a specific value
13
14
  When I filter on {:entity => 'Event', :value => 'Meeting'}
14
15
  Then the output should have 1 entries
15
16
  And All entries should be named 'Event'
16
17
  And All entries should have the value 'Meeting'
17
18
 
18
19
 
19
- Scenario: Select an entity only if another entity with a specific value exists in the data source
20
+ Scenario: Filter an entity only if another entity with a specific value exists in the data source
20
21
  When I filter on {:entity => 'Person', :given => {:entity => 'Event', :value => 'Meeting'}}
21
22
  Then the output should have 2 entries
22
23
  And All entries should be named 'Person'
23
24
  And One entry should have the value 'Roger Kay'
24
- And One entry should have the value 'David Bailey'
25
+ And One entry should have the value 'David Bailey'
26
+
27
+ Scenario: Filter all entities with a specific name
28
+ When I filter on {:entity => 'EmailAddress'}
29
+ Then the output should have 2 entries
30
+ And All entries should be named 'EmailAddress'
@@ -0,0 +1,32 @@
1
+ require 'eventmachine'
2
+ require 'em-http-request'
3
+ require 'yomu'
4
+ require 'rspec'
5
+ require_relative '../../lib/dover_to_calais'
6
+ #require_relative './filtering_steps.rb'
7
+
8
+
9
+ # N.B Cucumber must be run with the Environment variable 'API_KEY' set
10
+ # to the OpenCalais API Key value.
11
+
12
+ Given(/^the file '(\w+\.\w{3,4})' is successfully processed with the rich output$/) do |file|
13
+
14
+ steps %{
15
+ Given the file #{file}
16
+ When the Output format is set to 'Application/JSON'
17
+ And DoverToCalais processes this file
18
+ Then the output should have no errors
19
+ }
20
+
21
+ end
22
+
23
+ When(/^I filter the response on ({.+})$/) do |f|
24
+ @filtered_output = @output.filter(eval(f))
25
+
26
+ end
27
+
28
+ Then(/^The output should be an error$/) do
29
+ @filtered_output.match(/^ERR:\s/).should_not be_nil
30
+ end
31
+
32
+
@@ -4,27 +4,28 @@ require 'eventmachine'
4
4
  require 'em-http-request'
5
5
  require 'yomu'
6
6
  require 'rspec'
7
- require File.expand_path('../../../lib/dover_to_calais', __FILE__)
8
-
7
+ require_relative '../../lib/dover_to_calais'
9
8
 
10
9
  # N.B Cucumber must be run with the Environment variable 'API_KEY' set
11
10
  # to the OpenCalais API Key value.
12
11
 
13
12
 
13
+
14
14
  Given(/^the file (\w+\.\w{3,4})$/) do |arg1|
15
15
  puts arg1
16
16
  @input = Dir.pwd + '/test/' + arg1
17
17
  @output = nil
18
+
18
19
  end
19
20
 
20
21
 
21
22
 
22
23
  When(/^DoverToCalais processes this file$/) do
23
24
  EM.run {
24
-
25
25
  DoverToCalais::API_KEY = ENV['API_KEY']
26
+
26
27
  d1 = DoverToCalais::Dover.new(@input)
27
- d1.analyse_this
28
+ d1.analyse_this(@output_format)
28
29
  d1.to_calais do |response|
29
30
  @output = response
30
31
  EM.stop
@@ -33,6 +34,15 @@ When(/^DoverToCalais processes this file$/) do
33
34
  }
34
35
  end
35
36
 
37
+ When(/^the Output format is set to 'Text\/Simple'$/) do
38
+ @output_format = nil
39
+ end
40
+
41
+ When(/^the Output format is set to 'Application\/JSON'$/) do
42
+ @output_format = :rich
43
+ end
44
+
45
+
36
46
 
37
47
 
38
48
  Then(/^the output should have no errors$/) do
@@ -3,27 +3,31 @@ require 'eventmachine'
3
3
  require 'em-http-request'
4
4
  require 'yomu'
5
5
  require 'rspec'
6
- require File.expand_path('../../../lib/dover_to_calais', __FILE__)
7
-
6
+ #require File.expand_path('../../../lib/dover_to_calais', __FILE__)
7
+ require_relative '../../lib/dover_to_calais'
8
8
 
9
9
  # N.B Cucumber must be run with the Environment variable 'API_KEY' set
10
10
  # to the OpenCalais API Key value.
11
11
 
12
12
 
13
13
 
14
- Given(/^the file '(\w+\.\w{3,4})' is successfully processed$/) do |file|
14
+ Given(/^the file '(\w+\.\w{3,4})' is successfully processed with the simple output$/) do |file|
15
15
 
16
16
  steps %{
17
17
  Given the file #{file}
18
- When DoverToCalais processes this file
18
+ When the Output format is set to 'Text/Simple'
19
+ And DoverToCalais processes this file
19
20
  Then the output should have no errors
20
21
  }
21
22
 
22
23
  end
23
24
 
24
25
 
25
- When(/^I filter on ({.+})/) do |filter|
26
- @filtered_output = @output.filter(eval(filter))
26
+
27
+
28
+
29
+ When(/^I filter on ({.+})$/) do |f|
30
+ @filtered_output = @output.filter(eval(f))
27
31
 
28
32
  end
29
33
 
@@ -5,12 +5,22 @@ require 'nokogiri'
5
5
  require 'eventmachine'
6
6
  require 'em-http-request'
7
7
  require 'yomu'
8
+ require 'json'
9
+ require "dover_to_calais/models" #gem lib file
10
+ require 'ohm'
8
11
 
9
12
 
10
13
  module DoverToCalais
11
14
 
12
15
 
13
16
  PROXY = nil
17
+ REDIS = "redis://127.0.0.1:6379/6"
18
+
19
+ def self.flushdb
20
+ Ohm.redis = Redic.new(REDIS)
21
+ Ohm.redis.call "FLUSHDB"
22
+ end
23
+
14
24
 
15
25
  # The ResponseItem structure holds all potential text and attribute values of an OpenCalais
16
26
  # XML Simple format element.
@@ -66,65 +76,361 @@ module DoverToCalais
66
76
  # nil if none occurred
67
77
  #
68
78
  class ResponseData
69
- attr_reader :error
70
79
 
80
+
81
+
82
+ class Entity< Struct.new(:type, :name, :ref)
83
+
84
+ def to_hash
85
+ a_hash = {}
86
+ self.each_pair do |attr, value|
87
+ a_hash[attr] = value
88
+ end
89
+ a_hash
90
+ end
91
+
92
+ end
93
+
94
+ class GenericRelation< Struct.new(:subject, :verb, :object, :detection)
95
+
96
+ end #class
97
+
98
+ class Event
99
+
100
+ attr_reader :entities
101
+
102
+ def initialize(events_hash)
103
+ # @entities = entity_hash
104
+ events_hash.each do |k,v|
105
+ unless k.eql?('_typeGroup') || k.eql?('instances') || k.eql?('_typeReference')
106
+ k = 'type' if k.eql?('_type') #don't like the underscore
107
+ ## create and initialize an instance variable for this key/value pair
108
+ self.instance_variable_set("@#{k}", v)
109
+ ## create the getter that returns the instance variable
110
+ self.class.send(:define_method, k, proc{self.instance_variable_get("@#{k}")})
111
+ ## create the setter that sets the instance variable
112
+ self.class.send(:define_method, "#{k}=", proc{|v| self.instance_variable_set("@#{k}", v)})
113
+ end
114
+ end #block
115
+ end #method
116
+
117
+ def each_pair
118
+ self.instance_variables.each do |a|
119
+ yield a, self.instance_variable_get(a)
120
+ end
121
+ end
122
+
123
+ def [](attrib_name)
124
+ self.instance_variables.each do |a|
125
+ if "@#{a}" == attrib_name
126
+ self.instance_variable_get(a)
127
+ end
128
+ end
129
+ end
130
+
131
+ public :each_pair
132
+
133
+ end #class
134
+
135
+
136
+
137
+
138
+ attr_reader :error, :entities_store, :events_store, :generic_relations_store, :freds
71
139
  # creates a new ResponseData object, passing the name of the data source to be processed
72
140
  #
73
- # @param xml_data [ Nokogiri::XML::NodeSet, nil] the xml data returned by OpenCalais
141
+ # @param response_data [ Nokogiri::XML::NodeSet, Hash, nil] the XML or JSON data returned by OpenCalais
74
142
  # @param error [ String, nil] an error description if the OpenCalais call has failed
75
- def initialize(xml_data = nil, error = nil)
76
- if xml_data
77
- @raw = xml_data
143
+ def initialize(response_data = nil, error = nil)
144
+ if response_data.class.to_s == "Nokogiri::XML::Document"
145
+ @xml_data = response_data
146
+ elsif response_data.class.to_s == "Hash"
147
+ @json_data = response_data
148
+ prepare_data(response_data)
149
+
150
+
78
151
  else
79
152
  @error = error
80
153
  end
154
+
81
155
  end
82
156
 
83
157
  # Returns the response data as an XML string or an error, if one has occurred.
84
158
  #
85
159
  # @return [String] an XML string
86
160
  def to_s
87
- @raw ? @raw.to_s : @error
161
+ if @xml_data
162
+ @xml_data.to_s
163
+ elsif @json_data
164
+ @json_data.to_s
165
+ else
166
+ @error
167
+ end
168
+ end
169
+
170
+
171
+
172
+
173
+ # The method will first create three Hash instance variables, where it will store the
174
+ # Entities, Generic Relations and Events -respectively- from the OpenCalais response.
175
+ # The key on each Hash instance variable will be the OpenCalais ID and the value will
176
+ # be the values_hash for that ID.
177
+ # Secondly, the method will iterate through each Entity, find all of it's related
178
+ # Relations and Events and store them -in a relational manner- in Redis, via Ohm.
179
+ #
180
+ # Only applicable with the JSON (rich) output format
181
+ #
182
+ # @param Hash the OpenCalais JSON response, as a Hash
183
+ # @return an GenericRelation Struct if a match is found, nil otherwise
184
+
185
+ def prepare_data(results_hash)
186
+
187
+ @entities_store = {}
188
+ @generic_relations_store = {}
189
+ @events_store = {}
190
+ # find all Entities in response
191
+ @entities_store = results_hash.select{|key, hash| hash["_typeGroup"] == "entities"}
192
+ # find all GenericRelations in response
193
+ @generic_relations_store = results_hash.select{|key, hash| hash["_typeGroup"] == "relations" &&
194
+ hash["_type"] == "GenericRelations"}
195
+ # find all Events in response
196
+ @events_store = results_hash.select{|key, hash| hash["_typeGroup"] == "relations" &&
197
+ hash["_type"] != "GenericRelations"}
198
+
199
+ Ohm.redis = Redic.new(REDIS)
200
+
201
+
202
+ #for each Entity find all related Relations and Events and store them to Ohm/Redis
203
+ @entities_store.each_pair do |k, v|
204
+
205
+ entity_set = EntityModel.find(calais_id: k)
206
+
207
+ if entity_set.size > 0 #entity already exists in store
208
+ entity = entity_set.first
209
+ k = entity.calais_id
210
+ else #entity doesn't exist in store
211
+ entity = EntityModel.create(:name => v['name'], :type => v['_type'], :calais_id => k)
212
+ entity.save
213
+ end #if
214
+
215
+
216
+ #get all referenced relations
217
+ find_in_relations(k).each do |obj|
218
+
219
+ found_rel = get_relation(obj[0])
220
+ if found_rel
221
+
222
+ found_rel.subject = convert_to_hash(found_rel.subject)
223
+ found_rel.object = convert_to_hash(found_rel.object)
224
+
225
+ relation = EntityModel::RelationModel.create(:subject => found_rel.subject,
226
+ :object => found_rel.object,
227
+ :verb => found_rel.verb,
228
+ :detection => found_rel.detection,
229
+ :calais_id => obj[0])
230
+ entity.relations.add(relation)
231
+ end #if
232
+ end #each
233
+ #get all referenced events
234
+ find_in_events(k).each do |obj|
235
+ found_event = get_event(obj[0])
236
+ attribs = {}
237
+ if found_event
238
+
239
+ found_event.each_pair do |key, val|
240
+
241
+ key = key.to_s.slice(1, key.length-1)
242
+ attribs[key] = val
243
+
244
+ end #block
245
+
246
+ event = EntityModel::EventModel.create(:calais_id => obj[0], :info_hash => attribs)
247
+ entity.events.add(event)
248
+
249
+ end #if
250
+
251
+ end #each
252
+ end #each_pair
253
+ end #method
254
+
255
+
256
+
257
+ # Coverts an attribute to an appropriate Hash
258
+ #
259
+ # Only applicable with the JSON (rich) output format
260
+ #
261
+ # @param [String, DoverToCalais::ResponseData::Entity, Hash] an object
262
+ # @return a Hash value
263
+ def convert_to_hash(an_attribute)
264
+ h = {}
265
+ if an_attribute.class.to_s.eql?('String')
266
+ h[:name] = an_attribute
267
+ end
268
+
269
+ if an_attribute.class.to_s.eql?('DoverToCalais::ResponseData::Entity')
270
+ h = an_attribute.to_hash
271
+ end
272
+
273
+ if an_attribute.class.to_s.eql?('Hash')
274
+ h = an_attribute
275
+ end
276
+
277
+ h
278
+ end #method
279
+
280
+
281
+
282
+ # Retrieves the entity with the specified key (OpenCalais ID)
283
+ #
284
+ # Only applicable with the JSON (rich) output format
285
+ #
286
+ # @param String the OpenCalais ID
287
+ # @return an Entity Struct if a match is found, nil otherwise
288
+ def get_entity(ref_key)
289
+ if @entities_store.has_key?(ref_key)
290
+ Entity.new(@entities_store[ref_key]['_type'], @entities_store[ref_key]['name'], ref_key)
291
+ else
292
+ nil
293
+ end
88
294
  end
89
295
 
90
296
 
91
- # Filters the response object to extract relevant data.
297
+ # Retrieves the relation with the specified key (OpenCalais ID). The method will also
298
+ # de-reference any of its attributes that refer to other entities via an OpenCalais ID
299
+ # and will replace the references with the appropriate Entity structure, if applicable
300
+ #
301
+ # Only applicable with the JSON (rich) output format
302
+ #
303
+ # @param String the OpenCalais ID
304
+ # @return an GenericRelation Struct if a match is found, nil otherwise
305
+ def get_relation(ref_key)
306
+ if @generic_relations_store.key?(ref_key)
307
+
308
+ if @generic_relations_store[ref_key]['relationsubject']
309
+ gr_subject = @generic_relations_store[ref_key]['relationsubject'].match('^http://d.opencalais.com') ?
310
+ get_entity(@generic_relations_store[ref_key]['relationsubject']) :
311
+ @generic_relations_store[ref_key]['relationsubject']
312
+ else
313
+ gr_subject = 'N/A'
314
+ end
315
+
316
+
317
+ if @generic_relations_store[ref_key]['relationobject']
318
+ gr_object = @generic_relations_store[ref_key]['relationobject'].match('^http://d.opencalais.com') ?
319
+ get_entity(@generic_relations_store[ref_key]['relationobject']) :
320
+ @generic_relations_store[ref_key]['relationobject']
321
+ else
322
+ gr_object = 'N/A'
323
+ end
324
+
325
+ GenericRelation.new(gr_subject,
326
+ @generic_relations_store[ref_key]['verb'],
327
+ gr_object,
328
+ @generic_relations_store[ref_key]['instances'][0]['exact'] ||= 'N/A')
329
+ else
330
+ nil
331
+ end
332
+ end
333
+
334
+ def get_event(ref_key)
335
+
336
+ dereferenced_events = {}
337
+
338
+ if @events_store.key?(ref_key)
339
+
340
+ @events_store[ref_key].each do |k, v|
341
+
342
+ if v.class.to_s.eql?("String") && v.match('^http://d.opencalais.com')
343
+ dereferenced_events[k] = get_entity(v).to_hash
344
+ elsif v.class.to_s.eql?("String") && !v.match('^http://d.opencalais.com')
345
+ h = {}
346
+ h['name'] = v
347
+ dereferenced_events[k] = h
348
+ elsif v.class.to_s.eql?("Array")
349
+ h = {}
350
+ h['name'] = v[0]['exact']
351
+ dereferenced_events[k] = h
352
+ end
353
+ end
354
+
355
+ Event.new(dereferenced_events)
356
+ else
357
+ nil
358
+ end
359
+ end
360
+
361
+
362
+ # Selects a Hash of generic relations, where the relations' subject or object attributes
363
+ # match the specified OpenCalais ID.
364
+ #
365
+ # Only applicable with the JSON (rich) output format
366
+ #
367
+ # @param String the OpenCalais ID
368
+ # @return a Hash with the selected matches
369
+ def find_in_relations(ref_key)
370
+ @generic_relations_store.select{|key, hash| (hash["relationsubject"] == ref_key) ||
371
+ (hash["relationobject"] == ref_key) }
372
+
373
+ end
374
+
375
+ # Selects a Hash of events, where the events' key matches the
376
+ # specified OpenCalais ID.
377
+ #
378
+ # Only applicable with the JSON (rich) output format
379
+ #
380
+ # @param String the OpenCalais ID
381
+ # @return a Hash with the selected matches
382
+ def find_in_events(ref_key)
383
+ @events_store.select{|key, hash| hash.has_value?(ref_key) }
384
+ end
385
+
386
+
387
+
388
+ # Filters the xml response object to extract relevant data.
92
389
  #
93
390
  # @param params [Hash] a filter Hash (see code samples)
94
391
  # @return [Array[ResponseItem]] a list of relevant response items
95
392
  def filter(params)
393
+ unless @xml_data
394
+ return 'ERR: filter method only works with xml-based output!'
395
+
396
+ end
397
+
96
398
  result = Array.new
97
399
  begin
98
- if @raw
400
+ if @xml_data
99
401
 
100
402
  if params[:given]
101
- found = @raw.xpath("//#{params[:given][:entity]}[contains(text(), #{params[:given][:value].inspect})]")
403
+ found = @xml_data.xpath("//#{params[:given][:entity]}[contains(text(), #{params[:given][:value].inspect})]")
102
404
  if found.size > 0
103
- @raw.xpath("//#{params[:entity]}[contains(text(), #{params[:value].inspect})]").each do |node|
405
+ @xml_data.xpath("//#{params[:entity]}[contains(text(), #{params[:value].inspect})]").each do |node|
104
406
  result << create_response_item(node)
105
407
  end
106
408
  end
107
409
  else # no conditional
108
- @raw.xpath("//#{params[:entity]}[contains(text(), #{params[:value].inspect})]").each do |node|
410
+ @xml_data.xpath("//#{params[:entity]}[contains(text(), #{params[:value].inspect})]").each do |node|
109
411
  result << create_response_item(node)
110
412
  end
111
413
  end
112
414
 
113
415
  return result
114
416
  else # no xml data
115
- puts 'ERR: no valid xml data!'
417
+ return 'ERR: no valid xml data!'
116
418
 
117
419
  end #if
118
420
 
119
421
  rescue Exception=>e
120
- puts "ERR: #filter: #{e}"
422
+ return "ERR: #filter: #{e}"
121
423
 
122
424
  end
123
425
 
124
- #return result
426
+ return result
125
427
 
126
428
  end #method
127
429
 
430
+ # Creates a Response Item from an xml node.
431
+ #
432
+ # @param node [Nokogiri::XML::Node] an XML node
433
+ # @return [ResponseItem] a response item object
128
434
  def create_response_item(node)
129
435
  node_relevance = node.attribute('relevance').text.to_f if node.has_attribute?('relevance')
130
436
  node_count = node.attribute('count').text.to_i if node.has_attribute?('count')
@@ -133,17 +439,18 @@ module DoverToCalais
133
439
  node_orig_value = node.xpath('originalValue').text if node.name.eql?('SocialTag')
134
440
 
135
441
  ResponseItem.new(node.name,
136
- node.text,
137
- node_relevance,
138
- node_count,
139
- node_normalized,
140
- node_importance,
141
- node_orig_value )
442
+ node.text,
443
+ node_relevance,
444
+ node_count,
445
+ node_normalized,
446
+ node_importance,
447
+ node_orig_value )
142
448
 
143
449
  end
144
450
 
145
451
  public :filter
146
- private :create_response_item
452
+ private :create_response_item, :prepare_data, :convert_to_hash, :find_in_relations, :find_in_events,
453
+ :get_event, :get_entity
147
454
 
148
455
  end #class
149
456
 
@@ -211,13 +518,23 @@ module DoverToCalais
211
518
 
212
519
  end #method
213
520
 
521
+
522
+
523
+
524
+
214
525
  # Gets the source text parsed. If the parsing is successful, the data source is POSTed to OpenCalais
215
526
  # via an EventMachine request and a callback is set to manage the OpenCalais response.
216
- # All Dover object callbacks are then called with the request result yielded to them.
527
+ # All Dover object callbacks are then called with the request result yielded to them.
217
528
  #
218
529
  # @param N/A
219
530
  # @return a {Class ResponseData} object
220
- def analyse_this
531
+ def analyse_this(output_format=nil)
532
+
533
+ if output_format
534
+ @output_format = 'application/json'
535
+ else
536
+ @output_format = 'Text/Simple'
537
+ end
221
538
 
222
539
  @document = get_src_data(@data_src)
223
540
  begin
@@ -230,19 +547,19 @@ module DoverToCalais
230
547
 
231
548
 
232
549
  if DoverToCalais::PROXY &&
233
- DoverToCalais::PROXY.class.eql?('Hash') &&
234
- DoverToCalais::PROXY.keys[0].eql?(:proxy)
550
+ DoverToCalais::PROXY.class.eql?('Hash') &&
551
+ DoverToCalais::PROXY.keys[0].eql?(:proxy)
235
552
 
236
553
  connection_options = connection_options.merge(DoverToCalais::PROXY)
237
554
  end
238
555
 
239
556
  request_options = {
240
- :body => @document.to_s,
241
- :head => {
242
- 'x-calais-licenseID' => DoverToCalais::API_KEY,
243
- :content_type => 'TEXT/RAW',
244
- :enableMetadataType => 'GenericRelations,SocialTags',
245
- :outputFormat => 'Text/Simple'}
557
+ :body => @document.to_s,
558
+ :head => {
559
+ 'x-calais-licenseID' => DoverToCalais::API_KEY,
560
+ :content_type => 'TEXT/RAW',
561
+ :enableMetadataType => 'GenericRelations,SocialTags',
562
+ :outputFormat => @output_format}
246
563
  }
247
564
 
248
565
  http = EventMachine::HttpRequest.new(CALAIS_SERVICE, connection_options ).post request_options
@@ -251,20 +568,34 @@ module DoverToCalais
251
568
  http.callback do
252
569
 
253
570
  if http.response_header.status == 200
254
- http.response.match(/<OpenCalaisSimple>/) do |m|
255
- response = Nokogiri::XML('<OpenCalaisSimple>' + m.post_match) do |config|
256
- #strict xml parsing, disallow network connections
257
- config.strict.nonet
258
- end #block
259
- end #block
260
-
261
- result = response ?
262
- ResponseData.new(response, nil) :
263
- ResponseData.new(nil,'ERR: cannot find <OpenCalaisSimple> tag in response data - source invalid?')
264
- else #non-200 response header
265
- result = ResponseData.new nil,
266
- "ERR: OpenCalais service responded with #{http.response_header.status} - response body: '#{http.response}'"
267
- end
571
+ if @output_format == 'Text/Simple'
572
+ http.response.match(/<OpenCalaisSimple>/) do |m|
573
+ response = Nokogiri::XML('<OpenCalaisSimple>' + m.post_match) do |config|
574
+ #strict xml parsing, disallow network connections
575
+ config.strict.nonet
576
+ end #block
577
+ end
578
+ else #@output_format == 'application/json'
579
+ response = JSON.parse(http.response) #response should now be a Hash
580
+
581
+ end #if
582
+
583
+ case response.class.to_s
584
+ when 'NilClass'
585
+ result = ResponseData.new(nil,'ERR: cannot parse response data - source invalid?')
586
+ when 'Nokogiri::XML::Document'
587
+ result = ResponseData.new(response, nil)
588
+ when 'Hash'
589
+ result = ResponseData.new(response, nil)
590
+ else
591
+ result = ResponseData.new(nil,'ERR: cannot parse response data - unrecognized format!')
592
+ end
593
+
594
+
595
+ else #non-200 response
596
+ result = ResponseData.new nil,
597
+ "ERR: OpenCalais service responded with #{http.response_header.status} - response body: '#{http.response}'"
598
+ end
268
599
 
269
600
  @callbacks.each { |c| c.call(result) }
270
601
 
@@ -287,6 +618,7 @@ module DoverToCalais
287
618
  end #method
288
619
 
289
620
 
621
+
290
622
  alias_method :analyze_this, :analyse_this
291
623
  public :to_calais, :analyse_this
292
624
  private :get_src_data
@@ -0,0 +1,46 @@
1
+ require 'ohm'
2
+ require 'ohm/contrib'
3
+
4
+ module DoverToCalais
5
+ class EntityModel < Ohm::Model
6
+ attribute :name
7
+ attribute :type
8
+ attribute :calais_id
9
+ set :relations, :RelationModel
10
+ set :events, :EventModel
11
+
12
+
13
+ index :name
14
+ index :type
15
+ index :calais_id
16
+
17
+ def validate
18
+ assert_present :name
19
+ assert_present :type
20
+ assert_present :calais_id
21
+ end
22
+
23
+ class RelationModel < Ohm::Model
24
+ include Ohm::DataTypes
25
+
26
+ attribute :subject, Type::Hash
27
+ attribute :object, Type::Hash
28
+ attribute :verb
29
+ attribute :detection
30
+ attribute :calais_id
31
+
32
+ index :subject
33
+
34
+ end #class
35
+
36
+ class EventModel < Ohm::Model
37
+ include Ohm::DataTypes
38
+
39
+ attribute :calais_id
40
+ attribute :info_hash, Type::Hash
41
+
42
+ end #class
43
+
44
+ end #class
45
+
46
+ end
@@ -1,23 +1,3 @@
1
- # ontology.rb
2
- #
3
- # Copyright 2013 Fred <fred@fred-Veriton-X270>
4
- #
5
- # This program is free software; you can redistribute it and/or modify
6
- # it under the terms of the GNU General Public License as published by
7
- # the Free Software Foundation; either version 2 of the License, or
8
- # (at your option) any later version.
9
- #
10
- # This program is distributed in the hope that it will be useful,
11
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
12
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
- # GNU General Public License for more details.
14
- #
15
- # You should have received a copy of the GNU General Public License
16
- # along with this program; if not, write to the Free Software
17
- # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
18
- # MA 02110-1301, USA.
19
- #
20
- #
21
1
 
22
2
 
23
3
  module CalaisOntology
@@ -67,25 +47,38 @@ CALAIS_EVENTS = %w(
67
47
  Alliance
68
48
  AnalystEarningsEstimate
69
49
  AnalystRecommendation
50
+ ArmedAttack
51
+ ArmsPurchaseSale
70
52
  Arrest
71
53
  Bankruptcy
72
54
  BonusSharesIssuance
73
55
  BusinessRelation
74
56
  Buybacks
57
+ CandidatePosition
75
58
  CompanyAccountingChange
59
+ CompanyAffiliates
60
+ CompanyCompetitor
61
+ CompanyCustomer
76
62
  CompanyEarningsAnnouncement
77
63
  CompanyEarningsGuidance
64
+ CompanyEmployeesNumber
78
65
  CompanyExpansion
79
66
  CompanyForceMajeure
67
+ CompanyFounded
80
68
  CompanyInvestment
81
69
  CompanyLaborIssues
82
70
  CompanyLayoffs
83
71
  CompanyLegalIssues
72
+ CompanyLocation
84
73
  CompanyListingChange
85
74
  CompanyMeeting
86
75
  CompanyNameChange
76
+ CompanyProduct
87
77
  CompanyReorganization
88
78
  CompanyRestatement
79
+ CompanyTechnology
80
+ CompanyUsingProduct
81
+ CompanyTicker
89
82
  ConferenceCall
90
83
  Conviction
91
84
  CreditRating
@@ -94,19 +87,32 @@ CALAIS_EVENTS = %w(
94
87
  DiplomaticRelations
95
88
  Dividend
96
89
  EmploymentChange
90
+ EmploymentRelation
97
91
  EnvironmentalIssue
92
+ EquityFinancing
98
93
  Extinction
94
+ FamilyRelation
99
95
  FDAPhase
100
96
  Indictment
97
+ IndicesChanges
101
98
  IPO
102
99
  JointVenture
103
100
  ManMadeDisaster
104
101
  Merger
102
+ MilitaryAction
105
103
  MovieRelease
106
104
  MusicAlbumRelease
107
105
  NaturalDisaster
108
106
  PatentFiling
109
107
  PatentIssuance
108
+ PersonAttributes
109
+ PersonCareer
110
+ PersonCommunication
111
+ PersonEducation
112
+ PersonEmailAddress
113
+ PersonLocation
114
+ PersonParty
115
+ PersonRelation
110
116
  PersonTravel
111
117
  PoliticalEndorsement
112
118
  PoliticalRelationship
@@ -114,14 +120,15 @@ CALAIS_EVENTS = %w(
114
120
  ProductIssues
115
121
  ProductRecall
116
122
  ProductRelease
117
- SocialTags
123
+ Quotation
118
124
  SecondaryIssuance
119
- PersonCommunication
120
125
  StockSplit
121
126
  Trial
122
127
  VotingResult
123
128
  )
124
129
 
130
+
131
+
125
132
  CALAIS_TOPICS = %w(
126
133
  Business_Finance
127
134
  Disaster_Accident
@@ -1,3 +1,3 @@
1
1
  module DoverToCalais
2
- VERSION = "0.2.0"
2
+ VERSION = "0.2.1"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: dover_to_calais
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.2.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Fred Heath
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-02-10 00:00:00.000000000 Z
11
+ date: 2014-03-23 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: nokogiri
@@ -78,6 +78,86 @@ dependencies:
78
78
  - - ! '>='
79
79
  - !ruby/object:Gem::Version
80
80
  version: 0.1.9
81
+ - !ruby/object:Gem::Dependency
82
+ name: json
83
+ requirement: !ruby/object:Gem::Requirement
84
+ requirements:
85
+ - - ~>
86
+ - !ruby/object:Gem::Version
87
+ version: '1.5'
88
+ - - ! '>='
89
+ - !ruby/object:Gem::Version
90
+ version: 1.5.5
91
+ type: :runtime
92
+ prerelease: false
93
+ version_requirements: !ruby/object:Gem::Requirement
94
+ requirements:
95
+ - - ~>
96
+ - !ruby/object:Gem::Version
97
+ version: '1.5'
98
+ - - ! '>='
99
+ - !ruby/object:Gem::Version
100
+ version: 1.5.5
101
+ - !ruby/object:Gem::Dependency
102
+ name: ohm
103
+ requirement: !ruby/object:Gem::Requirement
104
+ requirements:
105
+ - - ~>
106
+ - !ruby/object:Gem::Version
107
+ version: '2.0'
108
+ - - ! '>='
109
+ - !ruby/object:Gem::Version
110
+ version: 2.0.0
111
+ type: :runtime
112
+ prerelease: false
113
+ version_requirements: !ruby/object:Gem::Requirement
114
+ requirements:
115
+ - - ~>
116
+ - !ruby/object:Gem::Version
117
+ version: '2.0'
118
+ - - ! '>='
119
+ - !ruby/object:Gem::Version
120
+ version: 2.0.0
121
+ - !ruby/object:Gem::Dependency
122
+ name: ohm-contrib
123
+ requirement: !ruby/object:Gem::Requirement
124
+ requirements:
125
+ - - ~>
126
+ - !ruby/object:Gem::Version
127
+ version: '2.0'
128
+ - - ! '>='
129
+ - !ruby/object:Gem::Version
130
+ version: 2.0.0
131
+ type: :runtime
132
+ prerelease: false
133
+ version_requirements: !ruby/object:Gem::Requirement
134
+ requirements:
135
+ - - ~>
136
+ - !ruby/object:Gem::Version
137
+ version: '2.0'
138
+ - - ! '>='
139
+ - !ruby/object:Gem::Version
140
+ version: 2.0.0
141
+ - !ruby/object:Gem::Dependency
142
+ name: em-throttled_queue
143
+ requirement: !ruby/object:Gem::Requirement
144
+ requirements:
145
+ - - ~>
146
+ - !ruby/object:Gem::Version
147
+ version: '1.1'
148
+ - - ! '>='
149
+ - !ruby/object:Gem::Version
150
+ version: 1.1.0
151
+ type: :runtime
152
+ prerelease: false
153
+ version_requirements: !ruby/object:Gem::Requirement
154
+ requirements:
155
+ - - ~>
156
+ - !ruby/object:Gem::Version
157
+ version: '1.1'
158
+ - - ! '>='
159
+ - !ruby/object:Gem::Version
160
+ version: 1.1.0
81
161
  - !ruby/object:Gem::Dependency
82
162
  name: bundler
83
163
  requirement: !ruby/object:Gem::Requirement
@@ -164,11 +244,14 @@ files:
164
244
  - README.md
165
245
  - Rakefile
166
246
  - dover_to_calais.gemspec
247
+ - features/data_mining.feature
167
248
  - features/data_sources.feature
168
249
  - features/filtering.feature
250
+ - features/step_definitions/data_mining_steps.rb
169
251
  - features/step_definitions/data_sources_steps.rb
170
252
  - features/step_definitions/filtering_steps.rb
171
253
  - lib/dover_to_calais.rb
254
+ - lib/dover_to_calais/models.rb
172
255
  - lib/dover_to_calais/ontology.rb
173
256
  - lib/dover_to_calais/version.rb
174
257
  - test/test_file_1.doc
@@ -202,8 +285,10 @@ signing_key:
202
285
  specification_version: 4
203
286
  summary: An easy-to-use wrapper round the OpenCalais semantic analysis web service.
204
287
  test_files:
288
+ - features/data_mining.feature
205
289
  - features/data_sources.feature
206
290
  - features/filtering.feature
291
+ - features/step_definitions/data_mining_steps.rb
207
292
  - features/step_definitions/data_sources_steps.rb
208
293
  - features/step_definitions/filtering_steps.rb
209
294
  - test/test_file_1.doc