dover_to_calais 0.2.0 → 0.2.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,15 +1,15 @@
1
1
  ---
2
2
  !binary "U0hBMQ==":
3
3
  metadata.gz: !binary |-
4
- OWJmOWEyMGFjNDk2ZjZiODYyNjQ1NDM2YjM0YjMyNzQ1MmUzZjg3MA==
4
+ MmI4YTNmZGZkYTViNDVlMzVmNGRiMGM3YTFlYzQ5YTc1ZTdmMDU3ZA==
5
5
  data.tar.gz: !binary |-
6
- MTllMDRiOTNlNDg2Y2FiNmY1MmQyMjAyMzViNWJiZWFmN2ZjYTY3ZA==
6
+ M2U0YjI4ZTJhM2E4M2U4MGM2N2E2OWZkYjUwYzExM2YyOWExZDYxNg==
7
7
  SHA512:
8
8
  metadata.gz: !binary |-
9
- OGI3NDU4YWU1YzllMjBiNTVlMmU3NzNhMDUzYmNhNWYzODY0ZTE3MDQzZmMx
10
- NmZiMDMxZjYzMTI3ZTdkYWU5MGNiNDc3ZTE2ZTRjYThjYjc5ZDQxNjFlZjU0
11
- MGRmZWY5ZGM4NTAwYjAyZTEyZmY5M2I5MDdjNDA4NWQ1MDE4MDM=
9
+ ZjA0NDgwNjM4MzQxZDYxZmMwNjFmYzg2MTc4MjI1MDY0ZmU2ZGI2ZWZmNTY4
10
+ MjU5OTBhZWVkY2YyYzEyODhiOTI4NWI3NGQwZjI4NTg4ZWNiNmVlZWQwZjAx
11
+ NGJjYjc5YzdhMWJmN2UyODQ1YzM0MGUxY2VkOGU4YmQ5NjU3OTc=
12
12
  data.tar.gz: !binary |-
13
- ODUyZGFhN2JhYjdjZDAyNmMxMTNhZjY0MjJhNWQ5YjU2OTY0OTQyNmU4MDkz
14
- NjljZTU1NDUzYTRhN2I2MTA3MmQ3MTM3MDYxODUyMjgzOGVkYTYzNzY4MjA1
15
- YTgyZDNkNjE3YjI1NWJiYTJkMzNjN2RiYzEzN2M1MWNmYzFhMzU=
13
+ YjY3NDk2ODY1YjE4ZjM3NDMzZWQyMzc5Njc3YzRiODgyODYxNjJjMGY3YmUz
14
+ MDJiNzQzMTZhZjQxYWJjOWIxMjIyZjU3YjE3YmE5MDVmMjk2YjA3Y2QxZmQz
15
+ MmQ1YjY0ZTljNGQ0NGRkODU4NTg0ZWIzYzM4YWRmZGIxYTgxNDM=
data/Rakefile CHANGED
@@ -3,5 +3,10 @@ require 'cucumber'
3
3
  require 'cucumber/rake/task'
4
4
 
5
5
  Cucumber::Rake::Task.new(:features) do |t|
6
- t.cucumber_opts = "features --format pretty API_KEY=#{ENV['API_KEY']}"
6
+ if ENV['TAGS'].empty?
7
+ t.cucumber_opts = "features --format pretty API_KEY=#{ENV['API_KEY']}"
8
+ else
9
+ t.cucumber_opts = "features --format pretty API_KEY=#{ENV['API_KEY']} --tags #{ENV['TAGS']}"
10
+ end
11
+
7
12
  end
@@ -21,6 +21,10 @@ Gem::Specification.new do |spec|
21
21
  spec.add_runtime_dependency "eventmachine", "~> 1.0", ">= 1.0.3"
22
22
  spec.add_runtime_dependency "em-http-request", "~> 1.1"
23
23
  spec.add_runtime_dependency "yomu", "~> 0.1", ">= 0.1.9"
24
+ spec.add_runtime_dependency "json", "~> 1.5", ">= 1.5.5"
25
+ spec.add_runtime_dependency "ohm", "~> 2.0", ">= 2.0.0"
26
+ spec.add_runtime_dependency "ohm-contrib", "~> 2.0", ">= 2.0.0"
27
+ spec.add_runtime_dependency "em-throttled_queue", "~> 1.1", ">= 1.1.0"
24
28
 
25
29
 
26
30
  spec.files = `git ls-files`.split($/)
@@ -0,0 +1,10 @@
1
+ Feature: Ability to detect relationships between entities and events
2
+
3
+ Background:
4
+ Given the file 'test_file_1.txt' is successfully processed with the rich output
5
+
6
+ @rich_output
7
+ Scenario: Filter an entity with the rich output format
8
+ When I filter the response on {:entity => 'Event', :value => 'Meeting'}
9
+ Then The output should be an error
10
+
@@ -1,7 +1,11 @@
1
- Feature: Able to handle wide range of data formats as input
2
- Scenario Outline: Processing various data-source formats
1
+ Feature: Able to handle wide range of data formats as input
2
+
3
+
4
+ @simple_output
5
+ Scenario Outline: Processing various data-source formats (Simple Format)
3
6
  Given the file <input>
4
- When DoverToCalais processes this file
7
+ When the Output format is set to 'Text/Simple'
8
+ And DoverToCalais processes this file
5
9
  Then the output should have no errors
6
10
 
7
11
  Examples:
@@ -12,3 +16,19 @@ Feature: Able to handle wide range of data formats as input
12
16
  |test_file_1.pdf|
13
17
  |test_file_1.rtf|
14
18
  |test_file_1.txt|
19
+
20
+ @rich_output
21
+ Scenario Outline: Processing various data-source formats (Rich Format)
22
+ Given the file <input>
23
+ When the Output format is set to 'Application/JSON'
24
+ And DoverToCalais processes this file
25
+ Then the output should have no errors
26
+
27
+ Examples:
28
+ | input |
29
+ |test_file_1.doc |
30
+ |test_file_1.html|
31
+ |test_file_1.odt|
32
+ |test_file_1.pdf|
33
+ |test_file_1.rtf|
34
+ |test_file_1.txt|
@@ -1,24 +1,30 @@
1
+ @simple_output
1
2
  Feature: Ability to select certain OpenCalais entities based on certain conditions
2
3
 
3
4
  Background:
4
- Given the file 'test_file_1.txt' is successfully processed
5
+ Given the file 'test_file_1.txt' is successfully processed with the simple output
5
6
 
6
7
 
7
- Scenario: Select all entities with a specific name
8
+ Scenario: Filter all entities with a specific name
8
9
  When I filter on {:entity => 'EmailAddress'}
9
10
  Then the output should have 2 entries
10
11
  And All entries should be named 'EmailAddress'
11
12
 
12
- Scenario: Select an entity with a specific value
13
+ Scenario: Filter an entity with a specific value
13
14
  When I filter on {:entity => 'Event', :value => 'Meeting'}
14
15
  Then the output should have 1 entries
15
16
  And All entries should be named 'Event'
16
17
  And All entries should have the value 'Meeting'
17
18
 
18
19
 
19
- Scenario: Select an entity only if another entity with a specific value exists in the data source
20
+ Scenario: Filter an entity only if another entity with a specific value exists in the data source
20
21
  When I filter on {:entity => 'Person', :given => {:entity => 'Event', :value => 'Meeting'}}
21
22
  Then the output should have 2 entries
22
23
  And All entries should be named 'Person'
23
24
  And One entry should have the value 'Roger Kay'
24
- And One entry should have the value 'David Bailey'
25
+ And One entry should have the value 'David Bailey'
26
+
27
+ Scenario: Filter all entities with a specific name
28
+ When I filter on {:entity => 'EmailAddress'}
29
+ Then the output should have 2 entries
30
+ And All entries should be named 'EmailAddress'
@@ -0,0 +1,32 @@
1
+ require 'eventmachine'
2
+ require 'em-http-request'
3
+ require 'yomu'
4
+ require 'rspec'
5
+ require_relative '../../lib/dover_to_calais'
6
+ #require_relative './filtering_steps.rb'
7
+
8
+
9
+ # N.B Cucumber must be run with the Environment variable 'API_KEY' set
10
+ # to the OpenCalais API Key value.
11
+
12
+ Given(/^the file '(\w+\.\w{3,4})' is successfully processed with the rich output$/) do |file|
13
+
14
+ steps %{
15
+ Given the file #{file}
16
+ When the Output format is set to 'Application/JSON'
17
+ And DoverToCalais processes this file
18
+ Then the output should have no errors
19
+ }
20
+
21
+ end
22
+
23
+ When(/^I filter the response on ({.+})$/) do |f|
24
+ @filtered_output = @output.filter(eval(f))
25
+
26
+ end
27
+
28
+ Then(/^The output should be an error$/) do
29
+ @filtered_output.match(/^ERR:\s/).should_not be_nil
30
+ end
31
+
32
+
@@ -4,27 +4,28 @@ require 'eventmachine'
4
4
  require 'em-http-request'
5
5
  require 'yomu'
6
6
  require 'rspec'
7
- require File.expand_path('../../../lib/dover_to_calais', __FILE__)
8
-
7
+ require_relative '../../lib/dover_to_calais'
9
8
 
10
9
  # N.B Cucumber must be run with the Environment variable 'API_KEY' set
11
10
  # to the OpenCalais API Key value.
12
11
 
13
12
 
13
+
14
14
  Given(/^the file (\w+\.\w{3,4})$/) do |arg1|
15
15
  puts arg1
16
16
  @input = Dir.pwd + '/test/' + arg1
17
17
  @output = nil
18
+
18
19
  end
19
20
 
20
21
 
21
22
 
22
23
  When(/^DoverToCalais processes this file$/) do
23
24
  EM.run {
24
-
25
25
  DoverToCalais::API_KEY = ENV['API_KEY']
26
+
26
27
  d1 = DoverToCalais::Dover.new(@input)
27
- d1.analyse_this
28
+ d1.analyse_this(@output_format)
28
29
  d1.to_calais do |response|
29
30
  @output = response
30
31
  EM.stop
@@ -33,6 +34,15 @@ When(/^DoverToCalais processes this file$/) do
33
34
  }
34
35
  end
35
36
 
37
+ When(/^the Output format is set to 'Text\/Simple'$/) do
38
+ @output_format = nil
39
+ end
40
+
41
+ When(/^the Output format is set to 'Application\/JSON'$/) do
42
+ @output_format = :rich
43
+ end
44
+
45
+
36
46
 
37
47
 
38
48
  Then(/^the output should have no errors$/) do
@@ -3,27 +3,31 @@ require 'eventmachine'
3
3
  require 'em-http-request'
4
4
  require 'yomu'
5
5
  require 'rspec'
6
- require File.expand_path('../../../lib/dover_to_calais', __FILE__)
7
-
6
+ #require File.expand_path('../../../lib/dover_to_calais', __FILE__)
7
+ require_relative '../../lib/dover_to_calais'
8
8
 
9
9
  # N.B Cucumber must be run with the Environment variable 'API_KEY' set
10
10
  # to the OpenCalais API Key value.
11
11
 
12
12
 
13
13
 
14
- Given(/^the file '(\w+\.\w{3,4})' is successfully processed$/) do |file|
14
+ Given(/^the file '(\w+\.\w{3,4})' is successfully processed with the simple output$/) do |file|
15
15
 
16
16
  steps %{
17
17
  Given the file #{file}
18
- When DoverToCalais processes this file
18
+ When the Output format is set to 'Text/Simple'
19
+ And DoverToCalais processes this file
19
20
  Then the output should have no errors
20
21
  }
21
22
 
22
23
  end
23
24
 
24
25
 
25
- When(/^I filter on ({.+})/) do |filter|
26
- @filtered_output = @output.filter(eval(filter))
26
+
27
+
28
+
29
+ When(/^I filter on ({.+})$/) do |f|
30
+ @filtered_output = @output.filter(eval(f))
27
31
 
28
32
  end
29
33
 
@@ -5,12 +5,22 @@ require 'nokogiri'
5
5
  require 'eventmachine'
6
6
  require 'em-http-request'
7
7
  require 'yomu'
8
+ require 'json'
9
+ require "dover_to_calais/models" #gem lib file
10
+ require 'ohm'
8
11
 
9
12
 
10
13
  module DoverToCalais
11
14
 
12
15
 
13
16
  PROXY = nil
17
+ REDIS = "redis://127.0.0.1:6379/6"
18
+
19
+ def self.flushdb
20
+ Ohm.redis = Redic.new(REDIS)
21
+ Ohm.redis.call "FLUSHDB"
22
+ end
23
+
14
24
 
15
25
  # The ResponseItem structure holds all potential text and attribute values of an OpenCalais
16
26
  # XML Simple format element.
@@ -66,65 +76,361 @@ module DoverToCalais
66
76
  # nil if none occurred
67
77
  #
68
78
  class ResponseData
69
- attr_reader :error
70
79
 
80
+
81
+
82
+ class Entity< Struct.new(:type, :name, :ref)
83
+
84
+ def to_hash
85
+ a_hash = {}
86
+ self.each_pair do |attr, value|
87
+ a_hash[attr] = value
88
+ end
89
+ a_hash
90
+ end
91
+
92
+ end
93
+
94
+ class GenericRelation< Struct.new(:subject, :verb, :object, :detection)
95
+
96
+ end #class
97
+
98
+ class Event
99
+
100
+ attr_reader :entities
101
+
102
+ def initialize(events_hash)
103
+ # @entities = entity_hash
104
+ events_hash.each do |k,v|
105
+ unless k.eql?('_typeGroup') || k.eql?('instances') || k.eql?('_typeReference')
106
+ k = 'type' if k.eql?('_type') #don't like the underscore
107
+ ## create and initialize an instance variable for this key/value pair
108
+ self.instance_variable_set("@#{k}", v)
109
+ ## create the getter that returns the instance variable
110
+ self.class.send(:define_method, k, proc{self.instance_variable_get("@#{k}")})
111
+ ## create the setter that sets the instance variable
112
+ self.class.send(:define_method, "#{k}=", proc{|v| self.instance_variable_set("@#{k}", v)})
113
+ end
114
+ end #block
115
+ end #method
116
+
117
+ def each_pair
118
+ self.instance_variables.each do |a|
119
+ yield a, self.instance_variable_get(a)
120
+ end
121
+ end
122
+
123
+ def [](attrib_name)
124
+ self.instance_variables.each do |a|
125
+ if "@#{a}" == attrib_name
126
+ self.instance_variable_get(a)
127
+ end
128
+ end
129
+ end
130
+
131
+ public :each_pair
132
+
133
+ end #class
134
+
135
+
136
+
137
+
138
+ attr_reader :error, :entities_store, :events_store, :generic_relations_store, :freds
71
139
  # creates a new ResponseData object, passing the name of the data source to be processed
72
140
  #
73
- # @param xml_data [ Nokogiri::XML::NodeSet, nil] the xml data returned by OpenCalais
141
+ # @param response_data [ Nokogiri::XML::NodeSet, Hash, nil] the XML or JSON data returned by OpenCalais
74
142
  # @param error [ String, nil] an error description if the OpenCalais call has failed
75
- def initialize(xml_data = nil, error = nil)
76
- if xml_data
77
- @raw = xml_data
143
+ def initialize(response_data = nil, error = nil)
144
+ if response_data.class.to_s == "Nokogiri::XML::Document"
145
+ @xml_data = response_data
146
+ elsif response_data.class.to_s == "Hash"
147
+ @json_data = response_data
148
+ prepare_data(response_data)
149
+
150
+
78
151
  else
79
152
  @error = error
80
153
  end
154
+
81
155
  end
82
156
 
83
157
  # Returns the response data as an XML string or an error, if one has occurred.
84
158
  #
85
159
  # @return [String] an XML string
86
160
  def to_s
87
- @raw ? @raw.to_s : @error
161
+ if @xml_data
162
+ @xml_data.to_s
163
+ elsif @json_data
164
+ @json_data.to_s
165
+ else
166
+ @error
167
+ end
168
+ end
169
+
170
+
171
+
172
+
173
+ # The method will first create three Hash instance variables, where it will store the
174
+ # Entities, Generic Relations and Events -respectively- from the OpenCalais response.
175
+ # The key on each Hash instance variable will be the OpenCalais ID and the value will
176
+ # be the values_hash for that ID.
177
+ # Secondly, the method will iterate through each Entity, find all of it's related
178
+ # Relations and Events and store them -in a relational manner- in Redis, via Ohm.
179
+ #
180
+ # Only applicable with the JSON (rich) output format
181
+ #
182
+ # @param Hash the OpenCalais JSON response, as a Hash
183
+ # @return an GenericRelation Struct if a match is found, nil otherwise
184
+
185
+ def prepare_data(results_hash)
186
+
187
+ @entities_store = {}
188
+ @generic_relations_store = {}
189
+ @events_store = {}
190
+ # find all Entities in response
191
+ @entities_store = results_hash.select{|key, hash| hash["_typeGroup"] == "entities"}
192
+ # find all GenericRelations in response
193
+ @generic_relations_store = results_hash.select{|key, hash| hash["_typeGroup"] == "relations" &&
194
+ hash["_type"] == "GenericRelations"}
195
+ # find all Events in response
196
+ @events_store = results_hash.select{|key, hash| hash["_typeGroup"] == "relations" &&
197
+ hash["_type"] != "GenericRelations"}
198
+
199
+ Ohm.redis = Redic.new(REDIS)
200
+
201
+
202
+ #for each Entity find all related Relations and Events and store them to Ohm/Redis
203
+ @entities_store.each_pair do |k, v|
204
+
205
+ entity_set = EntityModel.find(calais_id: k)
206
+
207
+ if entity_set.size > 0 #entity already exists in store
208
+ entity = entity_set.first
209
+ k = entity.calais_id
210
+ else #entity doesn't exist in store
211
+ entity = EntityModel.create(:name => v['name'], :type => v['_type'], :calais_id => k)
212
+ entity.save
213
+ end #if
214
+
215
+
216
+ #get all referenced relations
217
+ find_in_relations(k).each do |obj|
218
+
219
+ found_rel = get_relation(obj[0])
220
+ if found_rel
221
+
222
+ found_rel.subject = convert_to_hash(found_rel.subject)
223
+ found_rel.object = convert_to_hash(found_rel.object)
224
+
225
+ relation = EntityModel::RelationModel.create(:subject => found_rel.subject,
226
+ :object => found_rel.object,
227
+ :verb => found_rel.verb,
228
+ :detection => found_rel.detection,
229
+ :calais_id => obj[0])
230
+ entity.relations.add(relation)
231
+ end #if
232
+ end #each
233
+ #get all referenced events
234
+ find_in_events(k).each do |obj|
235
+ found_event = get_event(obj[0])
236
+ attribs = {}
237
+ if found_event
238
+
239
+ found_event.each_pair do |key, val|
240
+
241
+ key = key.to_s.slice(1, key.length-1)
242
+ attribs[key] = val
243
+
244
+ end #block
245
+
246
+ event = EntityModel::EventModel.create(:calais_id => obj[0], :info_hash => attribs)
247
+ entity.events.add(event)
248
+
249
+ end #if
250
+
251
+ end #each
252
+ end #each_pair
253
+ end #method
254
+
255
+
256
+
257
+ # Coverts an attribute to an appropriate Hash
258
+ #
259
+ # Only applicable with the JSON (rich) output format
260
+ #
261
+ # @param [String, DoverToCalais::ResponseData::Entity, Hash] an object
262
+ # @return a Hash value
263
+ def convert_to_hash(an_attribute)
264
+ h = {}
265
+ if an_attribute.class.to_s.eql?('String')
266
+ h[:name] = an_attribute
267
+ end
268
+
269
+ if an_attribute.class.to_s.eql?('DoverToCalais::ResponseData::Entity')
270
+ h = an_attribute.to_hash
271
+ end
272
+
273
+ if an_attribute.class.to_s.eql?('Hash')
274
+ h = an_attribute
275
+ end
276
+
277
+ h
278
+ end #method
279
+
280
+
281
+
282
+ # Retrieves the entity with the specified key (OpenCalais ID)
283
+ #
284
+ # Only applicable with the JSON (rich) output format
285
+ #
286
+ # @param String the OpenCalais ID
287
+ # @return an Entity Struct if a match is found, nil otherwise
288
+ def get_entity(ref_key)
289
+ if @entities_store.has_key?(ref_key)
290
+ Entity.new(@entities_store[ref_key]['_type'], @entities_store[ref_key]['name'], ref_key)
291
+ else
292
+ nil
293
+ end
88
294
  end
89
295
 
90
296
 
91
- # Filters the response object to extract relevant data.
297
+ # Retrieves the relation with the specified key (OpenCalais ID). The method will also
298
+ # de-reference any of its attributes that refer to other entities via an OpenCalais ID
299
+ # and will replace the references with the appropriate Entity structure, if applicable
300
+ #
301
+ # Only applicable with the JSON (rich) output format
302
+ #
303
+ # @param String the OpenCalais ID
304
+ # @return an GenericRelation Struct if a match is found, nil otherwise
305
+ def get_relation(ref_key)
306
+ if @generic_relations_store.key?(ref_key)
307
+
308
+ if @generic_relations_store[ref_key]['relationsubject']
309
+ gr_subject = @generic_relations_store[ref_key]['relationsubject'].match('^http://d.opencalais.com') ?
310
+ get_entity(@generic_relations_store[ref_key]['relationsubject']) :
311
+ @generic_relations_store[ref_key]['relationsubject']
312
+ else
313
+ gr_subject = 'N/A'
314
+ end
315
+
316
+
317
+ if @generic_relations_store[ref_key]['relationobject']
318
+ gr_object = @generic_relations_store[ref_key]['relationobject'].match('^http://d.opencalais.com') ?
319
+ get_entity(@generic_relations_store[ref_key]['relationobject']) :
320
+ @generic_relations_store[ref_key]['relationobject']
321
+ else
322
+ gr_object = 'N/A'
323
+ end
324
+
325
+ GenericRelation.new(gr_subject,
326
+ @generic_relations_store[ref_key]['verb'],
327
+ gr_object,
328
+ @generic_relations_store[ref_key]['instances'][0]['exact'] ||= 'N/A')
329
+ else
330
+ nil
331
+ end
332
+ end
333
+
334
+ def get_event(ref_key)
335
+
336
+ dereferenced_events = {}
337
+
338
+ if @events_store.key?(ref_key)
339
+
340
+ @events_store[ref_key].each do |k, v|
341
+
342
+ if v.class.to_s.eql?("String") && v.match('^http://d.opencalais.com')
343
+ dereferenced_events[k] = get_entity(v).to_hash
344
+ elsif v.class.to_s.eql?("String") && !v.match('^http://d.opencalais.com')
345
+ h = {}
346
+ h['name'] = v
347
+ dereferenced_events[k] = h
348
+ elsif v.class.to_s.eql?("Array")
349
+ h = {}
350
+ h['name'] = v[0]['exact']
351
+ dereferenced_events[k] = h
352
+ end
353
+ end
354
+
355
+ Event.new(dereferenced_events)
356
+ else
357
+ nil
358
+ end
359
+ end
360
+
361
+
362
+ # Selects a Hash of generic relations, where the relations' subject or object attributes
363
+ # match the specified OpenCalais ID.
364
+ #
365
+ # Only applicable with the JSON (rich) output format
366
+ #
367
+ # @param String the OpenCalais ID
368
+ # @return a Hash with the selected matches
369
+ def find_in_relations(ref_key)
370
+ @generic_relations_store.select{|key, hash| (hash["relationsubject"] == ref_key) ||
371
+ (hash["relationobject"] == ref_key) }
372
+
373
+ end
374
+
375
+ # Selects a Hash of events, where the events' key matches the
376
+ # specified OpenCalais ID.
377
+ #
378
+ # Only applicable with the JSON (rich) output format
379
+ #
380
+ # @param String the OpenCalais ID
381
+ # @return a Hash with the selected matches
382
+ def find_in_events(ref_key)
383
+ @events_store.select{|key, hash| hash.has_value?(ref_key) }
384
+ end
385
+
386
+
387
+
388
+ # Filters the xml response object to extract relevant data.
92
389
  #
93
390
  # @param params [Hash] a filter Hash (see code samples)
94
391
  # @return [Array[ResponseItem]] a list of relevant response items
95
392
  def filter(params)
393
+ unless @xml_data
394
+ return 'ERR: filter method only works with xml-based output!'
395
+
396
+ end
397
+
96
398
  result = Array.new
97
399
  begin
98
- if @raw
400
+ if @xml_data
99
401
 
100
402
  if params[:given]
101
- found = @raw.xpath("//#{params[:given][:entity]}[contains(text(), #{params[:given][:value].inspect})]")
403
+ found = @xml_data.xpath("//#{params[:given][:entity]}[contains(text(), #{params[:given][:value].inspect})]")
102
404
  if found.size > 0
103
- @raw.xpath("//#{params[:entity]}[contains(text(), #{params[:value].inspect})]").each do |node|
405
+ @xml_data.xpath("//#{params[:entity]}[contains(text(), #{params[:value].inspect})]").each do |node|
104
406
  result << create_response_item(node)
105
407
  end
106
408
  end
107
409
  else # no conditional
108
- @raw.xpath("//#{params[:entity]}[contains(text(), #{params[:value].inspect})]").each do |node|
410
+ @xml_data.xpath("//#{params[:entity]}[contains(text(), #{params[:value].inspect})]").each do |node|
109
411
  result << create_response_item(node)
110
412
  end
111
413
  end
112
414
 
113
415
  return result
114
416
  else # no xml data
115
- puts 'ERR: no valid xml data!'
417
+ return 'ERR: no valid xml data!'
116
418
 
117
419
  end #if
118
420
 
119
421
  rescue Exception=>e
120
- puts "ERR: #filter: #{e}"
422
+ return "ERR: #filter: #{e}"
121
423
 
122
424
  end
123
425
 
124
- #return result
426
+ return result
125
427
 
126
428
  end #method
127
429
 
430
+ # Creates a Response Item from an xml node.
431
+ #
432
+ # @param node [Nokogiri::XML::Node] an XML node
433
+ # @return [ResponseItem] a response item object
128
434
  def create_response_item(node)
129
435
  node_relevance = node.attribute('relevance').text.to_f if node.has_attribute?('relevance')
130
436
  node_count = node.attribute('count').text.to_i if node.has_attribute?('count')
@@ -133,17 +439,18 @@ module DoverToCalais
133
439
  node_orig_value = node.xpath('originalValue').text if node.name.eql?('SocialTag')
134
440
 
135
441
  ResponseItem.new(node.name,
136
- node.text,
137
- node_relevance,
138
- node_count,
139
- node_normalized,
140
- node_importance,
141
- node_orig_value )
442
+ node.text,
443
+ node_relevance,
444
+ node_count,
445
+ node_normalized,
446
+ node_importance,
447
+ node_orig_value )
142
448
 
143
449
  end
144
450
 
145
451
  public :filter
146
- private :create_response_item
452
+ private :create_response_item, :prepare_data, :convert_to_hash, :find_in_relations, :find_in_events,
453
+ :get_event, :get_entity
147
454
 
148
455
  end #class
149
456
 
@@ -211,13 +518,23 @@ module DoverToCalais
211
518
 
212
519
  end #method
213
520
 
521
+
522
+
523
+
524
+
214
525
  # Gets the source text parsed. If the parsing is successful, the data source is POSTed to OpenCalais
215
526
  # via an EventMachine request and a callback is set to manage the OpenCalais response.
216
- # All Dover object callbacks are then called with the request result yielded to them.
527
+ # All Dover object callbacks are then called with the request result yielded to them.
217
528
  #
218
529
  # @param N/A
219
530
  # @return a {Class ResponseData} object
220
- def analyse_this
531
+ def analyse_this(output_format=nil)
532
+
533
+ if output_format
534
+ @output_format = 'application/json'
535
+ else
536
+ @output_format = 'Text/Simple'
537
+ end
221
538
 
222
539
  @document = get_src_data(@data_src)
223
540
  begin
@@ -230,19 +547,19 @@ module DoverToCalais
230
547
 
231
548
 
232
549
  if DoverToCalais::PROXY &&
233
- DoverToCalais::PROXY.class.eql?('Hash') &&
234
- DoverToCalais::PROXY.keys[0].eql?(:proxy)
550
+ DoverToCalais::PROXY.class.eql?('Hash') &&
551
+ DoverToCalais::PROXY.keys[0].eql?(:proxy)
235
552
 
236
553
  connection_options = connection_options.merge(DoverToCalais::PROXY)
237
554
  end
238
555
 
239
556
  request_options = {
240
- :body => @document.to_s,
241
- :head => {
242
- 'x-calais-licenseID' => DoverToCalais::API_KEY,
243
- :content_type => 'TEXT/RAW',
244
- :enableMetadataType => 'GenericRelations,SocialTags',
245
- :outputFormat => 'Text/Simple'}
557
+ :body => @document.to_s,
558
+ :head => {
559
+ 'x-calais-licenseID' => DoverToCalais::API_KEY,
560
+ :content_type => 'TEXT/RAW',
561
+ :enableMetadataType => 'GenericRelations,SocialTags',
562
+ :outputFormat => @output_format}
246
563
  }
247
564
 
248
565
  http = EventMachine::HttpRequest.new(CALAIS_SERVICE, connection_options ).post request_options
@@ -251,20 +568,34 @@ module DoverToCalais
251
568
  http.callback do
252
569
 
253
570
  if http.response_header.status == 200
254
- http.response.match(/<OpenCalaisSimple>/) do |m|
255
- response = Nokogiri::XML('<OpenCalaisSimple>' + m.post_match) do |config|
256
- #strict xml parsing, disallow network connections
257
- config.strict.nonet
258
- end #block
259
- end #block
260
-
261
- result = response ?
262
- ResponseData.new(response, nil) :
263
- ResponseData.new(nil,'ERR: cannot find <OpenCalaisSimple> tag in response data - source invalid?')
264
- else #non-200 response header
265
- result = ResponseData.new nil,
266
- "ERR: OpenCalais service responded with #{http.response_header.status} - response body: '#{http.response}'"
267
- end
571
+ if @output_format == 'Text/Simple'
572
+ http.response.match(/<OpenCalaisSimple>/) do |m|
573
+ response = Nokogiri::XML('<OpenCalaisSimple>' + m.post_match) do |config|
574
+ #strict xml parsing, disallow network connections
575
+ config.strict.nonet
576
+ end #block
577
+ end
578
+ else #@output_format == 'application/json'
579
+ response = JSON.parse(http.response) #response should now be a Hash
580
+
581
+ end #if
582
+
583
+ case response.class.to_s
584
+ when 'NilClass'
585
+ result = ResponseData.new(nil,'ERR: cannot parse response data - source invalid?')
586
+ when 'Nokogiri::XML::Document'
587
+ result = ResponseData.new(response, nil)
588
+ when 'Hash'
589
+ result = ResponseData.new(response, nil)
590
+ else
591
+ result = ResponseData.new(nil,'ERR: cannot parse response data - unrecognized format!')
592
+ end
593
+
594
+
595
+ else #non-200 response
596
+ result = ResponseData.new nil,
597
+ "ERR: OpenCalais service responded with #{http.response_header.status} - response body: '#{http.response}'"
598
+ end
268
599
 
269
600
  @callbacks.each { |c| c.call(result) }
270
601
 
@@ -287,6 +618,7 @@ module DoverToCalais
287
618
  end #method
288
619
 
289
620
 
621
+
290
622
  alias_method :analyze_this, :analyse_this
291
623
  public :to_calais, :analyse_this
292
624
  private :get_src_data
@@ -0,0 +1,46 @@
1
+ require 'ohm'
2
+ require 'ohm/contrib'
3
+
4
+ module DoverToCalais
5
+ class EntityModel < Ohm::Model
6
+ attribute :name
7
+ attribute :type
8
+ attribute :calais_id
9
+ set :relations, :RelationModel
10
+ set :events, :EventModel
11
+
12
+
13
+ index :name
14
+ index :type
15
+ index :calais_id
16
+
17
+ def validate
18
+ assert_present :name
19
+ assert_present :type
20
+ assert_present :calais_id
21
+ end
22
+
23
+ class RelationModel < Ohm::Model
24
+ include Ohm::DataTypes
25
+
26
+ attribute :subject, Type::Hash
27
+ attribute :object, Type::Hash
28
+ attribute :verb
29
+ attribute :detection
30
+ attribute :calais_id
31
+
32
+ index :subject
33
+
34
+ end #class
35
+
36
+ class EventModel < Ohm::Model
37
+ include Ohm::DataTypes
38
+
39
+ attribute :calais_id
40
+ attribute :info_hash, Type::Hash
41
+
42
+ end #class
43
+
44
+ end #class
45
+
46
+ end
@@ -1,23 +1,3 @@
1
- # ontology.rb
2
- #
3
- # Copyright 2013 Fred <fred@fred-Veriton-X270>
4
- #
5
- # This program is free software; you can redistribute it and/or modify
6
- # it under the terms of the GNU General Public License as published by
7
- # the Free Software Foundation; either version 2 of the License, or
8
- # (at your option) any later version.
9
- #
10
- # This program is distributed in the hope that it will be useful,
11
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
12
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
- # GNU General Public License for more details.
14
- #
15
- # You should have received a copy of the GNU General Public License
16
- # along with this program; if not, write to the Free Software
17
- # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
18
- # MA 02110-1301, USA.
19
- #
20
- #
21
1
 
22
2
 
23
3
  module CalaisOntology
@@ -67,25 +47,38 @@ CALAIS_EVENTS = %w(
67
47
  Alliance
68
48
  AnalystEarningsEstimate
69
49
  AnalystRecommendation
50
+ ArmedAttack
51
+ ArmsPurchaseSale
70
52
  Arrest
71
53
  Bankruptcy
72
54
  BonusSharesIssuance
73
55
  BusinessRelation
74
56
  Buybacks
57
+ CandidatePosition
75
58
  CompanyAccountingChange
59
+ CompanyAffiliates
60
+ CompanyCompetitor
61
+ CompanyCustomer
76
62
  CompanyEarningsAnnouncement
77
63
  CompanyEarningsGuidance
64
+ CompanyEmployeesNumber
78
65
  CompanyExpansion
79
66
  CompanyForceMajeure
67
+ CompanyFounded
80
68
  CompanyInvestment
81
69
  CompanyLaborIssues
82
70
  CompanyLayoffs
83
71
  CompanyLegalIssues
72
+ CompanyLocation
84
73
  CompanyListingChange
85
74
  CompanyMeeting
86
75
  CompanyNameChange
76
+ CompanyProduct
87
77
  CompanyReorganization
88
78
  CompanyRestatement
79
+ CompanyTechnology
80
+ CompanyUsingProduct
81
+ CompanyTicker
89
82
  ConferenceCall
90
83
  Conviction
91
84
  CreditRating
@@ -94,19 +87,32 @@ CALAIS_EVENTS = %w(
94
87
  DiplomaticRelations
95
88
  Dividend
96
89
  EmploymentChange
90
+ EmploymentRelation
97
91
  EnvironmentalIssue
92
+ EquityFinancing
98
93
  Extinction
94
+ FamilyRelation
99
95
  FDAPhase
100
96
  Indictment
97
+ IndicesChanges
101
98
  IPO
102
99
  JointVenture
103
100
  ManMadeDisaster
104
101
  Merger
102
+ MilitaryAction
105
103
  MovieRelease
106
104
  MusicAlbumRelease
107
105
  NaturalDisaster
108
106
  PatentFiling
109
107
  PatentIssuance
108
+ PersonAttributes
109
+ PersonCareer
110
+ PersonCommunication
111
+ PersonEducation
112
+ PersonEmailAddress
113
+ PersonLocation
114
+ PersonParty
115
+ PersonRelation
110
116
  PersonTravel
111
117
  PoliticalEndorsement
112
118
  PoliticalRelationship
@@ -114,14 +120,15 @@ CALAIS_EVENTS = %w(
114
120
  ProductIssues
115
121
  ProductRecall
116
122
  ProductRelease
117
- SocialTags
123
+ Quotation
118
124
  SecondaryIssuance
119
- PersonCommunication
120
125
  StockSplit
121
126
  Trial
122
127
  VotingResult
123
128
  )
124
129
 
130
+
131
+
125
132
  CALAIS_TOPICS = %w(
126
133
  Business_Finance
127
134
  Disaster_Accident
@@ -1,3 +1,3 @@
1
1
  module DoverToCalais
2
- VERSION = "0.2.0"
2
+ VERSION = "0.2.1"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: dover_to_calais
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.2.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Fred Heath
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-02-10 00:00:00.000000000 Z
11
+ date: 2014-03-23 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: nokogiri
@@ -78,6 +78,86 @@ dependencies:
78
78
  - - ! '>='
79
79
  - !ruby/object:Gem::Version
80
80
  version: 0.1.9
81
+ - !ruby/object:Gem::Dependency
82
+ name: json
83
+ requirement: !ruby/object:Gem::Requirement
84
+ requirements:
85
+ - - ~>
86
+ - !ruby/object:Gem::Version
87
+ version: '1.5'
88
+ - - ! '>='
89
+ - !ruby/object:Gem::Version
90
+ version: 1.5.5
91
+ type: :runtime
92
+ prerelease: false
93
+ version_requirements: !ruby/object:Gem::Requirement
94
+ requirements:
95
+ - - ~>
96
+ - !ruby/object:Gem::Version
97
+ version: '1.5'
98
+ - - ! '>='
99
+ - !ruby/object:Gem::Version
100
+ version: 1.5.5
101
+ - !ruby/object:Gem::Dependency
102
+ name: ohm
103
+ requirement: !ruby/object:Gem::Requirement
104
+ requirements:
105
+ - - ~>
106
+ - !ruby/object:Gem::Version
107
+ version: '2.0'
108
+ - - ! '>='
109
+ - !ruby/object:Gem::Version
110
+ version: 2.0.0
111
+ type: :runtime
112
+ prerelease: false
113
+ version_requirements: !ruby/object:Gem::Requirement
114
+ requirements:
115
+ - - ~>
116
+ - !ruby/object:Gem::Version
117
+ version: '2.0'
118
+ - - ! '>='
119
+ - !ruby/object:Gem::Version
120
+ version: 2.0.0
121
+ - !ruby/object:Gem::Dependency
122
+ name: ohm-contrib
123
+ requirement: !ruby/object:Gem::Requirement
124
+ requirements:
125
+ - - ~>
126
+ - !ruby/object:Gem::Version
127
+ version: '2.0'
128
+ - - ! '>='
129
+ - !ruby/object:Gem::Version
130
+ version: 2.0.0
131
+ type: :runtime
132
+ prerelease: false
133
+ version_requirements: !ruby/object:Gem::Requirement
134
+ requirements:
135
+ - - ~>
136
+ - !ruby/object:Gem::Version
137
+ version: '2.0'
138
+ - - ! '>='
139
+ - !ruby/object:Gem::Version
140
+ version: 2.0.0
141
+ - !ruby/object:Gem::Dependency
142
+ name: em-throttled_queue
143
+ requirement: !ruby/object:Gem::Requirement
144
+ requirements:
145
+ - - ~>
146
+ - !ruby/object:Gem::Version
147
+ version: '1.1'
148
+ - - ! '>='
149
+ - !ruby/object:Gem::Version
150
+ version: 1.1.0
151
+ type: :runtime
152
+ prerelease: false
153
+ version_requirements: !ruby/object:Gem::Requirement
154
+ requirements:
155
+ - - ~>
156
+ - !ruby/object:Gem::Version
157
+ version: '1.1'
158
+ - - ! '>='
159
+ - !ruby/object:Gem::Version
160
+ version: 1.1.0
81
161
  - !ruby/object:Gem::Dependency
82
162
  name: bundler
83
163
  requirement: !ruby/object:Gem::Requirement
@@ -164,11 +244,14 @@ files:
164
244
  - README.md
165
245
  - Rakefile
166
246
  - dover_to_calais.gemspec
247
+ - features/data_mining.feature
167
248
  - features/data_sources.feature
168
249
  - features/filtering.feature
250
+ - features/step_definitions/data_mining_steps.rb
169
251
  - features/step_definitions/data_sources_steps.rb
170
252
  - features/step_definitions/filtering_steps.rb
171
253
  - lib/dover_to_calais.rb
254
+ - lib/dover_to_calais/models.rb
172
255
  - lib/dover_to_calais/ontology.rb
173
256
  - lib/dover_to_calais/version.rb
174
257
  - test/test_file_1.doc
@@ -202,8 +285,10 @@ signing_key:
202
285
  specification_version: 4
203
286
  summary: An easy-to-use wrapper round the OpenCalais semantic analysis web service.
204
287
  test_files:
288
+ - features/data_mining.feature
205
289
  - features/data_sources.feature
206
290
  - features/filtering.feature
291
+ - features/step_definitions/data_mining_steps.rb
207
292
  - features/step_definitions/data_sources_steps.rb
208
293
  - features/step_definitions/filtering_steps.rb
209
294
  - test/test_file_1.doc