calais 0.0.7 → 0.0.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.markdown +2 -2
- data/Rakefile +1 -1
- data/VERSION.yml +2 -2
- data/lib/calais.rb +1 -1
- data/lib/calais/client.rb +13 -12
- data/lib/calais/response.rb +39 -39
- data/spec/calais/client_spec.rb +2 -2
- metadata +9 -7
data/README.markdown
CHANGED
@@ -32,7 +32,7 @@ This will return an object containing information extracted from the RDF respons
|
|
32
32
|
## Requirements ##
|
33
33
|
|
34
34
|
* [Ruby 1.8.5 or better](http://ruby-lang.org)
|
35
|
-
* [
|
35
|
+
* [nokogiri](http://nokogiri.rubyforge.org/nokogiri/), [libxml2](http://xmlsoft.org/), [libxslt](http://xmlsoft.org/xslt/)
|
36
36
|
* [curb](http://curb.rubyforge.org/), [libcurl](http://curl.haxx.se/)
|
37
37
|
* [json](http://json.rubyforge.org/)
|
38
38
|
|
@@ -46,4 +46,4 @@ You can install the Calais gem via Rubygems (`gem install calais`) or by buildin
|
|
46
46
|
|
47
47
|
## Acknowledgements ##
|
48
48
|
|
49
|
-
* [Paul Legato](http://www.economaton.com/): Help all around with the new response processor and implementation of the 3.1 API.
|
49
|
+
* [Paul Legato](http://www.economaton.com/): Help all around with the new response processor and implementation of the 3.1 API.
|
data/Rakefile
CHANGED
@@ -18,7 +18,7 @@ begin
|
|
18
18
|
s.authors = ['Abhay Kumar']
|
19
19
|
s.files = FileList["[A-Z]*", "{bin,generators,lib,test}/**/*"]
|
20
20
|
s.rubyforge_project = 'calais'
|
21
|
-
s.add_dependency '
|
21
|
+
s.add_dependency 'nokogiri', '>= 1.3.3'
|
22
22
|
s.add_dependency 'json', '>= 1.1.3'
|
23
23
|
s.add_dependency 'curb', '>= 0.1.4'
|
24
24
|
end
|
data/VERSION.yml
CHANGED
data/lib/calais.rb
CHANGED
data/lib/calais/client.rb
CHANGED
@@ -38,36 +38,37 @@ module Calais
|
|
38
38
|
|
39
39
|
def params_xml
|
40
40
|
check_params
|
41
|
-
|
42
|
-
|
41
|
+
document = Nokogiri::XML::Document.new
|
42
|
+
|
43
|
+
params_node = Nokogiri::XML::Node.new('c:params', document)
|
43
44
|
params_node['xmlns:c'] = 'http://s.opencalais.com/1/pred/'
|
44
45
|
params_node['xmlns:rdf'] = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#'
|
45
|
-
|
46
|
-
processing_node = XML::Node.new('c:processingDirectives')
|
46
|
+
|
47
|
+
processing_node = Nokogiri::XML::Node.new('c:processingDirectives', document)
|
47
48
|
processing_node['c:contentType'] = AVAILABLE_CONTENT_TYPES[@content_type] if @content_type
|
48
49
|
processing_node['c:outputFormat'] = AVAILABLE_OUTPUT_FORMATS[@output_format] if @output_format
|
49
50
|
processing_node['c:reltagBaseURL'] = @reltag_base_url.to_s if @reltag_base_url
|
50
|
-
|
51
|
+
|
51
52
|
processing_node['c:enableMetadataType'] = @metadata_enables.join(';') unless @metadata_enables.empty?
|
52
53
|
processing_node['c:discardMetadata'] = @metadata_discards.join(';') unless @metadata_discards.empty?
|
53
54
|
processing_node['c:omitOutputtingOriginalText'] = 'true' if @omit_outputting_original_text
|
54
|
-
|
55
|
-
user_node = XML::Node.new('c:userDirectives')
|
55
|
+
|
56
|
+
user_node = Nokogiri::XML::Node.new('c:userDirectives', document)
|
56
57
|
user_node['c:allowDistribution'] = @allow_distribution.to_s unless @allow_distribution.nil?
|
57
58
|
user_node['c:allowSearch'] = @allow_search.to_s unless @allow_search.nil?
|
58
59
|
user_node['c:externalID'] = @external_id.to_s if @external_id
|
59
60
|
user_node['c:submitter'] = @submitter.to_s if @submitter
|
60
|
-
|
61
|
+
|
61
62
|
params_node << processing_node
|
62
63
|
params_node << user_node
|
63
|
-
|
64
|
+
|
64
65
|
if @external_metadata
|
65
|
-
external_node = XML::Node.new('c:externalMetadata')
|
66
|
+
external_node = Nokogiri::XML::Node.new('c:externalMetadata', document)
|
66
67
|
external_node << @external_metadata
|
67
68
|
params_node << external_node
|
68
69
|
end
|
69
|
-
|
70
|
-
params_node.
|
70
|
+
|
71
|
+
params_node.to_xml(:indent => 2)
|
71
72
|
end
|
72
73
|
|
73
74
|
private
|
data/lib/calais/response.rb
CHANGED
@@ -47,14 +47,14 @@ module Calais
|
|
47
47
|
class Instance
|
48
48
|
attr_accessor :prefix, :exact, :suffix, :offset, :length
|
49
49
|
|
50
|
-
# Makes a new Instance object from an appropriate
|
50
|
+
# Makes a new Instance object from an appropriate Nokogiri::XML::Node.
|
51
51
|
def self.from_node(node)
|
52
52
|
instance = self.new
|
53
|
-
instance.prefix = node.
|
54
|
-
instance.exact
|
55
|
-
instance.suffix = node.
|
56
|
-
instance.offset = node.
|
57
|
-
instance.length = node.
|
53
|
+
instance.prefix = node.xpath("c:prefix[1]").first.content
|
54
|
+
instance.exact = node.xpath("c:exact[1]").first.content
|
55
|
+
instance.suffix = node.xpath("c:suffix[1]").first.content
|
56
|
+
instance.offset = node.xpath("c:offset[1]").first.content.to_i
|
57
|
+
instance.length = node.xpath("c:length[1]").first.content.to_i
|
58
58
|
|
59
59
|
instance
|
60
60
|
end
|
@@ -76,112 +76,112 @@ module Calais
|
|
76
76
|
|
77
77
|
private
|
78
78
|
def extract_data
|
79
|
-
doc = XML
|
79
|
+
doc = Nokogiri::XML(@raw_response)
|
80
80
|
|
81
|
-
if doc.root.
|
82
|
-
raise Calais::Error, doc.root.
|
81
|
+
if doc.root.xpath("/Error[1]").first
|
82
|
+
raise Calais::Error, doc.root.xpath("/Error/Exception").first.content
|
83
83
|
end
|
84
84
|
|
85
|
-
doc.root.
|
85
|
+
doc.root.xpath("rdf:Description/rdf:type[contains(@rdf:resource, '#{MATCHERS[:docinfometa]}')]/..").each do |node|
|
86
86
|
@language = node['language']
|
87
87
|
@submission_date = DateTime.parse node['submissionDate']
|
88
88
|
|
89
|
-
attributes = extract_attributes(node.
|
89
|
+
attributes = extract_attributes(node.xpath("*[contains(name(), 'c:')]"))
|
90
90
|
|
91
91
|
@signature = attributes.delete('signature')
|
92
92
|
@submitter_code = attributes.delete('submitterCode')
|
93
93
|
|
94
|
-
node.remove
|
94
|
+
node.remove
|
95
95
|
end
|
96
96
|
|
97
|
-
doc.root.
|
97
|
+
doc.root.xpath("rdf:Description/rdf:type[contains(@rdf:resource, '#{MATCHERS[:docinfo]}')]/..").each do |node|
|
98
98
|
@request_id = node['calaisRequestID']
|
99
99
|
|
100
|
-
attributes = extract_attributes(node.
|
100
|
+
attributes = extract_attributes(node.xpath("*[contains(name(), 'c:')]"))
|
101
101
|
|
102
102
|
@doc_title = attributes.delete('docTitle')
|
103
|
-
@doc_date = Date.parse
|
103
|
+
@doc_date = Date.parse(attributes.delete('docDate'))
|
104
104
|
|
105
|
-
node.remove
|
105
|
+
node.remove
|
106
106
|
end
|
107
107
|
|
108
|
-
@categories = doc.root.
|
108
|
+
@categories = doc.root.xpath("rdf:Description/rdf:type[contains(@rdf:resource, '#{MATCHERS[:doccat]}')]/..").map do |node|
|
109
109
|
category = Category.new
|
110
|
-
category.name = node.
|
111
|
-
score = node.
|
110
|
+
category.name = node.xpath("c:categoryName[1]").first.content
|
111
|
+
score = node.xpath("c:score[1]").first
|
112
112
|
category.score = score.content.to_f unless score.nil?
|
113
113
|
|
114
|
-
node.remove
|
114
|
+
node.remove
|
115
115
|
category
|
116
116
|
end
|
117
117
|
|
118
|
-
@relevances = doc.root.
|
119
|
-
subject_hash = node.
|
120
|
-
acc[subject_hash] = node.
|
118
|
+
@relevances = doc.root.xpath("rdf:Description/rdf:type[contains(@rdf:resource, '#{MATCHERS[:relevances]}')]/..").inject({}) do |acc, node|
|
119
|
+
subject_hash = node.xpath("c:subject[1]").first[:resource].split('/')[-1]
|
120
|
+
acc[subject_hash] = node.xpath("c:relevance[1]").first.content.to_f
|
121
121
|
|
122
|
-
node.remove
|
122
|
+
node.remove
|
123
123
|
acc
|
124
124
|
end
|
125
125
|
|
126
|
-
@entities = doc.root.
|
126
|
+
@entities = doc.root.xpath("rdf:Description/rdf:type[contains(@rdf:resource, '#{MATCHERS[:entities]}')]/..").map do |node|
|
127
127
|
extracted_hash = node['about'].split('/')[-1] rescue nil
|
128
128
|
|
129
129
|
entity = Entity.new
|
130
130
|
entity.calais_hash = CalaisHash.find_or_create(extracted_hash, @hashes)
|
131
131
|
entity.type = extract_type(node)
|
132
|
-
entity.attributes = extract_attributes(node.
|
132
|
+
entity.attributes = extract_attributes(node.xpath("*[contains(name(), 'c:')]"))
|
133
133
|
|
134
134
|
entity.relevance = @relevances[extracted_hash]
|
135
135
|
entity.instances = extract_instances(doc, extracted_hash)
|
136
136
|
|
137
|
-
node.remove
|
137
|
+
node.remove
|
138
138
|
entity
|
139
139
|
end
|
140
140
|
|
141
|
-
@relations = doc.root.
|
141
|
+
@relations = doc.root.xpath("rdf:Description/rdf:type[contains(@rdf:resource, '#{MATCHERS[:relations]}')]/..").map do |node|
|
142
142
|
extracted_hash = node['about'].split('/')[-1] rescue nil
|
143
143
|
|
144
144
|
relation = Relation.new
|
145
145
|
relation.calais_hash = CalaisHash.find_or_create(extracted_hash, @hashes)
|
146
146
|
relation.type = extract_type(node)
|
147
|
-
relation.attributes = extract_attributes(node.
|
147
|
+
relation.attributes = extract_attributes(node.xpath("*[contains(name(), 'c:')]"))
|
148
148
|
relation.instances = extract_instances(doc, extracted_hash)
|
149
149
|
|
150
|
-
node.remove
|
150
|
+
node.remove
|
151
151
|
relation
|
152
152
|
end
|
153
153
|
|
154
|
-
@geographies = doc.root.
|
155
|
-
attributes = extract_attributes(node.
|
154
|
+
@geographies = doc.root.xpath("rdf:Description/rdf:type[contains(@rdf:resource, '#{MATCHERS[:geographies]}')]/..").map do |node|
|
155
|
+
attributes = extract_attributes(node.xpath("*[contains(name(), 'c:')]"))
|
156
156
|
|
157
157
|
geography = Geography.new
|
158
158
|
geography.name = attributes.delete('name')
|
159
159
|
geography.calais_hash = attributes.delete('subject')
|
160
160
|
geography.attributes = attributes
|
161
161
|
|
162
|
-
node.remove
|
162
|
+
node.remove
|
163
163
|
geography
|
164
164
|
end
|
165
165
|
|
166
|
-
doc.root.
|
167
|
-
doc.root.
|
166
|
+
doc.root.xpath("rdf:Description/rdf:type[contains(@rdf:resource, '#{MATCHERS[:defaultlangid]}')]/..").each { |node| node.remove }
|
167
|
+
doc.root.xpath("./*").each { |node| node.remove }
|
168
168
|
|
169
169
|
return
|
170
170
|
end
|
171
171
|
|
172
172
|
def extract_instances(doc, hash)
|
173
|
-
doc.root.
|
174
|
-
instance_node.
|
173
|
+
doc.root.xpath("rdf:Description/rdf:type[contains(@rdf:resource, '#{MATCHERS[:instances]}')]/..").select do |instance_node|
|
174
|
+
instance_node.xpath("c:subject[1]").first[:resource].split("/")[-1] == hash
|
175
175
|
end.map do |instance_node|
|
176
176
|
instance = Instance.from_node(instance_node)
|
177
|
-
instance_node.remove
|
177
|
+
instance_node.remove
|
178
178
|
|
179
179
|
instance
|
180
180
|
end
|
181
181
|
end
|
182
182
|
|
183
183
|
def extract_type(node)
|
184
|
-
node.
|
184
|
+
node.xpath("*[name()='rdf:type']")[0]['resource'].split('/')[-1]
|
185
185
|
rescue
|
186
186
|
nil
|
187
187
|
end
|
data/spec/calais/client_spec.rb
CHANGED
@@ -32,7 +32,7 @@ end
|
|
32
32
|
describe Calais::Client, :params_xml do
|
33
33
|
it 'returns an xml encoded string' do
|
34
34
|
client = Calais::Client.new(:content => SAMPLE_DOCUMENT, :license_id => LICENSE_ID)
|
35
|
-
client.params_xml.should == %[<c:params xmlns:c
|
35
|
+
client.params_xml.should == %[<c:params xmlns:c=\"http://s.opencalais.com/1/pred/\" xmlns:rdf=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#\">\n <c:processingDirectives/>\n <c:userDirectives/>\n</c:params>]
|
36
36
|
|
37
37
|
client.content_type = :xml
|
38
38
|
client.output_format = :json
|
@@ -76,4 +76,4 @@ describe Calais::Client, :enlighten do
|
|
76
76
|
@client.enlighten
|
77
77
|
@client.instance_variable_get(:@client).url.should == Calais::BETA_REST_ENDPOINT
|
78
78
|
end
|
79
|
-
end
|
79
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: calais
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.8
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Abhay Kumar
|
@@ -9,18 +9,18 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2009-
|
12
|
+
date: 2009-09-18 00:00:00 -07:00
|
13
13
|
default_executable:
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
16
|
-
name:
|
16
|
+
name: nokogiri
|
17
17
|
type: :runtime
|
18
18
|
version_requirement:
|
19
19
|
version_requirements: !ruby/object:Gem::Requirement
|
20
20
|
requirements:
|
21
21
|
- - ">="
|
22
22
|
- !ruby/object:Gem::Version
|
23
|
-
version:
|
23
|
+
version: 1.3.3
|
24
24
|
version:
|
25
25
|
- !ruby/object:Gem::Dependency
|
26
26
|
name: json
|
@@ -62,6 +62,8 @@ files:
|
|
62
62
|
- lib/calais/response.rb
|
63
63
|
has_rdoc: true
|
64
64
|
homepage: http://github.com/abhay/calais
|
65
|
+
licenses: []
|
66
|
+
|
65
67
|
post_install_message:
|
66
68
|
rdoc_options:
|
67
69
|
- --charset=UTF-8
|
@@ -82,11 +84,11 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
82
84
|
requirements: []
|
83
85
|
|
84
86
|
rubyforge_project: calais
|
85
|
-
rubygems_version: 1.3.
|
87
|
+
rubygems_version: 1.3.5
|
86
88
|
signing_key:
|
87
89
|
specification_version: 2
|
88
90
|
summary: A Ruby interface to the Calais Web Service
|
89
91
|
test_files:
|
90
|
-
- spec/helper.rb
|
91
|
-
- spec/calais/response_spec.rb
|
92
92
|
- spec/calais/client_spec.rb
|
93
|
+
- spec/calais/response_spec.rb
|
94
|
+
- spec/helper.rb
|