calais 0.0.7 → 0.0.8
Sign up to get free protection for your applications and to get access to all the features.
- data/README.markdown +2 -2
- data/Rakefile +1 -1
- data/VERSION.yml +2 -2
- data/lib/calais.rb +1 -1
- data/lib/calais/client.rb +13 -12
- data/lib/calais/response.rb +39 -39
- data/spec/calais/client_spec.rb +2 -2
- metadata +9 -7
data/README.markdown
CHANGED
@@ -32,7 +32,7 @@ This will return an object containing information extracted from the RDF respons
|
|
32
32
|
## Requirements ##
|
33
33
|
|
34
34
|
* [Ruby 1.8.5 or better](http://ruby-lang.org)
|
35
|
-
* [
|
35
|
+
* [nokogiri](http://nokogiri.rubyforge.org/nokogiri/), [libxml2](http://xmlsoft.org/), [libxslt](http://xmlsoft.org/xslt/)
|
36
36
|
* [curb](http://curb.rubyforge.org/), [libcurl](http://curl.haxx.se/)
|
37
37
|
* [json](http://json.rubyforge.org/)
|
38
38
|
|
@@ -46,4 +46,4 @@ You can install the Calais gem via Rubygems (`gem install calais`) or by buildin
|
|
46
46
|
|
47
47
|
## Acknowledgements ##
|
48
48
|
|
49
|
-
* [Paul Legato](http://www.economaton.com/): Help all around with the new response processor and implementation of the 3.1 API.
|
49
|
+
* [Paul Legato](http://www.economaton.com/): Help all around with the new response processor and implementation of the 3.1 API.
|
data/Rakefile
CHANGED
@@ -18,7 +18,7 @@ begin
|
|
18
18
|
s.authors = ['Abhay Kumar']
|
19
19
|
s.files = FileList["[A-Z]*", "{bin,generators,lib,test}/**/*"]
|
20
20
|
s.rubyforge_project = 'calais'
|
21
|
-
s.add_dependency '
|
21
|
+
s.add_dependency 'nokogiri', '>= 1.3.3'
|
22
22
|
s.add_dependency 'json', '>= 1.1.3'
|
23
23
|
s.add_dependency 'curb', '>= 0.1.4'
|
24
24
|
end
|
data/VERSION.yml
CHANGED
data/lib/calais.rb
CHANGED
data/lib/calais/client.rb
CHANGED
@@ -38,36 +38,37 @@ module Calais
|
|
38
38
|
|
39
39
|
def params_xml
|
40
40
|
check_params
|
41
|
-
|
42
|
-
|
41
|
+
document = Nokogiri::XML::Document.new
|
42
|
+
|
43
|
+
params_node = Nokogiri::XML::Node.new('c:params', document)
|
43
44
|
params_node['xmlns:c'] = 'http://s.opencalais.com/1/pred/'
|
44
45
|
params_node['xmlns:rdf'] = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#'
|
45
|
-
|
46
|
-
processing_node = XML::Node.new('c:processingDirectives')
|
46
|
+
|
47
|
+
processing_node = Nokogiri::XML::Node.new('c:processingDirectives', document)
|
47
48
|
processing_node['c:contentType'] = AVAILABLE_CONTENT_TYPES[@content_type] if @content_type
|
48
49
|
processing_node['c:outputFormat'] = AVAILABLE_OUTPUT_FORMATS[@output_format] if @output_format
|
49
50
|
processing_node['c:reltagBaseURL'] = @reltag_base_url.to_s if @reltag_base_url
|
50
|
-
|
51
|
+
|
51
52
|
processing_node['c:enableMetadataType'] = @metadata_enables.join(';') unless @metadata_enables.empty?
|
52
53
|
processing_node['c:discardMetadata'] = @metadata_discards.join(';') unless @metadata_discards.empty?
|
53
54
|
processing_node['c:omitOutputtingOriginalText'] = 'true' if @omit_outputting_original_text
|
54
|
-
|
55
|
-
user_node = XML::Node.new('c:userDirectives')
|
55
|
+
|
56
|
+
user_node = Nokogiri::XML::Node.new('c:userDirectives', document)
|
56
57
|
user_node['c:allowDistribution'] = @allow_distribution.to_s unless @allow_distribution.nil?
|
57
58
|
user_node['c:allowSearch'] = @allow_search.to_s unless @allow_search.nil?
|
58
59
|
user_node['c:externalID'] = @external_id.to_s if @external_id
|
59
60
|
user_node['c:submitter'] = @submitter.to_s if @submitter
|
60
|
-
|
61
|
+
|
61
62
|
params_node << processing_node
|
62
63
|
params_node << user_node
|
63
|
-
|
64
|
+
|
64
65
|
if @external_metadata
|
65
|
-
external_node = XML::Node.new('c:externalMetadata')
|
66
|
+
external_node = Nokogiri::XML::Node.new('c:externalMetadata', document)
|
66
67
|
external_node << @external_metadata
|
67
68
|
params_node << external_node
|
68
69
|
end
|
69
|
-
|
70
|
-
params_node.
|
70
|
+
|
71
|
+
params_node.to_xml(:indent => 2)
|
71
72
|
end
|
72
73
|
|
73
74
|
private
|
data/lib/calais/response.rb
CHANGED
@@ -47,14 +47,14 @@ module Calais
|
|
47
47
|
class Instance
|
48
48
|
attr_accessor :prefix, :exact, :suffix, :offset, :length
|
49
49
|
|
50
|
-
# Makes a new Instance object from an appropriate
|
50
|
+
# Makes a new Instance object from an appropriate Nokogiri::XML::Node.
|
51
51
|
def self.from_node(node)
|
52
52
|
instance = self.new
|
53
|
-
instance.prefix = node.
|
54
|
-
instance.exact
|
55
|
-
instance.suffix = node.
|
56
|
-
instance.offset = node.
|
57
|
-
instance.length = node.
|
53
|
+
instance.prefix = node.xpath("c:prefix[1]").first.content
|
54
|
+
instance.exact = node.xpath("c:exact[1]").first.content
|
55
|
+
instance.suffix = node.xpath("c:suffix[1]").first.content
|
56
|
+
instance.offset = node.xpath("c:offset[1]").first.content.to_i
|
57
|
+
instance.length = node.xpath("c:length[1]").first.content.to_i
|
58
58
|
|
59
59
|
instance
|
60
60
|
end
|
@@ -76,112 +76,112 @@ module Calais
|
|
76
76
|
|
77
77
|
private
|
78
78
|
def extract_data
|
79
|
-
doc = XML
|
79
|
+
doc = Nokogiri::XML(@raw_response)
|
80
80
|
|
81
|
-
if doc.root.
|
82
|
-
raise Calais::Error, doc.root.
|
81
|
+
if doc.root.xpath("/Error[1]").first
|
82
|
+
raise Calais::Error, doc.root.xpath("/Error/Exception").first.content
|
83
83
|
end
|
84
84
|
|
85
|
-
doc.root.
|
85
|
+
doc.root.xpath("rdf:Description/rdf:type[contains(@rdf:resource, '#{MATCHERS[:docinfometa]}')]/..").each do |node|
|
86
86
|
@language = node['language']
|
87
87
|
@submission_date = DateTime.parse node['submissionDate']
|
88
88
|
|
89
|
-
attributes = extract_attributes(node.
|
89
|
+
attributes = extract_attributes(node.xpath("*[contains(name(), 'c:')]"))
|
90
90
|
|
91
91
|
@signature = attributes.delete('signature')
|
92
92
|
@submitter_code = attributes.delete('submitterCode')
|
93
93
|
|
94
|
-
node.remove
|
94
|
+
node.remove
|
95
95
|
end
|
96
96
|
|
97
|
-
doc.root.
|
97
|
+
doc.root.xpath("rdf:Description/rdf:type[contains(@rdf:resource, '#{MATCHERS[:docinfo]}')]/..").each do |node|
|
98
98
|
@request_id = node['calaisRequestID']
|
99
99
|
|
100
|
-
attributes = extract_attributes(node.
|
100
|
+
attributes = extract_attributes(node.xpath("*[contains(name(), 'c:')]"))
|
101
101
|
|
102
102
|
@doc_title = attributes.delete('docTitle')
|
103
|
-
@doc_date = Date.parse
|
103
|
+
@doc_date = Date.parse(attributes.delete('docDate'))
|
104
104
|
|
105
|
-
node.remove
|
105
|
+
node.remove
|
106
106
|
end
|
107
107
|
|
108
|
-
@categories = doc.root.
|
108
|
+
@categories = doc.root.xpath("rdf:Description/rdf:type[contains(@rdf:resource, '#{MATCHERS[:doccat]}')]/..").map do |node|
|
109
109
|
category = Category.new
|
110
|
-
category.name = node.
|
111
|
-
score = node.
|
110
|
+
category.name = node.xpath("c:categoryName[1]").first.content
|
111
|
+
score = node.xpath("c:score[1]").first
|
112
112
|
category.score = score.content.to_f unless score.nil?
|
113
113
|
|
114
|
-
node.remove
|
114
|
+
node.remove
|
115
115
|
category
|
116
116
|
end
|
117
117
|
|
118
|
-
@relevances = doc.root.
|
119
|
-
subject_hash = node.
|
120
|
-
acc[subject_hash] = node.
|
118
|
+
@relevances = doc.root.xpath("rdf:Description/rdf:type[contains(@rdf:resource, '#{MATCHERS[:relevances]}')]/..").inject({}) do |acc, node|
|
119
|
+
subject_hash = node.xpath("c:subject[1]").first[:resource].split('/')[-1]
|
120
|
+
acc[subject_hash] = node.xpath("c:relevance[1]").first.content.to_f
|
121
121
|
|
122
|
-
node.remove
|
122
|
+
node.remove
|
123
123
|
acc
|
124
124
|
end
|
125
125
|
|
126
|
-
@entities = doc.root.
|
126
|
+
@entities = doc.root.xpath("rdf:Description/rdf:type[contains(@rdf:resource, '#{MATCHERS[:entities]}')]/..").map do |node|
|
127
127
|
extracted_hash = node['about'].split('/')[-1] rescue nil
|
128
128
|
|
129
129
|
entity = Entity.new
|
130
130
|
entity.calais_hash = CalaisHash.find_or_create(extracted_hash, @hashes)
|
131
131
|
entity.type = extract_type(node)
|
132
|
-
entity.attributes = extract_attributes(node.
|
132
|
+
entity.attributes = extract_attributes(node.xpath("*[contains(name(), 'c:')]"))
|
133
133
|
|
134
134
|
entity.relevance = @relevances[extracted_hash]
|
135
135
|
entity.instances = extract_instances(doc, extracted_hash)
|
136
136
|
|
137
|
-
node.remove
|
137
|
+
node.remove
|
138
138
|
entity
|
139
139
|
end
|
140
140
|
|
141
|
-
@relations = doc.root.
|
141
|
+
@relations = doc.root.xpath("rdf:Description/rdf:type[contains(@rdf:resource, '#{MATCHERS[:relations]}')]/..").map do |node|
|
142
142
|
extracted_hash = node['about'].split('/')[-1] rescue nil
|
143
143
|
|
144
144
|
relation = Relation.new
|
145
145
|
relation.calais_hash = CalaisHash.find_or_create(extracted_hash, @hashes)
|
146
146
|
relation.type = extract_type(node)
|
147
|
-
relation.attributes = extract_attributes(node.
|
147
|
+
relation.attributes = extract_attributes(node.xpath("*[contains(name(), 'c:')]"))
|
148
148
|
relation.instances = extract_instances(doc, extracted_hash)
|
149
149
|
|
150
|
-
node.remove
|
150
|
+
node.remove
|
151
151
|
relation
|
152
152
|
end
|
153
153
|
|
154
|
-
@geographies = doc.root.
|
155
|
-
attributes = extract_attributes(node.
|
154
|
+
@geographies = doc.root.xpath("rdf:Description/rdf:type[contains(@rdf:resource, '#{MATCHERS[:geographies]}')]/..").map do |node|
|
155
|
+
attributes = extract_attributes(node.xpath("*[contains(name(), 'c:')]"))
|
156
156
|
|
157
157
|
geography = Geography.new
|
158
158
|
geography.name = attributes.delete('name')
|
159
159
|
geography.calais_hash = attributes.delete('subject')
|
160
160
|
geography.attributes = attributes
|
161
161
|
|
162
|
-
node.remove
|
162
|
+
node.remove
|
163
163
|
geography
|
164
164
|
end
|
165
165
|
|
166
|
-
doc.root.
|
167
|
-
doc.root.
|
166
|
+
doc.root.xpath("rdf:Description/rdf:type[contains(@rdf:resource, '#{MATCHERS[:defaultlangid]}')]/..").each { |node| node.remove }
|
167
|
+
doc.root.xpath("./*").each { |node| node.remove }
|
168
168
|
|
169
169
|
return
|
170
170
|
end
|
171
171
|
|
172
172
|
def extract_instances(doc, hash)
|
173
|
-
doc.root.
|
174
|
-
instance_node.
|
173
|
+
doc.root.xpath("rdf:Description/rdf:type[contains(@rdf:resource, '#{MATCHERS[:instances]}')]/..").select do |instance_node|
|
174
|
+
instance_node.xpath("c:subject[1]").first[:resource].split("/")[-1] == hash
|
175
175
|
end.map do |instance_node|
|
176
176
|
instance = Instance.from_node(instance_node)
|
177
|
-
instance_node.remove
|
177
|
+
instance_node.remove
|
178
178
|
|
179
179
|
instance
|
180
180
|
end
|
181
181
|
end
|
182
182
|
|
183
183
|
def extract_type(node)
|
184
|
-
node.
|
184
|
+
node.xpath("*[name()='rdf:type']")[0]['resource'].split('/')[-1]
|
185
185
|
rescue
|
186
186
|
nil
|
187
187
|
end
|
data/spec/calais/client_spec.rb
CHANGED
@@ -32,7 +32,7 @@ end
|
|
32
32
|
describe Calais::Client, :params_xml do
|
33
33
|
it 'returns an xml encoded string' do
|
34
34
|
client = Calais::Client.new(:content => SAMPLE_DOCUMENT, :license_id => LICENSE_ID)
|
35
|
-
client.params_xml.should == %[<c:params xmlns:c
|
35
|
+
client.params_xml.should == %[<c:params xmlns:c=\"http://s.opencalais.com/1/pred/\" xmlns:rdf=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#\">\n <c:processingDirectives/>\n <c:userDirectives/>\n</c:params>]
|
36
36
|
|
37
37
|
client.content_type = :xml
|
38
38
|
client.output_format = :json
|
@@ -76,4 +76,4 @@ describe Calais::Client, :enlighten do
|
|
76
76
|
@client.enlighten
|
77
77
|
@client.instance_variable_get(:@client).url.should == Calais::BETA_REST_ENDPOINT
|
78
78
|
end
|
79
|
-
end
|
79
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: calais
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.8
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Abhay Kumar
|
@@ -9,18 +9,18 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2009-
|
12
|
+
date: 2009-09-18 00:00:00 -07:00
|
13
13
|
default_executable:
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
16
|
-
name:
|
16
|
+
name: nokogiri
|
17
17
|
type: :runtime
|
18
18
|
version_requirement:
|
19
19
|
version_requirements: !ruby/object:Gem::Requirement
|
20
20
|
requirements:
|
21
21
|
- - ">="
|
22
22
|
- !ruby/object:Gem::Version
|
23
|
-
version:
|
23
|
+
version: 1.3.3
|
24
24
|
version:
|
25
25
|
- !ruby/object:Gem::Dependency
|
26
26
|
name: json
|
@@ -62,6 +62,8 @@ files:
|
|
62
62
|
- lib/calais/response.rb
|
63
63
|
has_rdoc: true
|
64
64
|
homepage: http://github.com/abhay/calais
|
65
|
+
licenses: []
|
66
|
+
|
65
67
|
post_install_message:
|
66
68
|
rdoc_options:
|
67
69
|
- --charset=UTF-8
|
@@ -82,11 +84,11 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
82
84
|
requirements: []
|
83
85
|
|
84
86
|
rubyforge_project: calais
|
85
|
-
rubygems_version: 1.3.
|
87
|
+
rubygems_version: 1.3.5
|
86
88
|
signing_key:
|
87
89
|
specification_version: 2
|
88
90
|
summary: A Ruby interface to the Calais Web Service
|
89
91
|
test_files:
|
90
|
-
- spec/helper.rb
|
91
|
-
- spec/calais/response_spec.rb
|
92
92
|
- spec/calais/client_spec.rb
|
93
|
+
- spec/calais/response_spec.rb
|
94
|
+
- spec/helper.rb
|