calais 0.0.7 → 0.0.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README.markdown CHANGED
@@ -32,7 +32,7 @@ This will return an object containing information extracted from the RDF respons
32
32
  ## Requirements ##
33
33
 
34
34
  * [Ruby 1.8.5 or better](http://ruby-lang.org)
35
- * [libxml-ruby](http://libxml.rubyforge.org/), [libxml2](http://xmlsoft.org/)
35
+ * [nokogiri](http://nokogiri.rubyforge.org/nokogiri/), [libxml2](http://xmlsoft.org/), [libxslt](http://xmlsoft.org/xslt/)
36
36
  * [curb](http://curb.rubyforge.org/), [libcurl](http://curl.haxx.se/)
37
37
  * [json](http://json.rubyforge.org/)
38
38
 
@@ -46,4 +46,4 @@ You can install the Calais gem via Rubygems (`gem install calais`) or by buildin
46
46
 
47
47
  ## Acknowledgements ##
48
48
 
49
- * [Paul Legato](http://www.economaton.com/): Help all around with the new response processor and implementation of the 3.1 API.
49
+ * [Paul Legato](http://www.economaton.com/): Help all around with the new response processor and implementation of the 3.1 API.
data/Rakefile CHANGED
@@ -18,7 +18,7 @@ begin
18
18
  s.authors = ['Abhay Kumar']
19
19
  s.files = FileList["[A-Z]*", "{bin,generators,lib,test}/**/*"]
20
20
  s.rubyforge_project = 'calais'
21
- s.add_dependency 'libxml-ruby', '>= 0.5.4'
21
+ s.add_dependency 'nokogiri', '>= 1.3.3'
22
22
  s.add_dependency 'json', '>= 1.1.3'
23
23
  s.add_dependency 'curb', '>= 0.1.4'
24
24
  end
data/VERSION.yml CHANGED
@@ -1,4 +1,4 @@
1
1
  ---
2
- :patch: 7
3
- :major: 0
4
2
  :minor: 0
3
+ :patch: 8
4
+ :major: 0
data/lib/calais.rb CHANGED
@@ -5,7 +5,7 @@ require 'iconv'
5
5
  require 'set'
6
6
 
7
7
  require 'rubygems'
8
- require 'xml/libxml'
8
+ require 'nokogiri'
9
9
  require 'json'
10
10
  require 'curb'
11
11
 
data/lib/calais/client.rb CHANGED
@@ -38,36 +38,37 @@ module Calais
38
38
 
39
39
  def params_xml
40
40
  check_params
41
-
42
- params_node = XML::Node.new('c:params')
41
+ document = Nokogiri::XML::Document.new
42
+
43
+ params_node = Nokogiri::XML::Node.new('c:params', document)
43
44
  params_node['xmlns:c'] = 'http://s.opencalais.com/1/pred/'
44
45
  params_node['xmlns:rdf'] = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#'
45
-
46
- processing_node = XML::Node.new('c:processingDirectives')
46
+
47
+ processing_node = Nokogiri::XML::Node.new('c:processingDirectives', document)
47
48
  processing_node['c:contentType'] = AVAILABLE_CONTENT_TYPES[@content_type] if @content_type
48
49
  processing_node['c:outputFormat'] = AVAILABLE_OUTPUT_FORMATS[@output_format] if @output_format
49
50
  processing_node['c:reltagBaseURL'] = @reltag_base_url.to_s if @reltag_base_url
50
-
51
+
51
52
  processing_node['c:enableMetadataType'] = @metadata_enables.join(';') unless @metadata_enables.empty?
52
53
  processing_node['c:discardMetadata'] = @metadata_discards.join(';') unless @metadata_discards.empty?
53
54
  processing_node['c:omitOutputtingOriginalText'] = 'true' if @omit_outputting_original_text
54
-
55
- user_node = XML::Node.new('c:userDirectives')
55
+
56
+ user_node = Nokogiri::XML::Node.new('c:userDirectives', document)
56
57
  user_node['c:allowDistribution'] = @allow_distribution.to_s unless @allow_distribution.nil?
57
58
  user_node['c:allowSearch'] = @allow_search.to_s unless @allow_search.nil?
58
59
  user_node['c:externalID'] = @external_id.to_s if @external_id
59
60
  user_node['c:submitter'] = @submitter.to_s if @submitter
60
-
61
+
61
62
  params_node << processing_node
62
63
  params_node << user_node
63
-
64
+
64
65
  if @external_metadata
65
- external_node = XML::Node.new('c:externalMetadata')
66
+ external_node = Nokogiri::XML::Node.new('c:externalMetadata', document)
66
67
  external_node << @external_metadata
67
68
  params_node << external_node
68
69
  end
69
-
70
- params_node.to_s
70
+
71
+ params_node.to_xml(:indent => 2)
71
72
  end
72
73
 
73
74
  private
@@ -47,14 +47,14 @@ module Calais
47
47
  class Instance
48
48
  attr_accessor :prefix, :exact, :suffix, :offset, :length
49
49
 
50
- # Makes a new Instance object from an appropriate LibXML::XML::Node.
50
+ # Makes a new Instance object from an appropriate Nokogiri::XML::Node.
51
51
  def self.from_node(node)
52
52
  instance = self.new
53
- instance.prefix = node.find_first("c:prefix").content
54
- instance.exact = node.find_first("c:exact").content
55
- instance.suffix = node.find_first("c:suffix").content
56
- instance.offset = node.find_first("c:offset").content.to_i
57
- instance.length = node.find_first("c:length").content.to_i
53
+ instance.prefix = node.xpath("c:prefix[1]").first.content
54
+ instance.exact = node.xpath("c:exact[1]").first.content
55
+ instance.suffix = node.xpath("c:suffix[1]").first.content
56
+ instance.offset = node.xpath("c:offset[1]").first.content.to_i
57
+ instance.length = node.xpath("c:length[1]").first.content.to_i
58
58
 
59
59
  instance
60
60
  end
@@ -76,112 +76,112 @@ module Calais
76
76
 
77
77
  private
78
78
  def extract_data
79
- doc = XML::Parser.string(@raw_response).parse
79
+ doc = Nokogiri::XML(@raw_response)
80
80
 
81
- if doc.root.find("/Error").first
82
- raise Calais::Error, doc.root.find("/Error/Exception").first.content
81
+ if doc.root.xpath("/Error[1]").first
82
+ raise Calais::Error, doc.root.xpath("/Error/Exception").first.content
83
83
  end
84
84
 
85
- doc.root.find("rdf:Description/rdf:type[contains(@rdf:resource, '#{MATCHERS[:docinfometa]}')]/..").each do |node|
85
+ doc.root.xpath("rdf:Description/rdf:type[contains(@rdf:resource, '#{MATCHERS[:docinfometa]}')]/..").each do |node|
86
86
  @language = node['language']
87
87
  @submission_date = DateTime.parse node['submissionDate']
88
88
 
89
- attributes = extract_attributes(node.find("*[contains(name(), 'c:')]"))
89
+ attributes = extract_attributes(node.xpath("*[contains(name(), 'c:')]"))
90
90
 
91
91
  @signature = attributes.delete('signature')
92
92
  @submitter_code = attributes.delete('submitterCode')
93
93
 
94
- node.remove!
94
+ node.remove
95
95
  end
96
96
 
97
- doc.root.find("rdf:Description/rdf:type[contains(@rdf:resource, '#{MATCHERS[:docinfo]}')]/..").each do |node|
97
+ doc.root.xpath("rdf:Description/rdf:type[contains(@rdf:resource, '#{MATCHERS[:docinfo]}')]/..").each do |node|
98
98
  @request_id = node['calaisRequestID']
99
99
 
100
- attributes = extract_attributes(node.find("*[contains(name(), 'c:')]"))
100
+ attributes = extract_attributes(node.xpath("*[contains(name(), 'c:')]"))
101
101
 
102
102
  @doc_title = attributes.delete('docTitle')
103
- @doc_date = Date.parse attributes.delete('docDate')
103
+ @doc_date = Date.parse(attributes.delete('docDate'))
104
104
 
105
- node.remove!
105
+ node.remove
106
106
  end
107
107
 
108
- @categories = doc.root.find("rdf:Description/rdf:type[contains(@rdf:resource, '#{MATCHERS[:doccat]}')]/..").map do |node|
108
+ @categories = doc.root.xpath("rdf:Description/rdf:type[contains(@rdf:resource, '#{MATCHERS[:doccat]}')]/..").map do |node|
109
109
  category = Category.new
110
- category.name = node.find_first("c:categoryName").content
111
- score = node.find_first("c:score")
110
+ category.name = node.xpath("c:categoryName[1]").first.content
111
+ score = node.xpath("c:score[1]").first
112
112
  category.score = score.content.to_f unless score.nil?
113
113
 
114
- node.remove!
114
+ node.remove
115
115
  category
116
116
  end
117
117
 
118
- @relevances = doc.root.find("rdf:Description/rdf:type[contains(@rdf:resource, '#{MATCHERS[:relevances]}')]/..").inject({}) do |acc, node|
119
- subject_hash = node.find_first("c:subject")[:resource].split('/')[-1]
120
- acc[subject_hash] = node.find_first("c:relevance").content.to_f
118
+ @relevances = doc.root.xpath("rdf:Description/rdf:type[contains(@rdf:resource, '#{MATCHERS[:relevances]}')]/..").inject({}) do |acc, node|
119
+ subject_hash = node.xpath("c:subject[1]").first[:resource].split('/')[-1]
120
+ acc[subject_hash] = node.xpath("c:relevance[1]").first.content.to_f
121
121
 
122
- node.remove!
122
+ node.remove
123
123
  acc
124
124
  end
125
125
 
126
- @entities = doc.root.find("rdf:Description/rdf:type[contains(@rdf:resource, '#{MATCHERS[:entities]}')]/..").map do |node|
126
+ @entities = doc.root.xpath("rdf:Description/rdf:type[contains(@rdf:resource, '#{MATCHERS[:entities]}')]/..").map do |node|
127
127
  extracted_hash = node['about'].split('/')[-1] rescue nil
128
128
 
129
129
  entity = Entity.new
130
130
  entity.calais_hash = CalaisHash.find_or_create(extracted_hash, @hashes)
131
131
  entity.type = extract_type(node)
132
- entity.attributes = extract_attributes(node.find("*[contains(name(), 'c:')]"))
132
+ entity.attributes = extract_attributes(node.xpath("*[contains(name(), 'c:')]"))
133
133
 
134
134
  entity.relevance = @relevances[extracted_hash]
135
135
  entity.instances = extract_instances(doc, extracted_hash)
136
136
 
137
- node.remove!
137
+ node.remove
138
138
  entity
139
139
  end
140
140
 
141
- @relations = doc.root.find("rdf:Description/rdf:type[contains(@rdf:resource, '#{MATCHERS[:relations]}')]/..").map do |node|
141
+ @relations = doc.root.xpath("rdf:Description/rdf:type[contains(@rdf:resource, '#{MATCHERS[:relations]}')]/..").map do |node|
142
142
  extracted_hash = node['about'].split('/')[-1] rescue nil
143
143
 
144
144
  relation = Relation.new
145
145
  relation.calais_hash = CalaisHash.find_or_create(extracted_hash, @hashes)
146
146
  relation.type = extract_type(node)
147
- relation.attributes = extract_attributes(node.find("*[contains(name(), 'c:')]"))
147
+ relation.attributes = extract_attributes(node.xpath("*[contains(name(), 'c:')]"))
148
148
  relation.instances = extract_instances(doc, extracted_hash)
149
149
 
150
- node.remove!
150
+ node.remove
151
151
  relation
152
152
  end
153
153
 
154
- @geographies = doc.root.find("rdf:Description/rdf:type[contains(@rdf:resource, '#{MATCHERS[:geographies]}')]/..").map do |node|
155
- attributes = extract_attributes(node.find("*[contains(name(), 'c:')]"))
154
+ @geographies = doc.root.xpath("rdf:Description/rdf:type[contains(@rdf:resource, '#{MATCHERS[:geographies]}')]/..").map do |node|
155
+ attributes = extract_attributes(node.xpath("*[contains(name(), 'c:')]"))
156
156
 
157
157
  geography = Geography.new
158
158
  geography.name = attributes.delete('name')
159
159
  geography.calais_hash = attributes.delete('subject')
160
160
  geography.attributes = attributes
161
161
 
162
- node.remove!
162
+ node.remove
163
163
  geography
164
164
  end
165
165
 
166
- doc.root.find("rdf:Description/rdf:type[contains(@rdf:resource, '#{MATCHERS[:defaultlangid]}')]/..").each { |node| node.remove! }
167
- doc.root.find("./*").each { |node| node.remove! }
166
+ doc.root.xpath("rdf:Description/rdf:type[contains(@rdf:resource, '#{MATCHERS[:defaultlangid]}')]/..").each { |node| node.remove }
167
+ doc.root.xpath("./*").each { |node| node.remove }
168
168
 
169
169
  return
170
170
  end
171
171
 
172
172
  def extract_instances(doc, hash)
173
- doc.root.find("rdf:Description/rdf:type[contains(@rdf:resource, '#{MATCHERS[:instances]}')]/..").select do |instance_node|
174
- instance_node.find_first("c:subject")[:resource].split("/")[-1] == hash
173
+ doc.root.xpath("rdf:Description/rdf:type[contains(@rdf:resource, '#{MATCHERS[:instances]}')]/..").select do |instance_node|
174
+ instance_node.xpath("c:subject[1]").first[:resource].split("/")[-1] == hash
175
175
  end.map do |instance_node|
176
176
  instance = Instance.from_node(instance_node)
177
- instance_node.remove!
177
+ instance_node.remove
178
178
 
179
179
  instance
180
180
  end
181
181
  end
182
182
 
183
183
  def extract_type(node)
184
- node.find("*[name()='rdf:type']")[0]['resource'].split('/')[-1]
184
+ node.xpath("*[name()='rdf:type']")[0]['resource'].split('/')[-1]
185
185
  rescue
186
186
  nil
187
187
  end
@@ -32,7 +32,7 @@ end
32
32
  describe Calais::Client, :params_xml do
33
33
  it 'returns an xml encoded string' do
34
34
  client = Calais::Client.new(:content => SAMPLE_DOCUMENT, :license_id => LICENSE_ID)
35
- client.params_xml.should == %[<c:params xmlns:c="http://s.opencalais.com/1/pred/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">\n <c:processingDirectives/>\n <c:userDirectives/>\n</c:params>]
35
+ client.params_xml.should == %[<c:params xmlns:c=\"http://s.opencalais.com/1/pred/\" xmlns:rdf=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#\">\n <c:processingDirectives/>\n <c:userDirectives/>\n</c:params>]
36
36
 
37
37
  client.content_type = :xml
38
38
  client.output_format = :json
@@ -76,4 +76,4 @@ describe Calais::Client, :enlighten do
76
76
  @client.enlighten
77
77
  @client.instance_variable_get(:@client).url.should == Calais::BETA_REST_ENDPOINT
78
78
  end
79
- end
79
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: calais
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.7
4
+ version: 0.0.8
5
5
  platform: ruby
6
6
  authors:
7
7
  - Abhay Kumar
@@ -9,18 +9,18 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2009-06-08 00:00:00 -05:00
12
+ date: 2009-09-18 00:00:00 -07:00
13
13
  default_executable:
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
- name: libxml-ruby
16
+ name: nokogiri
17
17
  type: :runtime
18
18
  version_requirement:
19
19
  version_requirements: !ruby/object:Gem::Requirement
20
20
  requirements:
21
21
  - - ">="
22
22
  - !ruby/object:Gem::Version
23
- version: 0.5.4
23
+ version: 1.3.3
24
24
  version:
25
25
  - !ruby/object:Gem::Dependency
26
26
  name: json
@@ -62,6 +62,8 @@ files:
62
62
  - lib/calais/response.rb
63
63
  has_rdoc: true
64
64
  homepage: http://github.com/abhay/calais
65
+ licenses: []
66
+
65
67
  post_install_message:
66
68
  rdoc_options:
67
69
  - --charset=UTF-8
@@ -82,11 +84,11 @@ required_rubygems_version: !ruby/object:Gem::Requirement
82
84
  requirements: []
83
85
 
84
86
  rubyforge_project: calais
85
- rubygems_version: 1.3.1
87
+ rubygems_version: 1.3.5
86
88
  signing_key:
87
89
  specification_version: 2
88
90
  summary: A Ruby interface to the Calais Web Service
89
91
  test_files:
90
- - spec/helper.rb
91
- - spec/calais/response_spec.rb
92
92
  - spec/calais/client_spec.rb
93
+ - spec/calais/response_spec.rb
94
+ - spec/helper.rb