calais 0.0.7 → 0.0.8

Sign up to get free protection for your applications and to get access to all the features.
data/README.markdown CHANGED
@@ -32,7 +32,7 @@ This will return an object containing information extracted from the RDF respons
32
32
  ## Requirements ##
33
33
 
34
34
  * [Ruby 1.8.5 or better](http://ruby-lang.org)
35
- * [libxml-ruby](http://libxml.rubyforge.org/), [libxml2](http://xmlsoft.org/)
35
+ * [nokogiri](http://nokogiri.rubyforge.org/nokogiri/), [libxml2](http://xmlsoft.org/), [libxslt](http://xmlsoft.org/xslt/)
36
36
  * [curb](http://curb.rubyforge.org/), [libcurl](http://curl.haxx.se/)
37
37
  * [json](http://json.rubyforge.org/)
38
38
 
@@ -46,4 +46,4 @@ You can install the Calais gem via Rubygems (`gem install calais`) or by buildin
46
46
 
47
47
  ## Acknowledgements ##
48
48
 
49
- * [Paul Legato](http://www.economaton.com/): Help all around with the new response processor and implementation of the 3.1 API.
49
+ * [Paul Legato](http://www.economaton.com/): Help all around with the new response processor and implementation of the 3.1 API.
data/Rakefile CHANGED
@@ -18,7 +18,7 @@ begin
18
18
  s.authors = ['Abhay Kumar']
19
19
  s.files = FileList["[A-Z]*", "{bin,generators,lib,test}/**/*"]
20
20
  s.rubyforge_project = 'calais'
21
- s.add_dependency 'libxml-ruby', '>= 0.5.4'
21
+ s.add_dependency 'nokogiri', '>= 1.3.3'
22
22
  s.add_dependency 'json', '>= 1.1.3'
23
23
  s.add_dependency 'curb', '>= 0.1.4'
24
24
  end
data/VERSION.yml CHANGED
@@ -1,4 +1,4 @@
1
1
  ---
2
- :patch: 7
3
- :major: 0
4
2
  :minor: 0
3
+ :patch: 8
4
+ :major: 0
data/lib/calais.rb CHANGED
@@ -5,7 +5,7 @@ require 'iconv'
5
5
  require 'set'
6
6
 
7
7
  require 'rubygems'
8
- require 'xml/libxml'
8
+ require 'nokogiri'
9
9
  require 'json'
10
10
  require 'curb'
11
11
 
data/lib/calais/client.rb CHANGED
@@ -38,36 +38,37 @@ module Calais
38
38
 
39
39
  def params_xml
40
40
  check_params
41
-
42
- params_node = XML::Node.new('c:params')
41
+ document = Nokogiri::XML::Document.new
42
+
43
+ params_node = Nokogiri::XML::Node.new('c:params', document)
43
44
  params_node['xmlns:c'] = 'http://s.opencalais.com/1/pred/'
44
45
  params_node['xmlns:rdf'] = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#'
45
-
46
- processing_node = XML::Node.new('c:processingDirectives')
46
+
47
+ processing_node = Nokogiri::XML::Node.new('c:processingDirectives', document)
47
48
  processing_node['c:contentType'] = AVAILABLE_CONTENT_TYPES[@content_type] if @content_type
48
49
  processing_node['c:outputFormat'] = AVAILABLE_OUTPUT_FORMATS[@output_format] if @output_format
49
50
  processing_node['c:reltagBaseURL'] = @reltag_base_url.to_s if @reltag_base_url
50
-
51
+
51
52
  processing_node['c:enableMetadataType'] = @metadata_enables.join(';') unless @metadata_enables.empty?
52
53
  processing_node['c:discardMetadata'] = @metadata_discards.join(';') unless @metadata_discards.empty?
53
54
  processing_node['c:omitOutputtingOriginalText'] = 'true' if @omit_outputting_original_text
54
-
55
- user_node = XML::Node.new('c:userDirectives')
55
+
56
+ user_node = Nokogiri::XML::Node.new('c:userDirectives', document)
56
57
  user_node['c:allowDistribution'] = @allow_distribution.to_s unless @allow_distribution.nil?
57
58
  user_node['c:allowSearch'] = @allow_search.to_s unless @allow_search.nil?
58
59
  user_node['c:externalID'] = @external_id.to_s if @external_id
59
60
  user_node['c:submitter'] = @submitter.to_s if @submitter
60
-
61
+
61
62
  params_node << processing_node
62
63
  params_node << user_node
63
-
64
+
64
65
  if @external_metadata
65
- external_node = XML::Node.new('c:externalMetadata')
66
+ external_node = Nokogiri::XML::Node.new('c:externalMetadata', document)
66
67
  external_node << @external_metadata
67
68
  params_node << external_node
68
69
  end
69
-
70
- params_node.to_s
70
+
71
+ params_node.to_xml(:indent => 2)
71
72
  end
72
73
 
73
74
  private
@@ -47,14 +47,14 @@ module Calais
47
47
  class Instance
48
48
  attr_accessor :prefix, :exact, :suffix, :offset, :length
49
49
 
50
- # Makes a new Instance object from an appropriate LibXML::XML::Node.
50
+ # Makes a new Instance object from an appropriate Nokogiri::XML::Node.
51
51
  def self.from_node(node)
52
52
  instance = self.new
53
- instance.prefix = node.find_first("c:prefix").content
54
- instance.exact = node.find_first("c:exact").content
55
- instance.suffix = node.find_first("c:suffix").content
56
- instance.offset = node.find_first("c:offset").content.to_i
57
- instance.length = node.find_first("c:length").content.to_i
53
+ instance.prefix = node.xpath("c:prefix[1]").first.content
54
+ instance.exact = node.xpath("c:exact[1]").first.content
55
+ instance.suffix = node.xpath("c:suffix[1]").first.content
56
+ instance.offset = node.xpath("c:offset[1]").first.content.to_i
57
+ instance.length = node.xpath("c:length[1]").first.content.to_i
58
58
 
59
59
  instance
60
60
  end
@@ -76,112 +76,112 @@ module Calais
76
76
 
77
77
  private
78
78
  def extract_data
79
- doc = XML::Parser.string(@raw_response).parse
79
+ doc = Nokogiri::XML(@raw_response)
80
80
 
81
- if doc.root.find("/Error").first
82
- raise Calais::Error, doc.root.find("/Error/Exception").first.content
81
+ if doc.root.xpath("/Error[1]").first
82
+ raise Calais::Error, doc.root.xpath("/Error/Exception").first.content
83
83
  end
84
84
 
85
- doc.root.find("rdf:Description/rdf:type[contains(@rdf:resource, '#{MATCHERS[:docinfometa]}')]/..").each do |node|
85
+ doc.root.xpath("rdf:Description/rdf:type[contains(@rdf:resource, '#{MATCHERS[:docinfometa]}')]/..").each do |node|
86
86
  @language = node['language']
87
87
  @submission_date = DateTime.parse node['submissionDate']
88
88
 
89
- attributes = extract_attributes(node.find("*[contains(name(), 'c:')]"))
89
+ attributes = extract_attributes(node.xpath("*[contains(name(), 'c:')]"))
90
90
 
91
91
  @signature = attributes.delete('signature')
92
92
  @submitter_code = attributes.delete('submitterCode')
93
93
 
94
- node.remove!
94
+ node.remove
95
95
  end
96
96
 
97
- doc.root.find("rdf:Description/rdf:type[contains(@rdf:resource, '#{MATCHERS[:docinfo]}')]/..").each do |node|
97
+ doc.root.xpath("rdf:Description/rdf:type[contains(@rdf:resource, '#{MATCHERS[:docinfo]}')]/..").each do |node|
98
98
  @request_id = node['calaisRequestID']
99
99
 
100
- attributes = extract_attributes(node.find("*[contains(name(), 'c:')]"))
100
+ attributes = extract_attributes(node.xpath("*[contains(name(), 'c:')]"))
101
101
 
102
102
  @doc_title = attributes.delete('docTitle')
103
- @doc_date = Date.parse attributes.delete('docDate')
103
+ @doc_date = Date.parse(attributes.delete('docDate'))
104
104
 
105
- node.remove!
105
+ node.remove
106
106
  end
107
107
 
108
- @categories = doc.root.find("rdf:Description/rdf:type[contains(@rdf:resource, '#{MATCHERS[:doccat]}')]/..").map do |node|
108
+ @categories = doc.root.xpath("rdf:Description/rdf:type[contains(@rdf:resource, '#{MATCHERS[:doccat]}')]/..").map do |node|
109
109
  category = Category.new
110
- category.name = node.find_first("c:categoryName").content
111
- score = node.find_first("c:score")
110
+ category.name = node.xpath("c:categoryName[1]").first.content
111
+ score = node.xpath("c:score[1]").first
112
112
  category.score = score.content.to_f unless score.nil?
113
113
 
114
- node.remove!
114
+ node.remove
115
115
  category
116
116
  end
117
117
 
118
- @relevances = doc.root.find("rdf:Description/rdf:type[contains(@rdf:resource, '#{MATCHERS[:relevances]}')]/..").inject({}) do |acc, node|
119
- subject_hash = node.find_first("c:subject")[:resource].split('/')[-1]
120
- acc[subject_hash] = node.find_first("c:relevance").content.to_f
118
+ @relevances = doc.root.xpath("rdf:Description/rdf:type[contains(@rdf:resource, '#{MATCHERS[:relevances]}')]/..").inject({}) do |acc, node|
119
+ subject_hash = node.xpath("c:subject[1]").first[:resource].split('/')[-1]
120
+ acc[subject_hash] = node.xpath("c:relevance[1]").first.content.to_f
121
121
 
122
- node.remove!
122
+ node.remove
123
123
  acc
124
124
  end
125
125
 
126
- @entities = doc.root.find("rdf:Description/rdf:type[contains(@rdf:resource, '#{MATCHERS[:entities]}')]/..").map do |node|
126
+ @entities = doc.root.xpath("rdf:Description/rdf:type[contains(@rdf:resource, '#{MATCHERS[:entities]}')]/..").map do |node|
127
127
  extracted_hash = node['about'].split('/')[-1] rescue nil
128
128
 
129
129
  entity = Entity.new
130
130
  entity.calais_hash = CalaisHash.find_or_create(extracted_hash, @hashes)
131
131
  entity.type = extract_type(node)
132
- entity.attributes = extract_attributes(node.find("*[contains(name(), 'c:')]"))
132
+ entity.attributes = extract_attributes(node.xpath("*[contains(name(), 'c:')]"))
133
133
 
134
134
  entity.relevance = @relevances[extracted_hash]
135
135
  entity.instances = extract_instances(doc, extracted_hash)
136
136
 
137
- node.remove!
137
+ node.remove
138
138
  entity
139
139
  end
140
140
 
141
- @relations = doc.root.find("rdf:Description/rdf:type[contains(@rdf:resource, '#{MATCHERS[:relations]}')]/..").map do |node|
141
+ @relations = doc.root.xpath("rdf:Description/rdf:type[contains(@rdf:resource, '#{MATCHERS[:relations]}')]/..").map do |node|
142
142
  extracted_hash = node['about'].split('/')[-1] rescue nil
143
143
 
144
144
  relation = Relation.new
145
145
  relation.calais_hash = CalaisHash.find_or_create(extracted_hash, @hashes)
146
146
  relation.type = extract_type(node)
147
- relation.attributes = extract_attributes(node.find("*[contains(name(), 'c:')]"))
147
+ relation.attributes = extract_attributes(node.xpath("*[contains(name(), 'c:')]"))
148
148
  relation.instances = extract_instances(doc, extracted_hash)
149
149
 
150
- node.remove!
150
+ node.remove
151
151
  relation
152
152
  end
153
153
 
154
- @geographies = doc.root.find("rdf:Description/rdf:type[contains(@rdf:resource, '#{MATCHERS[:geographies]}')]/..").map do |node|
155
- attributes = extract_attributes(node.find("*[contains(name(), 'c:')]"))
154
+ @geographies = doc.root.xpath("rdf:Description/rdf:type[contains(@rdf:resource, '#{MATCHERS[:geographies]}')]/..").map do |node|
155
+ attributes = extract_attributes(node.xpath("*[contains(name(), 'c:')]"))
156
156
 
157
157
  geography = Geography.new
158
158
  geography.name = attributes.delete('name')
159
159
  geography.calais_hash = attributes.delete('subject')
160
160
  geography.attributes = attributes
161
161
 
162
- node.remove!
162
+ node.remove
163
163
  geography
164
164
  end
165
165
 
166
- doc.root.find("rdf:Description/rdf:type[contains(@rdf:resource, '#{MATCHERS[:defaultlangid]}')]/..").each { |node| node.remove! }
167
- doc.root.find("./*").each { |node| node.remove! }
166
+ doc.root.xpath("rdf:Description/rdf:type[contains(@rdf:resource, '#{MATCHERS[:defaultlangid]}')]/..").each { |node| node.remove }
167
+ doc.root.xpath("./*").each { |node| node.remove }
168
168
 
169
169
  return
170
170
  end
171
171
 
172
172
  def extract_instances(doc, hash)
173
- doc.root.find("rdf:Description/rdf:type[contains(@rdf:resource, '#{MATCHERS[:instances]}')]/..").select do |instance_node|
174
- instance_node.find_first("c:subject")[:resource].split("/")[-1] == hash
173
+ doc.root.xpath("rdf:Description/rdf:type[contains(@rdf:resource, '#{MATCHERS[:instances]}')]/..").select do |instance_node|
174
+ instance_node.xpath("c:subject[1]").first[:resource].split("/")[-1] == hash
175
175
  end.map do |instance_node|
176
176
  instance = Instance.from_node(instance_node)
177
- instance_node.remove!
177
+ instance_node.remove
178
178
 
179
179
  instance
180
180
  end
181
181
  end
182
182
 
183
183
  def extract_type(node)
184
- node.find("*[name()='rdf:type']")[0]['resource'].split('/')[-1]
184
+ node.xpath("*[name()='rdf:type']")[0]['resource'].split('/')[-1]
185
185
  rescue
186
186
  nil
187
187
  end
@@ -32,7 +32,7 @@ end
32
32
  describe Calais::Client, :params_xml do
33
33
  it 'returns an xml encoded string' do
34
34
  client = Calais::Client.new(:content => SAMPLE_DOCUMENT, :license_id => LICENSE_ID)
35
- client.params_xml.should == %[<c:params xmlns:c="http://s.opencalais.com/1/pred/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">\n <c:processingDirectives/>\n <c:userDirectives/>\n</c:params>]
35
+ client.params_xml.should == %[<c:params xmlns:c=\"http://s.opencalais.com/1/pred/\" xmlns:rdf=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#\">\n <c:processingDirectives/>\n <c:userDirectives/>\n</c:params>]
36
36
 
37
37
  client.content_type = :xml
38
38
  client.output_format = :json
@@ -76,4 +76,4 @@ describe Calais::Client, :enlighten do
76
76
  @client.enlighten
77
77
  @client.instance_variable_get(:@client).url.should == Calais::BETA_REST_ENDPOINT
78
78
  end
79
- end
79
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: calais
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.7
4
+ version: 0.0.8
5
5
  platform: ruby
6
6
  authors:
7
7
  - Abhay Kumar
@@ -9,18 +9,18 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2009-06-08 00:00:00 -05:00
12
+ date: 2009-09-18 00:00:00 -07:00
13
13
  default_executable:
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
- name: libxml-ruby
16
+ name: nokogiri
17
17
  type: :runtime
18
18
  version_requirement:
19
19
  version_requirements: !ruby/object:Gem::Requirement
20
20
  requirements:
21
21
  - - ">="
22
22
  - !ruby/object:Gem::Version
23
- version: 0.5.4
23
+ version: 1.3.3
24
24
  version:
25
25
  - !ruby/object:Gem::Dependency
26
26
  name: json
@@ -62,6 +62,8 @@ files:
62
62
  - lib/calais/response.rb
63
63
  has_rdoc: true
64
64
  homepage: http://github.com/abhay/calais
65
+ licenses: []
66
+
65
67
  post_install_message:
66
68
  rdoc_options:
67
69
  - --charset=UTF-8
@@ -82,11 +84,11 @@ required_rubygems_version: !ruby/object:Gem::Requirement
82
84
  requirements: []
83
85
 
84
86
  rubyforge_project: calais
85
- rubygems_version: 1.3.1
87
+ rubygems_version: 1.3.5
86
88
  signing_key:
87
89
  specification_version: 2
88
90
  summary: A Ruby interface to the Calais Web Service
89
91
  test_files:
90
- - spec/helper.rb
91
- - spec/calais/response_spec.rb
92
92
  - spec/calais/client_spec.rb
93
+ - spec/calais/response_spec.rb
94
+ - spec/helper.rb