calais 0.0.3 → 0.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History.txt +11 -0
- data/README.txt +1 -1
- data/lib/calais.rb +14 -5
- data/lib/calais/client.rb +3 -17
- data/lib/calais/response.rb +32 -56
- data/spec/calais_spec.rb +1 -1
- metadata +44 -72
- data.tar.gz.sig +0 -1
- metadata.gz.sig +0 -0
data/History.txt
CHANGED
@@ -1,3 +1,14 @@
|
|
1
|
+
== 0.0.5
|
2
|
+
|
3
|
+
* fixed error where classes weren't being required in the proper order on Ubuntu (reported by Jon Moses)
|
4
|
+
* New things coming back from the API. Fixing in tests.
|
5
|
+
|
6
|
+
== 0.0.4
|
7
|
+
|
8
|
+
* changed dependency from hpricot to libxml
|
9
|
+
* utf fun
|
10
|
+
* cleanup all around
|
11
|
+
|
1
12
|
== 0.0.3
|
2
13
|
|
3
14
|
* pluginized the library for Rails (via pius: http://gitorious.org/projects/calais-au-rails)
|
data/README.txt
CHANGED
data/lib/calais.rb
CHANGED
@@ -1,14 +1,20 @@
|
|
1
1
|
require 'digest/sha1'
|
2
2
|
require 'net/http'
|
3
|
-
require 'yaml'
|
4
3
|
require 'cgi'
|
4
|
+
require 'iconv'
|
5
5
|
|
6
6
|
require 'rubygems'
|
7
|
-
require '
|
7
|
+
require 'xml/libxml'
|
8
8
|
|
9
9
|
$KCODE = "UTF8"
|
10
|
+
require 'jcode'
|
10
11
|
|
11
|
-
|
12
|
+
$:.unshift File.expand_path(File.dirname(__FILE__)) + '/calais'
|
13
|
+
|
14
|
+
require 'name'
|
15
|
+
require 'relationship'
|
16
|
+
require 'response'
|
17
|
+
require 'client'
|
12
18
|
|
13
19
|
module Calais
|
14
20
|
POST_URL = "http://api.opencalais.com"
|
@@ -35,13 +41,16 @@ module Calais
|
|
35
41
|
|
36
42
|
class << self
|
37
43
|
def enlighten(*args, &block) Client.new(*args, &block).call(:enlighten) end
|
44
|
+
|
38
45
|
def process_document(*args, &block)
|
39
46
|
data, error = Calais.enlighten(*args, &block)
|
40
|
-
|
47
|
+
process_data(data, error)
|
41
48
|
end
|
49
|
+
|
50
|
+
def process_data(data, error=nil) Response.new(data, error) end
|
42
51
|
end
|
43
52
|
end
|
44
53
|
|
45
54
|
module Calais
|
46
|
-
VERSION = '0.0.
|
55
|
+
VERSION = '0.0.5'
|
47
56
|
end
|
data/lib/calais/client.rb
CHANGED
@@ -11,37 +11,23 @@ module Calais
|
|
11
11
|
yield(self) if block_given?
|
12
12
|
end
|
13
13
|
|
14
|
-
def call(method
|
14
|
+
def call(method)
|
15
15
|
method = method.intern unless method.is_a?(Symbol)
|
16
16
|
raise ArgumentError.new("Unknown method: #{method}") unless AVAILABLE_METHODS.keys.include? method
|
17
17
|
|
18
18
|
post_args = {
|
19
19
|
"licenseID" => @license_id,
|
20
|
-
"content" => @content,
|
20
|
+
"content" => Iconv.iconv('UTF-8//IGNORE', 'UTF-8', "#{@content} ").first[0..-2],
|
21
21
|
"paramsXML" => params_xml
|
22
22
|
}
|
23
23
|
|
24
24
|
url = URI.parse(POST_URL + AVAILABLE_METHODS[method])
|
25
25
|
resp, data = Net::HTTP.post_form(url, post_args)
|
26
26
|
|
27
|
-
|
28
|
-
end
|
29
|
-
|
30
|
-
def self.process_data(data, error=nil)
|
31
|
-
Calais::Response.new(data, error)
|
27
|
+
return resp.is_a?(Net::HTTPOK) ? data : [data, "API Error: #{resp}"]
|
32
28
|
end
|
33
29
|
|
34
30
|
private
|
35
|
-
def handle_response(resp, data, method, times)
|
36
|
-
if resp.is_a? Net::HTTPOK
|
37
|
-
[data, nil]
|
38
|
-
elsif times >= MAX_RETRIES
|
39
|
-
[data, "Too many retries: #{times}"]
|
40
|
-
else
|
41
|
-
call(method, times+1)
|
42
|
-
end
|
43
|
-
end
|
44
|
-
|
45
31
|
def params_xml
|
46
32
|
content_type = @content_type && AVAILABLE_CONTENT_TYPES.keys.include?(@content_type) ? AVAILABLE_CONTENT_TYPES[@content_type] : AVAILABLE_CONTENT_TYPES[DEFAULT_CONTENT_TYPE]
|
47
33
|
output_format = @output_format && AVAILABLE_OUTPUT_FORMATS.keys.include?(@output_format) ? AVAILABLE_OUTPUT_FORMATS[@output_format] : AVAILABLE_OUTPUT_FORMATS[DEFAULT_OUTPUT_FORMAT]
|
data/lib/calais/response.rb
CHANGED
@@ -7,15 +7,11 @@ module Calais
|
|
7
7
|
@names = []
|
8
8
|
@relationships = []
|
9
9
|
|
10
|
-
|
10
|
+
parse_raw(raw)
|
11
11
|
return if @error
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
signature_node = h_doc.root.search("//rdf:Description//c:signature//..").remove.first
|
16
|
-
language_node = h_doc.root.search("//rdf:Description//c:lang//..").remove.first
|
17
|
-
h_doc = parse_names(h_doc)
|
18
|
-
h_doc = parse_relationships(h_doc)
|
12
|
+
|
13
|
+
parse_names
|
14
|
+
parse_relationships
|
19
15
|
end
|
20
16
|
|
21
17
|
Name::TYPES.each_pair do |method_name, type|
|
@@ -25,30 +21,23 @@ module Calais
|
|
25
21
|
end
|
26
22
|
|
27
23
|
private
|
28
|
-
def
|
29
|
-
@
|
30
|
-
@
|
31
|
-
@error =
|
24
|
+
def parse_raw(raw)
|
25
|
+
@libxml = XML::Parser.string(XML::Parser.string(raw).parse.root.child.content).parse
|
26
|
+
@rdf = @libxml.to_s
|
27
|
+
@error = @libxml.find("/Error/Exception").first.content rescue @error
|
32
28
|
end
|
33
29
|
|
34
|
-
def parse_names
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
hash = ele.attributes["rdf:about"].split("/").last
|
40
|
-
|
41
|
-
detection_nodes = doc.root.search("//rdf:Description//c:subject//..").collect! do |ele|
|
42
|
-
ele unless ele.at("c:subject").attributes["rdf:resource"].match(hash).nil?
|
43
|
-
end.compact
|
30
|
+
def parse_names
|
31
|
+
@names = @libxml.root.find("rdf:Description/rdf:type[contains(@rdf:resource, '/em/e/')]/..").map do |n|
|
32
|
+
name = n.find_first("c:name").content
|
33
|
+
type = n.find_first("rdf:type").properties.to_a.assoc("resource").last.split('/').last
|
34
|
+
hash = n.properties.to_a.assoc("about").last.split("/").last
|
44
35
|
|
45
|
-
locations =
|
46
|
-
start =
|
47
|
-
Range.new(start, start+
|
36
|
+
locations = @libxml.root.find("rdf:Description/c:subject[contains(@rdf:resource, '#{hash}')]/..").map do |n2|
|
37
|
+
start = n2.find_first("c:offset").content.to_i
|
38
|
+
Range.new(start, start+n2.find_first("c:length").content.to_i)
|
48
39
|
end
|
49
40
|
|
50
|
-
detection_nodes.remove
|
51
|
-
|
52
41
|
Name.new(
|
53
42
|
:name => name,
|
54
43
|
:hash => hash,
|
@@ -56,47 +45,34 @@ module Calais
|
|
56
45
|
:locations => locations
|
57
46
|
)
|
58
47
|
end
|
59
|
-
name_elements.remove
|
60
|
-
|
61
|
-
doc
|
62
48
|
end
|
63
49
|
|
64
|
-
def parse_relationships
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
hash = ele.attributes["rdf:about"].split("/").last
|
70
|
-
type = ele.at("rdf:type").attributes["rdf:resource"].split("/").last
|
50
|
+
def parse_relationships
|
51
|
+
@libxml.root.find("rdf:Description/rdf:type[contains(@rdf:resource, '/em/r')]/..").each do |n|
|
52
|
+
hash = n.properties.to_a.assoc("about").last.split("/").last
|
53
|
+
type = n.find_first("rdf:type").properties.to_a.assoc("resource").last.split('/').last
|
54
|
+
|
71
55
|
metadata = {}
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
else
|
78
|
-
child.inner_html.strip
|
79
|
-
end
|
80
|
-
metadata[child.name.split(":").last] = value
|
56
|
+
|
57
|
+
n.to_a.each do |n2|
|
58
|
+
next if n2.name == "type" or n2.comment?
|
59
|
+
resource = n2.properties.to_a.assoc("resource")
|
60
|
+
metadata[n2.name] = resource ? Name.find_in_names(resource.last.split("/").last, @names) : n2.content.strip
|
81
61
|
end
|
82
62
|
|
83
|
-
locations =
|
84
|
-
|
85
|
-
|
86
|
-
start = ele.at("c:offset").inner_html.to_i
|
87
|
-
Range.new(start, start+ele.at("c:length").inner_html.to_i)
|
63
|
+
locations = @libxml.root.find("rdf:Description/c:subject[contains(@rdf:resource, '#{hash}')]/..").map do |n2|
|
64
|
+
start = n2.find_first("c:offset").content.to_i
|
65
|
+
Range.new(start, start+n2.find_first("c:length").content.to_i)
|
88
66
|
end
|
89
67
|
|
90
|
-
|
68
|
+
|
69
|
+
@relationships << Relationship.new(
|
91
70
|
:type => type,
|
92
71
|
:hash => hash,
|
93
72
|
:metadata => metadata,
|
94
73
|
:locations => locations
|
95
74
|
)
|
96
|
-
end
|
97
|
-
relationship_elements.remove
|
98
|
-
|
99
|
-
doc
|
75
|
+
end
|
100
76
|
end
|
101
77
|
end
|
102
78
|
end
|
data/spec/calais_spec.rb
CHANGED
@@ -52,7 +52,7 @@ describe Calais, ".process_document" do
|
|
52
52
|
it "returns relationships" do
|
53
53
|
@response.relationships.should_not be_nil
|
54
54
|
@response.relationships.should_not be_empty
|
55
|
-
@response.relationships.map {|r| r.type }.should == ["PersonProfessional"]
|
55
|
+
@response.relationships.map {|r| r.type }.should == ["Quotation", "Quotation", "PersonProfessional", "Quotation", "Quotation", "Quotation", "Quotation"]
|
56
56
|
end
|
57
57
|
|
58
58
|
end
|
metadata
CHANGED
@@ -1,57 +1,33 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
|
+
rubygems_version: 0.9.2
|
3
|
+
specification_version: 1
|
2
4
|
name: calais
|
3
5
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
5
|
-
|
6
|
-
|
7
|
-
|
6
|
+
version: 0.0.5
|
7
|
+
date: 2008-03-12 00:00:00 -07:00
|
8
|
+
summary: A Ruby interface to the Calais Web Service
|
9
|
+
require_paths:
|
10
|
+
- lib
|
11
|
+
email: info@opensynapse.net
|
12
|
+
homepage: http://calais.rubyforge.org
|
13
|
+
rubyforge_project: calais
|
14
|
+
description: "== Features * Accepts documents in text/plain, text/xml and text/html format. * Basic access to the Open Calais API's Enlighten action. * Output is RDF representation of input document. * Single function ability to tag a document and receive a response in RDF format, names in the document, and their relationships. == Synopsis This is a very basic wrapper to the Open Calais API. It uses the POST endpoint and currently supports the Enlighten action. Here's a simple call: Calais.enlighten(:content => \"The government of the United Kingdom has given corporations like fast food chain McDonald's the right to award high school qualifications to employees who complete a company training program.\", :content_type => :text, :license_id => LICENSE_ID) This is the easiest way to get the RDF-formated response from the OpenCalais service. If you want to do something more fun like getting all sorts of fun information about a document, you can try this: Calais.process_document(:content => \"The government of the United Kingdom has given corporations like fast food chain McDonald's the right to award high school qualifications to employees who complete a company training program.\", :content_type => :text, :license_id => LICENSE_ID) This will return an object containing the RDF representation of the text, the names in the text, and any relationships that exist there."
|
8
15
|
autorequire:
|
16
|
+
default_executable:
|
9
17
|
bindir: bin
|
18
|
+
has_rdoc: true
|
19
|
+
required_ruby_version: !ruby/object:Gem::Version::Requirement
|
20
|
+
requirements:
|
21
|
+
- - ">"
|
22
|
+
- !ruby/object:Gem::Version
|
23
|
+
version: 0.0.0
|
24
|
+
version:
|
25
|
+
platform: ruby
|
26
|
+
signing_key:
|
10
27
|
cert_chain:
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
MRswGQYKCZImiZPyLGQBGRYLb3BlbnN5bmFwc2UxEzARBgoJkiaJk/IsZAEZFgNu
|
15
|
-
ZXQwHhcNMDgwMjAzMDUwODQzWhcNMDkwMjAyMDUwODQzWjBBMQ0wCwYDVQQDDARp
|
16
|
-
bmZvMRswGQYKCZImiZPyLGQBGRYLb3BlbnN5bmFwc2UxEzARBgoJkiaJk/IsZAEZ
|
17
|
-
FgNuZXQwggEiMA0GCSqGSIb3DQEBAQUAA4IBDwAwggEKAoIBAQCmaA3Od1p42luz
|
18
|
-
zDJepXD3VBFEmmeCUCOjs8rkGIlhRibBvAU8GB0hhkTUykeF6JvAp68FYtIqyTqM
|
19
|
-
EY7bnyYTWsvX7HrX/wGRshSKZPnxn2b0AnZ9T3QQZyUut1YQ5G+kBQrI76hz9ynA
|
20
|
-
l0mPCiGxrh+yUNTKt7KzOAzQbtPlqGiIzj+aYvzmdEsj24Ekm/11A/ntPnz+N/Wj
|
21
|
-
yS5c2tbfZdU8NfwfHCZQUBE4PROYCCjoly0QChvBQzKSZPrEpJB3EedMUyBc5m5E
|
22
|
-
TQ0u5aItr3isQchwo410x7ixzVveVzn4mchaGCZ3ZuPwaQkuI/7KSSWWH1LCouct
|
23
|
-
N7LsWR7jAgMBAAGjOTA3MAkGA1UdEwQCMAAwCwYDVR0PBAQDAgSwMB0GA1UdDgQW
|
24
|
-
BBRsRhkAWj4iWaut121ZcaOAKXG27DANBgkqhkiG9w0BAQUFAAOCAQEAFuqEVgKC
|
25
|
-
U6f237SZ/hzevOwRkaErF1EcaCEVzuNj+KNdbQOK9oOo+hHyos3jUo17TiUNDi+3
|
26
|
-
VJhw3cOkA/PEpa0ou0Vm8VIfXdp6dh62NhTKHBVwQ/qXHnn3aVuV/zIfOmi9WQ+t
|
27
|
-
mr7ehGTw7URly95GOESW4NKQ95p+iquAh/NGhtHGFt+nxjJGUkkYlnGVaxmmgof3
|
28
|
-
sP2hOrejIrD9jAoejiRhiA+IyEoaYJvlh+D+3MngvnyDFqHiFZgngM0fvTnMTsgT
|
29
|
-
avOOKhLsesocjiElkLMv8mwuY+L8P4tSvDTDKXxM9Bx/YagwgzYCqPoGtFdWI/GI
|
30
|
-
+keKvrmaTOJ7CQ==
|
31
|
-
-----END CERTIFICATE-----
|
32
|
-
|
33
|
-
date: 2008-02-07 00:00:00 -08:00
|
34
|
-
default_executable:
|
35
|
-
dependencies:
|
36
|
-
- !ruby/object:Gem::Dependency
|
37
|
-
name: hoe
|
38
|
-
version_requirement:
|
39
|
-
version_requirements: !ruby/object:Gem::Requirement
|
40
|
-
requirements:
|
41
|
-
- - ">="
|
42
|
-
- !ruby/object:Gem::Version
|
43
|
-
version: 1.5.0
|
44
|
-
version:
|
45
|
-
description: "== Features * Accepts documents in text/plain, text/xml and text/html format. * Basic access to the Open Calais API's Enlighten action. * Output is RDF representation of input document. * Single function ability to tag a document and receive a response in RDF format, names in the document, and their relationships. == Synopsis This is a very basic wrapper to the Open Calais API. It uses the POST endpoint and currently supports the Enlighten action. Here's a simple call: Calais.enlighten(:content => \"The government of the United Kingdom has given corporations like fast food chain McDonald's the right to award high school qualifications to employees who complete a company training program.\", :content_type => :text, :license_id => LICENSE_ID) This is the easiest way to get the RDF-formated response from the OpenCalais service. If you want to do something more fun like getting all sorts of fun information about a document, you can try this: Calais.process_document(:content => \"The government of the United Kingdom has given corporations like fast food chain McDonald's the right to award high school qualifications to employees who complete a company training program.\", :content_type => :text, :license_id => LICENSE_ID) This will return an object containing the RDF representation of the text, the names in the text, and any relationships that exist there."
|
46
|
-
email: info@opensynapse.net
|
47
|
-
executables: []
|
48
|
-
|
49
|
-
extensions: []
|
50
|
-
|
51
|
-
extra_rdoc_files:
|
52
|
-
- History.txt
|
53
|
-
- Manifest.txt
|
54
|
-
- README.txt
|
28
|
+
post_install_message:
|
29
|
+
authors:
|
30
|
+
- Abhay Kumar
|
55
31
|
files:
|
56
32
|
- History.txt
|
57
33
|
- MIT-LICENSE
|
@@ -71,32 +47,28 @@ files:
|
|
71
47
|
- spec/fixtures/slovenia_euro.xml
|
72
48
|
- spec/helper.rb
|
73
49
|
- spec/spec.opts
|
74
|
-
|
75
|
-
|
76
|
-
post_install_message:
|
50
|
+
test_files: []
|
51
|
+
|
77
52
|
rdoc_options:
|
78
53
|
- --main
|
79
54
|
- README.txt
|
80
|
-
|
81
|
-
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
- !ruby/object:Gem::Version
|
86
|
-
version: "0"
|
87
|
-
version:
|
88
|
-
required_rubygems_version: !ruby/object:Gem::Requirement
|
89
|
-
requirements:
|
90
|
-
- - ">="
|
91
|
-
- !ruby/object:Gem::Version
|
92
|
-
version: "0"
|
93
|
-
version:
|
94
|
-
requirements: []
|
55
|
+
extra_rdoc_files:
|
56
|
+
- History.txt
|
57
|
+
- Manifest.txt
|
58
|
+
- README.txt
|
59
|
+
executables: []
|
95
60
|
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
specification_version: 2
|
100
|
-
summary: A Ruby interface to the Calais Web Service
|
101
|
-
test_files: []
|
61
|
+
extensions: []
|
62
|
+
|
63
|
+
requirements: []
|
102
64
|
|
65
|
+
dependencies:
|
66
|
+
- !ruby/object:Gem::Dependency
|
67
|
+
name: hoe
|
68
|
+
version_requirement:
|
69
|
+
version_requirements: !ruby/object:Gem::Version::Requirement
|
70
|
+
requirements:
|
71
|
+
- - ">="
|
72
|
+
- !ruby/object:Gem::Version
|
73
|
+
version: 1.4.0
|
74
|
+
version:
|
data.tar.gz.sig
DELETED
@@ -1 +0,0 @@
|
|
1
|
-
��i/>�[ɵ:��m��<����
|
metadata.gz.sig
DELETED
Binary file
|