calais 0.0.3 → 0.0.5
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +11 -0
- data/README.txt +1 -1
- data/lib/calais.rb +14 -5
- data/lib/calais/client.rb +3 -17
- data/lib/calais/response.rb +32 -56
- data/spec/calais_spec.rb +1 -1
- metadata +44 -72
- data.tar.gz.sig +0 -1
- metadata.gz.sig +0 -0
data/History.txt
CHANGED
@@ -1,3 +1,14 @@
|
|
1
|
+
== 0.0.5
|
2
|
+
|
3
|
+
* fixed error where classes weren't being required in the proper order on Ubuntu (reported by Jon Moses)
|
4
|
+
* New things coming back from the API. Fixing in tests.
|
5
|
+
|
6
|
+
== 0.0.4
|
7
|
+
|
8
|
+
* changed dependency from hpricot to libxml
|
9
|
+
* utf fun
|
10
|
+
* cleanup all around
|
11
|
+
|
1
12
|
== 0.0.3
|
2
13
|
|
3
14
|
* pluginized the library for Rails (via pius: http://gitorious.org/projects/calais-au-rails)
|
data/README.txt
CHANGED
data/lib/calais.rb
CHANGED
@@ -1,14 +1,20 @@
|
|
1
1
|
require 'digest/sha1'
|
2
2
|
require 'net/http'
|
3
|
-
require 'yaml'
|
4
3
|
require 'cgi'
|
4
|
+
require 'iconv'
|
5
5
|
|
6
6
|
require 'rubygems'
|
7
|
-
require '
|
7
|
+
require 'xml/libxml'
|
8
8
|
|
9
9
|
$KCODE = "UTF8"
|
10
|
+
require 'jcode'
|
10
11
|
|
11
|
-
|
12
|
+
$:.unshift File.expand_path(File.dirname(__FILE__)) + '/calais'
|
13
|
+
|
14
|
+
require 'name'
|
15
|
+
require 'relationship'
|
16
|
+
require 'response'
|
17
|
+
require 'client'
|
12
18
|
|
13
19
|
module Calais
|
14
20
|
POST_URL = "http://api.opencalais.com"
|
@@ -35,13 +41,16 @@ module Calais
|
|
35
41
|
|
36
42
|
class << self
|
37
43
|
def enlighten(*args, &block) Client.new(*args, &block).call(:enlighten) end
|
44
|
+
|
38
45
|
def process_document(*args, &block)
|
39
46
|
data, error = Calais.enlighten(*args, &block)
|
40
|
-
|
47
|
+
process_data(data, error)
|
41
48
|
end
|
49
|
+
|
50
|
+
def process_data(data, error=nil) Response.new(data, error) end
|
42
51
|
end
|
43
52
|
end
|
44
53
|
|
45
54
|
module Calais
|
46
|
-
VERSION = '0.0.
|
55
|
+
VERSION = '0.0.5'
|
47
56
|
end
|
data/lib/calais/client.rb
CHANGED
@@ -11,37 +11,23 @@ module Calais
|
|
11
11
|
yield(self) if block_given?
|
12
12
|
end
|
13
13
|
|
14
|
-
def call(method
|
14
|
+
def call(method)
|
15
15
|
method = method.intern unless method.is_a?(Symbol)
|
16
16
|
raise ArgumentError.new("Unknown method: #{method}") unless AVAILABLE_METHODS.keys.include? method
|
17
17
|
|
18
18
|
post_args = {
|
19
19
|
"licenseID" => @license_id,
|
20
|
-
"content" => @content,
|
20
|
+
"content" => Iconv.iconv('UTF-8//IGNORE', 'UTF-8', "#{@content} ").first[0..-2],
|
21
21
|
"paramsXML" => params_xml
|
22
22
|
}
|
23
23
|
|
24
24
|
url = URI.parse(POST_URL + AVAILABLE_METHODS[method])
|
25
25
|
resp, data = Net::HTTP.post_form(url, post_args)
|
26
26
|
|
27
|
-
|
28
|
-
end
|
29
|
-
|
30
|
-
def self.process_data(data, error=nil)
|
31
|
-
Calais::Response.new(data, error)
|
27
|
+
return resp.is_a?(Net::HTTPOK) ? data : [data, "API Error: #{resp}"]
|
32
28
|
end
|
33
29
|
|
34
30
|
private
|
35
|
-
def handle_response(resp, data, method, times)
|
36
|
-
if resp.is_a? Net::HTTPOK
|
37
|
-
[data, nil]
|
38
|
-
elsif times >= MAX_RETRIES
|
39
|
-
[data, "Too many retries: #{times}"]
|
40
|
-
else
|
41
|
-
call(method, times+1)
|
42
|
-
end
|
43
|
-
end
|
44
|
-
|
45
31
|
def params_xml
|
46
32
|
content_type = @content_type && AVAILABLE_CONTENT_TYPES.keys.include?(@content_type) ? AVAILABLE_CONTENT_TYPES[@content_type] : AVAILABLE_CONTENT_TYPES[DEFAULT_CONTENT_TYPE]
|
47
33
|
output_format = @output_format && AVAILABLE_OUTPUT_FORMATS.keys.include?(@output_format) ? AVAILABLE_OUTPUT_FORMATS[@output_format] : AVAILABLE_OUTPUT_FORMATS[DEFAULT_OUTPUT_FORMAT]
|
data/lib/calais/response.rb
CHANGED
@@ -7,15 +7,11 @@ module Calais
|
|
7
7
|
@names = []
|
8
8
|
@relationships = []
|
9
9
|
|
10
|
-
|
10
|
+
parse_raw(raw)
|
11
11
|
return if @error
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
signature_node = h_doc.root.search("//rdf:Description//c:signature//..").remove.first
|
16
|
-
language_node = h_doc.root.search("//rdf:Description//c:lang//..").remove.first
|
17
|
-
h_doc = parse_names(h_doc)
|
18
|
-
h_doc = parse_relationships(h_doc)
|
12
|
+
|
13
|
+
parse_names
|
14
|
+
parse_relationships
|
19
15
|
end
|
20
16
|
|
21
17
|
Name::TYPES.each_pair do |method_name, type|
|
@@ -25,30 +21,23 @@ module Calais
|
|
25
21
|
end
|
26
22
|
|
27
23
|
private
|
28
|
-
def
|
29
|
-
@
|
30
|
-
@
|
31
|
-
@error =
|
24
|
+
def parse_raw(raw)
|
25
|
+
@libxml = XML::Parser.string(XML::Parser.string(raw).parse.root.child.content).parse
|
26
|
+
@rdf = @libxml.to_s
|
27
|
+
@error = @libxml.find("/Error/Exception").first.content rescue @error
|
32
28
|
end
|
33
29
|
|
34
|
-
def parse_names
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
hash = ele.attributes["rdf:about"].split("/").last
|
40
|
-
|
41
|
-
detection_nodes = doc.root.search("//rdf:Description//c:subject//..").collect! do |ele|
|
42
|
-
ele unless ele.at("c:subject").attributes["rdf:resource"].match(hash).nil?
|
43
|
-
end.compact
|
30
|
+
def parse_names
|
31
|
+
@names = @libxml.root.find("rdf:Description/rdf:type[contains(@rdf:resource, '/em/e/')]/..").map do |n|
|
32
|
+
name = n.find_first("c:name").content
|
33
|
+
type = n.find_first("rdf:type").properties.to_a.assoc("resource").last.split('/').last
|
34
|
+
hash = n.properties.to_a.assoc("about").last.split("/").last
|
44
35
|
|
45
|
-
locations =
|
46
|
-
start =
|
47
|
-
Range.new(start, start+
|
36
|
+
locations = @libxml.root.find("rdf:Description/c:subject[contains(@rdf:resource, '#{hash}')]/..").map do |n2|
|
37
|
+
start = n2.find_first("c:offset").content.to_i
|
38
|
+
Range.new(start, start+n2.find_first("c:length").content.to_i)
|
48
39
|
end
|
49
40
|
|
50
|
-
detection_nodes.remove
|
51
|
-
|
52
41
|
Name.new(
|
53
42
|
:name => name,
|
54
43
|
:hash => hash,
|
@@ -56,47 +45,34 @@ module Calais
|
|
56
45
|
:locations => locations
|
57
46
|
)
|
58
47
|
end
|
59
|
-
name_elements.remove
|
60
|
-
|
61
|
-
doc
|
62
48
|
end
|
63
49
|
|
64
|
-
def parse_relationships
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
hash = ele.attributes["rdf:about"].split("/").last
|
70
|
-
type = ele.at("rdf:type").attributes["rdf:resource"].split("/").last
|
50
|
+
def parse_relationships
|
51
|
+
@libxml.root.find("rdf:Description/rdf:type[contains(@rdf:resource, '/em/r')]/..").each do |n|
|
52
|
+
hash = n.properties.to_a.assoc("about").last.split("/").last
|
53
|
+
type = n.find_first("rdf:type").properties.to_a.assoc("resource").last.split('/').last
|
54
|
+
|
71
55
|
metadata = {}
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
else
|
78
|
-
child.inner_html.strip
|
79
|
-
end
|
80
|
-
metadata[child.name.split(":").last] = value
|
56
|
+
|
57
|
+
n.to_a.each do |n2|
|
58
|
+
next if n2.name == "type" or n2.comment?
|
59
|
+
resource = n2.properties.to_a.assoc("resource")
|
60
|
+
metadata[n2.name] = resource ? Name.find_in_names(resource.last.split("/").last, @names) : n2.content.strip
|
81
61
|
end
|
82
62
|
|
83
|
-
locations =
|
84
|
-
|
85
|
-
|
86
|
-
start = ele.at("c:offset").inner_html.to_i
|
87
|
-
Range.new(start, start+ele.at("c:length").inner_html.to_i)
|
63
|
+
locations = @libxml.root.find("rdf:Description/c:subject[contains(@rdf:resource, '#{hash}')]/..").map do |n2|
|
64
|
+
start = n2.find_first("c:offset").content.to_i
|
65
|
+
Range.new(start, start+n2.find_first("c:length").content.to_i)
|
88
66
|
end
|
89
67
|
|
90
|
-
|
68
|
+
|
69
|
+
@relationships << Relationship.new(
|
91
70
|
:type => type,
|
92
71
|
:hash => hash,
|
93
72
|
:metadata => metadata,
|
94
73
|
:locations => locations
|
95
74
|
)
|
96
|
-
end
|
97
|
-
relationship_elements.remove
|
98
|
-
|
99
|
-
doc
|
75
|
+
end
|
100
76
|
end
|
101
77
|
end
|
102
78
|
end
|
data/spec/calais_spec.rb
CHANGED
@@ -52,7 +52,7 @@ describe Calais, ".process_document" do
|
|
52
52
|
it "returns relationships" do
|
53
53
|
@response.relationships.should_not be_nil
|
54
54
|
@response.relationships.should_not be_empty
|
55
|
-
@response.relationships.map {|r| r.type }.should == ["PersonProfessional"]
|
55
|
+
@response.relationships.map {|r| r.type }.should == ["Quotation", "Quotation", "PersonProfessional", "Quotation", "Quotation", "Quotation", "Quotation"]
|
56
56
|
end
|
57
57
|
|
58
58
|
end
|
metadata
CHANGED
@@ -1,57 +1,33 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
|
+
rubygems_version: 0.9.2
|
3
|
+
specification_version: 1
|
2
4
|
name: calais
|
3
5
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
5
|
-
|
6
|
-
|
7
|
-
|
6
|
+
version: 0.0.5
|
7
|
+
date: 2008-03-12 00:00:00 -07:00
|
8
|
+
summary: A Ruby interface to the Calais Web Service
|
9
|
+
require_paths:
|
10
|
+
- lib
|
11
|
+
email: info@opensynapse.net
|
12
|
+
homepage: http://calais.rubyforge.org
|
13
|
+
rubyforge_project: calais
|
14
|
+
description: "== Features * Accepts documents in text/plain, text/xml and text/html format. * Basic access to the Open Calais API's Enlighten action. * Output is RDF representation of input document. * Single function ability to tag a document and receive a response in RDF format, names in the document, and their relationships. == Synopsis This is a very basic wrapper to the Open Calais API. It uses the POST endpoint and currently supports the Enlighten action. Here's a simple call: Calais.enlighten(:content => \"The government of the United Kingdom has given corporations like fast food chain McDonald's the right to award high school qualifications to employees who complete a company training program.\", :content_type => :text, :license_id => LICENSE_ID) This is the easiest way to get the RDF-formated response from the OpenCalais service. If you want to do something more fun like getting all sorts of fun information about a document, you can try this: Calais.process_document(:content => \"The government of the United Kingdom has given corporations like fast food chain McDonald's the right to award high school qualifications to employees who complete a company training program.\", :content_type => :text, :license_id => LICENSE_ID) This will return an object containing the RDF representation of the text, the names in the text, and any relationships that exist there."
|
8
15
|
autorequire:
|
16
|
+
default_executable:
|
9
17
|
bindir: bin
|
18
|
+
has_rdoc: true
|
19
|
+
required_ruby_version: !ruby/object:Gem::Version::Requirement
|
20
|
+
requirements:
|
21
|
+
- - ">"
|
22
|
+
- !ruby/object:Gem::Version
|
23
|
+
version: 0.0.0
|
24
|
+
version:
|
25
|
+
platform: ruby
|
26
|
+
signing_key:
|
10
27
|
cert_chain:
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
MRswGQYKCZImiZPyLGQBGRYLb3BlbnN5bmFwc2UxEzARBgoJkiaJk/IsZAEZFgNu
|
15
|
-
ZXQwHhcNMDgwMjAzMDUwODQzWhcNMDkwMjAyMDUwODQzWjBBMQ0wCwYDVQQDDARp
|
16
|
-
bmZvMRswGQYKCZImiZPyLGQBGRYLb3BlbnN5bmFwc2UxEzARBgoJkiaJk/IsZAEZ
|
17
|
-
FgNuZXQwggEiMA0GCSqGSIb3DQEBAQUAA4IBDwAwggEKAoIBAQCmaA3Od1p42luz
|
18
|
-
zDJepXD3VBFEmmeCUCOjs8rkGIlhRibBvAU8GB0hhkTUykeF6JvAp68FYtIqyTqM
|
19
|
-
EY7bnyYTWsvX7HrX/wGRshSKZPnxn2b0AnZ9T3QQZyUut1YQ5G+kBQrI76hz9ynA
|
20
|
-
l0mPCiGxrh+yUNTKt7KzOAzQbtPlqGiIzj+aYvzmdEsj24Ekm/11A/ntPnz+N/Wj
|
21
|
-
yS5c2tbfZdU8NfwfHCZQUBE4PROYCCjoly0QChvBQzKSZPrEpJB3EedMUyBc5m5E
|
22
|
-
TQ0u5aItr3isQchwo410x7ixzVveVzn4mchaGCZ3ZuPwaQkuI/7KSSWWH1LCouct
|
23
|
-
N7LsWR7jAgMBAAGjOTA3MAkGA1UdEwQCMAAwCwYDVR0PBAQDAgSwMB0GA1UdDgQW
|
24
|
-
BBRsRhkAWj4iWaut121ZcaOAKXG27DANBgkqhkiG9w0BAQUFAAOCAQEAFuqEVgKC
|
25
|
-
U6f237SZ/hzevOwRkaErF1EcaCEVzuNj+KNdbQOK9oOo+hHyos3jUo17TiUNDi+3
|
26
|
-
VJhw3cOkA/PEpa0ou0Vm8VIfXdp6dh62NhTKHBVwQ/qXHnn3aVuV/zIfOmi9WQ+t
|
27
|
-
mr7ehGTw7URly95GOESW4NKQ95p+iquAh/NGhtHGFt+nxjJGUkkYlnGVaxmmgof3
|
28
|
-
sP2hOrejIrD9jAoejiRhiA+IyEoaYJvlh+D+3MngvnyDFqHiFZgngM0fvTnMTsgT
|
29
|
-
avOOKhLsesocjiElkLMv8mwuY+L8P4tSvDTDKXxM9Bx/YagwgzYCqPoGtFdWI/GI
|
30
|
-
+keKvrmaTOJ7CQ==
|
31
|
-
-----END CERTIFICATE-----
|
32
|
-
|
33
|
-
date: 2008-02-07 00:00:00 -08:00
|
34
|
-
default_executable:
|
35
|
-
dependencies:
|
36
|
-
- !ruby/object:Gem::Dependency
|
37
|
-
name: hoe
|
38
|
-
version_requirement:
|
39
|
-
version_requirements: !ruby/object:Gem::Requirement
|
40
|
-
requirements:
|
41
|
-
- - ">="
|
42
|
-
- !ruby/object:Gem::Version
|
43
|
-
version: 1.5.0
|
44
|
-
version:
|
45
|
-
description: "== Features * Accepts documents in text/plain, text/xml and text/html format. * Basic access to the Open Calais API's Enlighten action. * Output is RDF representation of input document. * Single function ability to tag a document and receive a response in RDF format, names in the document, and their relationships. == Synopsis This is a very basic wrapper to the Open Calais API. It uses the POST endpoint and currently supports the Enlighten action. Here's a simple call: Calais.enlighten(:content => \"The government of the United Kingdom has given corporations like fast food chain McDonald's the right to award high school qualifications to employees who complete a company training program.\", :content_type => :text, :license_id => LICENSE_ID) This is the easiest way to get the RDF-formated response from the OpenCalais service. If you want to do something more fun like getting all sorts of fun information about a document, you can try this: Calais.process_document(:content => \"The government of the United Kingdom has given corporations like fast food chain McDonald's the right to award high school qualifications to employees who complete a company training program.\", :content_type => :text, :license_id => LICENSE_ID) This will return an object containing the RDF representation of the text, the names in the text, and any relationships that exist there."
|
46
|
-
email: info@opensynapse.net
|
47
|
-
executables: []
|
48
|
-
|
49
|
-
extensions: []
|
50
|
-
|
51
|
-
extra_rdoc_files:
|
52
|
-
- History.txt
|
53
|
-
- Manifest.txt
|
54
|
-
- README.txt
|
28
|
+
post_install_message:
|
29
|
+
authors:
|
30
|
+
- Abhay Kumar
|
55
31
|
files:
|
56
32
|
- History.txt
|
57
33
|
- MIT-LICENSE
|
@@ -71,32 +47,28 @@ files:
|
|
71
47
|
- spec/fixtures/slovenia_euro.xml
|
72
48
|
- spec/helper.rb
|
73
49
|
- spec/spec.opts
|
74
|
-
|
75
|
-
|
76
|
-
post_install_message:
|
50
|
+
test_files: []
|
51
|
+
|
77
52
|
rdoc_options:
|
78
53
|
- --main
|
79
54
|
- README.txt
|
80
|
-
|
81
|
-
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
- !ruby/object:Gem::Version
|
86
|
-
version: "0"
|
87
|
-
version:
|
88
|
-
required_rubygems_version: !ruby/object:Gem::Requirement
|
89
|
-
requirements:
|
90
|
-
- - ">="
|
91
|
-
- !ruby/object:Gem::Version
|
92
|
-
version: "0"
|
93
|
-
version:
|
94
|
-
requirements: []
|
55
|
+
extra_rdoc_files:
|
56
|
+
- History.txt
|
57
|
+
- Manifest.txt
|
58
|
+
- README.txt
|
59
|
+
executables: []
|
95
60
|
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
specification_version: 2
|
100
|
-
summary: A Ruby interface to the Calais Web Service
|
101
|
-
test_files: []
|
61
|
+
extensions: []
|
62
|
+
|
63
|
+
requirements: []
|
102
64
|
|
65
|
+
dependencies:
|
66
|
+
- !ruby/object:Gem::Dependency
|
67
|
+
name: hoe
|
68
|
+
version_requirement:
|
69
|
+
version_requirements: !ruby/object:Gem::Version::Requirement
|
70
|
+
requirements:
|
71
|
+
- - ">="
|
72
|
+
- !ruby/object:Gem::Version
|
73
|
+
version: 1.4.0
|
74
|
+
version:
|
data.tar.gz.sig
DELETED
@@ -1 +0,0 @@
|
|
1
|
-
��i/>�[ɵ:��m��<����
|
metadata.gz.sig
DELETED
Binary file
|