calais 0.0.1 → 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
data.tar.gz.sig CHANGED
Binary file
data/History.txt CHANGED
@@ -1,3 +1,11 @@
1
+ == 0.0.2
2
+
3
+ * cleanup in the specs
4
+ * cleaner parsing
5
+ * location of named entities
6
+ * more data in relationships
7
+ * moved Names and Relationships
8
+
1
9
  == 0.0.1
2
10
 
3
11
  * Access to Calais's Enlighten action
data/MIT-LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2008 Abhay Kumar info@opensynapse.net
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ 'Software'), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
17
+ IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
18
+ CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
19
+ TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
20
+ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/Manifest.txt CHANGED
@@ -1,9 +1,12 @@
1
1
  History.txt
2
+ MIT-LICENSE
2
3
  Manifest.txt
3
4
  README.txt
4
5
  Rakefile
5
6
  lib/calais.rb
6
7
  lib/calais/client.rb
8
+ lib/calais/name.rb
9
+ lib/calais/relationship.rb
7
10
  lib/calais/response.rb
8
11
  spec/calais_spec.rb
9
12
  spec/fixtures/.gitignore
data/README.txt CHANGED
@@ -30,7 +30,7 @@ This will return an object containing the RDF representation of the text, the na
30
30
 
31
31
  == Install
32
32
 
33
- TODO
33
+ You can install the Calais gem via Rubygems (gem install calais) or by building from source.
34
34
 
35
35
  == Authors
36
36
 
data/lib/calais.rb CHANGED
@@ -43,5 +43,5 @@ module Calais
43
43
  end
44
44
 
45
45
  module Calais
46
- VERSION = '0.0.1'
46
+ VERSION = '0.0.2'
47
47
  end
@@ -0,0 +1,13 @@
1
+ module Calais
2
+ class Name
3
+ attr_accessor :name, :type, :hash, :locations
4
+
5
+ def initialize(args={})
6
+ args.each {|k,v| send("#{k}=", v)}
7
+ end
8
+
9
+ def self.find_in_names(hash, names)
10
+ names.select {|name| name.hash == hash }.first
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,9 @@
1
+ module Calais
2
+ class Relationship
3
+ attr_accessor :type, :hash, :metadata, :locations
4
+
5
+ def initialize(args={})
6
+ args.each {|k,v| send("#{k}=", v)}
7
+ end
8
+ end
9
+ end
@@ -8,38 +8,14 @@ module Calais
8
8
  @relationships = []
9
9
 
10
10
  parse_rdf(raw)
11
- parse_names
12
- parse_relationships
13
- end
14
-
15
- def to_dot
16
- used = []
17
- id = @hpricot.at("rdf:Description//c:document//..").attributes["c:externalID"]
18
-
19
- dot = "digraph \"#{id}\"\n"
20
- dot += "{\n"
21
- dot += "\tgraph [rankdir=LR, overlap=false];\n"
22
- dot += "\tnode [shape = circle];"
11
+ return if @error
23
12
 
24
- @relationships.each do |rel|
25
- dot += "\t\"#{rel.actor.name}\" -> \"#{rel.target.name}\""
26
- dot += " ["
27
- dot += "label=\""
28
- dot += "#{rel.metadata} " if rel.metadata
29
- dot += "(#{rel.type})"
30
- dot += "\"];\n"
31
- used |= [rel.actor.hash, rel.target.hash]
32
- end
33
-
34
- @names.each do |name|
35
- dot += "\t\"#{name.name}\";\n" unless used.include?(name.hash)
36
- end
37
- dot += "}\n"
38
-
39
- f = File.open("#{id}.dot", 'w')
40
- f.puts dot
41
- f.close
42
-
13
+ h_doc = Hpricot.XML(@rdf)
14
+ document_node = h_doc.root.search("//rdf:Description//c:document//..").remove.first
15
+ signature_node = h_doc.root.search("//rdf:Description//c:signature//..").remove.first
16
+ language_node = h_doc.root.search("//rdf:Description//c:lang//..").remove.first
17
+ h_doc = parse_names(h_doc)
18
+ h_doc = parse_relationships(h_doc)
43
19
  end
44
20
 
45
21
  private
@@ -49,59 +25,72 @@ module Calais
49
25
  @error = Hpricot.XML(response).at("/Error/Exception").inner_html rescue @error
50
26
  end
51
27
 
52
- def parse_names
53
- @names = @hpricot.root.search("rdf:Description//c:name//..").map do |ele|
54
- Calais::Response::Name.new(
55
- :name => ele.at("c:name").inner_html,
56
- :hash => ele.attributes["rdf:about"].split('/').last,
57
- :type => ele.at("rdf:type").attributes["rdf:resource"].split('/').last
58
- )
59
- end unless @error
60
- end
61
-
62
- def parse_relationships
63
- doc = Hpricot.XML(@rdf)
64
- doc.search("rdf:Description//c:docId//..").remove
65
- doc.search("rdf:Description//c:document//..").remove
66
- doc.search("rdf:Description//c:name//..").remove
67
-
68
- @relationships = doc.root.search("rdf:Description").map do |ele|
69
- relationship = ele.at("rdf:type")
70
- actor = relationship.next_sibling
71
- metadata = actor.next_sibling.attributes["rdf:resource"] ? nil : actor.next_sibling.inner_html.strip
72
- target = metadata ? actor.next_sibling.next_sibling : actor.next_sibling
28
+ def parse_names(doc)
29
+ name_elements = doc.root.search("//rdf:Description//c:name//..")
30
+ @names = name_elements.map do |ele|
31
+ name = ele.at("c:name").inner_html
32
+ type = ele.at("rdf:type").attributes["rdf:resource"].split("/").last
33
+ hash = ele.attributes["rdf:about"].split("/").last
73
34
 
74
- actor_name = actor ? Name.find_in_names(actor.attributes["rdf:resource"].split('/').last, @names) : nil
75
- target_name = target ? Name.find_in_names(target.attributes["rdf:resource"].split('/').last, @names) : nil
76
-
77
- Calais::Response::Relationship.new(
78
- :type => relationship.attributes["rdf:resource"].split('/').last,
79
- :actor => actor_name,
80
- :target => target_name,
81
- :metadata => metadata
35
+ detection_nodes = doc.root.search("//rdf:Description//c:subject//..").collect! do |ele|
36
+ ele unless ele.at("c:subject").attributes["rdf:resource"].match(hash).nil?
37
+ end.compact
38
+
39
+ locations = detection_nodes.map do |ele|
40
+ start = ele.at("c:offset").inner_html.to_i
41
+ Range.new(start, start+ele.at("c:length").inner_html.to_i)
42
+ end
43
+
44
+ detection_nodes.remove
45
+
46
+ Name.new(
47
+ :name => name,
48
+ :hash => hash,
49
+ :type => type,
50
+ :locations => locations
82
51
  )
83
52
  end
84
- end
85
-
86
- class Name
87
- include Comparable
88
- attr_accessor :name, :type, :hash
89
-
90
- def initialize(args={})
91
- args.each {|k,v| send("#{k}=", v)}
53
+ name_elements.remove
54
+
55
+ doc
92
56
  end
93
57
 
94
- def self.find_in_names(hash, names)
95
- names.select {|name| name.hash == hash }.first
96
- end
97
- end
98
-
99
- class Relationship
100
- attr_accessor :type, :actor, :target, :metadata
101
-
102
- def initialize(args={})
103
- args.each {|k,v| send("#{k}=", v)}
58
+ def parse_relationships(doc)
59
+ relationship_elements = doc.root.search("rdf:Description")
60
+ @relationships = relationship_elements.map do |ele|
61
+ next if ele.at("c:docId")
62
+
63
+ hash = ele.attributes["rdf:about"].split("/").last
64
+ type = ele.at("rdf:type").attributes["rdf:resource"].split("/").last
65
+ metadata = {}
66
+ ele.children.each do |child|
67
+ next if child.comment? || child.name == "rdf:type"
68
+
69
+ value = if child.attributes["rdf:resource"]
70
+ Name.find_in_names(child.attributes["rdf:resource"].split("/").last, @names) rescue nil
71
+ else
72
+ child.inner_html.strip
73
+ end
74
+ metadata[child.name.split(":").last] = value
75
+ end
76
+
77
+ locations = doc.root.search("//rdf:Description//c:docId//..").collect! do |ele|
78
+ ele unless ele.at("c:subject").attributes["rdf:resource"].match(hash).nil?
79
+ end.compact.map do |ele|
80
+ start = ele.at("c:offset").inner_html.to_i
81
+ Range.new(start, start+ele.at("c:length").inner_html.to_i)
82
+ end
83
+
84
+ Relationship.new(
85
+ :type => type,
86
+ :hash => hash,
87
+ :metadata => metadata,
88
+ :locations => locations
89
+ )
90
+ end.compact
91
+ relationship_elements.remove
92
+
93
+ doc
104
94
  end
105
- end
106
95
  end
107
96
  end
data/spec/calais_spec.rb CHANGED
@@ -36,17 +36,25 @@ describe Calais::Client, ".new" do
36
36
  end
37
37
 
38
38
  describe Calais, ".process_document" do
39
+ before(:all) { @response = Calais.process_document(:content => SAMPLE_DOCUMENT, :content_type => :xml, :license_id => LICENSE_ID) }
40
+
39
41
  it "returns a Calais::Response" do
40
- response = Calais.process_document(:content => SAMPLE_DOCUMENT, :content_type => :xml, :license_id => LICENSE_ID)
41
- response.should_not be_nil
42
- response.should be_a_kind_of(Calais::Response)
42
+ @response.should_not be_nil
43
+ @response.should be_a_kind_of(Calais::Response)
43
44
  end
44
-
45
- it "returns a Calais::Response (with relationships)" do
46
- response = Calais.process_document(:content => File.read(File.join(File.dirname(__FILE__), 'fixtures', 'bicycles_austrailia.xml')), :content_type => :xml, :license_id => LICENSE_ID)
47
- response.should_not be_nil
48
- response.should be_a_kind_of(Calais::Response)
45
+
46
+ it "returns names" do
47
+ @response.names.should_not be_nil
48
+ @response.names.should_not be_empty
49
+ @response.names.map {|n| n.name }.sort.should == ["Australia", "Australia", "Cycling Promotion Fund", "Ian Christie", "car manufacturers", "car market", "car sales", "company car"]
49
50
  end
51
+
52
+ it "returns relationships" do
53
+ @response.relationships.should_not be_nil
54
+ @response.relationships.should_not be_empty
55
+ @response.relationships.map {|r| r.type }.should == ["PersonProfessional"]
56
+ end
57
+
50
58
  end
51
59
 
52
60
  describe Calais::Client, ".call" do
@@ -67,6 +75,6 @@ describe Calais::Client, ".params_xml" do
67
75
  it "returns an xml encoded string" do
68
76
  client = Calais::Client.new(:content => SAMPLE_DOCUMENT, :content_type => :xml, :license_id => LICENSE_ID)
69
77
  client.send("params_xml").should_not be_nil
70
- client.send("params_xml").should == %[<c:params xmlns:c=\"http://s.opencalais.com/1/pred/\" xmlns:rdf=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#\"><c:processingDirectives c:contentType=\"TEXT/XML\" c:outputFormat=\"XML/RDF\"></c:processingDirectives><c:userDirectives c:allowDistribution=\"false\" c:allowSearch=\"false\" c:externalID=\"dc68d5a382724c2238d9f22ba9c0b4d2581569d8\" c:submitter=\"calais.rb\"></c:userDirectives><c:externalMetadata></c:externalMetadata></c:params>]
78
+ client.send("params_xml").should == %[<c:params xmlns:c=\"http://s.opencalais.com/1/pred/\" xmlns:rdf=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#\"><c:processingDirectives c:contentType=\"TEXT/XML\" c:outputFormat=\"XML/RDF\"></c:processingDirectives><c:userDirectives c:allowDistribution=\"false\" c:allowSearch=\"false\" c:externalID=\"4a661f3cd285d43fa4df971e14e623eb51748e27\" c:submitter=\"calais.rb\"></c:userDirectives><c:externalMetadata></c:externalMetadata></c:params>]
71
79
  end
72
80
  end
data/spec/helper.rb CHANGED
@@ -4,5 +4,5 @@ require 'yaml'
4
4
 
5
5
  require File.dirname(__FILE__) + '/../lib/calais'
6
6
 
7
- SAMPLE_DOCUMENT = File.read(File.join(File.dirname(__FILE__), 'fixtures/slovenia_euro.xml'))
7
+ SAMPLE_DOCUMENT = File.read(File.join(File.dirname(__FILE__), 'fixtures/bicycles_austrailia.xml'))
8
8
  LICENSE_ID = YAML.load(File.read(File.join(File.dirname(__FILE__), 'fixtures/calais.yml')))['key']
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: calais
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.0.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Abhay Kumar
@@ -30,7 +30,7 @@ cert_chain:
30
30
  +keKvrmaTOJ7CQ==
31
31
  -----END CERTIFICATE-----
32
32
 
33
- date: 2008-02-02 00:00:00 -08:00
33
+ date: 2008-02-07 00:00:00 -08:00
34
34
  default_executable:
35
35
  dependencies:
36
36
  - !ruby/object:Gem::Dependency
@@ -54,11 +54,14 @@ extra_rdoc_files:
54
54
  - README.txt
55
55
  files:
56
56
  - History.txt
57
+ - MIT-LICENSE
57
58
  - Manifest.txt
58
59
  - README.txt
59
60
  - Rakefile
60
61
  - lib/calais.rb
61
62
  - lib/calais/client.rb
63
+ - lib/calais/name.rb
64
+ - lib/calais/relationship.rb
62
65
  - lib/calais/response.rb
63
66
  - spec/calais_spec.rb
64
67
  - spec/fixtures/.gitignore
metadata.gz.sig CHANGED
Binary file