calais 0.0.5 → 0.0.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data.tar.gz.sig +0 -0
- data/History.txt +3 -0
- data/Manifest.txt +5 -3
- data/README.txt +8 -4
- data/Rakefile +1 -1
- data/lib/calais.rb +32 -29
- data/lib/calais/client.rb +92 -29
- data/lib/calais/response.rb +183 -65
- data/spec/calais/client_spec.rb +79 -0
- data/spec/calais/response_spec.rb +127 -0
- data/spec/calais_spec.rb +1 -112
- data/spec/fixtures/bicycles_australia.response.json +483 -0
- data/spec/fixtures/bicycles_australia.response.rdf +122 -0
- data/spec/fixtures/{bicycles_austrailia.xml → bicycles_australia.xml} +0 -0
- data/spec/helper.rb +4 -2
- metadata +78 -47
- metadata.gz.sig +0 -0
- data/lib/calais/name.rb +0 -27
- data/lib/calais/relationship.rb +0 -9
data.tar.gz.sig
ADDED
Binary file
|
data/History.txt
CHANGED
data/Manifest.txt
CHANGED
@@ -6,12 +6,14 @@ Rakefile
|
|
6
6
|
init.rb
|
7
7
|
lib/calais.rb
|
8
8
|
lib/calais/client.rb
|
9
|
-
lib/calais/name.rb
|
10
|
-
lib/calais/relationship.rb
|
11
9
|
lib/calais/response.rb
|
10
|
+
spec/calais/client_spec.rb
|
11
|
+
spec/calais/response_spec.rb
|
12
12
|
spec/calais_spec.rb
|
13
13
|
spec/fixtures/.gitignore
|
14
|
-
spec/fixtures/
|
14
|
+
spec/fixtures/bicycles_australia.response.json
|
15
|
+
spec/fixtures/bicycles_australia.response.rdf
|
16
|
+
spec/fixtures/bicycles_australia.xml
|
15
17
|
spec/fixtures/calais.yml.sample
|
16
18
|
spec/fixtures/slovenia_euro.xml
|
17
19
|
spec/helper.rb
|
data/README.txt
CHANGED
@@ -6,8 +6,8 @@ A Ruby interface to the Open Calais Web Service (http://opencalais.com)
|
|
6
6
|
* Accepts documents in text/plain, text/xml and text/html format.
|
7
7
|
* Basic access to the Open Calais API's Enlighten action.
|
8
8
|
* Output is RDF representation of input document.
|
9
|
-
* Single function ability to
|
10
|
-
|
9
|
+
* Single function ability to extract names, entities and geographies from given text.
|
10
|
+
|
11
11
|
== Synopsis
|
12
12
|
|
13
13
|
This is a very basic wrapper to the Open Calais API. It uses the POST endpoint and currently supports the Enlighten action. Here's a simple call:
|
@@ -17,16 +17,17 @@ This is a very basic wrapper to the Open Calais API. It uses the POST endpoint a
|
|
17
17
|
This is the easiest way to get the RDF-formated response from the OpenCalais service.
|
18
18
|
|
19
19
|
If you want to do something more fun like getting all sorts of fun information about a document, you can try this:
|
20
|
-
|
20
|
+
|
21
21
|
Calais.process_document(:content => "The government of the United Kingdom has given corporations like fast food chain McDonald's the right to award high school qualifications to employees who complete a company training program.", :content_type => :text, :license_id => LICENSE_ID)
|
22
22
|
|
23
|
-
This will return an object containing
|
23
|
+
This will return an object containing information extracted from the RDF response.
|
24
24
|
|
25
25
|
== Requirements
|
26
26
|
|
27
27
|
* Ruby 1.8.5 or better
|
28
28
|
* Uses the following standard libraries: digest/sha1, net/http, yaml, cgi
|
29
29
|
* libxml2 / libxml-ruby
|
30
|
+
* curb, libcurl
|
30
31
|
|
31
32
|
== Install
|
32
33
|
|
@@ -37,3 +38,6 @@ You can install the Calais gem via Rubygems (gem install calais) or by building
|
|
37
38
|
Abhay Kumar
|
38
39
|
http://opensynapse.net
|
39
40
|
|
41
|
+
== Acknowledgements
|
42
|
+
|
43
|
+
* Paul Legato (http://www.economaton.com/). Help all around with the new response processor and implementation of the latest API.
|
data/Rakefile
CHANGED
data/lib/calais.rb
CHANGED
@@ -2,55 +2,58 @@ require 'digest/sha1'
|
|
2
2
|
require 'net/http'
|
3
3
|
require 'cgi'
|
4
4
|
require 'iconv'
|
5
|
+
require 'set'
|
5
6
|
|
6
7
|
require 'rubygems'
|
7
8
|
require 'xml/libxml'
|
9
|
+
require 'json'
|
10
|
+
require 'curb'
|
8
11
|
|
9
12
|
$KCODE = "UTF8"
|
10
13
|
require 'jcode'
|
11
14
|
|
12
15
|
$:.unshift File.expand_path(File.dirname(__FILE__)) + '/calais'
|
13
16
|
|
14
|
-
require 'name'
|
15
|
-
require 'relationship'
|
16
|
-
require 'response'
|
17
17
|
require 'client'
|
18
|
+
require 'response'
|
18
19
|
|
19
20
|
module Calais
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
:rdf => "XML/RDF"
|
24
|
-
}
|
25
|
-
DEFAULT_OUTPUT_FORMAT = :rdf
|
26
|
-
|
21
|
+
REST_ENDPOINT = "http://api.opencalais.com/enlighten/rest/"
|
22
|
+
BETA_REST_ENDPOINT = "http://beta.opencalais.com/enlighten/rest/"
|
23
|
+
|
27
24
|
AVAILABLE_CONTENT_TYPES = {
|
28
|
-
:xml =>
|
29
|
-
:
|
30
|
-
:
|
25
|
+
:xml => 'text/xml',
|
26
|
+
:text => 'text/txt',
|
27
|
+
:html => 'text/html',
|
28
|
+
:raw => 'text/raw'
|
31
29
|
}
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
:
|
30
|
+
|
31
|
+
AVAILABLE_OUTPUT_FORMATS = {
|
32
|
+
:rdf => 'xml/rdf',
|
33
|
+
:simple => 'text/simple',
|
34
|
+
:microformats => 'text/microformats',
|
35
|
+
:json => 'application/json'
|
38
36
|
}
|
39
|
-
|
37
|
+
|
38
|
+
KNOWN_ENABLES = ['GenericRelations']
|
39
|
+
KNOWN_DISCARDS = ['er/Company', 'er/Geo']
|
40
|
+
|
40
41
|
MAX_RETRIES = 5
|
41
|
-
|
42
|
+
HTTP_TIMEOUT = 60
|
43
|
+
MIN_CONTENT_SIZE = 100
|
44
|
+
MAX_CONTENT_SIZE = 100_000
|
45
|
+
|
42
46
|
class << self
|
43
|
-
def enlighten(*args, &block) Client.new(*args, &block).
|
47
|
+
def enlighten(*args, &block); Client.new(*args, &block).enlighten; end
|
44
48
|
|
45
|
-
def process_document(*args, &block)
|
46
|
-
|
47
|
-
|
49
|
+
def process_document(*args, &block)
|
50
|
+
client = Client.new(*args, &block)
|
51
|
+
client.output_format = :rdf
|
52
|
+
Response.new(client.enlighten)
|
48
53
|
end
|
49
|
-
|
50
|
-
def process_data(data, error=nil) Response.new(data, error) end
|
51
54
|
end
|
52
55
|
end
|
53
56
|
|
54
57
|
module Calais
|
55
|
-
VERSION = '0.0.
|
56
|
-
end
|
58
|
+
VERSION = '0.0.6'
|
59
|
+
end
|
data/lib/calais/client.rb
CHANGED
@@ -1,47 +1,110 @@
|
|
1
1
|
module Calais
|
2
2
|
class Client
|
3
|
-
|
3
|
+
# base attributes of the call
|
4
4
|
attr_accessor :content
|
5
|
-
attr_accessor :
|
6
|
-
|
5
|
+
attr_accessor :license_id
|
6
|
+
|
7
|
+
# processing directives
|
8
|
+
attr_accessor :content_type, :output_format, :reltag_base_url, :calculate_relevance, :omit_outputting_original_text
|
9
|
+
attr_accessor :metadata_enables, :metadata_discards
|
10
|
+
|
11
|
+
# user directives
|
12
|
+
attr_accessor :allow_distribution, :allow_search, :external_id, :submitter
|
13
|
+
|
7
14
|
attr_accessor :external_metadata
|
8
|
-
|
15
|
+
|
16
|
+
attr_accessor :use_beta
|
17
|
+
|
9
18
|
def initialize(options={}, &block)
|
10
19
|
options.each {|k,v| send("#{k}=", v)}
|
11
20
|
yield(self) if block_given?
|
12
21
|
end
|
13
|
-
|
14
|
-
def
|
15
|
-
method = method.intern unless method.is_a?(Symbol)
|
16
|
-
raise ArgumentError.new("Unknown method: #{method}") unless AVAILABLE_METHODS.keys.include? method
|
17
|
-
|
22
|
+
|
23
|
+
def enlighten
|
18
24
|
post_args = {
|
19
25
|
"licenseID" => @license_id,
|
20
26
|
"content" => Iconv.iconv('UTF-8//IGNORE', 'UTF-8', "#{@content} ").first[0..-2],
|
21
27
|
"paramsXML" => params_xml
|
22
28
|
}
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
29
|
+
|
30
|
+
@client ||= Curl::Easy.new
|
31
|
+
@client.url = @use_beta ? BETA_REST_ENDPOINT : REST_ENDPOINT
|
32
|
+
@client.timeout = HTTP_TIMEOUT
|
33
|
+
|
34
|
+
post_fields = post_args.map {|k,v| Curl::PostField.content(k, v) }
|
35
|
+
|
36
|
+
do_request(post_fields)
|
28
37
|
end
|
29
|
-
|
38
|
+
|
39
|
+
def params_xml
|
40
|
+
check_params
|
41
|
+
|
42
|
+
params_node = XML::Node.new('c:params')
|
43
|
+
params_node['xmlns:c'] = 'http://s.opencalais.com/1/pred/'
|
44
|
+
params_node['xmlns:rdf'] = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#'
|
45
|
+
|
46
|
+
processing_node = XML::Node.new('c:processingDirectives')
|
47
|
+
processing_node['c:contentType'] = AVAILABLE_CONTENT_TYPES[@content_type] if @content_type
|
48
|
+
processing_node['c:outputFormat'] = AVAILABLE_OUTPUT_FORMATS[@output_format] if @output_format
|
49
|
+
processing_node['c:reltagBaseURL'] = @reltag_base_url.to_s if @reltag_base_url
|
50
|
+
|
51
|
+
processing_node['c:enableMetadataType'] = @metadata_enables.join(';') unless @metadata_enables.empty?
|
52
|
+
processing_node['c:discardMetadata'] = @metadata_discards.join(';') unless @metadata_discards.empty?
|
53
|
+
processing_node['c:omitOutputtingOriginalText'] = 'true' if @omit_outputting_original_text
|
54
|
+
|
55
|
+
user_node = XML::Node.new('c:userDirectives')
|
56
|
+
user_node['c:allowDistribution'] = @allow_distribution.to_s unless @allow_distribution.nil?
|
57
|
+
user_node['c:allowSearch'] = @allow_search.to_s unless @allow_search.nil?
|
58
|
+
user_node['c:externalID'] = @external_id.to_s if @external_id
|
59
|
+
user_node['c:submitter'] = @submitter.to_s if @submitter
|
60
|
+
|
61
|
+
params_node << processing_node
|
62
|
+
params_node << user_node
|
63
|
+
|
64
|
+
if @external_metadata
|
65
|
+
external_node = XML::Node.new('c:externalMetadata')
|
66
|
+
external_node << @external_metadata
|
67
|
+
params_node << external_node
|
68
|
+
end
|
69
|
+
|
70
|
+
params_node.to_s
|
71
|
+
end
|
72
|
+
|
30
73
|
private
|
31
|
-
def
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
74
|
+
def check_params
|
75
|
+
raise 'missing content' if @content.nil? || @content.empty?
|
76
|
+
|
77
|
+
content_length = @content.length
|
78
|
+
raise 'content is too small' if content_length < MIN_CONTENT_SIZE
|
79
|
+
raise 'content is too large' if content_length > MAX_CONTENT_SIZE
|
80
|
+
|
81
|
+
raise 'missing license id' if @license_id.nil? || @license_id.empty?
|
82
|
+
|
83
|
+
raise 'unknown content type' unless AVAILABLE_CONTENT_TYPES.keys.include?(@content_type) if @content_type
|
84
|
+
raise 'unknown output format' unless AVAILABLE_OUTPUT_FORMATS.keys.include?(@output_format) if @output_format
|
85
|
+
|
86
|
+
%w[calculate_relevance allow_distribution allow_search].each do |variable|
|
87
|
+
value = self.send(variable)
|
88
|
+
unless NilClass === value || TrueClass === value || FalseClass === value
|
89
|
+
raise "expected a boolean value for #{variable} but got #{value}"
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
@metadata_enables ||= []
|
94
|
+
unknown_enables = Set.new(@metadata_enables) - KNOWN_ENABLES
|
95
|
+
raise "unknown metadata enables: #{unknown_enables.to_ainspect}" unless unknown_enables.empty?
|
96
|
+
|
97
|
+
@metadata_discards ||= []
|
98
|
+
unknown_discards = Set.new(@metadata_discards) - KNOWN_DISCARDS
|
99
|
+
raise "unknown metadata discards: #{unknown_discards.to_ainspect}" unless unknown_discards.empty?
|
100
|
+
end
|
101
|
+
|
102
|
+
def do_request(post_fields)
|
103
|
+
unless @client.http_post(post_fields)
|
104
|
+
raise 'unable to post to api endpoint'
|
105
|
+
end
|
106
|
+
|
107
|
+
@client.body_str
|
45
108
|
end
|
46
109
|
end
|
47
110
|
end
|
data/lib/calais/response.rb
CHANGED
@@ -1,77 +1,195 @@
|
|
1
1
|
module Calais
|
2
2
|
class Response
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
3
|
+
MATCHERS = {
|
4
|
+
:docinfo => 'DocInfo',
|
5
|
+
:docinfometa => 'DocInfoMeta',
|
6
|
+
:defaultlangid => 'DefaultLangId',
|
7
|
+
:doccat => 'DocCat',
|
8
|
+
:entities => 'type/em/e',
|
9
|
+
:relations => 'type/em/r',
|
10
|
+
:geographies => 'type/er',
|
11
|
+
:instances => 'type/sys/InstanceInfo',
|
12
|
+
:relevances => 'type/sys/RelevanceInfo',
|
13
|
+
}
|
14
|
+
|
15
|
+
attr_accessor :submitter_code, :signature, :language, :submission_date, :request_id, :doc_title, :doc_date
|
16
|
+
attr_accessor :hashes, :entities, :relations, :geographies, :categories
|
17
|
+
|
18
|
+
def initialize(rdf_string)
|
19
|
+
@raw_response = rdf_string
|
20
|
+
|
21
|
+
@hashes = []
|
22
|
+
@entities = []
|
23
|
+
@relations = []
|
24
|
+
@geographies = []
|
25
|
+
@relevances = {} # key = String hash, val = Float relevance
|
26
|
+
@categories = []
|
27
|
+
|
28
|
+
extract_data
|
29
|
+
end
|
30
|
+
|
31
|
+
class Entity
|
32
|
+
attr_accessor :hash, :type, :attributes, :relevance, :instances
|
33
|
+
end
|
34
|
+
|
35
|
+
class Relation
|
36
|
+
attr_accessor :hash, :type, :attributes, :instances
|
15
37
|
end
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
38
|
+
|
39
|
+
class Geography
|
40
|
+
attr_accessor :name, :hash, :attributes
|
41
|
+
end
|
42
|
+
|
43
|
+
class Category
|
44
|
+
attr_accessor :name, :score
|
45
|
+
end
|
46
|
+
|
47
|
+
class Instance
|
48
|
+
attr_accessor :prefix, :exact, :suffix, :offset, :length
|
49
|
+
|
50
|
+
# Makes a new Instance object from an appropriate LibXML::XML::Node.
|
51
|
+
def self.from_node(node)
|
52
|
+
instance = self.new
|
53
|
+
instance.prefix = node.find_first("c:prefix").content
|
54
|
+
instance.exact = node.find_first("c:exact").content
|
55
|
+
instance.suffix = node.find_first("c:suffix").content
|
56
|
+
instance.offset = node.find_first("c:offset").content.to_i
|
57
|
+
instance.length = node.find_first("c:length").content.to_i
|
58
|
+
|
59
|
+
instance
|
20
60
|
end
|
21
61
|
end
|
22
|
-
|
62
|
+
|
63
|
+
class CalaisHash
|
64
|
+
attr_accessor :value
|
65
|
+
|
66
|
+
def self.find_or_create(hash, hashes)
|
67
|
+
if !selected = hashes.select {|h| h.value == hash }.first
|
68
|
+
selected = self.new
|
69
|
+
selected.value = hash
|
70
|
+
hashes << selected
|
71
|
+
end
|
72
|
+
|
73
|
+
selected
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
23
77
|
private
|
24
|
-
def
|
25
|
-
|
26
|
-
|
27
|
-
|
78
|
+
def extract_data
|
79
|
+
doc = XML::Parser.string(@raw_response).parse
|
80
|
+
|
81
|
+
doc.root.find("rdf:Description/rdf:type[contains(@rdf:resource, '#{MATCHERS[:docinfometa]}')]/..").each do |node|
|
82
|
+
@language = node['language']
|
83
|
+
@submission_date = DateTime.parse node['submissionDate']
|
84
|
+
|
85
|
+
attributes = extract_attributes(node.find("*[contains(name(), 'c:')]"))
|
86
|
+
|
87
|
+
@signature = attributes.delete('signature')
|
88
|
+
@submitter_code = attributes.delete('submitterCode')
|
89
|
+
|
90
|
+
node.remove!
|
91
|
+
end
|
92
|
+
|
93
|
+
doc.root.find("rdf:Description/rdf:type[contains(@rdf:resource, '#{MATCHERS[:docinfo]}')]/..").each do |node|
|
94
|
+
@request_id = node['calaisRequestID']
|
95
|
+
|
96
|
+
attributes = extract_attributes(node.find("*[contains(name(), 'c:')]"))
|
97
|
+
|
98
|
+
@doc_title = attributes.delete('docTitle')
|
99
|
+
@doc_date = Date.parse attributes.delete('docDate')
|
100
|
+
|
101
|
+
node.remove!
|
102
|
+
end
|
103
|
+
|
104
|
+
@categories = doc.root.find("rdf:Description/rdf:type[contains(@rdf:resource, '#{MATCHERS[:doccat]}')]/..").map do |node|
|
105
|
+
category = Category.new
|
106
|
+
category.name = node.find_first("c:categoryName").content
|
107
|
+
category.score = node.find_first("c:score").content.to_f
|
108
|
+
|
109
|
+
node.remove!
|
110
|
+
category
|
111
|
+
end
|
112
|
+
|
113
|
+
@relevances = doc.root.find("rdf:Description/rdf:type[contains(@rdf:resource, '#{MATCHERS[:relevances]}')]/..").inject({}) do |acc, node|
|
114
|
+
subject_hash = node.find_first("c:subject")[:resource].split('/')[-1]
|
115
|
+
acc[subject_hash] = node.find_first("c:relevance").content.to_f
|
116
|
+
|
117
|
+
node.remove!
|
118
|
+
acc
|
119
|
+
end
|
120
|
+
|
121
|
+
@entities = doc.root.find("rdf:Description/rdf:type[contains(@rdf:resource, '#{MATCHERS[:entities]}')]/..").map do |node|
|
122
|
+
extracted_hash = node['about'].split('/')[-1] rescue nil
|
123
|
+
|
124
|
+
entity = Entity.new
|
125
|
+
entity.hash = CalaisHash.find_or_create(extracted_hash, @hashes)
|
126
|
+
entity.type = extract_type(node)
|
127
|
+
entity.attributes = extract_attributes(node.find("*[contains(name(), 'c:')]"))
|
128
|
+
|
129
|
+
entity.relevance = @relevances[extracted_hash]
|
130
|
+
entity.instances = extract_instances(doc, extracted_hash)
|
131
|
+
|
132
|
+
node.remove!
|
133
|
+
entity
|
134
|
+
end
|
135
|
+
|
136
|
+
@relations = doc.root.find("rdf:Description/rdf:type[contains(@rdf:resource, '#{MATCHERS[:relations]}')]/..").map do |node|
|
137
|
+
extracted_hash = node['about'].split('/')[-1] rescue nil
|
138
|
+
|
139
|
+
relation = Relation.new
|
140
|
+
relation.hash = CalaisHash.find_or_create(extracted_hash, @hashes)
|
141
|
+
relation.type = extract_type(node)
|
142
|
+
relation.attributes = extract_attributes(node.find("*[contains(name(), 'c:')]"))
|
143
|
+
relation.instances = extract_instances(doc, extracted_hash)
|
144
|
+
|
145
|
+
node.remove!
|
146
|
+
relation
|
147
|
+
end
|
148
|
+
|
149
|
+
@geographies = doc.root.find("rdf:Description/rdf:type[contains(@rdf:resource, '#{MATCHERS[:geographies]}')]/..").map do |node|
|
150
|
+
attributes = extract_attributes(node.find("*[contains(name(), 'c:')]"))
|
151
|
+
|
152
|
+
geography = Geography.new
|
153
|
+
geography.name = attributes.delete('name')
|
154
|
+
geography.hash = attributes.delete('subject')
|
155
|
+
geography.attributes = attributes
|
156
|
+
|
157
|
+
node.remove!
|
158
|
+
geography
|
159
|
+
end
|
160
|
+
|
161
|
+
doc.root.find("rdf:Description/rdf:type[contains(@rdf:resource, '#{MATCHERS[:defaultlangid]}')]/..").each { |node| node.remove! }
|
162
|
+
doc.root.find("./*").each { |node| node.remove! }
|
163
|
+
|
164
|
+
return
|
28
165
|
end
|
29
|
-
|
30
|
-
def
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
Range.new(start, start+n2.find_first("c:length").content.to_i)
|
39
|
-
end
|
40
|
-
|
41
|
-
Name.new(
|
42
|
-
:name => name,
|
43
|
-
:hash => hash,
|
44
|
-
:type => type,
|
45
|
-
:locations => locations
|
46
|
-
)
|
166
|
+
|
167
|
+
def extract_instances(doc, hash)
|
168
|
+
doc.root.find("rdf:Description/rdf:type[contains(@rdf:resource, '#{MATCHERS[:instances]}')]/..").select do |instance_node|
|
169
|
+
instance_node.find_first("c:subject")[:resource].split("/")[-1] == hash
|
170
|
+
end.map do |instance_node|
|
171
|
+
instance = Instance.from_node(instance_node)
|
172
|
+
instance_node.remove!
|
173
|
+
|
174
|
+
instance
|
47
175
|
end
|
48
176
|
end
|
49
|
-
|
50
|
-
def
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
Range.new(start, start+n2.find_first("c:length").content.to_i)
|
66
|
-
end
|
67
|
-
|
68
|
-
|
69
|
-
@relationships << Relationship.new(
|
70
|
-
:type => type,
|
71
|
-
:hash => hash,
|
72
|
-
:metadata => metadata,
|
73
|
-
:locations => locations
|
74
|
-
)
|
177
|
+
|
178
|
+
def extract_type(node)
|
179
|
+
node.find("*[name()='rdf:type']")[0]['resource'].split('/')[-1]
|
180
|
+
rescue
|
181
|
+
nil
|
182
|
+
end
|
183
|
+
|
184
|
+
def extract_attributes(nodes)
|
185
|
+
nodes.inject({}) do |hsh, node|
|
186
|
+
value = if node['resource']
|
187
|
+
extracted_hash = node['resource'].split('/')[-1] rescue nil
|
188
|
+
CalaisHash.find_or_create(extracted_hash, @hashes)
|
189
|
+
else
|
190
|
+
node.content
|
191
|
+
end
|
192
|
+
hsh.merge(node.name => value)
|
75
193
|
end
|
76
194
|
end
|
77
195
|
end
|