calais 0.0.5 → 0.0.6
Sign up to get free protection for your applications and to get access to all the features.
- data.tar.gz.sig +0 -0
- data/History.txt +3 -0
- data/Manifest.txt +5 -3
- data/README.txt +8 -4
- data/Rakefile +1 -1
- data/lib/calais.rb +32 -29
- data/lib/calais/client.rb +92 -29
- data/lib/calais/response.rb +183 -65
- data/spec/calais/client_spec.rb +79 -0
- data/spec/calais/response_spec.rb +127 -0
- data/spec/calais_spec.rb +1 -112
- data/spec/fixtures/bicycles_australia.response.json +483 -0
- data/spec/fixtures/bicycles_australia.response.rdf +122 -0
- data/spec/fixtures/{bicycles_austrailia.xml → bicycles_australia.xml} +0 -0
- data/spec/helper.rb +4 -2
- metadata +78 -47
- metadata.gz.sig +0 -0
- data/lib/calais/name.rb +0 -27
- data/lib/calais/relationship.rb +0 -9
data.tar.gz.sig
ADDED
Binary file
|
data/History.txt
CHANGED
data/Manifest.txt
CHANGED
@@ -6,12 +6,14 @@ Rakefile
|
|
6
6
|
init.rb
|
7
7
|
lib/calais.rb
|
8
8
|
lib/calais/client.rb
|
9
|
-
lib/calais/name.rb
|
10
|
-
lib/calais/relationship.rb
|
11
9
|
lib/calais/response.rb
|
10
|
+
spec/calais/client_spec.rb
|
11
|
+
spec/calais/response_spec.rb
|
12
12
|
spec/calais_spec.rb
|
13
13
|
spec/fixtures/.gitignore
|
14
|
-
spec/fixtures/
|
14
|
+
spec/fixtures/bicycles_australia.response.json
|
15
|
+
spec/fixtures/bicycles_australia.response.rdf
|
16
|
+
spec/fixtures/bicycles_australia.xml
|
15
17
|
spec/fixtures/calais.yml.sample
|
16
18
|
spec/fixtures/slovenia_euro.xml
|
17
19
|
spec/helper.rb
|
data/README.txt
CHANGED
@@ -6,8 +6,8 @@ A Ruby interface to the Open Calais Web Service (http://opencalais.com)
|
|
6
6
|
* Accepts documents in text/plain, text/xml and text/html format.
|
7
7
|
* Basic access to the Open Calais API's Enlighten action.
|
8
8
|
* Output is RDF representation of input document.
|
9
|
-
* Single function ability to
|
10
|
-
|
9
|
+
* Single function ability to extract names, entities and geographies from given text.
|
10
|
+
|
11
11
|
== Synopsis
|
12
12
|
|
13
13
|
This is a very basic wrapper to the Open Calais API. It uses the POST endpoint and currently supports the Enlighten action. Here's a simple call:
|
@@ -17,16 +17,17 @@ This is a very basic wrapper to the Open Calais API. It uses the POST endpoint a
|
|
17
17
|
This is the easiest way to get the RDF-formated response from the OpenCalais service.
|
18
18
|
|
19
19
|
If you want to do something more fun like getting all sorts of fun information about a document, you can try this:
|
20
|
-
|
20
|
+
|
21
21
|
Calais.process_document(:content => "The government of the United Kingdom has given corporations like fast food chain McDonald's the right to award high school qualifications to employees who complete a company training program.", :content_type => :text, :license_id => LICENSE_ID)
|
22
22
|
|
23
|
-
This will return an object containing
|
23
|
+
This will return an object containing information extracted from the RDF response.
|
24
24
|
|
25
25
|
== Requirements
|
26
26
|
|
27
27
|
* Ruby 1.8.5 or better
|
28
28
|
* Uses the following standard libraries: digest/sha1, net/http, yaml, cgi
|
29
29
|
* libxml2 / libxml-ruby
|
30
|
+
* curb, libcurl
|
30
31
|
|
31
32
|
== Install
|
32
33
|
|
@@ -37,3 +38,6 @@ You can install the Calais gem via Rubygems (gem install calais) or by building
|
|
37
38
|
Abhay Kumar
|
38
39
|
http://opensynapse.net
|
39
40
|
|
41
|
+
== Acknowledgements
|
42
|
+
|
43
|
+
* Paul Legato (http://www.economaton.com/). Help all around with the new response processor and implementation of the latest API.
|
data/Rakefile
CHANGED
data/lib/calais.rb
CHANGED
@@ -2,55 +2,58 @@ require 'digest/sha1'
|
|
2
2
|
require 'net/http'
|
3
3
|
require 'cgi'
|
4
4
|
require 'iconv'
|
5
|
+
require 'set'
|
5
6
|
|
6
7
|
require 'rubygems'
|
7
8
|
require 'xml/libxml'
|
9
|
+
require 'json'
|
10
|
+
require 'curb'
|
8
11
|
|
9
12
|
$KCODE = "UTF8"
|
10
13
|
require 'jcode'
|
11
14
|
|
12
15
|
$:.unshift File.expand_path(File.dirname(__FILE__)) + '/calais'
|
13
16
|
|
14
|
-
require 'name'
|
15
|
-
require 'relationship'
|
16
|
-
require 'response'
|
17
17
|
require 'client'
|
18
|
+
require 'response'
|
18
19
|
|
19
20
|
module Calais
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
:rdf => "XML/RDF"
|
24
|
-
}
|
25
|
-
DEFAULT_OUTPUT_FORMAT = :rdf
|
26
|
-
|
21
|
+
REST_ENDPOINT = "http://api.opencalais.com/enlighten/rest/"
|
22
|
+
BETA_REST_ENDPOINT = "http://beta.opencalais.com/enlighten/rest/"
|
23
|
+
|
27
24
|
AVAILABLE_CONTENT_TYPES = {
|
28
|
-
:xml =>
|
29
|
-
:
|
30
|
-
:
|
25
|
+
:xml => 'text/xml',
|
26
|
+
:text => 'text/txt',
|
27
|
+
:html => 'text/html',
|
28
|
+
:raw => 'text/raw'
|
31
29
|
}
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
:
|
30
|
+
|
31
|
+
AVAILABLE_OUTPUT_FORMATS = {
|
32
|
+
:rdf => 'xml/rdf',
|
33
|
+
:simple => 'text/simple',
|
34
|
+
:microformats => 'text/microformats',
|
35
|
+
:json => 'application/json'
|
38
36
|
}
|
39
|
-
|
37
|
+
|
38
|
+
KNOWN_ENABLES = ['GenericRelations']
|
39
|
+
KNOWN_DISCARDS = ['er/Company', 'er/Geo']
|
40
|
+
|
40
41
|
MAX_RETRIES = 5
|
41
|
-
|
42
|
+
HTTP_TIMEOUT = 60
|
43
|
+
MIN_CONTENT_SIZE = 100
|
44
|
+
MAX_CONTENT_SIZE = 100_000
|
45
|
+
|
42
46
|
class << self
|
43
|
-
def enlighten(*args, &block) Client.new(*args, &block).
|
47
|
+
def enlighten(*args, &block); Client.new(*args, &block).enlighten; end
|
44
48
|
|
45
|
-
def process_document(*args, &block)
|
46
|
-
|
47
|
-
|
49
|
+
def process_document(*args, &block)
|
50
|
+
client = Client.new(*args, &block)
|
51
|
+
client.output_format = :rdf
|
52
|
+
Response.new(client.enlighten)
|
48
53
|
end
|
49
|
-
|
50
|
-
def process_data(data, error=nil) Response.new(data, error) end
|
51
54
|
end
|
52
55
|
end
|
53
56
|
|
54
57
|
module Calais
|
55
|
-
VERSION = '0.0.
|
56
|
-
end
|
58
|
+
VERSION = '0.0.6'
|
59
|
+
end
|
data/lib/calais/client.rb
CHANGED
@@ -1,47 +1,110 @@
|
|
1
1
|
module Calais
|
2
2
|
class Client
|
3
|
-
|
3
|
+
# base attributes of the call
|
4
4
|
attr_accessor :content
|
5
|
-
attr_accessor :
|
6
|
-
|
5
|
+
attr_accessor :license_id
|
6
|
+
|
7
|
+
# processing directives
|
8
|
+
attr_accessor :content_type, :output_format, :reltag_base_url, :calculate_relevance, :omit_outputting_original_text
|
9
|
+
attr_accessor :metadata_enables, :metadata_discards
|
10
|
+
|
11
|
+
# user directives
|
12
|
+
attr_accessor :allow_distribution, :allow_search, :external_id, :submitter
|
13
|
+
|
7
14
|
attr_accessor :external_metadata
|
8
|
-
|
15
|
+
|
16
|
+
attr_accessor :use_beta
|
17
|
+
|
9
18
|
def initialize(options={}, &block)
|
10
19
|
options.each {|k,v| send("#{k}=", v)}
|
11
20
|
yield(self) if block_given?
|
12
21
|
end
|
13
|
-
|
14
|
-
def
|
15
|
-
method = method.intern unless method.is_a?(Symbol)
|
16
|
-
raise ArgumentError.new("Unknown method: #{method}") unless AVAILABLE_METHODS.keys.include? method
|
17
|
-
|
22
|
+
|
23
|
+
def enlighten
|
18
24
|
post_args = {
|
19
25
|
"licenseID" => @license_id,
|
20
26
|
"content" => Iconv.iconv('UTF-8//IGNORE', 'UTF-8', "#{@content} ").first[0..-2],
|
21
27
|
"paramsXML" => params_xml
|
22
28
|
}
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
29
|
+
|
30
|
+
@client ||= Curl::Easy.new
|
31
|
+
@client.url = @use_beta ? BETA_REST_ENDPOINT : REST_ENDPOINT
|
32
|
+
@client.timeout = HTTP_TIMEOUT
|
33
|
+
|
34
|
+
post_fields = post_args.map {|k,v| Curl::PostField.content(k, v) }
|
35
|
+
|
36
|
+
do_request(post_fields)
|
28
37
|
end
|
29
|
-
|
38
|
+
|
39
|
+
def params_xml
|
40
|
+
check_params
|
41
|
+
|
42
|
+
params_node = XML::Node.new('c:params')
|
43
|
+
params_node['xmlns:c'] = 'http://s.opencalais.com/1/pred/'
|
44
|
+
params_node['xmlns:rdf'] = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#'
|
45
|
+
|
46
|
+
processing_node = XML::Node.new('c:processingDirectives')
|
47
|
+
processing_node['c:contentType'] = AVAILABLE_CONTENT_TYPES[@content_type] if @content_type
|
48
|
+
processing_node['c:outputFormat'] = AVAILABLE_OUTPUT_FORMATS[@output_format] if @output_format
|
49
|
+
processing_node['c:reltagBaseURL'] = @reltag_base_url.to_s if @reltag_base_url
|
50
|
+
|
51
|
+
processing_node['c:enableMetadataType'] = @metadata_enables.join(';') unless @metadata_enables.empty?
|
52
|
+
processing_node['c:discardMetadata'] = @metadata_discards.join(';') unless @metadata_discards.empty?
|
53
|
+
processing_node['c:omitOutputtingOriginalText'] = 'true' if @omit_outputting_original_text
|
54
|
+
|
55
|
+
user_node = XML::Node.new('c:userDirectives')
|
56
|
+
user_node['c:allowDistribution'] = @allow_distribution.to_s unless @allow_distribution.nil?
|
57
|
+
user_node['c:allowSearch'] = @allow_search.to_s unless @allow_search.nil?
|
58
|
+
user_node['c:externalID'] = @external_id.to_s if @external_id
|
59
|
+
user_node['c:submitter'] = @submitter.to_s if @submitter
|
60
|
+
|
61
|
+
params_node << processing_node
|
62
|
+
params_node << user_node
|
63
|
+
|
64
|
+
if @external_metadata
|
65
|
+
external_node = XML::Node.new('c:externalMetadata')
|
66
|
+
external_node << @external_metadata
|
67
|
+
params_node << external_node
|
68
|
+
end
|
69
|
+
|
70
|
+
params_node.to_s
|
71
|
+
end
|
72
|
+
|
30
73
|
private
|
31
|
-
def
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
74
|
+
def check_params
|
75
|
+
raise 'missing content' if @content.nil? || @content.empty?
|
76
|
+
|
77
|
+
content_length = @content.length
|
78
|
+
raise 'content is too small' if content_length < MIN_CONTENT_SIZE
|
79
|
+
raise 'content is too large' if content_length > MAX_CONTENT_SIZE
|
80
|
+
|
81
|
+
raise 'missing license id' if @license_id.nil? || @license_id.empty?
|
82
|
+
|
83
|
+
raise 'unknown content type' unless AVAILABLE_CONTENT_TYPES.keys.include?(@content_type) if @content_type
|
84
|
+
raise 'unknown output format' unless AVAILABLE_OUTPUT_FORMATS.keys.include?(@output_format) if @output_format
|
85
|
+
|
86
|
+
%w[calculate_relevance allow_distribution allow_search].each do |variable|
|
87
|
+
value = self.send(variable)
|
88
|
+
unless NilClass === value || TrueClass === value || FalseClass === value
|
89
|
+
raise "expected a boolean value for #{variable} but got #{value}"
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
@metadata_enables ||= []
|
94
|
+
unknown_enables = Set.new(@metadata_enables) - KNOWN_ENABLES
|
95
|
+
raise "unknown metadata enables: #{unknown_enables.to_ainspect}" unless unknown_enables.empty?
|
96
|
+
|
97
|
+
@metadata_discards ||= []
|
98
|
+
unknown_discards = Set.new(@metadata_discards) - KNOWN_DISCARDS
|
99
|
+
raise "unknown metadata discards: #{unknown_discards.to_ainspect}" unless unknown_discards.empty?
|
100
|
+
end
|
101
|
+
|
102
|
+
def do_request(post_fields)
|
103
|
+
unless @client.http_post(post_fields)
|
104
|
+
raise 'unable to post to api endpoint'
|
105
|
+
end
|
106
|
+
|
107
|
+
@client.body_str
|
45
108
|
end
|
46
109
|
end
|
47
110
|
end
|
data/lib/calais/response.rb
CHANGED
@@ -1,77 +1,195 @@
|
|
1
1
|
module Calais
|
2
2
|
class Response
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
3
|
+
MATCHERS = {
|
4
|
+
:docinfo => 'DocInfo',
|
5
|
+
:docinfometa => 'DocInfoMeta',
|
6
|
+
:defaultlangid => 'DefaultLangId',
|
7
|
+
:doccat => 'DocCat',
|
8
|
+
:entities => 'type/em/e',
|
9
|
+
:relations => 'type/em/r',
|
10
|
+
:geographies => 'type/er',
|
11
|
+
:instances => 'type/sys/InstanceInfo',
|
12
|
+
:relevances => 'type/sys/RelevanceInfo',
|
13
|
+
}
|
14
|
+
|
15
|
+
attr_accessor :submitter_code, :signature, :language, :submission_date, :request_id, :doc_title, :doc_date
|
16
|
+
attr_accessor :hashes, :entities, :relations, :geographies, :categories
|
17
|
+
|
18
|
+
def initialize(rdf_string)
|
19
|
+
@raw_response = rdf_string
|
20
|
+
|
21
|
+
@hashes = []
|
22
|
+
@entities = []
|
23
|
+
@relations = []
|
24
|
+
@geographies = []
|
25
|
+
@relevances = {} # key = String hash, val = Float relevance
|
26
|
+
@categories = []
|
27
|
+
|
28
|
+
extract_data
|
29
|
+
end
|
30
|
+
|
31
|
+
class Entity
|
32
|
+
attr_accessor :hash, :type, :attributes, :relevance, :instances
|
33
|
+
end
|
34
|
+
|
35
|
+
class Relation
|
36
|
+
attr_accessor :hash, :type, :attributes, :instances
|
15
37
|
end
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
38
|
+
|
39
|
+
class Geography
|
40
|
+
attr_accessor :name, :hash, :attributes
|
41
|
+
end
|
42
|
+
|
43
|
+
class Category
|
44
|
+
attr_accessor :name, :score
|
45
|
+
end
|
46
|
+
|
47
|
+
class Instance
|
48
|
+
attr_accessor :prefix, :exact, :suffix, :offset, :length
|
49
|
+
|
50
|
+
# Makes a new Instance object from an appropriate LibXML::XML::Node.
|
51
|
+
def self.from_node(node)
|
52
|
+
instance = self.new
|
53
|
+
instance.prefix = node.find_first("c:prefix").content
|
54
|
+
instance.exact = node.find_first("c:exact").content
|
55
|
+
instance.suffix = node.find_first("c:suffix").content
|
56
|
+
instance.offset = node.find_first("c:offset").content.to_i
|
57
|
+
instance.length = node.find_first("c:length").content.to_i
|
58
|
+
|
59
|
+
instance
|
20
60
|
end
|
21
61
|
end
|
22
|
-
|
62
|
+
|
63
|
+
class CalaisHash
|
64
|
+
attr_accessor :value
|
65
|
+
|
66
|
+
def self.find_or_create(hash, hashes)
|
67
|
+
if !selected = hashes.select {|h| h.value == hash }.first
|
68
|
+
selected = self.new
|
69
|
+
selected.value = hash
|
70
|
+
hashes << selected
|
71
|
+
end
|
72
|
+
|
73
|
+
selected
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
23
77
|
private
|
24
|
-
def
|
25
|
-
|
26
|
-
|
27
|
-
|
78
|
+
def extract_data
|
79
|
+
doc = XML::Parser.string(@raw_response).parse
|
80
|
+
|
81
|
+
doc.root.find("rdf:Description/rdf:type[contains(@rdf:resource, '#{MATCHERS[:docinfometa]}')]/..").each do |node|
|
82
|
+
@language = node['language']
|
83
|
+
@submission_date = DateTime.parse node['submissionDate']
|
84
|
+
|
85
|
+
attributes = extract_attributes(node.find("*[contains(name(), 'c:')]"))
|
86
|
+
|
87
|
+
@signature = attributes.delete('signature')
|
88
|
+
@submitter_code = attributes.delete('submitterCode')
|
89
|
+
|
90
|
+
node.remove!
|
91
|
+
end
|
92
|
+
|
93
|
+
doc.root.find("rdf:Description/rdf:type[contains(@rdf:resource, '#{MATCHERS[:docinfo]}')]/..").each do |node|
|
94
|
+
@request_id = node['calaisRequestID']
|
95
|
+
|
96
|
+
attributes = extract_attributes(node.find("*[contains(name(), 'c:')]"))
|
97
|
+
|
98
|
+
@doc_title = attributes.delete('docTitle')
|
99
|
+
@doc_date = Date.parse attributes.delete('docDate')
|
100
|
+
|
101
|
+
node.remove!
|
102
|
+
end
|
103
|
+
|
104
|
+
@categories = doc.root.find("rdf:Description/rdf:type[contains(@rdf:resource, '#{MATCHERS[:doccat]}')]/..").map do |node|
|
105
|
+
category = Category.new
|
106
|
+
category.name = node.find_first("c:categoryName").content
|
107
|
+
category.score = node.find_first("c:score").content.to_f
|
108
|
+
|
109
|
+
node.remove!
|
110
|
+
category
|
111
|
+
end
|
112
|
+
|
113
|
+
@relevances = doc.root.find("rdf:Description/rdf:type[contains(@rdf:resource, '#{MATCHERS[:relevances]}')]/..").inject({}) do |acc, node|
|
114
|
+
subject_hash = node.find_first("c:subject")[:resource].split('/')[-1]
|
115
|
+
acc[subject_hash] = node.find_first("c:relevance").content.to_f
|
116
|
+
|
117
|
+
node.remove!
|
118
|
+
acc
|
119
|
+
end
|
120
|
+
|
121
|
+
@entities = doc.root.find("rdf:Description/rdf:type[contains(@rdf:resource, '#{MATCHERS[:entities]}')]/..").map do |node|
|
122
|
+
extracted_hash = node['about'].split('/')[-1] rescue nil
|
123
|
+
|
124
|
+
entity = Entity.new
|
125
|
+
entity.hash = CalaisHash.find_or_create(extracted_hash, @hashes)
|
126
|
+
entity.type = extract_type(node)
|
127
|
+
entity.attributes = extract_attributes(node.find("*[contains(name(), 'c:')]"))
|
128
|
+
|
129
|
+
entity.relevance = @relevances[extracted_hash]
|
130
|
+
entity.instances = extract_instances(doc, extracted_hash)
|
131
|
+
|
132
|
+
node.remove!
|
133
|
+
entity
|
134
|
+
end
|
135
|
+
|
136
|
+
@relations = doc.root.find("rdf:Description/rdf:type[contains(@rdf:resource, '#{MATCHERS[:relations]}')]/..").map do |node|
|
137
|
+
extracted_hash = node['about'].split('/')[-1] rescue nil
|
138
|
+
|
139
|
+
relation = Relation.new
|
140
|
+
relation.hash = CalaisHash.find_or_create(extracted_hash, @hashes)
|
141
|
+
relation.type = extract_type(node)
|
142
|
+
relation.attributes = extract_attributes(node.find("*[contains(name(), 'c:')]"))
|
143
|
+
relation.instances = extract_instances(doc, extracted_hash)
|
144
|
+
|
145
|
+
node.remove!
|
146
|
+
relation
|
147
|
+
end
|
148
|
+
|
149
|
+
@geographies = doc.root.find("rdf:Description/rdf:type[contains(@rdf:resource, '#{MATCHERS[:geographies]}')]/..").map do |node|
|
150
|
+
attributes = extract_attributes(node.find("*[contains(name(), 'c:')]"))
|
151
|
+
|
152
|
+
geography = Geography.new
|
153
|
+
geography.name = attributes.delete('name')
|
154
|
+
geography.hash = attributes.delete('subject')
|
155
|
+
geography.attributes = attributes
|
156
|
+
|
157
|
+
node.remove!
|
158
|
+
geography
|
159
|
+
end
|
160
|
+
|
161
|
+
doc.root.find("rdf:Description/rdf:type[contains(@rdf:resource, '#{MATCHERS[:defaultlangid]}')]/..").each { |node| node.remove! }
|
162
|
+
doc.root.find("./*").each { |node| node.remove! }
|
163
|
+
|
164
|
+
return
|
28
165
|
end
|
29
|
-
|
30
|
-
def
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
Range.new(start, start+n2.find_first("c:length").content.to_i)
|
39
|
-
end
|
40
|
-
|
41
|
-
Name.new(
|
42
|
-
:name => name,
|
43
|
-
:hash => hash,
|
44
|
-
:type => type,
|
45
|
-
:locations => locations
|
46
|
-
)
|
166
|
+
|
167
|
+
def extract_instances(doc, hash)
|
168
|
+
doc.root.find("rdf:Description/rdf:type[contains(@rdf:resource, '#{MATCHERS[:instances]}')]/..").select do |instance_node|
|
169
|
+
instance_node.find_first("c:subject")[:resource].split("/")[-1] == hash
|
170
|
+
end.map do |instance_node|
|
171
|
+
instance = Instance.from_node(instance_node)
|
172
|
+
instance_node.remove!
|
173
|
+
|
174
|
+
instance
|
47
175
|
end
|
48
176
|
end
|
49
|
-
|
50
|
-
def
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
Range.new(start, start+n2.find_first("c:length").content.to_i)
|
66
|
-
end
|
67
|
-
|
68
|
-
|
69
|
-
@relationships << Relationship.new(
|
70
|
-
:type => type,
|
71
|
-
:hash => hash,
|
72
|
-
:metadata => metadata,
|
73
|
-
:locations => locations
|
74
|
-
)
|
177
|
+
|
178
|
+
def extract_type(node)
|
179
|
+
node.find("*[name()='rdf:type']")[0]['resource'].split('/')[-1]
|
180
|
+
rescue
|
181
|
+
nil
|
182
|
+
end
|
183
|
+
|
184
|
+
def extract_attributes(nodes)
|
185
|
+
nodes.inject({}) do |hsh, node|
|
186
|
+
value = if node['resource']
|
187
|
+
extracted_hash = node['resource'].split('/')[-1] rescue nil
|
188
|
+
CalaisHash.find_or_create(extracted_hash, @hashes)
|
189
|
+
else
|
190
|
+
node.content
|
191
|
+
end
|
192
|
+
hsh.merge(node.name => value)
|
75
193
|
end
|
76
194
|
end
|
77
195
|
end
|