talia_core 0.5.4 → 0.7.0
Sign up to get free protection for your applications and to get access to all the features.
- data/VERSION.yml +2 -2
- data/config/talia_core.yml.example +37 -35
- data/generators/talia_admin/templates/app/models/fake_source.rb +93 -0
- data/generators/talia_admin/templates/app/models/talia_collection.rb +13 -37
- data/generators/talia_base/talia_base_generator.rb +0 -1
- data/generators/talia_base/templates/app/controllers/custom_templates_controller.rb +2 -1
- data/generators/talia_base/templates/app/controllers/sources_controller.rb +1 -1
- data/generators/talia_base/templates/script/configure_talia +56 -73
- data/generators/talia_swicky/talia_swicky_generator.rb +18 -0
- data/generators/talia_swicky/templates/app/controllers/swicky_notebooks_controller.rb +111 -0
- data/generators/talia_swicky/templates/app/helpers/swicky_notebooks_helper.rb +29 -0
- data/generators/talia_swicky/templates/app/views/swicky_notebooks/index.builder +6 -0
- data/generators/talia_swicky/templates/app/views/swicky_notebooks/index.html.erb +10 -0
- data/generators/talia_swicky/templates/app/views/swicky_notebooks/show.html.erb +11 -0
- data/generators/talia_swicky/templates/test/fixtures/notebook.rdf +862 -0
- data/generators/talia_swicky/templates/test/functional/swicky_notebooks_controller_test.rb +44 -0
- data/lib/core_ext/boolean.rb +23 -0
- data/lib/core_ext/jdbc_rake_monkeypatch.rb +22 -0
- data/lib/core_ext/nil_class.rb +11 -0
- data/lib/core_ext/object.rb +34 -0
- data/lib/core_ext/string.rb +15 -0
- data/lib/custom_template.rb +3 -1
- data/lib/loader_helper.rb +16 -3
- data/lib/mysql.rb +7 -7
- data/lib/progressbar.rb +2 -2
- data/lib/swicky/exhibit_json/item.rb +129 -0
- data/lib/swicky/exhibit_json/item_collection.rb +129 -0
- data/lib/swicky/fragment.rb +0 -0
- data/lib/swicky/note.rb +7 -0
- data/lib/swicky/notebook.rb +78 -12
- data/lib/talia_core/active_source.rb +45 -13
- data/lib/talia_core/active_source_parts/class_methods.rb +154 -26
- data/lib/talia_core/active_source_parts/finders.rb +49 -26
- data/lib/talia_core/active_source_parts/predicate_handler.rb +71 -23
- data/lib/talia_core/active_source_parts/rdf/ntriples_reader.rb +13 -0
- data/lib/talia_core/active_source_parts/rdf/rdf_reader.rb +99 -0
- data/lib/talia_core/active_source_parts/rdf/rdfxml_reader.rb +12 -0
- data/lib/talia_core/active_source_parts/{rdf.rb → rdf_handler.rb} +52 -19
- data/lib/talia_core/active_source_parts/xml/generic_reader.rb +151 -260
- data/lib/talia_core/active_source_parts/xml/generic_reader_add_statements.rb +97 -0
- data/lib/talia_core/active_source_parts/xml/generic_reader_helpers.rb +88 -0
- data/lib/talia_core/active_source_parts/xml/generic_reader_import_statements.rb +239 -0
- data/lib/talia_core/active_source_parts/xml/rdf_builder.rb +14 -7
- data/lib/talia_core/active_source_parts/xml/source_builder.rb +7 -3
- data/lib/talia_core/active_source_parts/xml/source_reader.rb +17 -2
- data/lib/talia_core/collection.rb +192 -1
- data/lib/talia_core/data_types/data_loader.rb +88 -18
- data/lib/talia_core/data_types/data_record.rb +24 -2
- data/lib/talia_core/data_types/delayed_copier.rb +13 -3
- data/lib/talia_core/data_types/file_record.rb +24 -13
- data/lib/talia_core/data_types/file_store.rb +111 -94
- data/lib/talia_core/data_types/iip_data.rb +104 -23
- data/lib/talia_core/data_types/iip_loader.rb +102 -56
- data/lib/talia_core/data_types/image_data.rb +3 -1
- data/lib/talia_core/data_types/media_link.rb +4 -1
- data/lib/talia_core/data_types/mime_mapping.rb +65 -38
- data/lib/talia_core/data_types/path_helpers.rb +23 -17
- data/lib/talia_core/data_types/pdf_data.rb +9 -6
- data/lib/talia_core/data_types/simple_text.rb +5 -4
- data/lib/talia_core/data_types/xml_data.rb +53 -25
- data/lib/talia_core/dummy_handler.rb +3 -2
- data/lib/talia_core/errors.rb +13 -27
- data/lib/talia_core/initializer.rb +44 -4
- data/lib/talia_core/oai/active_source_model.rb +13 -6
- data/lib/talia_core/oai/active_source_oai_adapter.rb +13 -12
- data/lib/talia_core/rdf_import.rb +1 -1
- data/lib/talia_core/rdf_resource.rb +2 -1
- data/lib/talia_core/semantic_collection_wrapper.rb +143 -151
- data/lib/talia_core/semantic_property.rb +4 -0
- data/lib/talia_core/semantic_relation.rb +84 -33
- data/lib/talia_core/source.rb +45 -25
- data/lib/talia_core/source_fragment.rb +7 -0
- data/lib/talia_core/source_transfer_object.rb +3 -1
- data/lib/talia_core/source_types/agent.rb +16 -0
- data/lib/talia_core/source_types/dc_resource.rb +3 -3
- data/lib/talia_core/source_types/marcont_resource.rb +15 -0
- data/lib/talia_core/source_types/skos_concept.rb +17 -0
- data/lib/talia_dependencies.rb +1 -1
- data/lib/talia_util.rb +1 -1
- data/lib/talia_util/bar_progressor.rb +1 -1
- data/lib/talia_util/image_conversions.rb +8 -2
- data/lib/talia_util/import_job_helper.rb +40 -3
- data/lib/talia_util/io_helper.rb +15 -4
- data/lib/talia_util/progressable.rb +50 -1
- data/lib/talia_util/rake_tasks.rb +3 -21
- data/lib/talia_util/test_helpers.rb +6 -1
- data/lib/talia_util/util.rb +108 -27
- data/lib/talia_util/xml/base_builder.rb +28 -1
- data/lib/talia_util/xml/rdf_builder.rb +81 -5
- data/lib/tasks/talia_core_tasks.rake +2 -0
- data/test/core_ext/boolean_test.rb +26 -0
- data/test/core_ext/nil_class_test.rb +14 -0
- data/test/core_ext/object_test.rb +26 -0
- data/test/core_ext/string_test.rb +11 -0
- data/test/swicky/json_encoder_test.rb +51 -42
- data/test/swicky/notebook_test.rb +13 -6
- data/test/talia_core/active_source_finder_interface_test.rb +30 -0
- data/test/talia_core/active_source_test.rb +445 -34
- data/test/talia_core/collection_test.rb +332 -0
- data/test/talia_core/data_types/file_record_test.rb +2 -23
- data/test/talia_core/ntriples_reader_test.rb +49 -0
- data/test/talia_core/rdfxml_reader_test.rb +51 -0
- data/test/talia_core/source_test.rb +12 -0
- data/test/talia_util/import_job_helper_test.rb +19 -12
- metadata +190 -90
- data/config/database.yml +0 -19
- data/config/rdfstore.yml +0 -13
- data/config/talia_core.yml +0 -24
- data/generators/talia_base/templates/migrations/bj_migration.rb +0 -10
- data/lib/JXslt/jxslt.rb +0 -60
- data/lib/swicky/json_encoder.rb +0 -179
- data/lib/talia_core/agent.rb +0 -14
- data/lib/talia_core/background_jobs/job.rb +0 -82
- data/lib/talia_core/background_jobs/progress_job.rb +0 -68
- data/lib/talia_core/data_types/temp_file_handling.rb +0 -85
- data/lib/talia_core/ordered_source.rb +0 -228
- data/lib/talia_core/semantic_collection_item.rb +0 -94
- data/lib/talia_core/source_types/collection.rb +0 -15
- data/lib/talia_util/progressbar.rb +0 -236
- data/tasks/talia_core_tasks.rake +0 -2
- data/test/talia_core/ordered_source_test.rb +0 -394
- data/test/talia_core/semantic_collection_item_test.rb +0 -125
@@ -1,19 +1,56 @@
|
|
1
1
|
require 'hpricot'
|
2
2
|
require 'pathname'
|
3
|
+
require "set"
|
4
|
+
require "uri"
|
3
5
|
|
4
6
|
module TaliaCore
|
5
7
|
module ActiveSourceParts
|
6
8
|
module Xml
|
7
9
|
|
8
|
-
# Superclass for importers/readers of generic xml files.
|
9
|
-
#
|
10
|
-
# the
|
10
|
+
# Superclass for importers/readers of generic xml files. The idea is that the
|
11
|
+
# user can very easily create subclasses of this that can import almost any XML
|
12
|
+
# format imaginable - see the SourceReader class for a simple example.
|
11
13
|
#
|
12
|
-
#
|
14
|
+
# The result of the "import" is a hash (available through #sources) which contains
|
15
|
+
# all the data from the import file in a standardized format. This hash can then
|
16
|
+
# be processed by the ActiveSource class to create the actual sources.
|
13
17
|
#
|
14
|
-
#
|
15
|
-
#
|
16
|
-
#
|
18
|
+
# = Writing XML importers
|
19
|
+
#
|
20
|
+
# Writing an importer is quite easy, all it takes is to subclass this class and
|
21
|
+
# then describe the structure of the element using the methods defined here.
|
22
|
+
#
|
23
|
+
# The reader subclass should declare handlers for the various XML tags that are
|
24
|
+
# in the file. See GenericReaderImportStatements for an explanation of how the
|
25
|
+
# handlers work and how they are declared. This module also contains methods to
|
26
|
+
# retrieve data from the XML in order to use it in the import
|
27
|
+
#
|
28
|
+
# The GenericReaderAddStatements contain the methods that are used to add data
|
29
|
+
# to the source that is currently being imported.
|
30
|
+
#
|
31
|
+
# In addition to the SourceReader class that can be used as an example, the
|
32
|
+
# other modules also contain some code examples for the mechanism.
|
33
|
+
#
|
34
|
+
# There are also some GenericReaderHelpers that can be used during the import.
|
35
|
+
#
|
36
|
+
# = Using an Importer
|
37
|
+
#
|
38
|
+
# The default way of using an importer is usually indirectly, through
|
39
|
+
# ActiveSource.create_from_xml. For direct use the sources_from_url or
|
40
|
+
# sources_from methods can be called - these are the entry points for the
|
41
|
+
# import process.
|
42
|
+
#
|
43
|
+
# = Result of the import operation
|
44
|
+
#
|
45
|
+
# The result of an import is an Array that contains a number of hashes. Each
|
46
|
+
# of those can be passed to ActiveSource.new to create a new source object
|
47
|
+
# with the given attributes.
|
48
|
+
#
|
49
|
+
# = Progress Reporting
|
50
|
+
#
|
51
|
+
# The class implements the TaliaUtil::Progressable interface, and if a
|
52
|
+
# progressor object is assigned, it will report the progress to it during
|
53
|
+
# the import operation.
|
17
54
|
class GenericReader
|
18
55
|
|
19
56
|
|
@@ -21,8 +58,17 @@ module TaliaCore
|
|
21
58
|
include TaliaUtil::IoHelper
|
22
59
|
include TaliaUtil::Progressable
|
23
60
|
include TaliaUtil::UriHelper
|
24
|
-
|
25
|
-
#
|
61
|
+
|
62
|
+
# Include all the parts
|
63
|
+
include GenericReaderImportStatements
|
64
|
+
extend GenericReaderImportStatements::Handlers
|
65
|
+
include GenericReaderAddStatements
|
66
|
+
include GenericReaderHelpers
|
67
|
+
|
68
|
+
# Class for the current import state. This contains the XML element that
|
69
|
+
# is currently imported, and the hash with the attributes for the currently
|
70
|
+
# importing source. In the importer, the current State will be available as
|
71
|
+
# @current
|
26
72
|
class State
|
27
73
|
attr_accessor :attributes, :element
|
28
74
|
end
|
@@ -35,7 +81,7 @@ module TaliaCore
|
|
35
81
|
open_generic(url, options) { |io| sources_from(io, progressor, url) }
|
36
82
|
end
|
37
83
|
|
38
|
-
#
|
84
|
+
# Read the sources from the given IO stream. You may specify a base
|
39
85
|
# url to help the reader to decide from where files should be opened.
|
40
86
|
def sources_from(source, progressor = nil, base_url=nil)
|
41
87
|
reader = self.new(source)
|
@@ -44,17 +90,6 @@ module TaliaCore
|
|
44
90
|
reader.sources
|
45
91
|
end
|
46
92
|
|
47
|
-
# Create a handler for an element from which a source will be created
|
48
|
-
def element(element_name, &handler_block)
|
49
|
-
element_handler(element_name, true, &handler_block)
|
50
|
-
end
|
51
|
-
|
52
|
-
# Create a handler for an element which will be processed but from which
|
53
|
-
# no source will be created
|
54
|
-
def plain_element(element_name, &handler_block)
|
55
|
-
element_handler(element_name, false, &handler_block)
|
56
|
-
end
|
57
|
-
|
58
93
|
# Set the reader to allow the use of root elements for import
|
59
94
|
def can_use_root
|
60
95
|
@use_root = true
|
@@ -71,8 +106,13 @@ module TaliaCore
|
|
71
106
|
|
72
107
|
private
|
73
108
|
|
74
|
-
# Adds an handler for the the given element.
|
75
|
-
#
|
109
|
+
# Adds an handler for the the given element. This will basically create an instance method
|
110
|
+
# <element_name>_handler, and add some bookkeeping information to the class.
|
111
|
+
#
|
112
|
+
# See call_handler to see how handlers are called.
|
113
|
+
#
|
114
|
+
# The creating parameter will indicate wether the handler, when called, will create a new
|
115
|
+
# method or not.
|
76
116
|
def element_handler(element_name, creating, &handler_block)
|
77
117
|
element_name = "#{element_name}_handler".to_sym
|
78
118
|
raise(ArgumentError, "Duplicate handler for #{element_name}") if(self.respond_to?(element_name))
|
@@ -84,10 +124,31 @@ module TaliaCore
|
|
84
124
|
end
|
85
125
|
end # End class methods
|
86
126
|
|
127
|
+
# Create a new reader. This parses the XML contained from the source and makes
|
128
|
+
# the resulting XML document available to the reader
|
87
129
|
def initialize(source)
|
88
130
|
@doc = Hpricot.XML(source)
|
89
131
|
end
|
90
132
|
|
133
|
+
# Build a list of sources. This will return an array of hashes, and each
|
134
|
+
# hash can be used to create a new source with ActiveSource.new.
|
135
|
+
#
|
136
|
+
# The result will be cached and once read, subsequent calls will return
|
137
|
+
# the same set of "sources" again
|
138
|
+
#
|
139
|
+
# *Example of Result*:
|
140
|
+
#
|
141
|
+
# [
|
142
|
+
# {
|
143
|
+
# 'uri' => 'http://foobar.com',
|
144
|
+
# 'type' => 'TaliaCore::Collection',
|
145
|
+
# 'http://rdfbar/foo' => '<http://taliainstall/otherthing'
|
146
|
+
# },
|
147
|
+
# {
|
148
|
+
# 'uri' => 'http://taliainstall/otherthing',
|
149
|
+
# 'type' => 'TaliaCore::DataTypes::DummySource'
|
150
|
+
# }
|
151
|
+
# ]
|
91
152
|
def sources
|
92
153
|
return @sources if(@sources)
|
93
154
|
@sources = {}
|
@@ -100,73 +161,108 @@ module TaliaCore
|
|
100
161
|
end
|
101
162
|
|
102
163
|
# This is the "base" for resolving file URLs. If a file URL is found
|
103
|
-
# to be relative, it will be relative to this URL
|
164
|
+
# to be relative, it will be relative to this URL.
|
165
|
+
#
|
166
|
+
# If no base URL was specified this will use the file system path to
|
167
|
+
# TALIA_ROOT
|
104
168
|
def base_file_url
|
105
169
|
@base_file_url ||= TALIA_ROOT
|
106
170
|
end
|
107
171
|
|
108
|
-
# Assign a new
|
172
|
+
# Assign a new base_file_url
|
109
173
|
def base_file_url=(new_base_url)
|
110
174
|
@base_file_url = base_for(new_base_url)
|
111
175
|
end
|
112
176
|
|
177
|
+
# This will add the given source to the global result. source_attribs is a hash
|
178
|
+
# with the attributes of one source. If that source already exists in the global
|
179
|
+
# results, the two versions will be merged:
|
180
|
+
#
|
181
|
+
# * If the property is a list of values (an Array) in both the new and the old
|
182
|
+
# version, these lists will be joined.
|
183
|
+
# * Otherwise, the old property will be overwritten by the new one
|
184
|
+
#
|
185
|
+
# The source_attribs *must* contain a URI, and they *must not* change a type
|
186
|
+
# field that is anything else than nil or TaliaCore::SourceTypes::DummySource
|
113
187
|
def add_source_with_check(source_attribs)
|
114
188
|
assit_kind_of(Hash, source_attribs)
|
189
|
+
# Check if we have a URI
|
115
190
|
if((uri = source_attribs['uri']).blank?)
|
116
191
|
raise(RuntimeError, "Problem reading from XML: Source without URI (#{source_attribs.inspect})")
|
117
192
|
else
|
118
|
-
uri = irify(uri)
|
119
|
-
|
120
|
-
@sources[uri]
|
121
|
-
|
122
|
-
|
123
|
-
|
193
|
+
source_attribs['uri'] = irify(uri) # "Irify" the URI (see UriHelper module)
|
194
|
+
@sources[uri] ||= {} # This is the hash in the global result for our uri
|
195
|
+
@sources[uri].each do |key, value| # Loop through existing results
|
196
|
+
next unless(new_value = source_attribs.delete(key)) # Skip all existing that are not in the new attributes
|
197
|
+
# Assert that we don't change a type away from DummySource - this would indicate some problem w/ the data
|
124
198
|
assit(!((key.to_sym == :type) && (value != 'TaliaCore::SourceTypes::DummySource') && (value != new_value)), "Type should not change during import, may be a format problem. (From #{value} to #{new_value})")
|
125
199
|
if(new_value.is_a?(Array) && value.is_a?(Array))
|
126
|
-
# If both are Array-types, the new elements will be appended
|
200
|
+
# If both new and old are Array-types, the new elements will be appended
|
127
201
|
# and duplicates will be removed
|
128
202
|
@sources[uri][key] = (value + new_value).uniq
|
129
203
|
else
|
130
|
-
# Otherwise just replace
|
204
|
+
# Otherwise just replace the old value with the new one
|
131
205
|
@sources[uri][key] = new_value
|
132
206
|
end
|
133
207
|
end
|
134
|
-
#
|
208
|
+
# Everything that is only in the new attributes can be merged in
|
135
209
|
@sources[uri].merge!(source_attribs)
|
136
210
|
end
|
137
211
|
end
|
138
212
|
|
213
|
+
# Returns a hash with all handlers that "create" (that is, they
|
214
|
+
# create a new source when called). This is taken from the class'
|
215
|
+
# create_handlers accessor
|
139
216
|
def create_handlers
|
140
217
|
@handlers ||= (self.class.create_handlers || {})
|
141
218
|
end
|
142
219
|
|
220
|
+
# Read a single source from a XML elem.
|
221
|
+
# Pass in the XML element and an (optional)
|
222
|
+
# block. This will call the handler (or block, see call_handler)
|
223
|
+
# and add the result to the global result set using
|
224
|
+
# add_source_with_check
|
143
225
|
def read_source(element, &block)
|
144
226
|
attribs = call_handler(element, &block)
|
145
227
|
add_source_with_check(attribs) if(attribs)
|
146
228
|
end
|
147
229
|
|
230
|
+
# As read_children of, using the standard progressor of the reader
|
148
231
|
def read_children_with_progress(element, &block)
|
149
232
|
run_with_progress('Xml Read', element.children.size) do |prog|
|
150
233
|
read_children_of(element, prog, &block)
|
151
234
|
end
|
152
235
|
end
|
153
236
|
|
237
|
+
# Read source data from each child of the given element
|
238
|
+
# using read_source. Optionally reports the progress to
|
239
|
+
# the given progressor.
|
154
240
|
def read_children_of(element, progress = nil, &block)
|
155
241
|
element.children.each do |element|
|
156
242
|
progress.inc if(progress)
|
157
|
-
next unless(element.is_a?(Hpricot::Elem))
|
243
|
+
next unless(element.is_a?(Hpricot::Elem)) # only use XML elements
|
158
244
|
read_source(element, &block)
|
159
245
|
end
|
160
246
|
end
|
161
247
|
|
248
|
+
# Same as use_root of the current class
|
162
249
|
def use_root
|
163
250
|
self.class.use_root
|
164
251
|
end
|
165
252
|
|
166
|
-
private
|
167
|
-
|
168
253
|
# Call the handler method for the given element. If a block is given, that
|
169
|
-
# will be called instead
|
254
|
+
# will be called instead. Pass in the XML element to read from.
|
255
|
+
#
|
256
|
+
# This saves the @current State object before calling the handler, and
|
257
|
+
# restores it after the call is complete. Thus nested calls will have their
|
258
|
+
# own state, but the state will be restored once you return to the
|
259
|
+
# parent handler.
|
260
|
+
#
|
261
|
+
# If a block is given, that block will be executed as the handler. Otherwise
|
262
|
+
# the system checks for the "<element.name>_handler" method, and calls it.
|
263
|
+
# (See also element_handler)
|
264
|
+
#
|
265
|
+
# If no block is given and no handler is found, an error is logged.
|
170
266
|
def call_handler(element)
|
171
267
|
handler_name = "#{element.name}_handler".to_sym
|
172
268
|
if(self.respond_to?(handler_name) || block_given?)
|
@@ -189,188 +285,20 @@ module TaliaCore
|
|
189
285
|
end
|
190
286
|
end
|
191
287
|
|
288
|
+
# Checks if the current status has an attribute hash, which means that there
|
289
|
+
# is a "current" source being created at the moment.
|
192
290
|
def chk_create
|
193
291
|
raise(RuntimeError, "Illegal operation when not creating a source") unless(@current.attributes)
|
194
292
|
end
|
195
293
|
|
196
|
-
#
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
set_element(predicate, object.is_a?(String) ? object : object.to_s, required)
|
205
|
-
end
|
206
|
-
end
|
207
|
-
|
208
|
-
# Adds a value with the given prediate and language/type information
|
209
|
-
def add_i18n(predicate, object, lang, type=nil)
|
210
|
-
object = object.blank? ? nil : TaliaCore::PropertyString.new(object, lang, type)
|
211
|
-
add(predicate, object)
|
212
|
-
end
|
213
|
-
|
214
|
-
# Adds a date field. This will attempt to parse the original string
|
215
|
-
# and write the result as an ISO 8061 compliant date string. Note
|
216
|
-
# that this won't be able to parse everything you throw at it, though.
|
217
|
-
def add_date(predicate, date, required = false, fmt = nil)
|
218
|
-
add(predicate, to_iso8601(parse_date(date, fmt)), required)
|
219
|
-
end
|
220
|
-
|
221
|
-
# Adds a date interval as an ISO 8061 compliant date string. See
|
222
|
-
# add_date for more info. If only one of the dates is given this
|
223
|
-
# will add a normal date string instead of an interval.
|
224
|
-
def add_date_interval(predicate, start_date, end_date, fmt = nil)
|
225
|
-
return if(start_date.blank? && end_date.blank?)
|
226
|
-
if(start_date.blank?)
|
227
|
-
add_date(predicate, start_date, true, fmt)
|
228
|
-
elsif(end_date.blank?)
|
229
|
-
add_date(predicate, end_date, true, fmt)
|
230
|
-
else
|
231
|
-
add(predicate, "#{to_iso8601(parse_date(start_date, fmt))}/#{to_iso8601(parse_date(end_date, fmt))}", required)
|
232
|
-
end
|
233
|
-
end
|
234
|
-
|
235
|
-
# Adds a relation for the given predicate
|
236
|
-
def add_rel(predicate, object, required = false)
|
237
|
-
object = check_objects(object)
|
238
|
-
if(!object)
|
239
|
-
raise(ArgumentError, "Relation with empty object on #{predicate} (#{@current.attributes['uri']}).") if(required)
|
240
|
-
return
|
241
|
-
end
|
242
|
-
if(object.kind_of?(Array))
|
243
|
-
object.each do |obj|
|
244
|
-
raise(ArgumentError, "Cannot add relation on database field <#{predicate}> - <#{object.inspect}>") if(ActiveSource.db_attr?(predicate))
|
245
|
-
set_element(predicate, "<#{irify(obj)}>", required)
|
246
|
-
end
|
247
|
-
else
|
248
|
-
raise(ArgumentError, "Cannot add relation on database field") if(ActiveSource.db_attr?(predicate))
|
249
|
-
set_element(predicate, "<#{irify(object)}>", required)
|
250
|
-
end
|
251
|
-
end
|
252
|
-
|
253
|
-
# Add a file to the source being imported. See the DataLoader module for a description of
|
254
|
-
# the possible options
|
255
|
-
def add_file(urls, options = {})
|
256
|
-
return if(urls.blank?)
|
257
|
-
urls = [ urls ] unless(urls.is_a?(Array))
|
258
|
-
files = urls.collect { |url| { :url => get_absolute_file_url(url), :options => options } }
|
259
|
-
@current.attributes[:files] = files if(files.size > 0)
|
260
|
-
end
|
261
|
-
|
262
|
-
# Gets an absolute path to the given file url, using the base_file_url
|
263
|
-
def get_absolute_file_url(url)
|
264
|
-
orig_url = url.to_s.strip
|
265
|
-
|
266
|
-
url = file_url(orig_url)
|
267
|
-
# If a file:// was stripped from the url, this means it will always point
|
268
|
-
# to a file
|
269
|
-
force_file = (orig_url != url)
|
270
|
-
# Indicates wether the base url is a network url or a file/directory
|
271
|
-
base_is_net = !base_file_url.is_a?(String)
|
272
|
-
# Try to find if we have a "net" URL if we aren't sure if this is a file. In
|
273
|
-
# case the base url is a network url, we'll always assume that the
|
274
|
-
# url is also a net thing. Otherwise we only have a net url if it contains a
|
275
|
-
# '://' string
|
276
|
-
is_net_url = !force_file && (base_is_net || url.include?('://'))
|
277
|
-
# The url is absolute if there is a : character to be found
|
278
|
-
|
279
|
-
|
280
|
-
if(is_net_url)
|
281
|
-
base_is_net ? join_url(base_file_url, url) : url
|
282
|
-
else
|
283
|
-
base_is_net ? url : join_files(base_file_url, url)
|
284
|
-
end
|
285
|
-
end
|
286
|
-
|
287
|
-
# Joins the two files. If the path is an absolute path,
|
288
|
-
# the base_dir is ignored
|
289
|
-
def join_files(base_dir, path)
|
290
|
-
if(Pathname.new(path).relative?)
|
291
|
-
File.join(base_dir, path)
|
292
|
-
else
|
293
|
-
path
|
294
|
-
end
|
295
|
-
end
|
296
|
-
|
297
|
-
# Joins the two url parts. If the path is an absolute URL,
|
298
|
-
# the base_url is ignored.
|
299
|
-
def join_url(base_url, path)
|
300
|
-
return path if(path.include?(':')) # Absolute URL contains ':'
|
301
|
-
if(path[0..0] == '/')
|
302
|
-
new_url = base_url.clone
|
303
|
-
new_url.path = path
|
304
|
-
new_url.to_s
|
305
|
-
else
|
306
|
-
(base_file_url + path).to_s
|
307
|
-
end
|
308
|
-
end
|
309
|
-
|
310
|
-
# Returns true if the given source was already imported. This can return false
|
311
|
-
# if you call this for the currently importing source.
|
312
|
-
def source_exists?(uri)
|
313
|
-
!@sources[uri].blank?
|
314
|
-
end
|
315
|
-
|
316
|
-
# Adds a source from the given sub-element. You may either pass a block with
|
317
|
-
# the code to import or the name of an already registered element. If the
|
318
|
-
# special value :from_all_sources is given, it will read from all sub-elements for which
|
319
|
-
# there are registered handlers
|
320
|
-
def add_source(sub_element = nil, &block)
|
321
|
-
if(sub_element)
|
322
|
-
if(sub_element == :from_all_sources)
|
323
|
-
read_children_of(@current.element)
|
324
|
-
else
|
325
|
-
@current.element.search("/#{sub_element}").each { |sub_elem| read_source(sub_elem, &block) }
|
326
|
-
end
|
327
|
-
else
|
328
|
-
raise(ArgumentError, "When adding elements on the fly, you must use a block") unless(block)
|
329
|
-
attribs = call_handler(@current.element, &block)
|
330
|
-
add_source_with_check(attribs) if(attribs)
|
331
|
-
end
|
332
|
-
end
|
333
|
-
|
334
|
-
# Returns true if the currently imported element already contains type information
|
335
|
-
# AND is of the given type.
|
336
|
-
def current_is_a?(type)
|
337
|
-
assit_kind_of(Class, type)
|
338
|
-
@current.attributes['type'] && ("TaliaCore::#{@current.attributes['type']}".constantize <= type)
|
339
|
-
end
|
340
|
-
|
341
|
-
# Adds a nested element. This will not change the currently importing source, but
|
342
|
-
# it will set the currently active element to the nested element.
|
343
|
-
# If a block is given, it will execute for each of the nested elements that
|
344
|
-
# are found. Otherwise, a method name must be given, and that method will
|
345
|
-
# be executed instead of the block
|
346
|
-
def nested(sub_element, handler_method = nil)
|
347
|
-
original_element = @current.element
|
348
|
-
begin
|
349
|
-
@current.element.search("#{sub_element}").each do |sub_elem|
|
350
|
-
@current.element = sub_elem
|
351
|
-
assit(block_given? ^ (handler_method.is_a?(Symbol)), 'Must have either a handler (x)or a block.')
|
352
|
-
block_given? ? yield : self.send(handler_method)
|
353
|
-
end
|
354
|
-
ensure
|
355
|
-
@current.element = original_element
|
356
|
-
end
|
357
|
-
end
|
358
|
-
|
359
|
-
# Imports another source like add_source and also assigns the new source as
|
360
|
-
# a part of the current one
|
361
|
-
def add_part(sub_element = nil, &block)
|
362
|
-
raise(RuntimeError, "Cannot add child before having an uri to refer to.") unless(@current.attributes['uri'])
|
363
|
-
@current.element.search("/#{sub_element}").each do |sub_elem|
|
364
|
-
attribs = call_handler(sub_elem, &block)
|
365
|
-
if(attribs)
|
366
|
-
attribs[N::TALIA.part_of.to_s] ||= []
|
367
|
-
attribs[N::TALIA.part_of.to_s] << "<#{@current.attributes['uri']}>"
|
368
|
-
add_source_with_check(attribs)
|
369
|
-
end
|
370
|
-
end
|
371
|
-
end
|
372
|
-
|
373
|
-
# Add a property to the source currently being imported
|
294
|
+
# Add a property to the source that is currently being imported. If no object is given, the method
|
295
|
+
# just exits, unless required is set, in which case an error will be raised for an empty object.
|
296
|
+
#
|
297
|
+
# Database properties will be added as a single string, while other (semantic) properties will
|
298
|
+
# always be added into an array (even if there is just a single object).
|
299
|
+
#
|
300
|
+
# This is the base code for adding elements, which is used for the add_* methods in
|
301
|
+
# GenericReaderAddStatements. This method should not usually be used directly.
|
374
302
|
def set_element(predicate, object, required)
|
375
303
|
chk_create
|
376
304
|
object = check_objects(object)
|
@@ -388,9 +316,12 @@ module TaliaCore
|
|
388
316
|
end
|
389
317
|
end
|
390
318
|
|
391
|
-
#
|
392
|
-
#
|
393
|
-
|
319
|
+
# Pass in a list of elements that are to be used as objects in RDF triples.
|
320
|
+
# This method will check the objects and remove any blank ones (which should
|
321
|
+
# not be added).
|
322
|
+
#
|
323
|
+
# If no non-blank element is found in the input, this will always return nil
|
324
|
+
def check_objects(objects)
|
394
325
|
if(objects.kind_of?(Array))
|
395
326
|
objects.reject! { |obj| obj.blank? }
|
396
327
|
(objects.size == 0) ? nil : objects
|
@@ -398,49 +329,9 @@ module TaliaCore
|
|
398
329
|
objects.blank? ? nil : objects
|
399
330
|
end
|
400
331
|
end
|
401
|
-
|
402
|
-
# Get an attribute from the current xml element
|
403
|
-
def from_attribute(attrib)
|
404
|
-
@current.element[attrib]
|
405
|
-
end
|
406
|
-
|
407
|
-
# Get the content of exactly one child element of type "elem" of the
|
408
|
-
# currently importing element.
|
409
|
-
#
|
410
|
-
# If elem is set to :self, this will give the content of the current element
|
411
|
-
def from_element(elem)
|
412
|
-
return @current.element.inner_text.strip if(elem == :self)
|
413
|
-
elements = all_elements(elem)
|
414
|
-
elements = elements.uniq if(elements.size > 1) # Try to ignore dupes
|
415
|
-
raise(ArgumentError, "More than one element of #{elem} in #{@current.element.inspect}") if(elements.size > 1)
|
416
|
-
elements.first
|
417
|
-
end
|
418
|
-
|
419
|
-
# Get the content of all child elements of type "elem" of the currently
|
420
|
-
# importing element
|
421
|
-
def all_elements(elem)
|
422
|
-
result = []
|
423
|
-
@current.element.search("/#{elem}").each { |el| result << el.inner_text.strip }
|
424
|
-
result
|
425
|
-
end
|
426
|
-
|
427
|
-
# Get the iso8601 string for the date
|
428
|
-
def to_iso8601(date)
|
429
|
-
return nil unless(date)
|
430
|
-
date = DateTime.parse(date) unless(date.respond_to?(:strftime))
|
431
|
-
date.strftime('%Y-%m-%dT%H:%M:%SZ')
|
432
|
-
end
|
433
|
-
|
434
|
-
# Parses the given string and returns it as a date object
|
435
|
-
def parse_date(date, fmt = nil)
|
436
|
-
return nil if(date.blank?)
|
437
|
-
return DateTime.strptime(date, fmt) if(fmt) # format given
|
438
|
-
return DateTime.new(date.to_i) if(date.size < 5) # this short should be a year
|
439
|
-
DateTime.parse(date)
|
440
|
-
end
|
441
332
|
|
442
333
|
end
|
443
334
|
|
444
335
|
end
|
445
336
|
end
|
446
|
-
end
|
337
|
+
end
|