talia_core 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.rdoc +41 -0
- data/bin/talia +33 -0
- data/lib/JXslt/jxslt.rb +60 -0
- data/lib/acts_as_roled.rb +11 -0
- data/lib/core_ext/platform.rb +9 -0
- data/lib/core_ext/string.rb +6 -0
- data/lib/core_ext.rb +1 -0
- data/lib/custom_template.rb +4 -0
- data/lib/loader_helper.rb +62 -0
- data/lib/mysql.rb +1214 -0
- data/lib/progressbar.rb +236 -0
- data/lib/role.rb +12 -0
- data/lib/talia_cl/command_line.rb +39 -0
- data/lib/talia_cl/commands/standalone/cl_options.rb +9 -0
- data/lib/talia_cl/commands/standalone/standalone_generate.rb +75 -0
- data/lib/talia_cl/commands/standalone.rb +25 -0
- data/lib/talia_cl/commands/talia_console/cl_options.rb +55 -0
- data/lib/talia_cl/commands/talia_console/console_commands.rb +37 -0
- data/lib/talia_cl/commands/talia_console/talia_commands.rb +131 -0
- data/lib/talia_cl/commands/talia_console.rb +47 -0
- data/lib/talia_cl/core_commands.rb +11 -0
- data/lib/talia_cl.rb +47 -0
- data/lib/talia_core/active_source.rb +372 -0
- data/lib/talia_core/active_source_parts/class_methods.rb +378 -0
- data/lib/talia_core/active_source_parts/predicate_handler.rb +89 -0
- data/lib/talia_core/active_source_parts/rdf.rb +131 -0
- data/lib/talia_core/active_source_parts/sql_helper.rb +36 -0
- data/lib/talia_core/active_source_parts/xml/base_builder.rb +47 -0
- data/lib/talia_core/active_source_parts/xml/generic_reader.rb +363 -0
- data/lib/talia_core/active_source_parts/xml/rdf_builder.rb +88 -0
- data/lib/talia_core/active_source_parts/xml/source_builder.rb +73 -0
- data/lib/talia_core/active_source_parts/xml/source_reader.rb +20 -0
- data/lib/talia_core/agent.rb +14 -0
- data/lib/talia_core/background_jobs/job.rb +82 -0
- data/lib/talia_core/background_jobs/progress_job.rb +68 -0
- data/lib/talia_core/collection.rb +13 -0
- data/lib/talia_core/data_types/data_loader.rb +92 -0
- data/lib/talia_core/data_types/data_record.rb +105 -0
- data/lib/talia_core/data_types/delayed_copier.rb +76 -0
- data/lib/talia_core/data_types/file_record.rb +59 -0
- data/lib/talia_core/data_types/file_store.rb +306 -0
- data/lib/talia_core/data_types/iip_data.rb +153 -0
- data/lib/talia_core/data_types/iip_loader.rb +127 -0
- data/lib/talia_core/data_types/image_data.rb +32 -0
- data/lib/talia_core/data_types/media_link.rb +19 -0
- data/lib/talia_core/data_types/mime_mapping.rb +45 -0
- data/lib/talia_core/data_types/path_helpers.rb +77 -0
- data/lib/talia_core/data_types/pdf_data.rb +42 -0
- data/lib/talia_core/data_types/simple_text.rb +36 -0
- data/lib/talia_core/data_types/temp_file_handling.rb +85 -0
- data/lib/talia_core/data_types/xml_data.rb +169 -0
- data/lib/talia_core/dc_resource.rb +20 -0
- data/lib/talia_core/dummy_handler.rb +34 -0
- data/lib/talia_core/dummy_source.rb +20 -0
- data/lib/talia_core/errors.rb +25 -0
- data/lib/talia_core/initializer.rb +427 -0
- data/lib/talia_core/ordered_source.rb +228 -0
- data/lib/talia_core/rails_ext/actionpack/action_controller/record_identifier.rb +13 -0
- data/lib/talia_core/rails_ext/actionpack/action_controller.rb +1 -0
- data/lib/talia_core/rails_ext/actionpack.rb +1 -0
- data/lib/talia_core/rails_ext.rb +1 -0
- data/lib/talia_core/rdf_import.rb +90 -0
- data/lib/talia_core/rdf_resource.rb +159 -0
- data/lib/talia_core/semantic_collection_item.rb +93 -0
- data/lib/talia_core/semantic_collection_wrapper.rb +324 -0
- data/lib/talia_core/semantic_property.rb +7 -0
- data/lib/talia_core/semantic_relation.rb +67 -0
- data/lib/talia_core/source.rb +323 -0
- data/lib/talia_core/source_transfer_object.rb +38 -0
- data/lib/talia_core/workflow/base.rb +15 -0
- data/lib/talia_core/workflow/publication_workflow.rb +62 -0
- data/lib/talia_core/workflow.rb +300 -0
- data/lib/talia_core.rb +9 -0
- data/lib/talia_dependencies.rb +12 -0
- data/lib/talia_util/bar_progressor.rb +15 -0
- data/lib/talia_util/configuration/config_file.rb +48 -0
- data/lib/talia_util/configuration/database_config.rb +40 -0
- data/lib/talia_util/configuration/mysql_database_setup.rb +104 -0
- data/lib/talia_util/data_import.rb +91 -0
- data/lib/talia_util/image_conversions.rb +82 -0
- data/lib/talia_util/import_job_helper.rb +132 -0
- data/lib/talia_util/io_helper.rb +54 -0
- data/lib/talia_util/progressable.rb +38 -0
- data/lib/talia_util/progressbar.rb +236 -0
- data/lib/talia_util/rdf_update.rb +80 -0
- data/lib/talia_util/some_sigla.xml +1960 -0
- data/lib/talia_util/test_helpers.rb +151 -0
- data/lib/talia_util/util.rb +226 -0
- data/lib/talia_util/yaml_import.rb +80 -0
- data/lib/talia_util.rb +13 -0
- data/lib/user.rb +116 -0
- data/lib/version.rb +15 -0
- data/test/core_ext/string_test.rb +11 -0
- data/test/custom_template_test.rb +8 -0
- data/test/talia_core/active_source_predicate_test.rb +54 -0
- data/test/talia_core/active_source_rdf_test.rb +89 -0
- data/test/talia_core/active_source_test.rb +631 -0
- data/test/talia_core/data_types/data_loader_test.rb +123 -0
- data/test/talia_core/data_types/data_record_test.rb +40 -0
- data/test/talia_core/data_types/file_record_test.rb +171 -0
- data/test/talia_core/data_types/iip_data_test.rb +130 -0
- data/test/talia_core/data_types/image_data_test.rb +88 -0
- data/test/talia_core/data_types/pdf_data_test.rb +68 -0
- data/test/talia_core/data_types/xml_data_test.rb +134 -0
- data/test/talia_core/generic_xml_test.rb +83 -0
- data/test/talia_core/initializer_test.rb +36 -0
- data/test/talia_core/ordered_source_test.rb +398 -0
- data/test/talia_core/rdf_resource_test.rb +115 -0
- data/test/talia_core/semantic_collection_item_test.rb +129 -0
- data/test/talia_core/source_reader_test.rb +33 -0
- data/test/talia_core/source_test.rb +484 -0
- data/test/talia_core/source_transfer_object_test.rb +24 -0
- data/test/talia_core/workflow/publication_workflow_test.rb +242 -0
- data/test/talia_core/workflow/user_class_for_workflow.rb +35 -0
- data/test/talia_core/workflow/workflow_base_test.rb +21 -0
- data/test/talia_core/workflow_test.rb +19 -0
- data/test/talia_util/import_job_helper_test.rb +46 -0
- data/test/test_helper.rb +68 -0
- metadata +262 -0
|
@@ -0,0 +1,363 @@
|
|
|
1
|
+
require 'hpricot'
|
|
2
|
+
|
|
3
|
+
module TaliaCore
|
|
4
|
+
module ActiveSourceParts
|
|
5
|
+
module Xml
|
|
6
|
+
|
|
7
|
+
# Superclass for importers/readers of generic xml files. This is as close as possible
|
|
8
|
+
# to the SourceReader class, and will (obviously) only work if a subclass fleshes out
|
|
9
|
+
# the mappings.
|
|
10
|
+
#
|
|
11
|
+
# See the SourceReader class for a simple example.
|
|
12
|
+
#
|
|
13
|
+
# When adding new sources, the reader will always check if the element is already
|
|
14
|
+
# present. If attributes for one source are imported in more than one place, all
|
|
15
|
+
# subsequent calls will merge the newly imported attributes with the existing ones.
|
|
16
|
+
class GenericReader
|
|
17
|
+
|
|
18
|
+
extend TaliaUtil::IoHelper
|
|
19
|
+
include TaliaUtil::Progressable
|
|
20
|
+
|
|
21
|
+
# Helper class for state
|
|
22
|
+
class State
|
|
23
|
+
attr_accessor :attributes, :element
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
class << self
|
|
27
|
+
|
|
28
|
+
# See the IoHelper class for help on the options. A progressor may
|
|
29
|
+
# be supplied on which the importer will report it's progress.
|
|
30
|
+
def sources_from_url(url, options = nil, progressor = nil)
|
|
31
|
+
open_generic(url, options) { |io| sources_from(io, progressor) }
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
def sources_from(source, progressor = nil)
|
|
35
|
+
reader = self.new(source)
|
|
36
|
+
reader.progressor = progressor
|
|
37
|
+
reader.sources
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
# Create a handler for an element from which a source will be created
|
|
41
|
+
def element(element_name, &handler_block)
|
|
42
|
+
element_handler(element_name, true, &handler_block)
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
# Create a handler for an element which will be processed but from which
|
|
46
|
+
# no source will be created
|
|
47
|
+
def plain_element(element_name, &handler_block)
|
|
48
|
+
element_handler(element_name, false, &handler_block)
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
# Set the reader to allow the use of root elements for import
|
|
52
|
+
def can_use_root
|
|
53
|
+
@use_root = true
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
# True if the reader should also check the root element, instead of
|
|
57
|
+
# only checking the children
|
|
58
|
+
def use_root
|
|
59
|
+
@use_root || false
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
# Returns the registered handlers
|
|
63
|
+
attr_reader :create_handlers
|
|
64
|
+
|
|
65
|
+
private
|
|
66
|
+
|
|
67
|
+
# Adds an handler for the the given element. The second parameter will
|
|
68
|
+
# indicate if the handler will create a new source or not
|
|
69
|
+
def element_handler(element_name, creating, &handler_block)
|
|
70
|
+
element_name = "#{element_name}_handler".to_sym
|
|
71
|
+
raise(ArgumentError, "Duplicate handler for #{element_name}") if(self.respond_to?(element_name))
|
|
72
|
+
raise(ArgumentError, "Must pass block to handler for #{element_name}") unless(handler_block)
|
|
73
|
+
@create_handlers ||= {}
|
|
74
|
+
@create_handlers[element_name] = creating # Indicates whether a soure is created
|
|
75
|
+
# Define the handler block method
|
|
76
|
+
define_method(element_name, handler_block)
|
|
77
|
+
end
|
|
78
|
+
end # End class methods
|
|
79
|
+
|
|
80
|
+
def initialize(source)
|
|
81
|
+
@doc = Hpricot.XML(source)
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
def sources
|
|
85
|
+
return @sources if(@sources)
|
|
86
|
+
@sources = {}
|
|
87
|
+
if(use_root && self.respond_to?("#{@doc.root.name}_handler".to_sym))
|
|
88
|
+
run_with_progress('XmlRead', 1) { read_source(@doc.root) }
|
|
89
|
+
else
|
|
90
|
+
read_children_of(@doc.root)
|
|
91
|
+
end
|
|
92
|
+
@sources.values
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
def add_source_with_check(source_attribs)
|
|
96
|
+
assit_kind_of(Hash, source_attribs)
|
|
97
|
+
if((uri = source_attribs['uri']).blank?)
|
|
98
|
+
raise(RuntimeError, "Problem reading from XML: Source without URI (#{source_attribs.inspect})")
|
|
99
|
+
else
|
|
100
|
+
uri = irify(uri)
|
|
101
|
+
source_attribs['uri'] = uri
|
|
102
|
+
@sources[uri] ||= {}
|
|
103
|
+
@sources[uri].each do |key, value|
|
|
104
|
+
next unless(new_value = source_attribs.delete(key))
|
|
105
|
+
|
|
106
|
+
assit(!((key.to_sym == :type) && (value != 'TaliaCore::DummySource') && (value != new_value)), "Type should not change during import, may be a format problem. (From #{value} to #{new_value})")
|
|
107
|
+
if(new_value.is_a?(Array) && value.is_a?(Array))
|
|
108
|
+
# If both are Array-types, the new elements will be appended
|
|
109
|
+
# and duplicates nwill be removed
|
|
110
|
+
@sources[uri][key] = (value + new_value).uniq
|
|
111
|
+
else
|
|
112
|
+
# Otherwise just replace
|
|
113
|
+
@sources[uri][key] = new_value
|
|
114
|
+
end
|
|
115
|
+
end
|
|
116
|
+
# Now merge in everything else
|
|
117
|
+
@sources[uri].merge!(source_attribs)
|
|
118
|
+
end
|
|
119
|
+
end
|
|
120
|
+
|
|
121
|
+
def create_handlers
|
|
122
|
+
@handlers ||= (self.class.create_handlers || {})
|
|
123
|
+
end
|
|
124
|
+
|
|
125
|
+
def read_source(element, &block)
|
|
126
|
+
attribs = call_handler(element, &block)
|
|
127
|
+
add_source_with_check(attribs) if(attribs)
|
|
128
|
+
end
|
|
129
|
+
|
|
130
|
+
def read_children_of(element, &block)
|
|
131
|
+
run_with_progress('Xml Read', element.children.size) do |prog|
|
|
132
|
+
element.children.each do |element|
|
|
133
|
+
prog.inc
|
|
134
|
+
next unless(element.is_a?(Hpricot::Elem))
|
|
135
|
+
read_source(element, &block)
|
|
136
|
+
end
|
|
137
|
+
end
|
|
138
|
+
end
|
|
139
|
+
|
|
140
|
+
def use_root
|
|
141
|
+
self.class.use_root
|
|
142
|
+
end
|
|
143
|
+
|
|
144
|
+
private
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
# Removes all characters that are illegal in IRIs, so that the
|
|
148
|
+
# URIs can be imported
|
|
149
|
+
def irify(uri)
|
|
150
|
+
N::URI.new(uri.to_s.gsub( /[{}|\\^`\s]/, '+')).to_s
|
|
151
|
+
end
|
|
152
|
+
|
|
153
|
+
# Call the handler method for the given element. If a block is given, that
|
|
154
|
+
# will be called instead
|
|
155
|
+
def call_handler(element)
|
|
156
|
+
handler_name = "#{element.name}_handler".to_sym
|
|
157
|
+
if(self.respond_to?(handler_name) || block_given?)
|
|
158
|
+
parent_state = @current # Save the state for recursive calls
|
|
159
|
+
attributes = nil
|
|
160
|
+
begin
|
|
161
|
+
creating = (create_handlers[handler_name] || block_given?)
|
|
162
|
+
@current = State.new
|
|
163
|
+
@current.attributes = creating ? {} : nil
|
|
164
|
+
@current.element = element
|
|
165
|
+
block_given? ? yield : self.send(handler_name)
|
|
166
|
+
attributes = @current.attributes
|
|
167
|
+
ensure
|
|
168
|
+
@current = parent_state # Reset the state to previous value
|
|
169
|
+
end
|
|
170
|
+
attributes
|
|
171
|
+
else
|
|
172
|
+
TaliaCore.logger.warn("Unknown element in import: #{element.name}")
|
|
173
|
+
false
|
|
174
|
+
end
|
|
175
|
+
end
|
|
176
|
+
|
|
177
|
+
def chk_create
|
|
178
|
+
raise(RuntimeError, "Illegal operation when not creating a source") unless(@current.attributes)
|
|
179
|
+
end
|
|
180
|
+
|
|
181
|
+
# Adds a value for the given predicate (may also be a database field)
|
|
182
|
+
def add(predicate, object, required = false)
|
|
183
|
+
if(object.kind_of?(Array))
|
|
184
|
+
object.each { |obj| set_element(predicate, obj.to_s, required) }
|
|
185
|
+
else
|
|
186
|
+
set_element(predicate, object.to_s, required)
|
|
187
|
+
end
|
|
188
|
+
end
|
|
189
|
+
|
|
190
|
+
# Adds a date field. This will attempt to parse the original string
|
|
191
|
+
# and write the result as an ISO 8061 compliant date string. Note
|
|
192
|
+
# that this won't be able to parse everything you throw at it, though.
|
|
193
|
+
def add_date(predicate, date, required = false, fmt = nil)
|
|
194
|
+
add(predicate, parse_date(date, fmt), required)
|
|
195
|
+
end
|
|
196
|
+
|
|
197
|
+
# Adds a date interval as an ISO 8061 compliant date string. See
|
|
198
|
+
# add_date for more info. If only one of the dates is given this
|
|
199
|
+
# will add a normal date string instead of an interval.
|
|
200
|
+
def add_date_interval(predicate, start_date, end_date, fmt = nil)
|
|
201
|
+
return if(start_date.blank? && end_date.blank?)
|
|
202
|
+
if(start_date.blank?)
|
|
203
|
+
add_date(predicate, start_date, true, fmt)
|
|
204
|
+
elsif(end_date.blank?)
|
|
205
|
+
add_date(predicate, end_date, true, fmt)
|
|
206
|
+
else
|
|
207
|
+
add(predicate, "#{parse_date(start_date, fmt)}/#{parse_date(end_date, fmt)}", required)
|
|
208
|
+
end
|
|
209
|
+
end
|
|
210
|
+
|
|
211
|
+
# Adds a relation for the given predicate
|
|
212
|
+
def add_rel(predicate, object, required = false)
|
|
213
|
+
object = check_objects(object)
|
|
214
|
+
if(!object)
|
|
215
|
+
raise(ArgumentError, "Relation with empty object on #{predicate} (#{@current.attributes['uri']}).") if(required)
|
|
216
|
+
return
|
|
217
|
+
end
|
|
218
|
+
if(object.kind_of?(Array))
|
|
219
|
+
object.each do |obj|
|
|
220
|
+
raise(ArgumentError, "Cannot add relation on database field") if(ActiveSource.db_attr?(predicate))
|
|
221
|
+
set_element(predicate, "<#{irify(obj)}>", required)
|
|
222
|
+
end
|
|
223
|
+
else
|
|
224
|
+
raise(ArgumentError, "Cannot add relation on database field") if(ActiveSource.db_attr?(predicate))
|
|
225
|
+
set_element(predicate, "<#{irify(object)}>", required)
|
|
226
|
+
end
|
|
227
|
+
end
|
|
228
|
+
|
|
229
|
+
# Add a file to the source being imported
|
|
230
|
+
def add_file(urls, options = {})
|
|
231
|
+
return if(urls.blank?)
|
|
232
|
+
urls = [ urls ] unless(urls.is_a?(Array))
|
|
233
|
+
files = urls.collect { |url| { :url => url.to_s, :options => options } }
|
|
234
|
+
@current.attributes[:files] = files if(files.size > 0)
|
|
235
|
+
end
|
|
236
|
+
|
|
237
|
+
# Returns true if the given source was already imported. This can return false
|
|
238
|
+
# if you call this for the currently importing source.
|
|
239
|
+
def source_exists?(uri)
|
|
240
|
+
!@sources[uri].blank?
|
|
241
|
+
end
|
|
242
|
+
|
|
243
|
+
# Adds a source from the given sub-element. You may either pass a block with
|
|
244
|
+
# the code to import or the name of an already registered element. If the
|
|
245
|
+
# special value :from_all_sources is given, it will read from all sub-elements for which
|
|
246
|
+
# there are registered handlers
|
|
247
|
+
def add_source(sub_element = nil, &block)
|
|
248
|
+
if(sub_element)
|
|
249
|
+
if(sub_element == :from_all_sources)
|
|
250
|
+
read_children_of(@current.element)
|
|
251
|
+
else
|
|
252
|
+
@current.element.search("/#{sub_element}").each { |sub_elem| read_source(sub_elem, &block) }
|
|
253
|
+
end
|
|
254
|
+
else
|
|
255
|
+
raise(ArgumentError, "When adding elements on the fly, you must use a block") unless(block)
|
|
256
|
+
attribs = call_handler(@current.element, &block)
|
|
257
|
+
add_source_with_check(attribs) if(attribs)
|
|
258
|
+
end
|
|
259
|
+
end
|
|
260
|
+
|
|
261
|
+
# Returns true if the currently imported element already contains type information
|
|
262
|
+
# AND is of the given type.
|
|
263
|
+
def current_is_a?(type)
|
|
264
|
+
assit_kind_of(Class, type)
|
|
265
|
+
@current.attributes['type'] && ("TaliaCore::#{@current.attributes['type']}".constantize <= type)
|
|
266
|
+
end
|
|
267
|
+
|
|
268
|
+
# Adds a nested element. This will not change the currently importing source, but
|
|
269
|
+
# it will set the currently active element to the nested element.
|
|
270
|
+
# If a block is given, it will execute for each of the nested elements that
|
|
271
|
+
# are found. Otherwise, a method name must be given, and that method will
|
|
272
|
+
# be executed instead of the block
|
|
273
|
+
def nested(sub_element, handler_method = nil)
|
|
274
|
+
original_element = @current.element
|
|
275
|
+
begin
|
|
276
|
+
@current.element.search("#{sub_element}").each do |sub_elem|
|
|
277
|
+
@current.element = sub_elem
|
|
278
|
+
assit(block_given? ^ (handler_method.is_a?(Symbol)), 'Must have either a handler (x)or a block.')
|
|
279
|
+
block_given? ? yield : self.send(handler_method)
|
|
280
|
+
end
|
|
281
|
+
ensure
|
|
282
|
+
@current.element = original_element
|
|
283
|
+
end
|
|
284
|
+
end
|
|
285
|
+
|
|
286
|
+
# Imports another source like add_source and also assigns the new source as
|
|
287
|
+
# a part of the current one
|
|
288
|
+
def add_part(sub_element = nil, &block)
|
|
289
|
+
raise(RuntimeError, "Cannot add child before having an uri to refer to.") unless(@current.attributes['uri'])
|
|
290
|
+
@current.element.search("/#{sub_element}").each do |sub_elem|
|
|
291
|
+
attribs = call_handler(sub_elem, &block)
|
|
292
|
+
if(attribs)
|
|
293
|
+
attribs[N::TALIA.part_of.to_s] ||= []
|
|
294
|
+
attribs[N::TALIA.part_of.to_s] << "<#{@current.attributes['uri']}>"
|
|
295
|
+
add_source_with_check(attribs)
|
|
296
|
+
end
|
|
297
|
+
end
|
|
298
|
+
end
|
|
299
|
+
|
|
300
|
+
# Add a property to the source currently being imported
|
|
301
|
+
def set_element(predicate, object, required)
|
|
302
|
+
chk_create
|
|
303
|
+
object = check_objects(object)
|
|
304
|
+
if(!object)
|
|
305
|
+
raise(ArgumentError, "No object given, but is required for #{predicate}.") if(required)
|
|
306
|
+
return
|
|
307
|
+
end
|
|
308
|
+
predicate = predicate.respond_to?(:uri) ? predicate.uri.to_s : predicate.to_s
|
|
309
|
+
if(ActiveSource.db_attr?(predicate))
|
|
310
|
+
assit(!object.is_a?(Array))
|
|
311
|
+
@current.attributes[predicate] = object
|
|
312
|
+
else
|
|
313
|
+
@current.attributes[predicate] ||= []
|
|
314
|
+
@current.attributes[predicate] << object
|
|
315
|
+
end
|
|
316
|
+
end
|
|
317
|
+
|
|
318
|
+
# Check the objects and sort out the blank ones (which should not be used).
|
|
319
|
+
# If no usable object
|
|
320
|
+
def check_objects(objects)
|
|
321
|
+
if(objects.kind_of?(Array))
|
|
322
|
+
objects.reject! { |obj| obj.blank? }
|
|
323
|
+
(objects.size == 0) ? nil : objects
|
|
324
|
+
else
|
|
325
|
+
objects.blank? ? nil : objects
|
|
326
|
+
end
|
|
327
|
+
end
|
|
328
|
+
|
|
329
|
+
# Get an attribute from the current xml element
|
|
330
|
+
def from_attribute(attrib)
|
|
331
|
+
@current.element[attrib]
|
|
332
|
+
end
|
|
333
|
+
|
|
334
|
+
# Get the content of exactly one child element of type "elem" of the
|
|
335
|
+
# currently importing element.
|
|
336
|
+
def from_element(elem)
|
|
337
|
+
elements = all_elements(elem)
|
|
338
|
+
elements = elements.uniq if(elements.size > 1) # Try to ignore dupes
|
|
339
|
+
raise(ArgumentError, "More than one element of #{elem} in #{@current.element.inspect}") if(elements.size > 1)
|
|
340
|
+
elements.first
|
|
341
|
+
end
|
|
342
|
+
|
|
343
|
+
# Get the content of all child elements of type "elem" of the currently
|
|
344
|
+
# importing element
|
|
345
|
+
def all_elements(elem)
|
|
346
|
+
result = []
|
|
347
|
+
@current.element.search("/#{elem}").each { |el| result << el.inner_text.strip }
|
|
348
|
+
result
|
|
349
|
+
end
|
|
350
|
+
|
|
351
|
+
# Parses the given string and returns it as a date object
|
|
352
|
+
def parse_date(date, fmt = nil)
|
|
353
|
+
return nil if(date.blank?)
|
|
354
|
+
return DateTime.strptime(date, fmt) if(fmt) # format given
|
|
355
|
+
return DateTime.new(date.to_i) if(date.size < 5) # this short should be a year
|
|
356
|
+
DateTime.parse(date)
|
|
357
|
+
end
|
|
358
|
+
|
|
359
|
+
end
|
|
360
|
+
|
|
361
|
+
end
|
|
362
|
+
end
|
|
363
|
+
end
|
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
module TaliaCore
|
|
2
|
+
module ActiveSourceParts
|
|
3
|
+
module Xml
|
|
4
|
+
|
|
5
|
+
# Class for creating xml-rdf data
|
|
6
|
+
class RdfBuilder < BaseBuilder
|
|
7
|
+
|
|
8
|
+
# Writes a simple "flat" triple. If the object is a string, it will be
|
|
9
|
+
# treated as a "value" while an object (ActiveSource or N::URI) will be treated
|
|
10
|
+
# as a "link"
|
|
11
|
+
def write_triple(subject, predicate, object)
|
|
12
|
+
subject = subject.respond_to?(:uri) ? subject.uri.to_s : subject
|
|
13
|
+
predicate = predicate.respond_to?(:uri) ? predicate : N::URI.new(predicate)
|
|
14
|
+
@builder.rdf :Description, "rdf:about" => subject do
|
|
15
|
+
write_predicate(predicate, [ object ])
|
|
16
|
+
end
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
# Writes a complete source to the rdf
|
|
20
|
+
def write_source(source)
|
|
21
|
+
@builder.rdf :Description, 'rdf:about' => source.uri.to_s do # Element describing this resource
|
|
22
|
+
# loop through the predicates
|
|
23
|
+
source.direct_predicates.each do |predicate|
|
|
24
|
+
write_predicate(predicate, source[predicate])
|
|
25
|
+
end
|
|
26
|
+
end
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
private
|
|
30
|
+
|
|
31
|
+
# Build the structure for the XML file and pass on to
|
|
32
|
+
# the given block
|
|
33
|
+
def build_structure
|
|
34
|
+
@builder.rdf :RDF, self.class.namespaces do
|
|
35
|
+
yield
|
|
36
|
+
end
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def self.namespaces
|
|
41
|
+
@namespaces ||= begin
|
|
42
|
+
namespaces = {}
|
|
43
|
+
N::Namespace.shortcuts.each { |key, value| namespaces["xmlns:#{key.to_s}"] = value.to_s }
|
|
44
|
+
namespaces
|
|
45
|
+
end
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
# Build an rdf/xml string for one predicate, with the given values
|
|
49
|
+
def write_predicate(predicate, values)
|
|
50
|
+
values.each { |val| write_single_predicate(predicate, val) }
|
|
51
|
+
end # end method
|
|
52
|
+
|
|
53
|
+
def write_single_predicate(predicate, value)
|
|
54
|
+
is_property = value.respond_to?(:uri)
|
|
55
|
+
value_properties = is_property ? { 'value' => value } : extract_values(value.to_s)
|
|
56
|
+
value = value_properties.delete('value')
|
|
57
|
+
@builder.tag!(predicate.to_name_s, value_properties) do
|
|
58
|
+
if(is_property)
|
|
59
|
+
@builder.rdf :Description, 'rdf:about' => value.uri.to_s
|
|
60
|
+
else
|
|
61
|
+
@builder.text!(value)
|
|
62
|
+
end
|
|
63
|
+
end
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
# Splits up the value, extracting encoded language codes and RDF data types. The
|
|
67
|
+
# result will be returned as a hash, with the "true" value being "value"
|
|
68
|
+
def extract_values(value)
|
|
69
|
+
result = {}
|
|
70
|
+
# First split for the type
|
|
71
|
+
type_split = value.split('^^')
|
|
72
|
+
# Check if any of the elements contains a language string
|
|
73
|
+
type_split = type_split.collect { |element| extract_lang(element, result) }
|
|
74
|
+
result['rdf:datatype'] = type_split.last if(type_split.size > 1)
|
|
75
|
+
result['value'] = (type_split.first || '')
|
|
76
|
+
result
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
# Helper to extract a language string. The lang value, if any, will be added to the hash
|
|
80
|
+
def extract_lang(value, hash)
|
|
81
|
+
lang_split = value.split('@')
|
|
82
|
+
hash['xml:lang'] = lang_split.last if(lang_split.size > 1)
|
|
83
|
+
lang_split.first || ''
|
|
84
|
+
end
|
|
85
|
+
end
|
|
86
|
+
end
|
|
87
|
+
end
|
|
88
|
+
end
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
module TaliaCore
|
|
2
|
+
module ActiveSourceParts
|
|
3
|
+
module Xml
|
|
4
|
+
|
|
5
|
+
# Class to build source representations of ActiveSource objects. Talia
|
|
6
|
+
# uses a simple XML format to encode the Source Object. The format
|
|
7
|
+
# maps easily to a Hash as it is used for the new or write_attributes
|
|
8
|
+
# methods:
|
|
9
|
+
#
|
|
10
|
+
# <sources>
|
|
11
|
+
# <source>
|
|
12
|
+
# <attribute>
|
|
13
|
+
# <predicate>http://foobar/</predicate>
|
|
14
|
+
# <object>http://barbar/</object>
|
|
15
|
+
# </attribute>
|
|
16
|
+
# ...
|
|
17
|
+
# </source>
|
|
18
|
+
# <source>
|
|
19
|
+
# <attribute>
|
|
20
|
+
# <predicate>http://foobar/bar/</pedicate>
|
|
21
|
+
# <value>val</value>
|
|
22
|
+
# <object>http://some_url</object>
|
|
23
|
+
# <value>another</value>
|
|
24
|
+
# ...
|
|
25
|
+
# </attribute>
|
|
26
|
+
# ...
|
|
27
|
+
# </source>
|
|
28
|
+
# ...
|
|
29
|
+
# </sources>
|
|
30
|
+
class SourceBuilder < BaseBuilder
|
|
31
|
+
|
|
32
|
+
# Builds the RDF for a single source
|
|
33
|
+
def write_source(source)
|
|
34
|
+
@builder.source do
|
|
35
|
+
source.attributes.each { |attrib, value| write_attribute(attrib, value) }
|
|
36
|
+
source.direct_predicates.each { |pred| write_attribute(pred, source[pred]) }
|
|
37
|
+
end
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
private
|
|
41
|
+
|
|
42
|
+
# build an attribute entry in a source
|
|
43
|
+
def write_attribute(predicate, values)
|
|
44
|
+
predicate = predicate.respond_to?(:uri) ? predicate.uri.to_s : predicate.to_s
|
|
45
|
+
values = [ values ] unless(values.respond_to?(:each))
|
|
46
|
+
@builder.attribute do
|
|
47
|
+
@builder.predicate { @builder.text!(predicate) }
|
|
48
|
+
values.each { |val| write_target(val) }
|
|
49
|
+
end
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
# Writes a value or object tag, depeding on the target
|
|
53
|
+
def write_target(target)
|
|
54
|
+
if(target.respond_to?(:uri))
|
|
55
|
+
@builder.object { @builder.text!(target.uri.to_s) }
|
|
56
|
+
else
|
|
57
|
+
@builder.value { @builder.text!(target.to_s) }
|
|
58
|
+
end
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
# Build the structure for the XML file and pass on to
|
|
62
|
+
# the given block
|
|
63
|
+
def build_structure
|
|
64
|
+
@builder.sources do
|
|
65
|
+
yield
|
|
66
|
+
end
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
end
|
|
72
|
+
end
|
|
73
|
+
end
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
module TaliaCore
|
|
2
|
+
module ActiveSourceParts
|
|
3
|
+
module Xml
|
|
4
|
+
|
|
5
|
+
# Helper class to read an attribute hash from a Source XML
|
|
6
|
+
class SourceReader < GenericReader
|
|
7
|
+
|
|
8
|
+
element :source do
|
|
9
|
+
nested :attribute do
|
|
10
|
+
add from_element(:predicate), all_elements(:value)
|
|
11
|
+
add_rel from_element(:predicate), all_elements(:object)
|
|
12
|
+
end
|
|
13
|
+
add_file all_elements(:file)
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
end
|
|
19
|
+
end
|
|
20
|
+
end
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
module TaliaCore
|
|
2
|
+
|
|
3
|
+
# Some item that "has the power to act". This can either be a person or another
|
|
4
|
+
# entity, like an institution or a corporation
|
|
5
|
+
class Agent < Source
|
|
6
|
+
|
|
7
|
+
has_rdf_type N::DCT.Agent
|
|
8
|
+
|
|
9
|
+
singular_property :name, N::DCNS.title
|
|
10
|
+
singular_property :description, N::DCNS.description
|
|
11
|
+
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
end
|
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
module TaliaCore
|
|
2
|
+
module BackgroundJobs
|
|
3
|
+
|
|
4
|
+
# Exception class for blocked jobs
|
|
5
|
+
class JobBlockedError < RuntimeError
|
|
6
|
+
end
|
|
7
|
+
|
|
8
|
+
# A Background job run from Talia. The Job class is designed to be able to run long-running
|
|
9
|
+
# tasks both from the command line and Talia's background job runner.
|
|
10
|
+
class Job
|
|
11
|
+
|
|
12
|
+
# Creates a background job with progress metering. If a tag is given, it will attempt to block
|
|
13
|
+
# the creation of further jobs with the same tag. This will also use the runner script, called
|
|
14
|
+
# with the current ruby binary
|
|
15
|
+
def self.submit_with_progress(jobs, options = {})
|
|
16
|
+
# add the runner script and ruby call to the jobs
|
|
17
|
+
jobs = make_jobs(jobs)
|
|
18
|
+
Bj.submit(jobs, options) do |job|
|
|
19
|
+
if(tag = job.tag)
|
|
20
|
+
tagged = Bj.table.job.find(:all, :conditions => ["(state != 'finished' and state != 'dead' and tag = ?)", tag])
|
|
21
|
+
# The error will break the transation and leave the db in a clean state
|
|
22
|
+
raise(JobBlockedError, "Tried to create another job with tag #{tag}.") unless(tagged.size == 1)
|
|
23
|
+
end
|
|
24
|
+
# Update the environment so the runner can find the job id
|
|
25
|
+
job.env['JOB_ID'] ||= job.id.to_s
|
|
26
|
+
job.save!
|
|
27
|
+
job
|
|
28
|
+
end
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
# Runs the block with an active progress meter, creating the progress object before starting, and
|
|
32
|
+
# deleting it from the db after completion. This way the progress_jobs table should remain mostly
|
|
33
|
+
# clean.
|
|
34
|
+
def self.run_progress_job
|
|
35
|
+
job_id = ENV['JOB_ID']
|
|
36
|
+
raise(RuntimeError, "Cannot run job: Job id not given or non-existent (#{job_id})") unless(job_id && Bj.table.job.exists?(job_id))
|
|
37
|
+
ProgressJob.create_progress!(job_id) unless(ProgressJob.exists?(:job_id => job_id))
|
|
38
|
+
yield
|
|
39
|
+
ensure
|
|
40
|
+
job_id = ENV['JOB_ID']
|
|
41
|
+
ProgressJob.delete(:job_id => job_id)
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
# Runs the block with the progress meter for the current job. This may be used multiple times.
|
|
45
|
+
def self.run_with_progress(message, item_count)
|
|
46
|
+
# Wrap this in the progress job call. This way it will work fine standalone
|
|
47
|
+
run_progress_job do
|
|
48
|
+
job_id = ENV['JOB_ID']
|
|
49
|
+
# Create the progress meter
|
|
50
|
+
progress = ProgressJob.find(:first, :conditions => {:job_id => job_id})
|
|
51
|
+
raise(RuntimeError, 'Progress meter not found for job.') unless progress
|
|
52
|
+
progress.update_attributes(:item_count => item_count, :progress_message => message, :processed_count => 0, :started_at => Time.now)
|
|
53
|
+
|
|
54
|
+
yield(progress)
|
|
55
|
+
|
|
56
|
+
progress.finish
|
|
57
|
+
end
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
private
|
|
61
|
+
|
|
62
|
+
def self.ruby
|
|
63
|
+
return @ruby if(@ruby)
|
|
64
|
+
c = ::Config::CONFIG
|
|
65
|
+
ruby = File.join(c['bindir'], c['ruby_install_name']) << c['EXEEXT']
|
|
66
|
+
@ruby = if system('%s -e 42' % ruby)
|
|
67
|
+
ruby
|
|
68
|
+
else
|
|
69
|
+
system('%s -e 42' % 'ruby') ? 'ruby' : warn('no ruby in PATH/CONFIG')
|
|
70
|
+
end
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
# Make the caller line for each job
|
|
74
|
+
def self.make_jobs(jobs)
|
|
75
|
+
jobs = [ jobs ] unless(jobs.kind_of?(Array))
|
|
76
|
+
jobs.collect { |current_job| "#{ruby} #{File.join('.', 'script', 'runner')} #{File.join('jobs', current_job)}" }
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
end
|
|
82
|
+
end
|