talia_core 0.4.3 → 0.4.4
Sign up to get free protection for your applications and to get access to all the features.
- data/README.rdoc +6 -27
- data/VERSION.yml +3 -2
- data/config/database.yml +11 -11
- data/config/talia_core.yml +11 -6
- data/config/talia_core.yml.example +11 -6
- data/generators/talia_base/talia_base_generator.rb +3 -0
- data/generators/talia_base/templates/README +1 -1
- data/generators/talia_base/templates/app/controllers/source_data_controller.rb +1 -1
- data/generators/talia_base/templates/app/controllers/sources_controller.rb +31 -12
- data/generators/talia_base/templates/app/helpers/sources_helper.rb +77 -1
- data/generators/talia_base/templates/app/views/layouts/sources.html.erb +22 -0
- data/generators/talia_base/templates/app/views/sources/_data_list.html.erb +17 -0
- data/generators/talia_base/templates/app/views/sources/_property_item.html.erb +10 -0
- data/generators/talia_base/templates/app/views/sources/_property_list.html.erb +13 -0
- data/generators/talia_base/templates/app/views/sources/index.html.erb +16 -11
- data/generators/talia_base/templates/app/views/sources/semantic_templates/default/default.html.erb +8 -19
- data/generators/talia_base/templates/config/routes.rb +11 -0
- data/generators/talia_base/templates/migrations/create_semantic_relations.rb +2 -1
- data/generators/talia_base/templates/public/images/core/arrow.png +0 -0
- data/generators/talia_base/templates/public/images/core/building.png +0 -0
- data/generators/talia_base/templates/public/images/core/contents_top_left.gif +0 -0
- data/generators/talia_base/templates/public/images/core/document-horizontal-text.png +0 -0
- data/generators/talia_base/templates/public/images/core/document.png +0 -0
- data/generators/talia_base/templates/public/images/core/gear.png +0 -0
- data/generators/talia_base/templates/public/images/core/group.png +0 -0
- data/generators/talia_base/templates/public/images/core/header_bg.gif +0 -0
- data/generators/talia_base/templates/public/images/core/image.png +0 -0
- data/generators/talia_base/templates/public/images/core/imagebig.png +0 -0
- data/generators/talia_base/templates/public/images/core/left_edge.gif +0 -0
- data/generators/talia_base/templates/public/images/core/letter.png +0 -0
- data/generators/talia_base/templates/public/images/core/line.png +0 -0
- data/generators/talia_base/templates/public/images/core/logo.gif +0 -0
- data/generators/talia_base/templates/public/images/core/map.png +0 -0
- data/generators/talia_base/templates/public/images/core/period.png +0 -0
- data/generators/talia_base/templates/public/images/core/person.png +0 -0
- data/generators/talia_base/templates/public/images/core/person_default.png +0 -0
- data/generators/talia_base/templates/public/images/core/place.png +0 -0
- data/generators/talia_base/templates/public/images/core/source.png +0 -0
- data/generators/talia_base/templates/public/images/core/television.png +0 -0
- data/generators/talia_base/templates/public/images/core/text.png +0 -0
- data/generators/talia_base/templates/public/images/core/type.png +0 -0
- data/generators/talia_base/templates/public/images/core/video.png +0 -0
- data/generators/talia_base/templates/public/stylesheets/img/arrow.png +0 -0
- data/generators/talia_base/templates/public/stylesheets/main.css +276 -0
- data/generators/talia_base/templates/script/configure_talia +1 -1
- data/generators/talia_base/templates/script/setup_talia_backend +2 -0
- data/lib/core_ext/platform.rb +1 -0
- data/lib/core_ext/string.rb +6 -0
- data/lib/talia_core/active_source.rb +62 -3
- data/lib/talia_core/active_source_parts/class_methods.rb +36 -122
- data/lib/talia_core/active_source_parts/finders.rb +158 -0
- data/lib/talia_core/active_source_parts/predicate_handler.rb +7 -8
- data/lib/talia_core/active_source_parts/xml/generic_reader.rb +95 -11
- data/lib/talia_core/active_source_parts/xml/rdf_builder.rb +6 -13
- data/lib/talia_core/active_source_parts/xml/source_reader.rb +8 -3
- data/lib/talia_core/data_types/data_loader.rb +14 -6
- data/lib/talia_core/data_types/data_record.rb +5 -1
- data/lib/talia_core/data_types/iip_data.rb +1 -1
- data/lib/talia_core/data_types/mime_mapping.rb +8 -3
- data/lib/talia_core/errors.rb +4 -0
- data/lib/talia_core/initializer.rb +1 -8
- data/lib/talia_core/property_string.rb +58 -0
- data/lib/talia_core/semantic_collection_item.rb +3 -2
- data/lib/talia_core/semantic_collection_wrapper.rb +236 -198
- data/lib/talia_core/source.rb +130 -178
- data/lib/talia_core/source_types/collection.rb +15 -0
- data/lib/talia_core/source_types/dc_resource.rb +22 -0
- data/lib/talia_core/source_types/dummy_source.rb +22 -0
- data/lib/talia_core.rb +0 -1
- data/lib/talia_util/import_job_helper.rb +44 -16
- data/lib/talia_util/io_helper.rb +21 -1
- data/lib/talia_util/rake_tasks.rb +48 -72
- data/lib/talia_util/rdf_update.rb +22 -13
- data/lib/talia_util/test_helpers.rb +1 -1
- data/lib/talia_util.rb +0 -2
- data/test/core_ext/string_test.rb +5 -0
- data/test/talia_core/active_source_test.rb +151 -14
- data/test/talia_core/generic_xml_test.rb +46 -2
- data/test/talia_core/initializer_test.rb +0 -1
- data/test/talia_core/property_string_test.rb +78 -0
- data/test/talia_core/source_reader_test.rb +5 -1
- data/test/talia_core/source_test.rb +23 -32
- data/test/talia_util/import_job_helper_test.rb +1 -1
- data/test/talia_util/io_helper_test.rb +44 -0
- metadata +399 -373
- data/generators/talia_base/templates/app/views/sources/semantic_templates/default/province.html.erb +0 -19
- data/lib/acts_as_roled.rb +0 -11
- data/lib/talia_core/collection.rb +0 -13
- data/lib/talia_core/dc_resource.rb +0 -20
- data/lib/talia_core/dummy_source.rb +0 -20
- data/lib/talia_core/rails_ext/actionpack/action_controller/record_identifier.rb +0 -13
- data/lib/talia_core/rails_ext/actionpack/action_controller.rb +0 -1
- data/lib/talia_core/rails_ext/actionpack.rb +0 -1
- data/lib/talia_core/rails_ext.rb +0 -1
- data/lib/talia_util/data_import.rb +0 -91
- data/lib/talia_util/yaml_import.rb +0 -80
data/lib/talia_core/source.rb
CHANGED
@@ -6,7 +6,7 @@ require 'dummy_handler'
|
|
6
6
|
require 'rdf_resource'
|
7
7
|
|
8
8
|
module TaliaCore
|
9
|
-
|
9
|
+
|
10
10
|
# This represents a Source in the Talia core system.
|
11
11
|
#
|
12
12
|
# Since data for the Source exists both in the database and in the RDF store, the
|
@@ -19,41 +19,32 @@ module TaliaCore
|
|
19
19
|
# * To ensure that the data is written, the save method should be called as
|
20
20
|
# necessary.
|
21
21
|
class Source < ActiveSource
|
22
|
-
|
22
|
+
# FIXME: Remove methods for old admin panel
|
23
|
+
|
23
24
|
has_one :workflow, :class_name => 'TaliaCore::Workflow::Base', :dependent => :destroy
|
24
|
-
|
25
|
+
|
25
26
|
# The uri will be wrapped into an object
|
26
27
|
def uri
|
27
28
|
N::URI.new(self[:uri])
|
28
29
|
end
|
29
|
-
|
30
|
+
|
30
31
|
# Indicates if this source belongs to the local store
|
31
32
|
def local
|
32
33
|
uri.local?
|
33
34
|
end
|
34
35
|
|
35
|
-
alias_method :ar_update_attributes, :update_attributes
|
36
|
-
# Wrapping for <tt>ActiveRecord</tt> <tt>update_attributes</tt>.
|
37
|
-
def update_attributes(attributes)
|
38
|
-
attributes, rdf_attributes = extract_attributes!(attributes)
|
39
|
-
rdf_attributes.each do |k,v|
|
40
|
-
send(k + "=", v)
|
41
|
-
send('save_' + k)
|
42
|
-
end
|
43
|
-
end
|
44
|
-
|
45
36
|
# Shortcut for assigning the primary_source status
|
46
37
|
def primary_source=(value)
|
47
38
|
value = value ? 'true' : 'false'
|
48
39
|
predicate_set(:talia, :primary_source, value)
|
49
40
|
end
|
50
|
-
|
41
|
+
|
51
42
|
# Indicates if the current source is considered "primary" in the local
|
52
43
|
# library
|
53
44
|
def primary_source
|
54
45
|
predicate(:talia, :primary_source) == true
|
55
46
|
end
|
56
|
-
|
47
|
+
|
57
48
|
# Searches for sources where <tt>property</tt> has one of the values given
|
58
49
|
# to this method. The result is a hash that contains one result list for
|
59
50
|
# each of the values, with the value as a key.
|
@@ -72,22 +63,22 @@ module TaliaCore
|
|
72
63
|
joins = 'LEFT JOIN semantic_relations ON semantic_relations.subject_id = active_sources.id '
|
73
64
|
joins << "LEFT JOIN active_sources AS t_sources ON semantic_relations.object_id = t_sources.id AND semantic_relations.object_type = 'TaliaCore::ActiveSource' "
|
74
65
|
joins << "LEFT JOIN semantic_properties ON semantic_relations.object_id = semantic_properties.id AND semantic_relations.object_type = 'TaliaCore::SemanticProperty' "
|
75
|
-
|
66
|
+
|
76
67
|
property = uri_string_for(property)
|
77
68
|
results = {}
|
78
69
|
for val in values
|
79
70
|
find(:all )
|
80
71
|
val_str = uri_string_for(val)
|
81
72
|
find_parms = params.merge(
|
82
|
-
|
83
|
-
|
73
|
+
:conditions => ['semantic_properties.value = ? OR t_sources.uri = ?', val_str, val_str],
|
74
|
+
:joins => joins
|
84
75
|
)
|
85
76
|
results[val] = find(:all, find_parms)
|
86
77
|
end
|
87
|
-
|
78
|
+
|
88
79
|
results
|
89
80
|
end
|
90
|
-
|
81
|
+
|
91
82
|
# Try to find a source for the given uri, if not exists it instantiate
|
92
83
|
# a new one, combining the N::LOCAL namespace and the given local name
|
93
84
|
#
|
@@ -101,35 +92,6 @@ module TaliaCore
|
|
101
92
|
result = find_by_uri(uri)
|
102
93
|
result ||= self.new(N::LOCAL.to_s + local_name.to_permalink)
|
103
94
|
end
|
104
|
-
|
105
|
-
# Find a list of sources which contains the given token inside the local name.
|
106
|
-
# This means that the namespace it will be excluded.
|
107
|
-
#
|
108
|
-
# Sources in system:
|
109
|
-
# * http://talia.org/one
|
110
|
-
# * http://talia.org/two
|
111
|
-
#
|
112
|
-
# Source.find_by_uri_token('a') # => [ ]
|
113
|
-
# Source.find_by_uri_token('o') # => [ 'http://talia.org/one', 'http://talia.org/two' ]
|
114
|
-
#
|
115
|
-
# NOTE: It internally use a MySQL function, as sql condition, to find the local name of the uri.
|
116
|
-
def self.find_by_uri_token(token, options = {})
|
117
|
-
find(:all, {
|
118
|
-
:conditions => [ "LOWER(SUBSTRING_INDEX(uri, '/', -1)) LIKE ?", '%' + token.downcase + '%' ],
|
119
|
-
:select => :uri,
|
120
|
-
:order => "uri ASC",
|
121
|
-
:limit => 10 }.merge!(options))
|
122
|
-
end
|
123
|
-
|
124
|
-
# Find the fist Source that matches the given URI.
|
125
|
-
# It's useful for admin pane, because users visit:
|
126
|
-
# /admin/sources/<source_id>/edit
|
127
|
-
# but that information is not enough, since we store
|
128
|
-
# into the database the whole reference as URI:
|
129
|
-
# http://localnode.org/av_media_sources/source_id
|
130
|
-
def self.find_by_partial_uri(id)
|
131
|
-
find(:first, :conditions => ["uri LIKE ?", '%' + id + '%'])
|
132
|
-
end
|
133
95
|
|
134
96
|
# Return an hash of direct predicates, grouped by namespace.
|
135
97
|
def grouped_direct_predicates
|
@@ -178,146 +140,136 @@ module TaliaCore
|
|
178
140
|
end
|
179
141
|
end
|
180
142
|
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
143
|
+
# Save, associate/disassociate given predicates attributes.
|
144
|
+
def save_predicates_attributes
|
145
|
+
each_predicate do |namespace, name, objects|
|
146
|
+
objects.each { |object| object.save if object.is_a?(Source) && object.new_record? }
|
147
|
+
self.predicate_replace(namespace, name, objects.to_s) if predicate_changed?(namespace, name, objects)
|
148
|
+
end
|
186
149
|
end
|
187
|
-
end
|
188
150
|
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
151
|
+
|
152
|
+
# Returns an array of labels for this source. You may give the name of the
|
153
|
+
# property that is used as a label, by default it uses rdf:label(s). If
|
154
|
+
# the given property is not set, it will return the local part of this
|
155
|
+
# Source's URI.
|
156
|
+
#
|
157
|
+
# In any case, the result will always be an Array with at least one elment.
|
158
|
+
def labels(type = N::RDFS::label)
|
159
|
+
labels = get_attribute(type)
|
160
|
+
unless(labels && labels.size > 0)
|
161
|
+
labels = [uri.local_name]
|
162
|
+
end
|
163
|
+
|
164
|
+
labels
|
200
165
|
end
|
201
166
|
|
202
|
-
labels
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
# exist in the RDF, just the first is returned.)
|
207
|
-
def label(type = N::RDFS::label)
|
208
|
-
labels(type)[0]
|
209
|
-
end
|
210
|
-
|
211
|
-
# Return the titleized uri local name.
|
212
|
-
#
|
213
|
-
# http://localnode.org/source # => Source
|
214
|
-
def titleized
|
215
|
-
self.uri.local_name.titleize
|
216
|
-
end
|
217
|
-
|
218
|
-
# Equality test. Two sources are equal if they have the same URI
|
219
|
-
def ==(value)
|
220
|
-
value.is_a?(Source) && (value.uri == uri)
|
221
|
-
end
|
222
|
-
|
223
|
-
def normalize_uri(uri, label = '')
|
224
|
-
self.class.normalize_uri(uri, label)
|
225
|
-
end
|
226
|
-
|
227
|
-
protected
|
228
|
-
|
229
|
-
# Separates given attributes distinguishing between ActiveSource and RDF.
|
230
|
-
def extract_attributes!(attributes)
|
231
|
-
active_source_attributes = attributes.inject({}) do |active_source_attributes, column_values|
|
232
|
-
active_source_attributes[column_values.first] = attributes.delete(column_values.first) if self.class.column_names.include? column_values.first
|
233
|
-
active_source_attributes
|
167
|
+
# This returns a single label of the given type. (If multiple labels
|
168
|
+
# exist in the RDF, just the first is returned.)
|
169
|
+
def label(type = N::RDFS::label)
|
170
|
+
labels(type)[0]
|
234
171
|
end
|
235
172
|
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
#
|
242
|
-
# Cases:
|
243
|
-
# Homer Simpson
|
244
|
-
# # => Should instantiate a source with
|
245
|
-
# http://localnode.org/Homer_Simpson using N::LOCAL constant.
|
246
|
-
#
|
247
|
-
# "Homer Simpson"
|
248
|
-
# # => Should return the string itself, without the double quoting
|
249
|
-
# in order to add it directly to the RDF triple.
|
250
|
-
#
|
251
|
-
# http://springfield.org/Homer_Simpson
|
252
|
-
# # => Should instantiate a source with the given uri
|
253
|
-
def instantiate_source_or_rdf_object(attributes)
|
254
|
-
name_or_uri = attributes['titleized']
|
255
|
-
if /^\"[\w\s\d]+\"$/.match name_or_uri
|
256
|
-
name_or_uri[1..-2]
|
257
|
-
elsif attributes['uri'].blank? and attributes['source'].blank?
|
258
|
-
name_or_uri
|
259
|
-
elsif /^http:\/\//.match name_or_uri
|
260
|
-
Source.new(name_or_uri)
|
261
|
-
else
|
262
|
-
Source.find_or_instantiate_by_uri(normalize_uri(attributes['uri']), name_or_uri)
|
173
|
+
# Return the titleized uri local name.
|
174
|
+
#
|
175
|
+
# http://localnode.org/source # => Source
|
176
|
+
def titleized
|
177
|
+
self.uri.local_name.titleize
|
263
178
|
end
|
264
|
-
end
|
265
179
|
|
266
|
-
|
267
|
-
|
268
|
-
|
269
|
-
predicates.each do |predicate, objects|
|
270
|
-
block.call(namespace, predicate, objects.flatten)
|
271
|
-
end
|
180
|
+
# Equality test. Two sources are equal if they have the same URI
|
181
|
+
def ==(value)
|
182
|
+
value.is_a?(Source) && (value.uri == uri)
|
272
183
|
end
|
273
|
-
end
|
274
184
|
|
275
|
-
# Class methods
|
276
|
-
class << self
|
277
|
-
|
278
|
-
# Normalize the given uri.
|
279
|
-
#
|
280
|
-
# Example:
|
281
|
-
# normalize_uri('Lucca') # => http://www.talia.discovery-project.org/sources/Lucca
|
282
|
-
# normalize_uri('http://xmlns.com/foaf/0.1/Group') # => http://xmlns.com/foaf/0.1/Group
|
283
|
-
# normalize_uri('http://www.talia.discovery-project.org/sources/Lucca')
|
284
|
-
# # => http://www.talia.discovery-project.org/sources/Lucca
|
285
185
|
def normalize_uri(uri, label = '')
|
286
|
-
uri
|
287
|
-
uri = N::LOCAL+label.gsub(' ', '_') if uri == N::LOCAL.to_s
|
288
|
-
uri.to_s
|
186
|
+
self.class.normalize_uri(uri, label)
|
289
187
|
end
|
290
|
-
|
291
|
-
|
292
|
-
|
293
|
-
|
294
|
-
|
295
|
-
|
296
|
-
|
297
|
-
|
298
|
-
|
299
|
-
|
300
|
-
|
301
|
-
|
302
|
-
#
|
303
|
-
#
|
304
|
-
|
305
|
-
|
306
|
-
|
307
|
-
|
308
|
-
|
309
|
-
|
310
|
-
|
188
|
+
|
189
|
+
protected
|
190
|
+
|
191
|
+
# Look at the given attributes and choose to instantiate
|
192
|
+
# a Source or a RDF object (triple endpoint).
|
193
|
+
#
|
194
|
+
# Cases:
|
195
|
+
# Homer Simpson
|
196
|
+
# # => Should instantiate a source with
|
197
|
+
# http://localnode.org/Homer_Simpson using N::LOCAL constant.
|
198
|
+
#
|
199
|
+
# "Homer Simpson"
|
200
|
+
# # => Should return the string itself, without the double quoting
|
201
|
+
# in order to add it directly to the RDF triple.
|
202
|
+
#
|
203
|
+
# http://springfield.org/Homer_Simpson
|
204
|
+
# # => Should instantiate a source with the given uri
|
205
|
+
def instantiate_source_or_rdf_object(attributes)
|
206
|
+
name_or_uri = attributes['titleized']
|
207
|
+
if /^\"[\w\s\d]+\"$/.match name_or_uri
|
208
|
+
name_or_uri[1..-2]
|
209
|
+
elsif attributes['uri'].blank? and attributes['source'].blank?
|
210
|
+
name_or_uri
|
211
|
+
elsif /^http:\/\//.match name_or_uri
|
212
|
+
Source.new(name_or_uri)
|
213
|
+
else
|
214
|
+
Source.find_or_instantiate_by_uri(normalize_uri(attributes['uri']), name_or_uri)
|
215
|
+
end
|
216
|
+
end
|
217
|
+
|
218
|
+
# Iterate through grouped_predicates_attributes, yielding the given code.
|
219
|
+
def each_predicate(&block)
|
220
|
+
grouped_predicates_attributes.each do |namespace, predicates|
|
221
|
+
predicates.each do |predicate, objects|
|
222
|
+
block.call(namespace, predicate, objects.flatten)
|
223
|
+
end
|
224
|
+
end
|
225
|
+
end
|
226
|
+
|
227
|
+
# Class methods
|
228
|
+
class << self
|
229
|
+
|
230
|
+
# Normalize the given uri.
|
231
|
+
#
|
232
|
+
# Example:
|
233
|
+
# normalize_uri('Lucca') # => http://www.talia.discovery-project.org/sources/Lucca
|
234
|
+
# normalize_uri('http://xmlns.com/foaf/0.1/Group') # => http://xmlns.com/foaf/0.1/Group
|
235
|
+
# normalize_uri('http://www.talia.discovery-project.org/sources/Lucca')
|
236
|
+
# # => http://www.talia.discovery-project.org/sources/Lucca
|
237
|
+
def normalize_uri(uri, label = '')
|
238
|
+
uri = N::LOCAL if uri.blank?
|
239
|
+
uri = N::LOCAL+label.gsub(' ', '_') if uri == N::LOCAL.to_s
|
240
|
+
uri.to_s
|
241
|
+
end
|
242
|
+
|
243
|
+
end
|
244
|
+
|
245
|
+
# End of class methods
|
246
|
+
|
247
|
+
|
248
|
+
# Missing methods: This just check if the given method corresponds to a
|
249
|
+
# registered namespace. If yes, this will return a "dummy" handler that
|
250
|
+
# allows access to properties.
|
251
|
+
#
|
252
|
+
# This will allow invocations as namespace::name
|
253
|
+
def method_missing(method_name, *args)
|
254
|
+
# TODO: Add permission checking for all updates to the model
|
255
|
+
# TODO: Add permission checking for read access?
|
256
|
+
|
257
|
+
update = method_name.to_s[-1..-1] == '='
|
258
|
+
|
259
|
+
shortcut = if update
|
260
|
+
method_name.to_s[0..-2]
|
261
|
+
else
|
262
|
+
method_name.to_s
|
263
|
+
end
|
264
|
+
|
265
|
+
# Otherwise, check for the RDF predicate
|
266
|
+
registered = N::URI[shortcut.to_s]
|
267
|
+
|
268
|
+
return super(method_name, *args) unless(registered) # normal handler if not a registered uri
|
269
|
+
raise(ArgumentError, "Must give a namspace as argument") unless(registered.is_a?(N::Namespace))
|
270
|
+
|
271
|
+
DummyHandler.new(registered, self)
|
311
272
|
end
|
312
273
|
|
313
|
-
# Otherwise, check for the RDF predicate
|
314
|
-
registered = N::URI[shortcut.to_s]
|
315
|
-
|
316
|
-
return super(method_name, *args) unless(registered) # normal handler if not a registered uri
|
317
|
-
raise(ArgumentError, "Must give a namspace as argument") unless(registered.is_a?(N::Namespace))
|
318
|
-
|
319
|
-
DummyHandler.new(registered, self)
|
320
274
|
end
|
321
|
-
|
322
|
-
end
|
323
|
-
end
|
275
|
+
end
|
@@ -0,0 +1,22 @@
|
|
1
|
+
module TaliaCore
|
2
|
+
module SourceTypes
|
3
|
+
|
4
|
+
# A generic resource that should contain the most important
|
5
|
+
# Dublin Core metadata fields
|
6
|
+
class DcResource < Source
|
7
|
+
|
8
|
+
# General metadata
|
9
|
+
singular_property :identifier, N::DCNS.identifier
|
10
|
+
simple_property :creators, N::DCNS.creator
|
11
|
+
singular_property :date, N::DCNS.date
|
12
|
+
singular_property :description, N::DCNS.description
|
13
|
+
simple_property :publishers, N::DCNS.publisher
|
14
|
+
singular_property :language, N::DCNS.language
|
15
|
+
simple_property :dc_subjects, N::DCNS.subject
|
16
|
+
singular_property :rights, N::DCNS.rights
|
17
|
+
singular_property :title, N::DCNS.title
|
18
|
+
|
19
|
+
end
|
20
|
+
|
21
|
+
end
|
22
|
+
end
|
@@ -0,0 +1,22 @@
|
|
1
|
+
module TaliaCore
|
2
|
+
module SourceTypes
|
3
|
+
|
4
|
+
# Dummy source class. This will be created by some mechanisms that need to create a relation to a
|
5
|
+
# not-yet-existing source. The DummySource should only exist temporarily, if some are found inside
|
6
|
+
# the data store it may be a sign of an inconsistent or not completely initialized store.
|
7
|
+
class DummySource < Source
|
8
|
+
|
9
|
+
# Converts the current source into one with a "real" klass. Returns the new, converted sourc
|
10
|
+
def self.make_real(klass)
|
11
|
+
assit_kind_of(Class, klass)
|
12
|
+
self['type'] = klass.name
|
13
|
+
save!
|
14
|
+
new_src = ActiveSource.find(uri)
|
15
|
+
assit_kind_of(klass, new_src)
|
16
|
+
new_src
|
17
|
+
end
|
18
|
+
|
19
|
+
end
|
20
|
+
|
21
|
+
end
|
22
|
+
end
|
data/lib/talia_core.rb
CHANGED
@@ -5,33 +5,57 @@ module TaliaUtil
|
|
5
5
|
# Helper methods that will be used during import job runs.
|
6
6
|
# The import jobs may use the following environment parameters:
|
7
7
|
#
|
8
|
-
# [*base_url*]
|
8
|
+
# [*base_url*] The base URL or directory. This will be prefixed to all urls, or if it is
|
9
9
|
# a local directory, it will be made the current directory during the import
|
10
|
-
# [*index*]
|
10
|
+
# [*index*] If given, the importer will try to read this document. While this will still
|
11
11
|
# support the old-style "hyper" format with sigla, it should usually contain a
|
12
12
|
# main element called "index" followed by "url" entries.
|
13
|
-
# [*xml*]
|
13
|
+
# [*xml*] URL of an XML file to import. This is incompatible with the "index" option.
|
14
14
|
# If neither "xml" nor "index" are given, the class will try to read the XML data from
|
15
15
|
# STDIN
|
16
|
-
# [*importer*]
|
16
|
+
# [*importer*] Name of the importer class to be used for the data. Uses the default class if not given
|
17
17
|
# [*reset_store*] - If this is set, the data store will be cleared before the import
|
18
|
-
# [*user*]
|
19
|
-
# [*pass*]
|
20
|
-
# [*callback*]
|
18
|
+
# [*user*] Username for HTTP authentication, if required
|
19
|
+
# [*pass*] Password for HTTP authentication, if required
|
20
|
+
# [*callback*] Name of a class. If given, the import will call the #before_import and #after_import
|
21
21
|
# methods on an object of that class. The call will receive a block which may be
|
22
22
|
# yielded to for each progress step and which can receive the overall number of
|
23
23
|
# steps
|
24
|
-
# [*extension*]
|
24
|
+
# [*extension*] Only used with index files; file extension to use
|
25
|
+
# [*duplicates*] How to deal with elements that already exist in the datastore. This may be
|
26
|
+
# set to one of the following options (default: :skip):
|
27
|
+
# * :add - Database fields will be updated and the system will add semantic
|
28
|
+
# properties as additional values, without removing any of the existing
|
29
|
+
# semantic relations. Example: If the data store already
|
30
|
+
# contains a title for an element, and the import file contains another
|
31
|
+
# for that element, the element will have two titles after the import.
|
32
|
+
# The system will not check for duplicates. Files will always be imported
|
33
|
+
# in addition to the existing ones.
|
34
|
+
# * :update - Database fields will be updated, and semantic properties will
|
35
|
+
# be overwritten with the new value(s). Semantic properties that are not
|
36
|
+
# included in the import data will be left untouched. In the example above,
|
37
|
+
# the element would only contain the new title. If the element also contained
|
38
|
+
# author information, and no author information was in the import file, the
|
39
|
+
# existing author information will be untouched. Existing files are replaced
|
40
|
+
# if the import contains new files
|
41
|
+
# * :overwrite - Database fields will be updated. All semantic data will be
|
42
|
+
# deleted before the import. Files are always removed.
|
43
|
+
# * :skip - If an element already exists, the import will be skipped.
|
44
|
+
#
|
45
|
+
# [*trace*] Enable tracing output for errors. (By default, this takes the rake task's setting
|
46
|
+
# if possible)
|
25
47
|
class ImportJobHelper
|
26
48
|
|
27
49
|
include IoHelper
|
28
50
|
|
29
|
-
attr_reader :importer, :credentials, :index_data, :xml_data, :reset, :callback, :base_url, :message_stream, :progressor
|
51
|
+
attr_reader :importer, :credentials, :index_data, :xml_data, :reset, :callback, :base_url, :message_stream, :progressor, :duplicates, :trace
|
30
52
|
|
31
53
|
# The message_stream will be used for printing progress messages
|
32
54
|
def initialize(message_stream = STDOUT, progressor = TaliaCore::BackgroundJobs::Job)
|
55
|
+
@trace = (defined?(Rake) ? Rake.application.options.trace : false) || ENV['trace']
|
33
56
|
@progressor = progressor
|
34
57
|
@message_stream = message_stream
|
58
|
+
@duplicates = ENV['duplicates'].to_sym if(ENV['duplicates'])
|
35
59
|
@importer = ENV['importer'] || 'TaliaCore::ActiveSourceParts::Xml::SourceReader'
|
36
60
|
@credentials = { :http_basic_authentication => [ENV['user'], ENV['pass']] } unless(ENV['user'].blank?)
|
37
61
|
assit(!(ENV['xml'] && ENV['index']), 'Not both xml and index parameters allowed')
|
@@ -59,6 +83,7 @@ module TaliaUtil
|
|
59
83
|
else
|
60
84
|
xml_url = ENV['xml']
|
61
85
|
xml_url = base_url + xml_url unless(File.exists?(xml_url))
|
86
|
+
@true_root = base_for(xml_url)
|
62
87
|
open_generic(xml_url, credentials) { |io| io.read }
|
63
88
|
end
|
64
89
|
else
|
@@ -78,15 +103,20 @@ module TaliaUtil
|
|
78
103
|
import_from_index(errors)
|
79
104
|
else
|
80
105
|
puts "Importing from single data file."
|
81
|
-
TaliaCore::ActiveSource.create_from_xml(xml_data, :progressor => progressor, :reader => importer, :errors => errors)
|
106
|
+
TaliaCore::ActiveSource.create_from_xml(xml_data, :progressor => progressor, :reader => importer, :base_file_uri => @true_root, :errors => errors, :duplicates => duplicates)
|
82
107
|
end
|
83
108
|
if(errors.size > 0)
|
84
109
|
puts "WARNING: #{errors.size} errors during import:"
|
85
|
-
errors.each { |e|
|
110
|
+
errors.each { |e| print_error e }
|
86
111
|
end
|
87
112
|
run_callback(:after_import)
|
88
113
|
end
|
89
114
|
|
115
|
+
def print_error(e)
|
116
|
+
puts e.message
|
117
|
+
puts e.backtrace if(trace)
|
118
|
+
end
|
119
|
+
|
90
120
|
def import_from_index(errors)
|
91
121
|
doc = Hpricot.XML(index_data)
|
92
122
|
hyper_format = (doc.root.name == 'sigla')
|
@@ -99,10 +129,8 @@ module TaliaUtil
|
|
99
129
|
elements.each do |element|
|
100
130
|
url = make_url_from("#{element.inner_text}#{ENV['extension']}")
|
101
131
|
begin
|
102
|
-
|
103
|
-
|
104
|
-
source_attributes = source_attributes + this_attribs
|
105
|
-
end
|
132
|
+
this_attribs = my_importer.sources_from_url(url, credentials)
|
133
|
+
source_attributes = source_attributes + this_attribs
|
106
134
|
rescue Exception => e
|
107
135
|
message_stream.puts "Problem importing #{url} (#{e.message})"
|
108
136
|
message_stream.puts e.backtrace
|
@@ -112,7 +140,7 @@ module TaliaUtil
|
|
112
140
|
end
|
113
141
|
# Write the data
|
114
142
|
TaliaCore::ActiveSource.progressor = progressor
|
115
|
-
TaliaCore::ActiveSource.create_multi_from(source_attributes, :errors => errors)
|
143
|
+
TaliaCore::ActiveSource.create_multi_from(source_attributes, :errors => errors, :duplicates => duplicates)
|
116
144
|
end
|
117
145
|
|
118
146
|
def make_url_from(url)
|
data/lib/talia_util/io_helper.rb
CHANGED
@@ -21,6 +21,23 @@ module TaliaUtil
|
|
21
21
|
end
|
22
22
|
end
|
23
23
|
|
24
|
+
# Will try to figure out the "base" (that is the parent directory or path)
|
25
|
+
# If the base is a directory, this will return the directory name, but if
|
26
|
+
# it is an URL, this will return an URI object.
|
27
|
+
def base_for(url)
|
28
|
+
url = file_url(url)
|
29
|
+
if(File.exist?(url))
|
30
|
+
file = File.expand_path(url)
|
31
|
+
File.directory?(file) ? file : File.dirname(file)
|
32
|
+
else
|
33
|
+
uri = URI.parse(url)
|
34
|
+
# Remove everything after the last '/'
|
35
|
+
uri.path.gsub!(/\/[^\/]+\Z/, '/')
|
36
|
+
uri.fragment = nil
|
37
|
+
uri
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
24
41
|
# Opens the given (web) URL, using URL encoding and necessary substitutions.
|
25
42
|
# The user must pass a block which will receive the io object from
|
26
43
|
# the url.
|
@@ -30,7 +47,10 @@ module TaliaUtil
|
|
30
47
|
#
|
31
48
|
# :http_basic_authentication => [login, password]
|
32
49
|
def open_from_url(url, options = {})
|
33
|
-
|
50
|
+
# Encode the URI (the inner decode will save already-encoded URI and should
|
51
|
+
# do nothing to non-encoded URIs)
|
52
|
+
url = URI.encode(URI.decode(url))
|
53
|
+
|
34
54
|
url.gsub!(/\[/, '%5B') # URI class doesn't like unescaped brackets
|
35
55
|
url.gsub!(/\]/, '%5D')
|
36
56
|
open_args = [ url ]
|