talia_core 0.4.3 → 0.4.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (96) hide show
  1. data/README.rdoc +6 -27
  2. data/VERSION.yml +3 -2
  3. data/config/database.yml +11 -11
  4. data/config/talia_core.yml +11 -6
  5. data/config/talia_core.yml.example +11 -6
  6. data/generators/talia_base/talia_base_generator.rb +3 -0
  7. data/generators/talia_base/templates/README +1 -1
  8. data/generators/talia_base/templates/app/controllers/source_data_controller.rb +1 -1
  9. data/generators/talia_base/templates/app/controllers/sources_controller.rb +31 -12
  10. data/generators/talia_base/templates/app/helpers/sources_helper.rb +77 -1
  11. data/generators/talia_base/templates/app/views/layouts/sources.html.erb +22 -0
  12. data/generators/talia_base/templates/app/views/sources/_data_list.html.erb +17 -0
  13. data/generators/talia_base/templates/app/views/sources/_property_item.html.erb +10 -0
  14. data/generators/talia_base/templates/app/views/sources/_property_list.html.erb +13 -0
  15. data/generators/talia_base/templates/app/views/sources/index.html.erb +16 -11
  16. data/generators/talia_base/templates/app/views/sources/semantic_templates/default/default.html.erb +8 -19
  17. data/generators/talia_base/templates/config/routes.rb +11 -0
  18. data/generators/talia_base/templates/migrations/create_semantic_relations.rb +2 -1
  19. data/generators/talia_base/templates/public/images/core/arrow.png +0 -0
  20. data/generators/talia_base/templates/public/images/core/building.png +0 -0
  21. data/generators/talia_base/templates/public/images/core/contents_top_left.gif +0 -0
  22. data/generators/talia_base/templates/public/images/core/document-horizontal-text.png +0 -0
  23. data/generators/talia_base/templates/public/images/core/document.png +0 -0
  24. data/generators/talia_base/templates/public/images/core/gear.png +0 -0
  25. data/generators/talia_base/templates/public/images/core/group.png +0 -0
  26. data/generators/talia_base/templates/public/images/core/header_bg.gif +0 -0
  27. data/generators/talia_base/templates/public/images/core/image.png +0 -0
  28. data/generators/talia_base/templates/public/images/core/imagebig.png +0 -0
  29. data/generators/talia_base/templates/public/images/core/left_edge.gif +0 -0
  30. data/generators/talia_base/templates/public/images/core/letter.png +0 -0
  31. data/generators/talia_base/templates/public/images/core/line.png +0 -0
  32. data/generators/talia_base/templates/public/images/core/logo.gif +0 -0
  33. data/generators/talia_base/templates/public/images/core/map.png +0 -0
  34. data/generators/talia_base/templates/public/images/core/period.png +0 -0
  35. data/generators/talia_base/templates/public/images/core/person.png +0 -0
  36. data/generators/talia_base/templates/public/images/core/person_default.png +0 -0
  37. data/generators/talia_base/templates/public/images/core/place.png +0 -0
  38. data/generators/talia_base/templates/public/images/core/source.png +0 -0
  39. data/generators/talia_base/templates/public/images/core/television.png +0 -0
  40. data/generators/talia_base/templates/public/images/core/text.png +0 -0
  41. data/generators/talia_base/templates/public/images/core/type.png +0 -0
  42. data/generators/talia_base/templates/public/images/core/video.png +0 -0
  43. data/generators/talia_base/templates/public/stylesheets/img/arrow.png +0 -0
  44. data/generators/talia_base/templates/public/stylesheets/main.css +276 -0
  45. data/generators/talia_base/templates/script/configure_talia +1 -1
  46. data/generators/talia_base/templates/script/setup_talia_backend +2 -0
  47. data/lib/core_ext/platform.rb +1 -0
  48. data/lib/core_ext/string.rb +6 -0
  49. data/lib/talia_core/active_source.rb +62 -3
  50. data/lib/talia_core/active_source_parts/class_methods.rb +36 -122
  51. data/lib/talia_core/active_source_parts/finders.rb +158 -0
  52. data/lib/talia_core/active_source_parts/predicate_handler.rb +7 -8
  53. data/lib/talia_core/active_source_parts/xml/generic_reader.rb +95 -11
  54. data/lib/talia_core/active_source_parts/xml/rdf_builder.rb +6 -13
  55. data/lib/talia_core/active_source_parts/xml/source_reader.rb +8 -3
  56. data/lib/talia_core/data_types/data_loader.rb +14 -6
  57. data/lib/talia_core/data_types/data_record.rb +5 -1
  58. data/lib/talia_core/data_types/iip_data.rb +1 -1
  59. data/lib/talia_core/data_types/mime_mapping.rb +8 -3
  60. data/lib/talia_core/errors.rb +4 -0
  61. data/lib/talia_core/initializer.rb +1 -8
  62. data/lib/talia_core/property_string.rb +58 -0
  63. data/lib/talia_core/semantic_collection_item.rb +3 -2
  64. data/lib/talia_core/semantic_collection_wrapper.rb +236 -198
  65. data/lib/talia_core/source.rb +130 -178
  66. data/lib/talia_core/source_types/collection.rb +15 -0
  67. data/lib/talia_core/source_types/dc_resource.rb +22 -0
  68. data/lib/talia_core/source_types/dummy_source.rb +22 -0
  69. data/lib/talia_core.rb +0 -1
  70. data/lib/talia_util/import_job_helper.rb +44 -16
  71. data/lib/talia_util/io_helper.rb +21 -1
  72. data/lib/talia_util/rake_tasks.rb +48 -72
  73. data/lib/talia_util/rdf_update.rb +22 -13
  74. data/lib/talia_util/test_helpers.rb +1 -1
  75. data/lib/talia_util.rb +0 -2
  76. data/test/core_ext/string_test.rb +5 -0
  77. data/test/talia_core/active_source_test.rb +151 -14
  78. data/test/talia_core/generic_xml_test.rb +46 -2
  79. data/test/talia_core/initializer_test.rb +0 -1
  80. data/test/talia_core/property_string_test.rb +78 -0
  81. data/test/talia_core/source_reader_test.rb +5 -1
  82. data/test/talia_core/source_test.rb +23 -32
  83. data/test/talia_util/import_job_helper_test.rb +1 -1
  84. data/test/talia_util/io_helper_test.rb +44 -0
  85. metadata +399 -373
  86. data/generators/talia_base/templates/app/views/sources/semantic_templates/default/province.html.erb +0 -19
  87. data/lib/acts_as_roled.rb +0 -11
  88. data/lib/talia_core/collection.rb +0 -13
  89. data/lib/talia_core/dc_resource.rb +0 -20
  90. data/lib/talia_core/dummy_source.rb +0 -20
  91. data/lib/talia_core/rails_ext/actionpack/action_controller/record_identifier.rb +0 -13
  92. data/lib/talia_core/rails_ext/actionpack/action_controller.rb +0 -1
  93. data/lib/talia_core/rails_ext/actionpack.rb +0 -1
  94. data/lib/talia_core/rails_ext.rb +0 -1
  95. data/lib/talia_util/data_import.rb +0 -91
  96. data/lib/talia_util/yaml_import.rb +0 -80
@@ -6,7 +6,7 @@ require 'dummy_handler'
6
6
  require 'rdf_resource'
7
7
 
8
8
  module TaliaCore
9
-
9
+
10
10
  # This represents a Source in the Talia core system.
11
11
  #
12
12
  # Since data for the Source exists both in the database and in the RDF store, the
@@ -19,41 +19,32 @@ module TaliaCore
19
19
  # * To ensure that the data is written, the save method should be called as
20
20
  # necessary.
21
21
  class Source < ActiveSource
22
-
22
+ # FIXME: Remove methods for old admin panel
23
+
23
24
  has_one :workflow, :class_name => 'TaliaCore::Workflow::Base', :dependent => :destroy
24
-
25
+
25
26
  # The uri will be wrapped into an object
26
27
  def uri
27
28
  N::URI.new(self[:uri])
28
29
  end
29
-
30
+
30
31
  # Indicates if this source belongs to the local store
31
32
  def local
32
33
  uri.local?
33
34
  end
34
35
 
35
- alias_method :ar_update_attributes, :update_attributes
36
- # Wrapping for <tt>ActiveRecord</tt> <tt>update_attributes</tt>.
37
- def update_attributes(attributes)
38
- attributes, rdf_attributes = extract_attributes!(attributes)
39
- rdf_attributes.each do |k,v|
40
- send(k + "=", v)
41
- send('save_' + k)
42
- end
43
- end
44
-
45
36
  # Shortcut for assigning the primary_source status
46
37
  def primary_source=(value)
47
38
  value = value ? 'true' : 'false'
48
39
  predicate_set(:talia, :primary_source, value)
49
40
  end
50
-
41
+
51
42
  # Indicates if the current source is considered "primary" in the local
52
43
  # library
53
44
  def primary_source
54
45
  predicate(:talia, :primary_source) == true
55
46
  end
56
-
47
+
57
48
  # Searches for sources where <tt>property</tt> has one of the values given
58
49
  # to this method. The result is a hash that contains one result list for
59
50
  # each of the values, with the value as a key.
@@ -72,22 +63,22 @@ module TaliaCore
72
63
  joins = 'LEFT JOIN semantic_relations ON semantic_relations.subject_id = active_sources.id '
73
64
  joins << "LEFT JOIN active_sources AS t_sources ON semantic_relations.object_id = t_sources.id AND semantic_relations.object_type = 'TaliaCore::ActiveSource' "
74
65
  joins << "LEFT JOIN semantic_properties ON semantic_relations.object_id = semantic_properties.id AND semantic_relations.object_type = 'TaliaCore::SemanticProperty' "
75
-
66
+
76
67
  property = uri_string_for(property)
77
68
  results = {}
78
69
  for val in values
79
70
  find(:all )
80
71
  val_str = uri_string_for(val)
81
72
  find_parms = params.merge(
82
- :conditions => ['semantic_properties.value = ? OR t_sources.uri = ?', val_str, val_str],
83
- :joins => joins
73
+ :conditions => ['semantic_properties.value = ? OR t_sources.uri = ?', val_str, val_str],
74
+ :joins => joins
84
75
  )
85
76
  results[val] = find(:all, find_parms)
86
77
  end
87
-
78
+
88
79
  results
89
80
  end
90
-
81
+
91
82
  # Try to find a source for the given uri, if not exists it instantiate
92
83
  # a new one, combining the N::LOCAL namespace and the given local name
93
84
  #
@@ -101,35 +92,6 @@ module TaliaCore
101
92
  result = find_by_uri(uri)
102
93
  result ||= self.new(N::LOCAL.to_s + local_name.to_permalink)
103
94
  end
104
-
105
- # Find a list of sources which contains the given token inside the local name.
106
- # This means that the namespace it will be excluded.
107
- #
108
- # Sources in system:
109
- # * http://talia.org/one
110
- # * http://talia.org/two
111
- #
112
- # Source.find_by_uri_token('a') # => [ ]
113
- # Source.find_by_uri_token('o') # => [ 'http://talia.org/one', 'http://talia.org/two' ]
114
- #
115
- # NOTE: It internally use a MySQL function, as sql condition, to find the local name of the uri.
116
- def self.find_by_uri_token(token, options = {})
117
- find(:all, {
118
- :conditions => [ "LOWER(SUBSTRING_INDEX(uri, '/', -1)) LIKE ?", '%' + token.downcase + '%' ],
119
- :select => :uri,
120
- :order => "uri ASC",
121
- :limit => 10 }.merge!(options))
122
- end
123
-
124
- # Find the fist Source that matches the given URI.
125
- # It's useful for admin pane, because users visit:
126
- # /admin/sources/<source_id>/edit
127
- # but that information is not enough, since we store
128
- # into the database the whole reference as URI:
129
- # http://localnode.org/av_media_sources/source_id
130
- def self.find_by_partial_uri(id)
131
- find(:first, :conditions => ["uri LIKE ?", '%' + id + '%'])
132
- end
133
95
 
134
96
  # Return an hash of direct predicates, grouped by namespace.
135
97
  def grouped_direct_predicates
@@ -178,146 +140,136 @@ module TaliaCore
178
140
  end
179
141
  end
180
142
 
181
- # Save, associate/disassociate given predicates attributes.
182
- def save_predicates_attributes
183
- each_predicate do |namespace, name, objects|
184
- objects.each { |object| object.save if object.is_a?(Source) && object.new_record? }
185
- self.predicate_replace(namespace, name, objects.to_s) if predicate_changed?(namespace, name, objects)
143
+ # Save, associate/disassociate given predicates attributes.
144
+ def save_predicates_attributes
145
+ each_predicate do |namespace, name, objects|
146
+ objects.each { |object| object.save if object.is_a?(Source) && object.new_record? }
147
+ self.predicate_replace(namespace, name, objects.to_s) if predicate_changed?(namespace, name, objects)
148
+ end
186
149
  end
187
- end
188
150
 
189
-
190
- # Returns an array of labels for this source. You may give the name of the
191
- # property that is used as a label, by default it uses rdf:label(s). If
192
- # the given property is not set, it will return the local part of this
193
- # Source's URI.
194
- #
195
- # In any case, the result will always be an Array with at least one elment.
196
- def labels(type = N::RDFS::label)
197
- labels = get_attribute(type)
198
- unless(labels && labels.size > 0)
199
- labels = [uri.local_name]
151
+
152
+ # Returns an array of labels for this source. You may give the name of the
153
+ # property that is used as a label, by default it uses rdf:label(s). If
154
+ # the given property is not set, it will return the local part of this
155
+ # Source's URI.
156
+ #
157
+ # In any case, the result will always be an Array with at least one elment.
158
+ def labels(type = N::RDFS::label)
159
+ labels = get_attribute(type)
160
+ unless(labels && labels.size > 0)
161
+ labels = [uri.local_name]
162
+ end
163
+
164
+ labels
200
165
  end
201
166
 
202
- labels
203
- end
204
-
205
- # This returns a single label of the given type. (If multiple labels
206
- # exist in the RDF, just the first is returned.)
207
- def label(type = N::RDFS::label)
208
- labels(type)[0]
209
- end
210
-
211
- # Return the titleized uri local name.
212
- #
213
- # http://localnode.org/source # => Source
214
- def titleized
215
- self.uri.local_name.titleize
216
- end
217
-
218
- # Equality test. Two sources are equal if they have the same URI
219
- def ==(value)
220
- value.is_a?(Source) && (value.uri == uri)
221
- end
222
-
223
- def normalize_uri(uri, label = '')
224
- self.class.normalize_uri(uri, label)
225
- end
226
-
227
- protected
228
-
229
- # Separates given attributes distinguishing between ActiveSource and RDF.
230
- def extract_attributes!(attributes)
231
- active_source_attributes = attributes.inject({}) do |active_source_attributes, column_values|
232
- active_source_attributes[column_values.first] = attributes.delete(column_values.first) if self.class.column_names.include? column_values.first
233
- active_source_attributes
167
+ # This returns a single label of the given type. (If multiple labels
168
+ # exist in the RDF, just the first is returned.)
169
+ def label(type = N::RDFS::label)
170
+ labels(type)[0]
234
171
  end
235
172
 
236
- [ active_source_attributes, attributes ]
237
- end
238
-
239
- # Look at the given attributes and choose to instantiate
240
- # a Source or a RDF object (triple endpoint).
241
- #
242
- # Cases:
243
- # Homer Simpson
244
- # # => Should instantiate a source with
245
- # http://localnode.org/Homer_Simpson using N::LOCAL constant.
246
- #
247
- # "Homer Simpson"
248
- # # => Should return the string itself, without the double quoting
249
- # in order to add it directly to the RDF triple.
250
- #
251
- # http://springfield.org/Homer_Simpson
252
- # # => Should instantiate a source with the given uri
253
- def instantiate_source_or_rdf_object(attributes)
254
- name_or_uri = attributes['titleized']
255
- if /^\"[\w\s\d]+\"$/.match name_or_uri
256
- name_or_uri[1..-2]
257
- elsif attributes['uri'].blank? and attributes['source'].blank?
258
- name_or_uri
259
- elsif /^http:\/\//.match name_or_uri
260
- Source.new(name_or_uri)
261
- else
262
- Source.find_or_instantiate_by_uri(normalize_uri(attributes['uri']), name_or_uri)
173
+ # Return the titleized uri local name.
174
+ #
175
+ # http://localnode.org/source # => Source
176
+ def titleized
177
+ self.uri.local_name.titleize
263
178
  end
264
- end
265
179
 
266
- # Iterate through grouped_predicates_attributes, yielding the given code.
267
- def each_predicate(&block)
268
- grouped_predicates_attributes.each do |namespace, predicates|
269
- predicates.each do |predicate, objects|
270
- block.call(namespace, predicate, objects.flatten)
271
- end
180
+ # Equality test. Two sources are equal if they have the same URI
181
+ def ==(value)
182
+ value.is_a?(Source) && (value.uri == uri)
272
183
  end
273
- end
274
184
 
275
- # Class methods
276
- class << self
277
-
278
- # Normalize the given uri.
279
- #
280
- # Example:
281
- # normalize_uri('Lucca') # => http://www.talia.discovery-project.org/sources/Lucca
282
- # normalize_uri('http://xmlns.com/foaf/0.1/Group') # => http://xmlns.com/foaf/0.1/Group
283
- # normalize_uri('http://www.talia.discovery-project.org/sources/Lucca')
284
- # # => http://www.talia.discovery-project.org/sources/Lucca
285
185
  def normalize_uri(uri, label = '')
286
- uri = N::LOCAL if uri.blank?
287
- uri = N::LOCAL+label.gsub(' ', '_') if uri == N::LOCAL.to_s
288
- uri.to_s
186
+ self.class.normalize_uri(uri, label)
289
187
  end
290
-
291
- end
292
-
293
- # End of class methods
294
-
295
-
296
- # Missing methods: This just check if the given method corresponds to a
297
- # registered namespace. If yes, this will return a "dummy" handler that
298
- # allows access to properties.
299
- #
300
- # This will allow invocations as namespace::name
301
- def method_missing(method_name, *args)
302
- # TODO: Add permission checking for all updates to the model
303
- # TODO: Add permission checking for read access?
304
-
305
- update = method_name.to_s[-1..-1] == '='
306
-
307
- shortcut = if update
308
- method_name.to_s[0..-2]
309
- else
310
- method_name.to_s
188
+
189
+ protected
190
+
191
+ # Look at the given attributes and choose to instantiate
192
+ # a Source or a RDF object (triple endpoint).
193
+ #
194
+ # Cases:
195
+ # Homer Simpson
196
+ # # => Should instantiate a source with
197
+ # http://localnode.org/Homer_Simpson using N::LOCAL constant.
198
+ #
199
+ # "Homer Simpson"
200
+ # # => Should return the string itself, without the double quoting
201
+ # in order to add it directly to the RDF triple.
202
+ #
203
+ # http://springfield.org/Homer_Simpson
204
+ # # => Should instantiate a source with the given uri
205
+ def instantiate_source_or_rdf_object(attributes)
206
+ name_or_uri = attributes['titleized']
207
+ if /^\"[\w\s\d]+\"$/.match name_or_uri
208
+ name_or_uri[1..-2]
209
+ elsif attributes['uri'].blank? and attributes['source'].blank?
210
+ name_or_uri
211
+ elsif /^http:\/\//.match name_or_uri
212
+ Source.new(name_or_uri)
213
+ else
214
+ Source.find_or_instantiate_by_uri(normalize_uri(attributes['uri']), name_or_uri)
215
+ end
216
+ end
217
+
218
+ # Iterate through grouped_predicates_attributes, yielding the given code.
219
+ def each_predicate(&block)
220
+ grouped_predicates_attributes.each do |namespace, predicates|
221
+ predicates.each do |predicate, objects|
222
+ block.call(namespace, predicate, objects.flatten)
223
+ end
224
+ end
225
+ end
226
+
227
+ # Class methods
228
+ class << self
229
+
230
+ # Normalize the given uri.
231
+ #
232
+ # Example:
233
+ # normalize_uri('Lucca') # => http://www.talia.discovery-project.org/sources/Lucca
234
+ # normalize_uri('http://xmlns.com/foaf/0.1/Group') # => http://xmlns.com/foaf/0.1/Group
235
+ # normalize_uri('http://www.talia.discovery-project.org/sources/Lucca')
236
+ # # => http://www.talia.discovery-project.org/sources/Lucca
237
+ def normalize_uri(uri, label = '')
238
+ uri = N::LOCAL if uri.blank?
239
+ uri = N::LOCAL+label.gsub(' ', '_') if uri == N::LOCAL.to_s
240
+ uri.to_s
241
+ end
242
+
243
+ end
244
+
245
+ # End of class methods
246
+
247
+
248
+ # Missing methods: This just check if the given method corresponds to a
249
+ # registered namespace. If yes, this will return a "dummy" handler that
250
+ # allows access to properties.
251
+ #
252
+ # This will allow invocations as namespace::name
253
+ def method_missing(method_name, *args)
254
+ # TODO: Add permission checking for all updates to the model
255
+ # TODO: Add permission checking for read access?
256
+
257
+ update = method_name.to_s[-1..-1] == '='
258
+
259
+ shortcut = if update
260
+ method_name.to_s[0..-2]
261
+ else
262
+ method_name.to_s
263
+ end
264
+
265
+ # Otherwise, check for the RDF predicate
266
+ registered = N::URI[shortcut.to_s]
267
+
268
+ return super(method_name, *args) unless(registered) # normal handler if not a registered uri
269
+ raise(ArgumentError, "Must give a namspace as argument") unless(registered.is_a?(N::Namespace))
270
+
271
+ DummyHandler.new(registered, self)
311
272
  end
312
273
 
313
- # Otherwise, check for the RDF predicate
314
- registered = N::URI[shortcut.to_s]
315
-
316
- return super(method_name, *args) unless(registered) # normal handler if not a registered uri
317
- raise(ArgumentError, "Must give a namspace as argument") unless(registered.is_a?(N::Namespace))
318
-
319
- DummyHandler.new(registered, self)
320
274
  end
321
-
322
- end
323
- end
275
+ end
@@ -0,0 +1,15 @@
1
+ module TaliaCore
2
+ module SourceTypes
3
+
4
+ class Collection < DcResource
5
+
6
+ has_rdf_type N::DCNS.Collection
7
+ has_rdf_type N::SKOS.Collection
8
+ has_rdf_type N::DCMIT.Collection
9
+
10
+ simple_property :items, N::DCNS.hasPart
11
+
12
+ end
13
+
14
+ end
15
+ end
@@ -0,0 +1,22 @@
1
+ module TaliaCore
2
+ module SourceTypes
3
+
4
+ # A generic resource that should contain the most important
5
+ # Dublin Core metadata fields
6
+ class DcResource < Source
7
+
8
+ # General metadata
9
+ singular_property :identifier, N::DCNS.identifier
10
+ simple_property :creators, N::DCNS.creator
11
+ singular_property :date, N::DCNS.date
12
+ singular_property :description, N::DCNS.description
13
+ simple_property :publishers, N::DCNS.publisher
14
+ singular_property :language, N::DCNS.language
15
+ simple_property :dc_subjects, N::DCNS.subject
16
+ singular_property :rights, N::DCNS.rights
17
+ singular_property :title, N::DCNS.title
18
+
19
+ end
20
+
21
+ end
22
+ end
@@ -0,0 +1,22 @@
1
+ module TaliaCore
2
+ module SourceTypes
3
+
4
+ # Dummy source class. This will be created by some mechanisms that need to create a relation to a
5
+ # not-yet-existing source. The DummySource should only exist temporarily, if some are found inside
6
+ # the data store it may be a sign of an inconsistent or not completely initialized store.
7
+ class DummySource < Source
8
+
9
+ # Converts the current source into one with a "real" klass. Returns the new, converted sourc
10
+ def self.make_real(klass)
11
+ assit_kind_of(Class, klass)
12
+ self['type'] = klass.name
13
+ save!
14
+ new_src = ActiveSource.find(uri)
15
+ assit_kind_of(klass, new_src)
16
+ new_src
17
+ end
18
+
19
+ end
20
+
21
+ end
22
+ end
data/lib/talia_core.rb CHANGED
@@ -3,5 +3,4 @@ require File.dirname(__FILE__) + '/talia_dependencies'
3
3
 
4
4
  # Load local things
5
5
  require 'talia_core/errors'
6
- require 'talia_core/rails_ext'
7
6
  require 'talia_core/initializer'
@@ -5,33 +5,57 @@ module TaliaUtil
5
5
  # Helper methods that will be used during import job runs.
6
6
  # The import jobs may use the following environment parameters:
7
7
  #
8
- # [*base_url*] - The base URL or directory. This will be prefixed to all urls, or if it is
8
+ # [*base_url*] The base URL or directory. This will be prefixed to all urls, or if it is
9
9
  # a local directory, it will be made the current directory during the import
10
- # [*index*] - If given, the importer will try to read this document. While this will still
10
+ # [*index*] If given, the importer will try to read this document. While this will still
11
11
  # support the old-style "hyper" format with sigla, it should usually contain a
12
12
  # main element called "index" followed by "url" entries.
13
- # [*xml*] - URL of an XML file to import. This is incompatible with the "index" option.
13
+ # [*xml*] URL of an XML file to import. This is incompatible with the "index" option.
14
14
  # If neither "xml" nor "index" are given, the class will try to read the XML data from
15
15
  # STDIN
16
- # [*importer*] - Name of the importer class to be used for the data. Uses the default class if not given
16
+ # [*importer*] Name of the importer class to be used for the data. Uses the default class if not given
17
17
  # [*reset_store*] - If this is set, the data store will be cleared before the import
18
- # [*user*] - Username for HTTP authentication, if required
19
- # [*pass*] - Password for HTTP authentication, if required
20
- # [*callback*] - Name of a class. If given, the import will call the #before_import and #after_import
18
+ # [*user*] Username for HTTP authentication, if required
19
+ # [*pass*] Password for HTTP authentication, if required
20
+ # [*callback*] Name of a class. If given, the import will call the #before_import and #after_import
21
21
  # methods on an object of that class. The call will receive a block which may be
22
22
  # yielded to for each progress step and which can receive the overall number of
23
23
  # steps
24
- # [*extension*] - Only used with index files; file extension to use
24
+ # [*extension*] Only used with index files; file extension to use
25
+ # [*duplicates*] How to deal with elements that already exist in the datastore. This may be
26
+ # set to one of the following options (default: :skip):
27
+ # * :add - Database fields will be updated and the system will add semantic
28
+ # properties as additional values, without removing any of the existing
29
+ # semantic relations. Example: If the data store already
30
+ # contains a title for an element, and the import file contains another
31
+ # for that element, the element will have two titles after the import.
32
+ # The system will not check for duplicates. Files will always be imported
33
+ # in addition to the existing ones.
34
+ # * :update - Database fields will be updated, and semantic properties will
35
+ # be overwritten with the new value(s). Semantic properties that are not
36
+ # included in the import data will be left untouched. In the example above,
37
+ # the element would only contain the new title. If the element also contained
38
+ # author information, and no author information was in the import file, the
39
+ # existing author information will be untouched. Existing files are replaced
40
+ # if the import contains new files
41
+ # * :overwrite - Database fields will be updated. All semantic data will be
42
+ # deleted before the import. Files are always removed.
43
+ # * :skip - If an element already exists, the import will be skipped.
44
+ #
45
+ # [*trace*] Enable tracing output for errors. (By default, this takes the rake task's setting
46
+ # if possible)
25
47
  class ImportJobHelper
26
48
 
27
49
  include IoHelper
28
50
 
29
- attr_reader :importer, :credentials, :index_data, :xml_data, :reset, :callback, :base_url, :message_stream, :progressor
51
+ attr_reader :importer, :credentials, :index_data, :xml_data, :reset, :callback, :base_url, :message_stream, :progressor, :duplicates, :trace
30
52
 
31
53
  # The message_stream will be used for printing progress messages
32
54
  def initialize(message_stream = STDOUT, progressor = TaliaCore::BackgroundJobs::Job)
55
+ @trace = (defined?(Rake) ? Rake.application.options.trace : false) || ENV['trace']
33
56
  @progressor = progressor
34
57
  @message_stream = message_stream
58
+ @duplicates = ENV['duplicates'].to_sym if(ENV['duplicates'])
35
59
  @importer = ENV['importer'] || 'TaliaCore::ActiveSourceParts::Xml::SourceReader'
36
60
  @credentials = { :http_basic_authentication => [ENV['user'], ENV['pass']] } unless(ENV['user'].blank?)
37
61
  assit(!(ENV['xml'] && ENV['index']), 'Not both xml and index parameters allowed')
@@ -59,6 +83,7 @@ module TaliaUtil
59
83
  else
60
84
  xml_url = ENV['xml']
61
85
  xml_url = base_url + xml_url unless(File.exists?(xml_url))
86
+ @true_root = base_for(xml_url)
62
87
  open_generic(xml_url, credentials) { |io| io.read }
63
88
  end
64
89
  else
@@ -78,15 +103,20 @@ module TaliaUtil
78
103
  import_from_index(errors)
79
104
  else
80
105
  puts "Importing from single data file."
81
- TaliaCore::ActiveSource.create_from_xml(xml_data, :progressor => progressor, :reader => importer, :errors => errors)
106
+ TaliaCore::ActiveSource.create_from_xml(xml_data, :progressor => progressor, :reader => importer, :base_file_uri => @true_root, :errors => errors, :duplicates => duplicates)
82
107
  end
83
108
  if(errors.size > 0)
84
109
  puts "WARNING: #{errors.size} errors during import:"
85
- errors.each { |e| puts e }
110
+ errors.each { |e| print_error e }
86
111
  end
87
112
  run_callback(:after_import)
88
113
  end
89
114
 
115
+ def print_error(e)
116
+ puts e.message
117
+ puts e.backtrace if(trace)
118
+ end
119
+
90
120
  def import_from_index(errors)
91
121
  doc = Hpricot.XML(index_data)
92
122
  hyper_format = (doc.root.name == 'sigla')
@@ -99,10 +129,8 @@ module TaliaUtil
99
129
  elements.each do |element|
100
130
  url = make_url_from("#{element.inner_text}#{ENV['extension']}")
101
131
  begin
102
- open_generic(url, credentials) do |io|
103
- this_attribs = my_importer.sources_from(io)
104
- source_attributes = source_attributes + this_attribs
105
- end
132
+ this_attribs = my_importer.sources_from_url(url, credentials)
133
+ source_attributes = source_attributes + this_attribs
106
134
  rescue Exception => e
107
135
  message_stream.puts "Problem importing #{url} (#{e.message})"
108
136
  message_stream.puts e.backtrace
@@ -112,7 +140,7 @@ module TaliaUtil
112
140
  end
113
141
  # Write the data
114
142
  TaliaCore::ActiveSource.progressor = progressor
115
- TaliaCore::ActiveSource.create_multi_from(source_attributes, :errors => errors)
143
+ TaliaCore::ActiveSource.create_multi_from(source_attributes, :errors => errors, :duplicates => duplicates)
116
144
  end
117
145
 
118
146
  def make_url_from(url)
@@ -21,6 +21,23 @@ module TaliaUtil
21
21
  end
22
22
  end
23
23
 
24
+ # Will try to figure out the "base" (that is the parent directory or path)
25
+ # If the base is a directory, this will return the directory name, but if
26
+ # it is an URL, this will return an URI object.
27
+ def base_for(url)
28
+ url = file_url(url)
29
+ if(File.exist?(url))
30
+ file = File.expand_path(url)
31
+ File.directory?(file) ? file : File.dirname(file)
32
+ else
33
+ uri = URI.parse(url)
34
+ # Remove everything after the last '/'
35
+ uri.path.gsub!(/\/[^\/]+\Z/, '/')
36
+ uri.fragment = nil
37
+ uri
38
+ end
39
+ end
40
+
24
41
  # Opens the given (web) URL, using URL encoding and necessary substitutions.
25
42
  # The user must pass a block which will receive the io object from
26
43
  # the url.
@@ -30,7 +47,10 @@ module TaliaUtil
30
47
  #
31
48
  # :http_basic_authentication => [login, password]
32
49
  def open_from_url(url, options = {})
33
- url = URI.encode(url)
50
+ # Encode the URI (the inner decode will save already-encoded URI and should
51
+ # do nothing to non-encoded URIs)
52
+ url = URI.encode(URI.decode(url))
53
+
34
54
  url.gsub!(/\[/, '%5B') # URI class doesn't like unescaped brackets
35
55
  url.gsub!(/\]/, '%5D')
36
56
  open_args = [ url ]