talia_core 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (119) hide show
  1. data/README.rdoc +41 -0
  2. data/bin/talia +33 -0
  3. data/lib/JXslt/jxslt.rb +60 -0
  4. data/lib/acts_as_roled.rb +11 -0
  5. data/lib/core_ext/platform.rb +9 -0
  6. data/lib/core_ext/string.rb +6 -0
  7. data/lib/core_ext.rb +1 -0
  8. data/lib/custom_template.rb +4 -0
  9. data/lib/loader_helper.rb +62 -0
  10. data/lib/mysql.rb +1214 -0
  11. data/lib/progressbar.rb +236 -0
  12. data/lib/role.rb +12 -0
  13. data/lib/talia_cl/command_line.rb +39 -0
  14. data/lib/talia_cl/commands/standalone/cl_options.rb +9 -0
  15. data/lib/talia_cl/commands/standalone/standalone_generate.rb +75 -0
  16. data/lib/talia_cl/commands/standalone.rb +25 -0
  17. data/lib/talia_cl/commands/talia_console/cl_options.rb +55 -0
  18. data/lib/talia_cl/commands/talia_console/console_commands.rb +37 -0
  19. data/lib/talia_cl/commands/talia_console/talia_commands.rb +131 -0
  20. data/lib/talia_cl/commands/talia_console.rb +47 -0
  21. data/lib/talia_cl/core_commands.rb +11 -0
  22. data/lib/talia_cl.rb +47 -0
  23. data/lib/talia_core/active_source.rb +372 -0
  24. data/lib/talia_core/active_source_parts/class_methods.rb +378 -0
  25. data/lib/talia_core/active_source_parts/predicate_handler.rb +89 -0
  26. data/lib/talia_core/active_source_parts/rdf.rb +131 -0
  27. data/lib/talia_core/active_source_parts/sql_helper.rb +36 -0
  28. data/lib/talia_core/active_source_parts/xml/base_builder.rb +47 -0
  29. data/lib/talia_core/active_source_parts/xml/generic_reader.rb +363 -0
  30. data/lib/talia_core/active_source_parts/xml/rdf_builder.rb +88 -0
  31. data/lib/talia_core/active_source_parts/xml/source_builder.rb +73 -0
  32. data/lib/talia_core/active_source_parts/xml/source_reader.rb +20 -0
  33. data/lib/talia_core/agent.rb +14 -0
  34. data/lib/talia_core/background_jobs/job.rb +82 -0
  35. data/lib/talia_core/background_jobs/progress_job.rb +68 -0
  36. data/lib/talia_core/collection.rb +13 -0
  37. data/lib/talia_core/data_types/data_loader.rb +92 -0
  38. data/lib/talia_core/data_types/data_record.rb +105 -0
  39. data/lib/talia_core/data_types/delayed_copier.rb +76 -0
  40. data/lib/talia_core/data_types/file_record.rb +59 -0
  41. data/lib/talia_core/data_types/file_store.rb +306 -0
  42. data/lib/talia_core/data_types/iip_data.rb +153 -0
  43. data/lib/talia_core/data_types/iip_loader.rb +127 -0
  44. data/lib/talia_core/data_types/image_data.rb +32 -0
  45. data/lib/talia_core/data_types/media_link.rb +19 -0
  46. data/lib/talia_core/data_types/mime_mapping.rb +45 -0
  47. data/lib/talia_core/data_types/path_helpers.rb +77 -0
  48. data/lib/talia_core/data_types/pdf_data.rb +42 -0
  49. data/lib/talia_core/data_types/simple_text.rb +36 -0
  50. data/lib/talia_core/data_types/temp_file_handling.rb +85 -0
  51. data/lib/talia_core/data_types/xml_data.rb +169 -0
  52. data/lib/talia_core/dc_resource.rb +20 -0
  53. data/lib/talia_core/dummy_handler.rb +34 -0
  54. data/lib/talia_core/dummy_source.rb +20 -0
  55. data/lib/talia_core/errors.rb +25 -0
  56. data/lib/talia_core/initializer.rb +427 -0
  57. data/lib/talia_core/ordered_source.rb +228 -0
  58. data/lib/talia_core/rails_ext/actionpack/action_controller/record_identifier.rb +13 -0
  59. data/lib/talia_core/rails_ext/actionpack/action_controller.rb +1 -0
  60. data/lib/talia_core/rails_ext/actionpack.rb +1 -0
  61. data/lib/talia_core/rails_ext.rb +1 -0
  62. data/lib/talia_core/rdf_import.rb +90 -0
  63. data/lib/talia_core/rdf_resource.rb +159 -0
  64. data/lib/talia_core/semantic_collection_item.rb +93 -0
  65. data/lib/talia_core/semantic_collection_wrapper.rb +324 -0
  66. data/lib/talia_core/semantic_property.rb +7 -0
  67. data/lib/talia_core/semantic_relation.rb +67 -0
  68. data/lib/talia_core/source.rb +323 -0
  69. data/lib/talia_core/source_transfer_object.rb +38 -0
  70. data/lib/talia_core/workflow/base.rb +15 -0
  71. data/lib/talia_core/workflow/publication_workflow.rb +62 -0
  72. data/lib/talia_core/workflow.rb +300 -0
  73. data/lib/talia_core.rb +9 -0
  74. data/lib/talia_dependencies.rb +12 -0
  75. data/lib/talia_util/bar_progressor.rb +15 -0
  76. data/lib/talia_util/configuration/config_file.rb +48 -0
  77. data/lib/talia_util/configuration/database_config.rb +40 -0
  78. data/lib/talia_util/configuration/mysql_database_setup.rb +104 -0
  79. data/lib/talia_util/data_import.rb +91 -0
  80. data/lib/talia_util/image_conversions.rb +82 -0
  81. data/lib/talia_util/import_job_helper.rb +132 -0
  82. data/lib/talia_util/io_helper.rb +54 -0
  83. data/lib/talia_util/progressable.rb +38 -0
  84. data/lib/talia_util/progressbar.rb +236 -0
  85. data/lib/talia_util/rdf_update.rb +80 -0
  86. data/lib/talia_util/some_sigla.xml +1960 -0
  87. data/lib/talia_util/test_helpers.rb +151 -0
  88. data/lib/talia_util/util.rb +226 -0
  89. data/lib/talia_util/yaml_import.rb +80 -0
  90. data/lib/talia_util.rb +13 -0
  91. data/lib/user.rb +116 -0
  92. data/lib/version.rb +15 -0
  93. data/test/core_ext/string_test.rb +11 -0
  94. data/test/custom_template_test.rb +8 -0
  95. data/test/talia_core/active_source_predicate_test.rb +54 -0
  96. data/test/talia_core/active_source_rdf_test.rb +89 -0
  97. data/test/talia_core/active_source_test.rb +631 -0
  98. data/test/talia_core/data_types/data_loader_test.rb +123 -0
  99. data/test/talia_core/data_types/data_record_test.rb +40 -0
  100. data/test/talia_core/data_types/file_record_test.rb +171 -0
  101. data/test/talia_core/data_types/iip_data_test.rb +130 -0
  102. data/test/talia_core/data_types/image_data_test.rb +88 -0
  103. data/test/talia_core/data_types/pdf_data_test.rb +68 -0
  104. data/test/talia_core/data_types/xml_data_test.rb +134 -0
  105. data/test/talia_core/generic_xml_test.rb +83 -0
  106. data/test/talia_core/initializer_test.rb +36 -0
  107. data/test/talia_core/ordered_source_test.rb +398 -0
  108. data/test/talia_core/rdf_resource_test.rb +115 -0
  109. data/test/talia_core/semantic_collection_item_test.rb +129 -0
  110. data/test/talia_core/source_reader_test.rb +33 -0
  111. data/test/talia_core/source_test.rb +484 -0
  112. data/test/talia_core/source_transfer_object_test.rb +24 -0
  113. data/test/talia_core/workflow/publication_workflow_test.rb +242 -0
  114. data/test/talia_core/workflow/user_class_for_workflow.rb +35 -0
  115. data/test/talia_core/workflow/workflow_base_test.rb +21 -0
  116. data/test/talia_core/workflow_test.rb +19 -0
  117. data/test/talia_util/import_job_helper_test.rb +46 -0
  118. data/test/test_helper.rb +68 -0
  119. metadata +262 -0
@@ -0,0 +1,68 @@
1
+ module TaliaCore
2
+ module BackgroundJobs
3
+
4
+ # Helper table to track the current status of a long-running task
5
+ class ProgressJob < ActiveRecord::Base
6
+
7
+ # Minimum interval for database updates.
8
+ DB_UPDATE_INTERVAL = 2
9
+
10
+ validates_numericality_of :job_id, :only_integer => true
11
+ validates_numericality_of :processed_count, :only_integer => true, :greater_than_or_equal => 0
12
+ validates_numericality_of :item_count, :only_integer => true, :greater_than => 0
13
+
14
+ # Create a new progress job
15
+ def self.create_progress!(job_id, message = '', item_count = 1)
16
+ job_prog = new(:job_id => job_id, :progress_message => message, :item_count => item_count, :processed_count => 0)
17
+ job_prog.save!
18
+ job_prog
19
+ end
20
+
21
+ # Clears the progress for processes no longer active
22
+ def self.clear
23
+ find(:all).each do |job_prog|
24
+ delete(job_prog.id) if(!Bg.table.job.exists?(job_id) || Bg.tablejob.find(job_id).finished?)
25
+ end
26
+ end
27
+
28
+ # Increments the number of processed items. To avoid flooding the db, the same object will
29
+ # only save this value at most all DB_UPDATE_INTERVAL seconds. Will return true if the
30
+ # element was saved, false otherwise.
31
+ def inc(inc_value = 1)
32
+ pc_old = self.processed_count
33
+ self.processed_count = pc_old + inc_value
34
+ if(!@last_update || ((Time.now - @last_update) > DB_UPDATE_INTERVAL))
35
+ save!
36
+ @last_update = Time.now
37
+ true
38
+ else
39
+ false
40
+ end
41
+ end
42
+
43
+ def finish
44
+ self.processed_count = self.item_count
45
+ save!
46
+ end
47
+
48
+ # The percentage completed
49
+ def percentage
50
+ [((self.processed_count * 100) / self.item_count), 100].min
51
+ end
52
+
53
+ # Elapsed time in seconds
54
+ def elapsed
55
+ return unless(self.started_at)
56
+ Time.now - started_at
57
+ end
58
+
59
+ # Returns the estimated time remaining on the current job
60
+ def eta
61
+ ((elapsed * 100) / percentage) - elapsed
62
+ end
63
+
64
+
65
+ end
66
+
67
+ end
68
+ end
@@ -0,0 +1,13 @@
1
+ module TaliaCore
2
+
3
+ class Collection < DcResource
4
+
5
+ has_rdf_type N::DCNS.Collection
6
+ has_rdf_type N::SKOS.Collection
7
+ has_rdf_type N::DCMIT.Collection
8
+
9
+ simple_property :items, N::DCNS.hasPart
10
+
11
+ end
12
+
13
+ end
@@ -0,0 +1,92 @@
1
+ module TaliaCore
2
+ module DataTypes
3
+
4
+ # Used for attaching data items by laoding them from files and/or URLs. This will also attempt to
5
+ # create the correct data type for any given file.
6
+ module DataLoader
7
+
8
+ module ClassMethods
9
+
10
+ # Load the data from the given URL. If the mime_type option is given, the handler will always
11
+ # use the parameter for the MIME type (which can be a Mime::Type object or a string like
12
+ # 'text/html', or a mime type symbol).
13
+ #
14
+ # *Attention:* This method will return an *Array* of data objects. This is for those cases,
15
+ # where a single data file will be processed into multiple objects (e.g. IIP data).
16
+ #
17
+ # If the mime type is not given, the method will attempt to automatically determine the
18
+ # type, using the file extension or the response code.
19
+ #
20
+ # The :http_credentials option may be used to pass login information for http like this:
21
+ # http_credentials = { :http_basic_authentication => [login, password] }
22
+ # See the openuri documentation for more.
23
+ #
24
+ # You may pass the :location parameter to identify the "location" value for the new
25
+ # data record. In general, this is not neccessary. If the location is given, the system
26
+ # will *always* attempt to determine the mime type through the location parameter, unless
27
+ # an explicit mime type is given.
28
+ def create_from_url(uri, options = {})
29
+ mime_type = options[:mime_type] || options['mime_type']
30
+ location = options[:location] || options['location']
31
+ # If a Mime type is given, use that.
32
+ if(mime_type)
33
+ mime_type = Mime::Type.lookup(mime_type) if(mime_type.is_a?(String))
34
+ elsif(location)
35
+ mime_type = Mime::Type.lookup_by_extension(File.extname(location)[1..-1])
36
+ end
37
+
38
+ data_records = []
39
+
40
+ # Remove file:// from URIs to allow standard file URIs
41
+ uri = file_url(uri)
42
+
43
+ # We have diffent code paths for local and remote files. This is mainly because
44
+ # the system will try to not open local files at all and just copy them around -
45
+ # which will greatly speed up the operation.
46
+ is_file = File.exist?(uri)
47
+
48
+ location ||= File.basename(uri) if(is_file)
49
+ # If we have a "standard" uri, we cut off at the last slash (the
50
+ # File.basename would use the system file separator)
51
+ location ||= uri.rindex('/') ? uri[(uri.rindex('/') + 1)..-1] : uri
52
+
53
+ if(is_file)
54
+ mime_type ||= Mime::Type.lookup_by_extension(File.extname(location)[1..-1])
55
+ open_and_create(mime_type, location, uri, true)
56
+ else
57
+ open_from_url(uri, options[:http_credentials]) do |io|
58
+ mime_type ||= Mime::Type.lookup(io.content_type)
59
+ open_and_create(mime_type, location, io, false)
60
+ end
61
+ end
62
+
63
+ end
64
+
65
+ private
66
+
67
+ # The main loader. This will handle the lookup from the mapping and the creating of the
68
+ # data objects. Depending on the setting of is_file, the source parameter will be interpreted
69
+ # in a different way. If it is a file, the file name will be passed in here. If it is
70
+ # a URL, the method will receive the io object of the open connection as the source
71
+ def open_and_create(mime_type, location, source, is_file)
72
+ data_type = loader_type_from(mime_type)
73
+ if(data_type.is_a?(Symbol))
74
+ raise(ArgumentError, "No handler found for loading: #{data_type}") unless(self.respond_to?(data_type))
75
+ self.send(data_type, mime_type, location, source, is_file)
76
+ else
77
+ raise(ArgumentError, "Registered handler for loading must be a method symbol or class. (#{data_type})") unless(data_type.is_a?(Class))
78
+ data_record = data_type.new
79
+ is_file ? data_record.create_from_file(location, source) : data_record.create_from_data(location, source)
80
+ data_record.mime = mime_type.to_s
81
+ data_record.location = location
82
+ [ data_record ]
83
+ end
84
+ end
85
+
86
+
87
+
88
+ end # Class methods end
89
+
90
+ end # Closing modules and such
91
+ end
92
+ end
@@ -0,0 +1,105 @@
1
+ module TaliaCore
2
+
3
+ # Contains all data types that are handled by the Talia system. All data elements
4
+ # should be subclasses of DataRecord
5
+ module DataTypes
6
+
7
+ # ActiveRecord interface to the data record in the database
8
+ class DataRecord < ActiveRecord::Base
9
+ # Attention: These need to come before the extends, otherwise it'll blow the
10
+ # tests
11
+ belongs_to :source, :class_name => 'TaliaCore::ActiveSource'
12
+ before_create :set_mime_type # Mime type must be saved before the record is written
13
+
14
+ extend MimeMapping
15
+
16
+ # Declaration of main abstract methods ======================
17
+ # Some notes: every subclasses of DataRecord must implement
18
+ # at least the following methods
19
+ # See also: single-table inheritance
20
+
21
+ # returns all bytes in the object as an array of unsigned integers
22
+ def all_bytes
23
+ end
24
+
25
+ # Returns all_bytes as an binary string
26
+ def content_string
27
+ all_bytes.pack('C*') if(all_bytes)
28
+ end
29
+
30
+ # returns the next byte from the object, or nil at EOS
31
+ def get_byte(close_after_single_read=false)
32
+ end
33
+
34
+ # returns the current position of the read cursor
35
+ def position
36
+ end
37
+
38
+ # adjust the position of the read cursor
39
+ def seek(new_position)
40
+ end
41
+
42
+ # returns the size of the object in bytes
43
+ def size
44
+ end
45
+
46
+ # reset the cursor to the initial state
47
+ def reset
48
+ end
49
+
50
+ def extract_mime_type(location)
51
+ # Lookup the mime type for the extension (removing the dot
52
+ # in front of the file extension) Works only for the file
53
+ # types supported by Rails' Mime class.
54
+ Mime::Type.lookup_by_extension((File.extname(location).downcase)[1..-1]).to_s
55
+ end
56
+
57
+ def mime_type
58
+ self.mime
59
+ end
60
+
61
+ attr_accessor :temp_path
62
+
63
+ # class methods ============================================
64
+ class << self
65
+
66
+ # Find all data records about a specified source
67
+ def find_data_records(id)
68
+ find(:all, :conditions => { :source_id => id })
69
+ end
70
+
71
+ def find_by_type_and_location!(source_data_type, location)
72
+ # TODO: Should it directly instantiate the STI sub-class?
73
+ # In this case we should use the following line instead.
74
+ #
75
+ # source_data = source_data_type.classify.constantize.find_by_location(location, :limit => 1)
76
+ #
77
+ source_data = self.find(:first, :conditions => ["type = ? AND location = ?", source_data_type.camelize, location])
78
+ raise ActiveRecord::RecordNotFound if source_data.nil?
79
+ source_data
80
+ end
81
+
82
+ end
83
+
84
+ private
85
+
86
+
87
+ # Returns demodulized class name.
88
+ def class_name
89
+ self.class.name.demodulize
90
+ end
91
+
92
+
93
+ # set mime type if it hasn't been assigned already
94
+ def set_mime_type
95
+ assit(!self.location.blank?, "Location for #{self} should not be blank")
96
+ if(!self.location.blank? && self.mime.blank?)
97
+ # Set mime type for the record
98
+ self.mime = extract_mime_type(self.location)
99
+ assit_not_nil(self.mime, "Mime should not be nil (location was #{self.location})!")
100
+ end
101
+ end
102
+
103
+ end
104
+ end
105
+ end
@@ -0,0 +1,76 @@
1
+ require 'fileutils'
2
+
3
+ module TaliaCore
4
+ module DataTypes
5
+
6
+ # This is used for "delayed" copy operations. Basically this will created
7
+ # a file called "delayed_copy.sh" in the RAILS_ROOT, which can later be
8
+ # run as a bash script. This will allow the user to run the
9
+ # copy operation and be potentially faster than using the builtin copy
10
+ # operations (especially using JRuby)
11
+ class DelayedCopier
12
+
13
+ # Returns (and creates, if necessary) the file to write the delayed
14
+ # copy operations to
15
+ def self.delayed_copy_file
16
+ @delayed_copy_file ||= begin
17
+ backup_file if(File.exists?(delay_file_name))
18
+ file = File.open(delay_file_name, 'w')
19
+ file.puts('#!/bin/bash')
20
+ file
21
+ end
22
+ end
23
+
24
+ def self.cp(source, target)
25
+ unless(dir_seen?(File.expand_path(target)))
26
+ mkdir_string = 'mkdir -vp "'
27
+ mkdir_string << File.dirname(File.expand_path(target))
28
+ mkdir_string << '"'
29
+ delayed_copy_file.puts(mkdir_string)
30
+ end
31
+ cp_string = 'cp -v "'
32
+ cp_string << File.expand_path(source)
33
+ cp_string << '" "'
34
+ cp_string << File.expand_path(target)
35
+ cp_string << '"'
36
+ delayed_copy_file.puts(cp_string)
37
+ delayed_copy_file.flush
38
+ end
39
+
40
+ # Close the delayed copy file
41
+ def self.close
42
+ if(@delayed_copy_file)
43
+ @delayed_copy_file.close
44
+ @delayed_copy_file = nil
45
+ end
46
+ end
47
+
48
+ private
49
+
50
+
51
+ def self.dir_seen?(directory)
52
+ @seen_dirs = {}
53
+ return true if(@seen_dirs[directory])
54
+ @seen_dirs[directory] = true
55
+ false
56
+ end
57
+
58
+ # The file name for the delayed copy
59
+ def self.delay_file_name
60
+ File.join(RAILS_ROOT, 'delayed_copy.sh')
61
+ end
62
+
63
+ # Backs up an existing file if necessary
64
+ def self.backup_file
65
+ round = 1
66
+ file_name = 'nil'
67
+ while(File.exists?(file_name = File.join(RAILS_ROOT, "delayed_copy_old_#{round}.sh")))
68
+ round += 1
69
+ end
70
+ FileUtils.mv(delay_file_name, file_name)
71
+ end
72
+
73
+ end
74
+
75
+ end
76
+ end
@@ -0,0 +1,59 @@
1
+ module TaliaCore
2
+ module DataTypes
3
+
4
+ # Base class for all data records that use a plain file for data storage
5
+ class FileRecord < DataRecord
6
+ include FileStore
7
+ extend FileStore::ClassMethods
8
+
9
+ include PathHelpers
10
+ extend PathHelpers::ClassMethods
11
+
12
+ include TempFileHandling
13
+ extend TempFileHandling::ClassMethods
14
+
15
+ include DataLoader
16
+ extend DataLoader::ClassMethods
17
+ extend IipLoader
18
+ extend TaliaUtil::IoHelper # Data IO for class methods
19
+
20
+ after_save :save_attachment, :write_file_after_save
21
+
22
+ before_destroy :destroy_attachment
23
+
24
+ # Returns and, if necessary, creates the file for "delayed" copy operations
25
+
26
+ # returns all bytes in the object as an array
27
+ def all_bytes
28
+ read_all_bytes
29
+ end
30
+
31
+ # returns the next byte from the object, or nil at EOS
32
+ def get_byte(close_after_single_read=false)
33
+ next_byte(close_after_single_read)
34
+ end
35
+
36
+ # returns the current position of the read cursor (binary access)
37
+ def position
38
+ return (@position != nil) ? @position : 0
39
+ end
40
+
41
+ # reset the cursor to the initial state
42
+ def reset
43
+ set_position(0)
44
+ end
45
+
46
+ # set the new position of the reding cursors
47
+ def seek(new_position)
48
+ set_position(new_position)
49
+ end
50
+
51
+ # returns the size of the object in bytes
52
+ def size
53
+ data_size
54
+ end
55
+
56
+ end
57
+
58
+ end
59
+ end