talia_core 0.4.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (119) hide show
  1. data/README.rdoc +41 -0
  2. data/bin/talia +33 -0
  3. data/lib/JXslt/jxslt.rb +60 -0
  4. data/lib/acts_as_roled.rb +11 -0
  5. data/lib/core_ext/platform.rb +9 -0
  6. data/lib/core_ext/string.rb +6 -0
  7. data/lib/core_ext.rb +1 -0
  8. data/lib/custom_template.rb +4 -0
  9. data/lib/loader_helper.rb +62 -0
  10. data/lib/mysql.rb +1214 -0
  11. data/lib/progressbar.rb +236 -0
  12. data/lib/role.rb +12 -0
  13. data/lib/talia_cl/command_line.rb +39 -0
  14. data/lib/talia_cl/commands/standalone/cl_options.rb +9 -0
  15. data/lib/talia_cl/commands/standalone/standalone_generate.rb +75 -0
  16. data/lib/talia_cl/commands/standalone.rb +25 -0
  17. data/lib/talia_cl/commands/talia_console/cl_options.rb +55 -0
  18. data/lib/talia_cl/commands/talia_console/console_commands.rb +37 -0
  19. data/lib/talia_cl/commands/talia_console/talia_commands.rb +131 -0
  20. data/lib/talia_cl/commands/talia_console.rb +47 -0
  21. data/lib/talia_cl/core_commands.rb +11 -0
  22. data/lib/talia_cl.rb +47 -0
  23. data/lib/talia_core/active_source.rb +372 -0
  24. data/lib/talia_core/active_source_parts/class_methods.rb +378 -0
  25. data/lib/talia_core/active_source_parts/predicate_handler.rb +89 -0
  26. data/lib/talia_core/active_source_parts/rdf.rb +131 -0
  27. data/lib/talia_core/active_source_parts/sql_helper.rb +36 -0
  28. data/lib/talia_core/active_source_parts/xml/base_builder.rb +47 -0
  29. data/lib/talia_core/active_source_parts/xml/generic_reader.rb +363 -0
  30. data/lib/talia_core/active_source_parts/xml/rdf_builder.rb +88 -0
  31. data/lib/talia_core/active_source_parts/xml/source_builder.rb +73 -0
  32. data/lib/talia_core/active_source_parts/xml/source_reader.rb +20 -0
  33. data/lib/talia_core/agent.rb +14 -0
  34. data/lib/talia_core/background_jobs/job.rb +82 -0
  35. data/lib/talia_core/background_jobs/progress_job.rb +68 -0
  36. data/lib/talia_core/collection.rb +13 -0
  37. data/lib/talia_core/data_types/data_loader.rb +92 -0
  38. data/lib/talia_core/data_types/data_record.rb +105 -0
  39. data/lib/talia_core/data_types/delayed_copier.rb +76 -0
  40. data/lib/talia_core/data_types/file_record.rb +59 -0
  41. data/lib/talia_core/data_types/file_store.rb +306 -0
  42. data/lib/talia_core/data_types/iip_data.rb +153 -0
  43. data/lib/talia_core/data_types/iip_loader.rb +127 -0
  44. data/lib/talia_core/data_types/image_data.rb +32 -0
  45. data/lib/talia_core/data_types/media_link.rb +19 -0
  46. data/lib/talia_core/data_types/mime_mapping.rb +45 -0
  47. data/lib/talia_core/data_types/path_helpers.rb +77 -0
  48. data/lib/talia_core/data_types/pdf_data.rb +42 -0
  49. data/lib/talia_core/data_types/simple_text.rb +36 -0
  50. data/lib/talia_core/data_types/temp_file_handling.rb +85 -0
  51. data/lib/talia_core/data_types/xml_data.rb +169 -0
  52. data/lib/talia_core/dc_resource.rb +20 -0
  53. data/lib/talia_core/dummy_handler.rb +34 -0
  54. data/lib/talia_core/dummy_source.rb +20 -0
  55. data/lib/talia_core/errors.rb +25 -0
  56. data/lib/talia_core/initializer.rb +427 -0
  57. data/lib/talia_core/ordered_source.rb +228 -0
  58. data/lib/talia_core/rails_ext/actionpack/action_controller/record_identifier.rb +13 -0
  59. data/lib/talia_core/rails_ext/actionpack/action_controller.rb +1 -0
  60. data/lib/talia_core/rails_ext/actionpack.rb +1 -0
  61. data/lib/talia_core/rails_ext.rb +1 -0
  62. data/lib/talia_core/rdf_import.rb +90 -0
  63. data/lib/talia_core/rdf_resource.rb +159 -0
  64. data/lib/talia_core/semantic_collection_item.rb +93 -0
  65. data/lib/talia_core/semantic_collection_wrapper.rb +324 -0
  66. data/lib/talia_core/semantic_property.rb +7 -0
  67. data/lib/talia_core/semantic_relation.rb +67 -0
  68. data/lib/talia_core/source.rb +323 -0
  69. data/lib/talia_core/source_transfer_object.rb +38 -0
  70. data/lib/talia_core/workflow/base.rb +15 -0
  71. data/lib/talia_core/workflow/publication_workflow.rb +62 -0
  72. data/lib/talia_core/workflow.rb +300 -0
  73. data/lib/talia_core.rb +9 -0
  74. data/lib/talia_dependencies.rb +12 -0
  75. data/lib/talia_util/bar_progressor.rb +15 -0
  76. data/lib/talia_util/configuration/config_file.rb +48 -0
  77. data/lib/talia_util/configuration/database_config.rb +40 -0
  78. data/lib/talia_util/configuration/mysql_database_setup.rb +104 -0
  79. data/lib/talia_util/data_import.rb +91 -0
  80. data/lib/talia_util/image_conversions.rb +82 -0
  81. data/lib/talia_util/import_job_helper.rb +132 -0
  82. data/lib/talia_util/io_helper.rb +54 -0
  83. data/lib/talia_util/progressable.rb +38 -0
  84. data/lib/talia_util/progressbar.rb +236 -0
  85. data/lib/talia_util/rdf_update.rb +80 -0
  86. data/lib/talia_util/some_sigla.xml +1960 -0
  87. data/lib/talia_util/test_helpers.rb +151 -0
  88. data/lib/talia_util/util.rb +226 -0
  89. data/lib/talia_util/yaml_import.rb +80 -0
  90. data/lib/talia_util.rb +13 -0
  91. data/lib/user.rb +116 -0
  92. data/lib/version.rb +15 -0
  93. data/test/core_ext/string_test.rb +11 -0
  94. data/test/custom_template_test.rb +8 -0
  95. data/test/talia_core/active_source_predicate_test.rb +54 -0
  96. data/test/talia_core/active_source_rdf_test.rb +89 -0
  97. data/test/talia_core/active_source_test.rb +631 -0
  98. data/test/talia_core/data_types/data_loader_test.rb +123 -0
  99. data/test/talia_core/data_types/data_record_test.rb +40 -0
  100. data/test/talia_core/data_types/file_record_test.rb +171 -0
  101. data/test/talia_core/data_types/iip_data_test.rb +130 -0
  102. data/test/talia_core/data_types/image_data_test.rb +88 -0
  103. data/test/talia_core/data_types/pdf_data_test.rb +68 -0
  104. data/test/talia_core/data_types/xml_data_test.rb +134 -0
  105. data/test/talia_core/generic_xml_test.rb +83 -0
  106. data/test/talia_core/initializer_test.rb +36 -0
  107. data/test/talia_core/ordered_source_test.rb +398 -0
  108. data/test/talia_core/rdf_resource_test.rb +115 -0
  109. data/test/talia_core/semantic_collection_item_test.rb +129 -0
  110. data/test/talia_core/source_reader_test.rb +33 -0
  111. data/test/talia_core/source_test.rb +484 -0
  112. data/test/talia_core/source_transfer_object_test.rb +24 -0
  113. data/test/talia_core/workflow/publication_workflow_test.rb +242 -0
  114. data/test/talia_core/workflow/user_class_for_workflow.rb +35 -0
  115. data/test/talia_core/workflow/workflow_base_test.rb +21 -0
  116. data/test/talia_core/workflow_test.rb +19 -0
  117. data/test/talia_util/import_job_helper_test.rb +46 -0
  118. data/test/test_helper.rb +68 -0
  119. metadata +262 -0
@@ -0,0 +1,68 @@
1
+ module TaliaCore
2
+ module BackgroundJobs
3
+
4
+ # Helper table to track the current status of a long-running task
5
+ class ProgressJob < ActiveRecord::Base
6
+
7
+ # Minimum interval for database updates.
8
+ DB_UPDATE_INTERVAL = 2
9
+
10
+ validates_numericality_of :job_id, :only_integer => true
11
+ validates_numericality_of :processed_count, :only_integer => true, :greater_than_or_equal => 0
12
+ validates_numericality_of :item_count, :only_integer => true, :greater_than => 0
13
+
14
+ # Create a new progress job
15
+ def self.create_progress!(job_id, message = '', item_count = 1)
16
+ job_prog = new(:job_id => job_id, :progress_message => message, :item_count => item_count, :processed_count => 0)
17
+ job_prog.save!
18
+ job_prog
19
+ end
20
+
21
+ # Clears the progress for processes no longer active
22
+ def self.clear
23
+ find(:all).each do |job_prog|
24
+ delete(job_prog.id) if(!Bg.table.job.exists?(job_id) || Bg.tablejob.find(job_id).finished?)
25
+ end
26
+ end
27
+
28
+ # Increments the number of processed items. To avoid flooding the db, the same object will
29
+ # only save this value at most all DB_UPDATE_INTERVAL seconds. Will return true if the
30
+ # element was saved, false otherwise.
31
+ def inc(inc_value = 1)
32
+ pc_old = self.processed_count
33
+ self.processed_count = pc_old + inc_value
34
+ if(!@last_update || ((Time.now - @last_update) > DB_UPDATE_INTERVAL))
35
+ save!
36
+ @last_update = Time.now
37
+ true
38
+ else
39
+ false
40
+ end
41
+ end
42
+
43
+ def finish
44
+ self.processed_count = self.item_count
45
+ save!
46
+ end
47
+
48
+ # The percentage completed
49
+ def percentage
50
+ [((self.processed_count * 100) / self.item_count), 100].min
51
+ end
52
+
53
+ # Elapsed time in seconds
54
+ def elapsed
55
+ return unless(self.started_at)
56
+ Time.now - started_at
57
+ end
58
+
59
+ # Returns the estimated time remaining on the current job
60
+ def eta
61
+ ((elapsed * 100) / percentage) - elapsed
62
+ end
63
+
64
+
65
+ end
66
+
67
+ end
68
+ end
@@ -0,0 +1,13 @@
1
+ module TaliaCore
2
+
3
+ class Collection < DcResource
4
+
5
+ has_rdf_type N::DCNS.Collection
6
+ has_rdf_type N::SKOS.Collection
7
+ has_rdf_type N::DCMIT.Collection
8
+
9
+ simple_property :items, N::DCNS.hasPart
10
+
11
+ end
12
+
13
+ end
@@ -0,0 +1,92 @@
1
+ module TaliaCore
2
+ module DataTypes
3
+
4
+ # Used for attaching data items by laoding them from files and/or URLs. This will also attempt to
5
+ # create the correct data type for any given file.
6
+ module DataLoader
7
+
8
+ module ClassMethods
9
+
10
+ # Load the data from the given URL. If the mime_type option is given, the handler will always
11
+ # use the parameter for the MIME type (which can be a Mime::Type object or a string like
12
+ # 'text/html', or a mime type symbol).
13
+ #
14
+ # *Attention:* This method will return an *Array* of data objects. This is for those cases,
15
+ # where a single data file will be processed into multiple objects (e.g. IIP data).
16
+ #
17
+ # If the mime type is not given, the method will attempt to automatically determine the
18
+ # type, using the file extension or the response code.
19
+ #
20
+ # The :http_credentials option may be used to pass login information for http like this:
21
+ # http_credentials = { :http_basic_authentication => [login, password] }
22
+ # See the openuri documentation for more.
23
+ #
24
+ # You may pass the :location parameter to identify the "location" value for the new
25
+ # data record. In general, this is not neccessary. If the location is given, the system
26
+ # will *always* attempt to determine the mime type through the location parameter, unless
27
+ # an explicit mime type is given.
28
+ def create_from_url(uri, options = {})
29
+ mime_type = options[:mime_type] || options['mime_type']
30
+ location = options[:location] || options['location']
31
+ # If a Mime type is given, use that.
32
+ if(mime_type)
33
+ mime_type = Mime::Type.lookup(mime_type) if(mime_type.is_a?(String))
34
+ elsif(location)
35
+ mime_type = Mime::Type.lookup_by_extension(File.extname(location)[1..-1])
36
+ end
37
+
38
+ data_records = []
39
+
40
+ # Remove file:// from URIs to allow standard file URIs
41
+ uri = file_url(uri)
42
+
43
+ # We have diffent code paths for local and remote files. This is mainly because
44
+ # the system will try to not open local files at all and just copy them around -
45
+ # which will greatly speed up the operation.
46
+ is_file = File.exist?(uri)
47
+
48
+ location ||= File.basename(uri) if(is_file)
49
+ # If we have a "standard" uri, we cut off at the last slash (the
50
+ # File.basename would use the system file separator)
51
+ location ||= uri.rindex('/') ? uri[(uri.rindex('/') + 1)..-1] : uri
52
+
53
+ if(is_file)
54
+ mime_type ||= Mime::Type.lookup_by_extension(File.extname(location)[1..-1])
55
+ open_and_create(mime_type, location, uri, true)
56
+ else
57
+ open_from_url(uri, options[:http_credentials]) do |io|
58
+ mime_type ||= Mime::Type.lookup(io.content_type)
59
+ open_and_create(mime_type, location, io, false)
60
+ end
61
+ end
62
+
63
+ end
64
+
65
+ private
66
+
67
+ # The main loader. This will handle the lookup from the mapping and the creating of the
68
+ # data objects. Depending on the setting of is_file, the source parameter will be interpreted
69
+ # in a different way. If it is a file, the file name will be passed in here. If it is
70
+ # a URL, the method will receive the io object of the open connection as the source
71
+ def open_and_create(mime_type, location, source, is_file)
72
+ data_type = loader_type_from(mime_type)
73
+ if(data_type.is_a?(Symbol))
74
+ raise(ArgumentError, "No handler found for loading: #{data_type}") unless(self.respond_to?(data_type))
75
+ self.send(data_type, mime_type, location, source, is_file)
76
+ else
77
+ raise(ArgumentError, "Registered handler for loading must be a method symbol or class. (#{data_type})") unless(data_type.is_a?(Class))
78
+ data_record = data_type.new
79
+ is_file ? data_record.create_from_file(location, source) : data_record.create_from_data(location, source)
80
+ data_record.mime = mime_type.to_s
81
+ data_record.location = location
82
+ [ data_record ]
83
+ end
84
+ end
85
+
86
+
87
+
88
+ end # Class methods end
89
+
90
+ end # Closing modules and such
91
+ end
92
+ end
@@ -0,0 +1,105 @@
1
+ module TaliaCore
2
+
3
+ # Contains all data types that are handled by the Talia system. All data elements
4
+ # should be subclasses of DataRecord
5
+ module DataTypes
6
+
7
+ # ActiveRecord interface to the data record in the database
8
+ class DataRecord < ActiveRecord::Base
9
+ # Attention: These need to come before the extends, otherwise it'll blow the
10
+ # tests
11
+ belongs_to :source, :class_name => 'TaliaCore::ActiveSource'
12
+ before_create :set_mime_type # Mime type must be saved before the record is written
13
+
14
+ extend MimeMapping
15
+
16
+ # Declaration of main abstract methods ======================
17
+ # Some notes: every subclasses of DataRecord must implement
18
+ # at least the following methods
19
+ # See also: single-table inheritance
20
+
21
+ # returns all bytes in the object as an array of unsigned integers
22
+ def all_bytes
23
+ end
24
+
25
+ # Returns all_bytes as an binary string
26
+ def content_string
27
+ all_bytes.pack('C*') if(all_bytes)
28
+ end
29
+
30
+ # returns the next byte from the object, or nil at EOS
31
+ def get_byte(close_after_single_read=false)
32
+ end
33
+
34
+ # returns the current position of the read cursor
35
+ def position
36
+ end
37
+
38
+ # adjust the position of the read cursor
39
+ def seek(new_position)
40
+ end
41
+
42
+ # returns the size of the object in bytes
43
+ def size
44
+ end
45
+
46
+ # reset the cursor to the initial state
47
+ def reset
48
+ end
49
+
50
+ def extract_mime_type(location)
51
+ # Lookup the mime type for the extension (removing the dot
52
+ # in front of the file extension) Works only for the file
53
+ # types supported by Rails' Mime class.
54
+ Mime::Type.lookup_by_extension((File.extname(location).downcase)[1..-1]).to_s
55
+ end
56
+
57
+ def mime_type
58
+ self.mime
59
+ end
60
+
61
+ attr_accessor :temp_path
62
+
63
+ # class methods ============================================
64
+ class << self
65
+
66
+ # Find all data records about a specified source
67
+ def find_data_records(id)
68
+ find(:all, :conditions => { :source_id => id })
69
+ end
70
+
71
+ def find_by_type_and_location!(source_data_type, location)
72
+ # TODO: Should it directly instantiate the STI sub-class?
73
+ # In this case we should use the following line instead.
74
+ #
75
+ # source_data = source_data_type.classify.constantize.find_by_location(location, :limit => 1)
76
+ #
77
+ source_data = self.find(:first, :conditions => ["type = ? AND location = ?", source_data_type.camelize, location])
78
+ raise ActiveRecord::RecordNotFound if source_data.nil?
79
+ source_data
80
+ end
81
+
82
+ end
83
+
84
+ private
85
+
86
+
87
+ # Returns demodulized class name.
88
+ def class_name
89
+ self.class.name.demodulize
90
+ end
91
+
92
+
93
+ # set mime type if it hasn't been assigned already
94
+ def set_mime_type
95
+ assit(!self.location.blank?, "Location for #{self} should not be blank")
96
+ if(!self.location.blank? && self.mime.blank?)
97
+ # Set mime type for the record
98
+ self.mime = extract_mime_type(self.location)
99
+ assit_not_nil(self.mime, "Mime should not be nil (location was #{self.location})!")
100
+ end
101
+ end
102
+
103
+ end
104
+ end
105
+ end
@@ -0,0 +1,76 @@
1
+ require 'fileutils'
2
+
3
+ module TaliaCore
4
+ module DataTypes
5
+
6
+ # This is used for "delayed" copy operations. Basically this will created
7
+ # a file called "delayed_copy.sh" in the RAILS_ROOT, which can later be
8
+ # run as a bash script. This will allow the user to run the
9
+ # copy operation and be potentially faster than using the builtin copy
10
+ # operations (especially using JRuby)
11
+ class DelayedCopier
12
+
13
+ # Returns (and creates, if necessary) the file to write the delayed
14
+ # copy operations to
15
+ def self.delayed_copy_file
16
+ @delayed_copy_file ||= begin
17
+ backup_file if(File.exists?(delay_file_name))
18
+ file = File.open(delay_file_name, 'w')
19
+ file.puts('#!/bin/bash')
20
+ file
21
+ end
22
+ end
23
+
24
+ def self.cp(source, target)
25
+ unless(dir_seen?(File.expand_path(target)))
26
+ mkdir_string = 'mkdir -vp "'
27
+ mkdir_string << File.dirname(File.expand_path(target))
28
+ mkdir_string << '"'
29
+ delayed_copy_file.puts(mkdir_string)
30
+ end
31
+ cp_string = 'cp -v "'
32
+ cp_string << File.expand_path(source)
33
+ cp_string << '" "'
34
+ cp_string << File.expand_path(target)
35
+ cp_string << '"'
36
+ delayed_copy_file.puts(cp_string)
37
+ delayed_copy_file.flush
38
+ end
39
+
40
+ # Close the delayed copy file
41
+ def self.close
42
+ if(@delayed_copy_file)
43
+ @delayed_copy_file.close
44
+ @delayed_copy_file = nil
45
+ end
46
+ end
47
+
48
+ private
49
+
50
+
51
+ def self.dir_seen?(directory)
52
+ @seen_dirs = {}
53
+ return true if(@seen_dirs[directory])
54
+ @seen_dirs[directory] = true
55
+ false
56
+ end
57
+
58
+ # The file name for the delayed copy
59
+ def self.delay_file_name
60
+ File.join(RAILS_ROOT, 'delayed_copy.sh')
61
+ end
62
+
63
+ # Backs up an existing file if necessary
64
+ def self.backup_file
65
+ round = 1
66
+ file_name = 'nil'
67
+ while(File.exists?(file_name = File.join(RAILS_ROOT, "delayed_copy_old_#{round}.sh")))
68
+ round += 1
69
+ end
70
+ FileUtils.mv(delay_file_name, file_name)
71
+ end
72
+
73
+ end
74
+
75
+ end
76
+ end
@@ -0,0 +1,59 @@
1
+ module TaliaCore
2
+ module DataTypes
3
+
4
+ # Base class for all data records that use a plain file for data storage
5
+ class FileRecord < DataRecord
6
+ include FileStore
7
+ extend FileStore::ClassMethods
8
+
9
+ include PathHelpers
10
+ extend PathHelpers::ClassMethods
11
+
12
+ include TempFileHandling
13
+ extend TempFileHandling::ClassMethods
14
+
15
+ include DataLoader
16
+ extend DataLoader::ClassMethods
17
+ extend IipLoader
18
+ extend TaliaUtil::IoHelper # Data IO for class methods
19
+
20
+ after_save :save_attachment, :write_file_after_save
21
+
22
+ before_destroy :destroy_attachment
23
+
24
+ # Returns and, if necessary, creates the file for "delayed" copy operations
25
+
26
+ # returns all bytes in the object as an array
27
+ def all_bytes
28
+ read_all_bytes
29
+ end
30
+
31
+ # returns the next byte from the object, or nil at EOS
32
+ def get_byte(close_after_single_read=false)
33
+ next_byte(close_after_single_read)
34
+ end
35
+
36
+ # returns the current position of the read cursor (binary access)
37
+ def position
38
+ return (@position != nil) ? @position : 0
39
+ end
40
+
41
+ # reset the cursor to the initial state
42
+ def reset
43
+ set_position(0)
44
+ end
45
+
46
+ # set the new position of the reding cursors
47
+ def seek(new_position)
48
+ set_position(new_position)
49
+ end
50
+
51
+ # returns the size of the object in bytes
52
+ def size
53
+ data_size
54
+ end
55
+
56
+ end
57
+
58
+ end
59
+ end