spread2rdf 0.0.1pre.1 → 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. checksums.yaml +8 -8
  2. data/Gemfile.ocra +6 -0
  3. data/VERSION +1 -1
  4. data/bin/spread2rdf +1 -1
  5. data/lib/spread2rdf/attributes.rb +1 -0
  6. data/lib/spread2rdf/cli.rb +128 -34
  7. data/lib/spread2rdf/coord.rb +49 -0
  8. data/lib/spread2rdf/mapping/cell.rb +105 -0
  9. data/lib/spread2rdf/mapping/column.rb +63 -0
  10. data/lib/spread2rdf/mapping/column_block.rb +23 -0
  11. data/lib/spread2rdf/mapping/default_cell_mappings.rb +26 -0
  12. data/lib/spread2rdf/mapping/element.rb +64 -0
  13. data/lib/spread2rdf/mapping/resource.rb +95 -0
  14. data/lib/spread2rdf/mapping/sheet.rb +80 -0
  15. data/lib/spread2rdf/mapping/spreadsheet.rb +56 -0
  16. data/lib/spread2rdf/mapping/statement.rb +22 -0
  17. data/lib/spread2rdf/mapping/worksheet.rb +12 -0
  18. data/lib/spread2rdf/namespace.rb +6 -2
  19. data/lib/spread2rdf/roo_helper.rb +45 -0
  20. data/lib/spread2rdf/schema/column.rb +37 -0
  21. data/lib/spread2rdf/{spreadsheet/sub_sheet.rb → schema/column_block.rb} +3 -2
  22. data/lib/spread2rdf/{spreadsheet → schema}/element.rb +12 -16
  23. data/lib/spread2rdf/schema/schema.rb +19 -0
  24. data/lib/spread2rdf/schema/sheet.rb +87 -0
  25. data/lib/spread2rdf/schema/sheet_dsl.rb +54 -0
  26. data/lib/spread2rdf/schema/spreadsheet.rb +49 -0
  27. data/lib/spread2rdf/schema/spreadsheet_dsl.rb +42 -0
  28. data/lib/spread2rdf/schema/statement_mapping_schema.rb +26 -0
  29. data/lib/spread2rdf/schema/worksheet.rb +47 -0
  30. data/lib/spread2rdf.rb +34 -2
  31. data/ontologies/unit-v1.1.ttl +8330 -0
  32. data/spread2rdf.gemspec +4 -2
  33. metadata +50 -26
  34. data/lib/spread2rdf/helper.rb +0 -14
  35. data/lib/spread2rdf/spreadsheet/column.rb +0 -48
  36. data/lib/spread2rdf/spreadsheet/column_mapping_context.rb +0 -156
  37. data/lib/spread2rdf/spreadsheet/coord.rb +0 -51
  38. data/lib/spread2rdf/spreadsheet/mapping_context.rb +0 -67
  39. data/lib/spread2rdf/spreadsheet/mapping_dsl.rb +0 -23
  40. data/lib/spread2rdf/spreadsheet/sheet.rb +0 -128
  41. data/lib/spread2rdf/spreadsheet/sheet_dsl.rb +0 -34
  42. data/lib/spread2rdf/spreadsheet/sheet_mapping_context.rb +0 -90
  43. data/lib/spread2rdf/spreadsheet/sub_sheet_mapping_context.rb +0 -55
  44. data/lib/spread2rdf/spreadsheet/worksheet.rb +0 -49
  45. data/lib/spread2rdf/spreadsheet.rb +0 -92
@@ -0,0 +1,80 @@
1
+ module Spread2RDF
2
+ module Mapping
3
+ class Sheet < Element
4
+
5
+ def initialize(sheet, parent)
6
+ super
7
+ @resources = []
8
+ end
9
+
10
+ def map
11
+ #puts "processing #{self} in #{row_range}"
12
+ return [] if row_range.nil? or schema.columns.empty?
13
+ rows_per_resource.each do |resource_range|
14
+ @resources << Mapping::Resource.new(schema, self, resource_range)
15
+ end
16
+ self
17
+ end
18
+
19
+ def rows_per_resource
20
+ return [] if row_range.nil?
21
+ @rows_per_resource ||= begin
22
+ rows = if fix_row_count = schema.fix_row_count_per_resource
23
+ row_range.find_all do |row|
24
+ (row - row_range.begin) % fix_row_count == 0
25
+ end
26
+ else
27
+ subject_column_coord = schema.subject_column.try(:coord)
28
+ raise "no subject column for #{self}" if subject_column_coord.blank?
29
+ row_range.find_all do |row|
30
+ not cell(row: row, column: subject_column_coord).blank?
31
+ end
32
+ end
33
+ rows_per_resource = []
34
+ rows.each_with_index do |first_row, i|
35
+ last_row = (i+1 == rows.count ? row_range.end : rows[i+1]-1)
36
+ rows_per_resource << Range.new(first_row, last_row)
37
+ end
38
+ rows_per_resource
39
+ end
40
+ end
41
+
42
+ ##########################################################################
43
+ # Mapping::Element structure
44
+
45
+ def worksheet
46
+ return self if self.is_a? Worksheet
47
+ parent = self.parent
48
+ parent = parent.parent until parent.is_a? Worksheet or parent.nil?
49
+ parent
50
+ end
51
+
52
+ attr_reader :resources
53
+ alias _children_ resources
54
+
55
+ def resource_by_row(row)
56
+ index = rows_per_resource.find_index { |range| range.include? row }
57
+ resource_by_index(index)
58
+ end
59
+
60
+ def resource_by_index(index)
61
+ @resources[index]
62
+ end
63
+
64
+ ##########################################################################
65
+ # Roo helper
66
+
67
+ def cell_value(coord)
68
+ value = ROO.cell(coord, schema.worksheet.source_name)
69
+ value = value.strip if value.is_a? String
70
+ value
71
+ end
72
+ alias cell cell_value
73
+
74
+ def roo(&block)
75
+ ROO.roo(schema.worksheet.source_name, &block)
76
+ end
77
+
78
+ end
79
+ end
80
+ end
@@ -0,0 +1,56 @@
1
+ module Spread2RDF
2
+ module Mapping
3
+ class Spreadsheet < Element
4
+
5
+ attr_reader :input_file
6
+
7
+ def initialize(schema, filename)
8
+ super(schema, nil)
9
+ @worksheets = {}
10
+ @input_file = filename
11
+ ROO.load(filename)
12
+ end
13
+
14
+ def map
15
+ schema.sorted_worksheets.each do |worksheet_schema|
16
+ worksheet!(worksheet_schema)
17
+ end
18
+ self
19
+ end
20
+
21
+ def worksheet_schema(name)
22
+ case name
23
+ when Schema::Worksheet then name
24
+ when String, Symbol then schema.worksheet[name]
25
+ else raise ArgumentError
26
+ end
27
+ end
28
+
29
+ ##########################################################################
30
+ # Mapping::Element structure
31
+
32
+ def spreadsheet
33
+ self
34
+ end
35
+
36
+ def worksheets
37
+ @worksheets.values
38
+ end
39
+ alias _children_ worksheets
40
+
41
+ def worksheet(name)
42
+ @worksheets[worksheet_schema(name).name]
43
+ end
44
+
45
+ def worksheet!(name)
46
+ worksheet_schema = worksheet_schema(name)
47
+ @worksheets[worksheet_schema.name] || begin
48
+ @worksheets[worksheet_schema.name] = mapping =
49
+ Mapping::Worksheet.new(worksheet_schema, self)
50
+ mapping.map
51
+ end
52
+ end
53
+
54
+ end
55
+ end
56
+ end
@@ -0,0 +1,22 @@
1
+ module Spread2RDF
2
+ module Mapping
3
+ module Statement
4
+
5
+ def statements_to_object(object)
6
+ case schema.statement_mapping_mode
7
+ when :default
8
+ statement(subject, predicate, object)
9
+ when :restriction
10
+ restriction_class = RDF::Node.new
11
+ statements(
12
+ [ subject, RDF::RDFS.subClassOf, restriction_class ],
13
+ [ restriction_class, RDF.type, RDF::OWL.Restriction ],
14
+ [ restriction_class, RDF::OWL.onProperty, predicate ],
15
+ [ restriction_class, schema.restriction_mode, object ]
16
+ )
17
+ end
18
+ end
19
+ private :statements_to_object
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,12 @@
1
+ module Spread2RDF
2
+ module Mapping
3
+ class Worksheet < Sheet
4
+
5
+ def row_range
6
+ range = roo { |roo| (Coord[schema.start].row .. roo.last_row) }
7
+ range.begin <= range.end ? range : nil
8
+ end
9
+
10
+ end
11
+ end
12
+ end
@@ -1,5 +1,9 @@
1
1
  module Spread2RDF
2
2
  module Namespace
3
+ DEFAULTS = {
4
+ UNIT: ::RDF::Vocabulary.new('http://qudt.org/vocab/unit#')
5
+ }
6
+
3
7
  class << self
4
8
  def [](name)
5
9
  name = name.to_sym
@@ -18,7 +22,7 @@ module Spread2RDF
18
22
  end
19
23
 
20
24
  def namespace
21
- @namespace ||= {}
25
+ @namespace ||= DEFAULTS
22
26
  end
23
27
 
24
28
  def namespaces
@@ -32,7 +36,7 @@ module Spread2RDF
32
36
  when RDF::Vocabulary, RDF::URI, String
33
37
  namespace_descriptor.to_s
34
38
  else
35
- raise "invalid namespace: #{namespace_descriptor.inspect}"
39
+ raise "invalid namespace: #{namespace_descriptor.inspect}:#{namespace_descriptor.class}"
36
40
  end
37
41
  end
38
42
 
@@ -0,0 +1,45 @@
1
+ require 'singleton'
2
+ module Spread2RDF
3
+ class RooAdapter
4
+ include Singleton
5
+
6
+ def initialize
7
+
8
+ end
9
+
10
+ # TODO: make this work with other spreadsheets than Excel
11
+ def load(file)
12
+ options = {}
13
+ options[:packed], options[:file_warning] = :zip, :ignore if
14
+ File.extname(file).downcase == '.xlsm'
15
+ @roo = Roo::Excelx.new(file, options)
16
+ end
17
+
18
+ def select_worksheet(worksheet)
19
+ @roo.default_sheet = worksheet
20
+ end
21
+
22
+ def roo(worksheet = nil)
23
+ return @roo if worksheet.nil?
24
+ last_default_sheet = @roo.default_sheet
25
+ select_worksheet(worksheet)
26
+ return @roo unless block_given?
27
+ result = yield @roo
28
+ @roo.default_sheet = last_default_sheet
29
+ result
30
+ end
31
+
32
+ def cell(coord, worksheet = nil)
33
+ coord = Coord[coord] unless coord.is_a? Coord
34
+ #if worksheet
35
+ @roo.cell(coord.column, coord.row, worksheet)
36
+ #else
37
+ # @roo.cell(coord.column, coord.row)
38
+ #end
39
+ end
40
+
41
+ end
42
+
43
+ ROO = RooAdapter.instance
44
+
45
+ end
@@ -0,0 +1,37 @@
1
+ module Spread2RDF
2
+ module Schema
3
+ class Column < Element
4
+ include StatementMapping
5
+
6
+ self.attributes = {
7
+ predicate: nil,
8
+ object: nil,
9
+ statement: nil
10
+ }
11
+
12
+ attr_reader :coord # this is set by Worksheet#index_columns!
13
+
14
+ alias sheet parent
15
+
16
+ def to_s
17
+ "#{super} of #{sheet}"
18
+ end
19
+
20
+ def object_mapping_mode
21
+ case
22
+ when object.nil? then :to_string
23
+ when object.is_a?(Proc) then :custom
24
+ when !object[:uri].nil? then :new_resource
25
+ when !object[:from].nil? then :resource_ref
26
+ else
27
+ raise "mapping specification error: don't know how to map #{self}"
28
+ end
29
+ end
30
+
31
+ def cell_mapping
32
+ object if object.is_a?(Proc)
33
+ end
34
+
35
+ end
36
+ end
37
+ end
@@ -1,6 +1,7 @@
1
1
  module Spread2RDF
2
- class Spreadsheet
3
- class SubSheet < Sheet
2
+ module Schema
3
+ class ColumnBlock < Sheet
4
+ include StatementMapping
4
5
 
5
6
  self.attributes = {
6
7
  predicate: nil,
@@ -1,7 +1,10 @@
1
+ require 'forwardable'
2
+
1
3
  module Spread2RDF
2
- class Spreadsheet
4
+ module Schema
3
5
  class Element
4
6
  include Attributes
7
+ extend Forwardable
5
8
 
6
9
  self.attributes = {
7
10
  name: nil,
@@ -11,17 +14,14 @@ module Spread2RDF
11
14
  attr_reader :parent
12
15
  attr_reader :block
13
16
 
17
+ def_delegators :parent, :spreadsheet
14
18
 
15
- def initialize(parent, attr={}, &block)
19
+ def initialize(parent, attr = {}, &block)
16
20
  @parent = parent
17
21
  @block = block
18
22
  init_attributes(attr)
19
23
  end
20
24
 
21
- def init
22
-
23
- end
24
-
25
25
  def name
26
26
  (@name or @source_name).try(:to_sym)
27
27
  end
@@ -30,21 +30,17 @@ module Spread2RDF
30
30
  (@source_name or @name).try(:to_s)
31
31
  end
32
32
 
33
- def spreadsheet
34
- parent.spreadsheet
33
+ def worksheet
34
+ return self if self.is_a? Worksheet
35
+ parent = self.parent
36
+ parent = parent.parent until parent.is_a? Worksheet or parent.nil?
37
+ parent
35
38
  end
36
39
 
37
40
  def to_s
38
41
  name = (self.name.to_s == self.source_name.to_s ?
39
42
  self.name : "#{self.name} (#{self.source_name})" )
40
- "#{self.class.name.split('::').last} #{name}"
41
- end
42
-
43
- private
44
-
45
- def create_context(parent_context, attr)
46
- context_class = self.class.const_get(:MappingContext)
47
- context_class.new(self, parent_context, attr)
43
+ "#{self.class.name.split('::').last}-schema #{name}"
48
44
  end
49
45
 
50
46
  end
@@ -0,0 +1,19 @@
1
+ module Spread2RDF
2
+ module Schema
3
+ class << self
4
+ def definition(*args, &block)
5
+ definitions << Spreadsheet.new(*args, &block)
6
+ end
7
+
8
+ def definitions
9
+ @@definitions ||= []
10
+ end
11
+
12
+ def execute(options = {})
13
+ CLI.run options.merge(schema: definitions.first) unless CLI.running?
14
+ end
15
+
16
+ end
17
+
18
+ end
19
+ end
@@ -0,0 +1,87 @@
1
+ module Spread2RDF
2
+ module Schema
3
+ class Sheet < Element
4
+
5
+ self.attributes = {
6
+ start: :A2,
7
+ subject: nil,
8
+ row_count_per_resource: nil
9
+ }
10
+
11
+ def initialize(parent, attr = {}, &block)
12
+ super
13
+ @column = {}
14
+ @column_index = {}
15
+ end
16
+
17
+ def start_coord
18
+ Coord[start]
19
+ end
20
+
21
+ def column_by_coord(coord)
22
+ coord = Roo::Base.number_to_letter(coord) if coord.is_a? Integer
23
+ @column_index[coord]
24
+ end
25
+
26
+ def column_by_name(name = nil)
27
+ return @column if name.nil?
28
+ name = name.to_sym
29
+ @column[name] or ( parent.is_a?(Sheet) and parent.column(name) ) or nil
30
+ end
31
+ alias column column_by_name
32
+
33
+ def columns
34
+ @column.values
35
+ end
36
+
37
+ def column_range
38
+ first = columns.first.coord
39
+ first = first.begin if first.is_a? Range
40
+ last = columns.last.coord
41
+ last = last.end if last.is_a? Range
42
+ first .. last
43
+ end
44
+
45
+ def each_column(&block)
46
+ columns.each do |column|
47
+ if column.is_a? ColumnBlock
48
+ column.each_column(&block)
49
+ else
50
+ yield column
51
+ end
52
+ end
53
+ end
54
+
55
+ def subject_column
56
+ column_name = self.subject.try(:fetch, :column, nil) || :uri
57
+ @column[column_name]
58
+ end
59
+
60
+ def fix_row_count_per_resource
61
+ row_count_per_resource or ( !subject_column && 1 ) or nil
62
+ end
63
+
64
+ def subject_mapping_mode
65
+ case
66
+ when ( subject.try(:fetch, :uri, nil) || subject ) == :bnode
67
+ :bnode
68
+ else
69
+ :from_column
70
+ end
71
+ end
72
+
73
+ def subject_namespace
74
+ subject_namespace_name =
75
+ subject.try(:fetch, :uri, nil).try(:fetch, :namespace, nil)
76
+ Namespace.resolve_to_namespace(subject_namespace_name)
77
+ end
78
+
79
+ def subject_resource_type
80
+ subject.try(:fetch, :type, nil) or
81
+ (subject.try(:fetch, :sub_class_of, nil) && RDF::RDFS.Class) or
82
+ nil
83
+ end
84
+
85
+ end
86
+ end
87
+ end
@@ -0,0 +1,54 @@
1
+ module Spread2RDF
2
+ module Schema
3
+ class Sheet
4
+ class DSL
5
+ def initialize(spreadsheet_dsl, worksheet, filename, &block)
6
+ @spreadsheet_dsl = spreadsheet_dsl
7
+ @worksheet = worksheet
8
+ @filename = filename
9
+ instance_exec(&block) if block_given?
10
+ end
11
+
12
+ def column(name, options={}, &block)
13
+ name = name.to_sym
14
+ column = @worksheet.column[name] ||= Column.new(@worksheet, &block)
15
+ column.update_attributes options.merge(name: name)
16
+ column # TODO: chaining logic ...?
17
+ end
18
+
19
+ def column_block(name, options={}, &block)
20
+ name = name.to_sym
21
+ sub_sheet = @worksheet.column[name] ||= ColumnBlock.new(@worksheet)
22
+ sub_sheet.update_attributes options.merge(name: name)
23
+ DSL.new(@spreadsheet_dsl, sub_sheet, @filename, &block)
24
+ end
25
+
26
+ def cell(coord, options = {}, &block)
27
+ content = ROO.cell(coord, @worksheet.source_name)
28
+ content = block.call(content) if block_given?
29
+ content
30
+ end
31
+
32
+ def include(template, *args)
33
+ instance_exec(*args, &__template__(template))
34
+ end
35
+
36
+ def __template__(name)
37
+ @spreadsheet_dsl.instance_variable_get(:@templates)[name]
38
+ end
39
+ private :__template__
40
+
41
+ def __cell_mapping__(name)
42
+ @spreadsheet_dsl.instance_variable_get(:@cell_mappings)[name] or
43
+ Mapping::Cell::Default.method(name).to_proc
44
+ end
45
+ private :__cell_mapping__
46
+
47
+ def method_missing(name, *args)
48
+ __template__(name) or __cell_mapping__(name) or super
49
+ end
50
+
51
+ end
52
+ end
53
+ end
54
+ end
@@ -0,0 +1,49 @@
1
+ module Spread2RDF
2
+ module Schema
3
+ class Spreadsheet
4
+
5
+ attr_reader :name
6
+ attr_reader :worksheet
7
+
8
+ def initialize(name, &block)
9
+ @name = name
10
+ @worksheet = {}
11
+ @schema_spec = block
12
+ end
13
+
14
+ def spreadsheet
15
+ self
16
+ end
17
+
18
+ def worksheets
19
+ @worksheet.values
20
+ end
21
+
22
+ def sorted_worksheets
23
+ unsorted_worksheets, sorted_worksheets = worksheets, []
24
+ unsorted_worksheets.reject! do |worksheet|
25
+ worksheet.columns.empty? and sorted_worksheets << worksheet
26
+ end
27
+ while not unsorted_worksheets.empty?
28
+ independent = unsorted_worksheets.find_index { |worksheet|
29
+ unsorted_worksheets.none? do |other_worksheet|
30
+ worksheet.depends_on? other_worksheet
31
+ end
32
+ }
33
+ raise "schema contains cyclic dependencies" if independent.nil?
34
+ sorted_worksheets << unsorted_worksheets.delete_at(independent)
35
+ end
36
+ sorted_worksheets
37
+ end
38
+
39
+ def map(input_file)
40
+ mapping = Mapping::Spreadsheet.new(self, input_file)
41
+ DSL.new(self, input_file).instance_exec(&@schema_spec)
42
+ worksheets.each { |worksheet| worksheet.init }
43
+ mapping.map
44
+ mapping
45
+ end
46
+
47
+ end
48
+ end
49
+ end
@@ -0,0 +1,42 @@
1
+ module Spread2RDF
2
+ module Schema
3
+ class Spreadsheet
4
+ class DSL
5
+
6
+ def initialize(schema, filename)
7
+ @schema = schema
8
+ @filename = filename
9
+ @templates = {}
10
+ @cell_mappings = {}
11
+ end
12
+
13
+ def namespaces(namespaces)
14
+ namespaces.each { |name, namespace| Namespace[name] = namespace }
15
+ end
16
+
17
+ def worksheet(name, options={}, &block)
18
+ source_name = options[:source_name] = name
19
+ name = ( options.delete(:name) || source_name ).to_sym
20
+ worksheet = @schema.worksheet[name] ||= Worksheet.new(@schema)
21
+ worksheet.update_attributes options.merge(name: name, source_name: source_name)
22
+ Sheet::DSL.new(self, worksheet, @filename, &block)
23
+ end
24
+
25
+ def template(name, &block)
26
+ raise "required block for template #{name} missing" unless block_given?
27
+ @templates[name.to_sym] = block
28
+ end
29
+
30
+ def cell_mapping(name, &block)
31
+ raise "required block for cell_mapping #{name} missing" unless block_given?
32
+ @cell_mappings[name.to_sym] = block
33
+ end
34
+
35
+ def method_missing(name, *args)
36
+ @templates[name] or super
37
+ end
38
+
39
+ end
40
+ end
41
+ end
42
+ end
@@ -0,0 +1,26 @@
1
+ module Spread2RDF
2
+ module Schema
3
+ module StatementMapping
4
+
5
+ def statement_mapping_mode
6
+ case
7
+ when statement == :none then :ignore
8
+ when statement == :none then :ignore
9
+ when predicate.nil? then :ignore
10
+ when restriction_mode then :restriction
11
+ else :default
12
+ end
13
+ end
14
+
15
+ def restriction_mode
16
+ case statement
17
+ when :restriction then RDF::OWL.hasValue
18
+ when Hash then restriction_mode[:restriction]
19
+ else false
20
+ end
21
+ end
22
+
23
+
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,47 @@
1
+ module Spread2RDF
2
+ module Schema
3
+ class Worksheet < Sheet
4
+
5
+ def init
6
+ index_columns!
7
+ end
8
+
9
+ def index_columns!
10
+ index = start_coord.column_as_number
11
+ each_column do |column|
12
+ index_letter = Roo::Base.number_to_letter(index)
13
+ column.instance_variable_set :@coord, index_letter
14
+ parent = column
15
+ until parent.is_a? Worksheet
16
+ parent = parent.parent
17
+ column_index = parent.instance_variable_get :@column_index
18
+ column_index[index_letter] = column
19
+ end
20
+ index += 1
21
+ end
22
+ end
23
+
24
+ def dependent_sheets
25
+ references = []
26
+ each_column do |column|
27
+ if column.object_mapping_mode == :resource_ref
28
+ references << spreadsheet.worksheet[column.object[:from]]
29
+ end
30
+ end
31
+ references
32
+ end
33
+
34
+ def depends_on?(worksheet)
35
+ return false unless worksheet.is_a? Worksheet
36
+ return false if worksheet == self
37
+ dependent_sheets = self.dependent_sheets
38
+ return false if dependent_sheets.empty?
39
+ return true if dependent_sheets.include? worksheet
40
+ dependent_sheets.any? do |dependent_sheet|
41
+ dependent_sheet != self and dependent_sheet.depends_on? worksheet
42
+ end
43
+ end
44
+
45
+ end
46
+ end
47
+ end