spread2rdf 0.0.1pre.1 → 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (45) hide show
  1. checksums.yaml +8 -8
  2. data/Gemfile.ocra +6 -0
  3. data/VERSION +1 -1
  4. data/bin/spread2rdf +1 -1
  5. data/lib/spread2rdf/attributes.rb +1 -0
  6. data/lib/spread2rdf/cli.rb +128 -34
  7. data/lib/spread2rdf/coord.rb +49 -0
  8. data/lib/spread2rdf/mapping/cell.rb +105 -0
  9. data/lib/spread2rdf/mapping/column.rb +63 -0
  10. data/lib/spread2rdf/mapping/column_block.rb +23 -0
  11. data/lib/spread2rdf/mapping/default_cell_mappings.rb +26 -0
  12. data/lib/spread2rdf/mapping/element.rb +64 -0
  13. data/lib/spread2rdf/mapping/resource.rb +95 -0
  14. data/lib/spread2rdf/mapping/sheet.rb +80 -0
  15. data/lib/spread2rdf/mapping/spreadsheet.rb +56 -0
  16. data/lib/spread2rdf/mapping/statement.rb +22 -0
  17. data/lib/spread2rdf/mapping/worksheet.rb +12 -0
  18. data/lib/spread2rdf/namespace.rb +6 -2
  19. data/lib/spread2rdf/roo_helper.rb +45 -0
  20. data/lib/spread2rdf/schema/column.rb +37 -0
  21. data/lib/spread2rdf/{spreadsheet/sub_sheet.rb → schema/column_block.rb} +3 -2
  22. data/lib/spread2rdf/{spreadsheet → schema}/element.rb +12 -16
  23. data/lib/spread2rdf/schema/schema.rb +19 -0
  24. data/lib/spread2rdf/schema/sheet.rb +87 -0
  25. data/lib/spread2rdf/schema/sheet_dsl.rb +54 -0
  26. data/lib/spread2rdf/schema/spreadsheet.rb +49 -0
  27. data/lib/spread2rdf/schema/spreadsheet_dsl.rb +42 -0
  28. data/lib/spread2rdf/schema/statement_mapping_schema.rb +26 -0
  29. data/lib/spread2rdf/schema/worksheet.rb +47 -0
  30. data/lib/spread2rdf.rb +34 -2
  31. data/ontologies/unit-v1.1.ttl +8330 -0
  32. data/spread2rdf.gemspec +4 -2
  33. metadata +50 -26
  34. data/lib/spread2rdf/helper.rb +0 -14
  35. data/lib/spread2rdf/spreadsheet/column.rb +0 -48
  36. data/lib/spread2rdf/spreadsheet/column_mapping_context.rb +0 -156
  37. data/lib/spread2rdf/spreadsheet/coord.rb +0 -51
  38. data/lib/spread2rdf/spreadsheet/mapping_context.rb +0 -67
  39. data/lib/spread2rdf/spreadsheet/mapping_dsl.rb +0 -23
  40. data/lib/spread2rdf/spreadsheet/sheet.rb +0 -128
  41. data/lib/spread2rdf/spreadsheet/sheet_dsl.rb +0 -34
  42. data/lib/spread2rdf/spreadsheet/sheet_mapping_context.rb +0 -90
  43. data/lib/spread2rdf/spreadsheet/sub_sheet_mapping_context.rb +0 -55
  44. data/lib/spread2rdf/spreadsheet/worksheet.rb +0 -49
  45. data/lib/spread2rdf/spreadsheet.rb +0 -92
@@ -0,0 +1,80 @@
1
+ module Spread2RDF
2
+ module Mapping
3
+ class Sheet < Element
4
+
5
+ def initialize(sheet, parent)
6
+ super
7
+ @resources = []
8
+ end
9
+
10
+ def map
11
+ #puts "processing #{self} in #{row_range}"
12
+ return [] if row_range.nil? or schema.columns.empty?
13
+ rows_per_resource.each do |resource_range|
14
+ @resources << Mapping::Resource.new(schema, self, resource_range)
15
+ end
16
+ self
17
+ end
18
+
19
+ def rows_per_resource
20
+ return [] if row_range.nil?
21
+ @rows_per_resource ||= begin
22
+ rows = if fix_row_count = schema.fix_row_count_per_resource
23
+ row_range.find_all do |row|
24
+ (row - row_range.begin) % fix_row_count == 0
25
+ end
26
+ else
27
+ subject_column_coord = schema.subject_column.try(:coord)
28
+ raise "no subject column for #{self}" if subject_column_coord.blank?
29
+ row_range.find_all do |row|
30
+ not cell(row: row, column: subject_column_coord).blank?
31
+ end
32
+ end
33
+ rows_per_resource = []
34
+ rows.each_with_index do |first_row, i|
35
+ last_row = (i+1 == rows.count ? row_range.end : rows[i+1]-1)
36
+ rows_per_resource << Range.new(first_row, last_row)
37
+ end
38
+ rows_per_resource
39
+ end
40
+ end
41
+
42
+ ##########################################################################
43
+ # Mapping::Element structure
44
+
45
+ def worksheet
46
+ return self if self.is_a? Worksheet
47
+ parent = self.parent
48
+ parent = parent.parent until parent.is_a? Worksheet or parent.nil?
49
+ parent
50
+ end
51
+
52
+ attr_reader :resources
53
+ alias _children_ resources
54
+
55
+ def resource_by_row(row)
56
+ index = rows_per_resource.find_index { |range| range.include? row }
57
+ resource_by_index(index)
58
+ end
59
+
60
+ def resource_by_index(index)
61
+ @resources[index]
62
+ end
63
+
64
+ ##########################################################################
65
+ # Roo helper
66
+
67
+ def cell_value(coord)
68
+ value = ROO.cell(coord, schema.worksheet.source_name)
69
+ value = value.strip if value.is_a? String
70
+ value
71
+ end
72
+ alias cell cell_value
73
+
74
+ def roo(&block)
75
+ ROO.roo(schema.worksheet.source_name, &block)
76
+ end
77
+
78
+ end
79
+ end
80
+ end
@@ -0,0 +1,56 @@
1
+ module Spread2RDF
2
+ module Mapping
3
+ class Spreadsheet < Element
4
+
5
+ attr_reader :input_file
6
+
7
+ def initialize(schema, filename)
8
+ super(schema, nil)
9
+ @worksheets = {}
10
+ @input_file = filename
11
+ ROO.load(filename)
12
+ end
13
+
14
+ def map
15
+ schema.sorted_worksheets.each do |worksheet_schema|
16
+ worksheet!(worksheet_schema)
17
+ end
18
+ self
19
+ end
20
+
21
+ def worksheet_schema(name)
22
+ case name
23
+ when Schema::Worksheet then name
24
+ when String, Symbol then schema.worksheet[name]
25
+ else raise ArgumentError
26
+ end
27
+ end
28
+
29
+ ##########################################################################
30
+ # Mapping::Element structure
31
+
32
+ def spreadsheet
33
+ self
34
+ end
35
+
36
+ def worksheets
37
+ @worksheets.values
38
+ end
39
+ alias _children_ worksheets
40
+
41
+ def worksheet(name)
42
+ @worksheets[worksheet_schema(name).name]
43
+ end
44
+
45
+ def worksheet!(name)
46
+ worksheet_schema = worksheet_schema(name)
47
+ @worksheets[worksheet_schema.name] || begin
48
+ @worksheets[worksheet_schema.name] = mapping =
49
+ Mapping::Worksheet.new(worksheet_schema, self)
50
+ mapping.map
51
+ end
52
+ end
53
+
54
+ end
55
+ end
56
+ end
@@ -0,0 +1,22 @@
1
+ module Spread2RDF
2
+ module Mapping
3
+ module Statement
4
+
5
+ def statements_to_object(object)
6
+ case schema.statement_mapping_mode
7
+ when :default
8
+ statement(subject, predicate, object)
9
+ when :restriction
10
+ restriction_class = RDF::Node.new
11
+ statements(
12
+ [ subject, RDF::RDFS.subClassOf, restriction_class ],
13
+ [ restriction_class, RDF.type, RDF::OWL.Restriction ],
14
+ [ restriction_class, RDF::OWL.onProperty, predicate ],
15
+ [ restriction_class, schema.restriction_mode, object ]
16
+ )
17
+ end
18
+ end
19
+ private :statements_to_object
20
+ end
21
+ end
22
+ end
@@ -0,0 +1,12 @@
1
+ module Spread2RDF
2
+ module Mapping
3
+ class Worksheet < Sheet
4
+
5
+ def row_range
6
+ range = roo { |roo| (Coord[schema.start].row .. roo.last_row) }
7
+ range.begin <= range.end ? range : nil
8
+ end
9
+
10
+ end
11
+ end
12
+ end
@@ -1,5 +1,9 @@
1
1
  module Spread2RDF
2
2
  module Namespace
3
+ DEFAULTS = {
4
+ UNIT: ::RDF::Vocabulary.new('http://qudt.org/vocab/unit#')
5
+ }
6
+
3
7
  class << self
4
8
  def [](name)
5
9
  name = name.to_sym
@@ -18,7 +22,7 @@ module Spread2RDF
18
22
  end
19
23
 
20
24
  def namespace
21
- @namespace ||= {}
25
+ @namespace ||= DEFAULTS
22
26
  end
23
27
 
24
28
  def namespaces
@@ -32,7 +36,7 @@ module Spread2RDF
32
36
  when RDF::Vocabulary, RDF::URI, String
33
37
  namespace_descriptor.to_s
34
38
  else
35
- raise "invalid namespace: #{namespace_descriptor.inspect}"
39
+ raise "invalid namespace: #{namespace_descriptor.inspect}:#{namespace_descriptor.class}"
36
40
  end
37
41
  end
38
42
 
@@ -0,0 +1,45 @@
1
+ require 'singleton'
2
+ module Spread2RDF
3
+ class RooAdapter
4
+ include Singleton
5
+
6
+ def initialize
7
+
8
+ end
9
+
10
+ # TODO: make this work with other spreadsheets than Excel
11
+ def load(file)
12
+ options = {}
13
+ options[:packed], options[:file_warning] = :zip, :ignore if
14
+ File.extname(file).downcase == '.xlsm'
15
+ @roo = Roo::Excelx.new(file, options)
16
+ end
17
+
18
+ def select_worksheet(worksheet)
19
+ @roo.default_sheet = worksheet
20
+ end
21
+
22
+ def roo(worksheet = nil)
23
+ return @roo if worksheet.nil?
24
+ last_default_sheet = @roo.default_sheet
25
+ select_worksheet(worksheet)
26
+ return @roo unless block_given?
27
+ result = yield @roo
28
+ @roo.default_sheet = last_default_sheet
29
+ result
30
+ end
31
+
32
+ def cell(coord, worksheet = nil)
33
+ coord = Coord[coord] unless coord.is_a? Coord
34
+ #if worksheet
35
+ @roo.cell(coord.column, coord.row, worksheet)
36
+ #else
37
+ # @roo.cell(coord.column, coord.row)
38
+ #end
39
+ end
40
+
41
+ end
42
+
43
+ ROO = RooAdapter.instance
44
+
45
+ end
@@ -0,0 +1,37 @@
1
+ module Spread2RDF
2
+ module Schema
3
+ class Column < Element
4
+ include StatementMapping
5
+
6
+ self.attributes = {
7
+ predicate: nil,
8
+ object: nil,
9
+ statement: nil
10
+ }
11
+
12
+ attr_reader :coord # this is set by Worksheet#index_columns!
13
+
14
+ alias sheet parent
15
+
16
+ def to_s
17
+ "#{super} of #{sheet}"
18
+ end
19
+
20
+ def object_mapping_mode
21
+ case
22
+ when object.nil? then :to_string
23
+ when object.is_a?(Proc) then :custom
24
+ when !object[:uri].nil? then :new_resource
25
+ when !object[:from].nil? then :resource_ref
26
+ else
27
+ raise "mapping specification error: don't know how to map #{self}"
28
+ end
29
+ end
30
+
31
+ def cell_mapping
32
+ object if object.is_a?(Proc)
33
+ end
34
+
35
+ end
36
+ end
37
+ end
@@ -1,6 +1,7 @@
1
1
  module Spread2RDF
2
- class Spreadsheet
3
- class SubSheet < Sheet
2
+ module Schema
3
+ class ColumnBlock < Sheet
4
+ include StatementMapping
4
5
 
5
6
  self.attributes = {
6
7
  predicate: nil,
@@ -1,7 +1,10 @@
1
+ require 'forwardable'
2
+
1
3
  module Spread2RDF
2
- class Spreadsheet
4
+ module Schema
3
5
  class Element
4
6
  include Attributes
7
+ extend Forwardable
5
8
 
6
9
  self.attributes = {
7
10
  name: nil,
@@ -11,17 +14,14 @@ module Spread2RDF
11
14
  attr_reader :parent
12
15
  attr_reader :block
13
16
 
17
+ def_delegators :parent, :spreadsheet
14
18
 
15
- def initialize(parent, attr={}, &block)
19
+ def initialize(parent, attr = {}, &block)
16
20
  @parent = parent
17
21
  @block = block
18
22
  init_attributes(attr)
19
23
  end
20
24
 
21
- def init
22
-
23
- end
24
-
25
25
  def name
26
26
  (@name or @source_name).try(:to_sym)
27
27
  end
@@ -30,21 +30,17 @@ module Spread2RDF
30
30
  (@source_name or @name).try(:to_s)
31
31
  end
32
32
 
33
- def spreadsheet
34
- parent.spreadsheet
33
+ def worksheet
34
+ return self if self.is_a? Worksheet
35
+ parent = self.parent
36
+ parent = parent.parent until parent.is_a? Worksheet or parent.nil?
37
+ parent
35
38
  end
36
39
 
37
40
  def to_s
38
41
  name = (self.name.to_s == self.source_name.to_s ?
39
42
  self.name : "#{self.name} (#{self.source_name})" )
40
- "#{self.class.name.split('::').last} #{name}"
41
- end
42
-
43
- private
44
-
45
- def create_context(parent_context, attr)
46
- context_class = self.class.const_get(:MappingContext)
47
- context_class.new(self, parent_context, attr)
43
+ "#{self.class.name.split('::').last}-schema #{name}"
48
44
  end
49
45
 
50
46
  end
@@ -0,0 +1,19 @@
1
+ module Spread2RDF
2
+ module Schema
3
+ class << self
4
+ def definition(*args, &block)
5
+ definitions << Spreadsheet.new(*args, &block)
6
+ end
7
+
8
+ def definitions
9
+ @@definitions ||= []
10
+ end
11
+
12
+ def execute(options = {})
13
+ CLI.run options.merge(schema: definitions.first) unless CLI.running?
14
+ end
15
+
16
+ end
17
+
18
+ end
19
+ end
@@ -0,0 +1,87 @@
1
+ module Spread2RDF
2
+ module Schema
3
+ class Sheet < Element
4
+
5
+ self.attributes = {
6
+ start: :A2,
7
+ subject: nil,
8
+ row_count_per_resource: nil
9
+ }
10
+
11
+ def initialize(parent, attr = {}, &block)
12
+ super
13
+ @column = {}
14
+ @column_index = {}
15
+ end
16
+
17
+ def start_coord
18
+ Coord[start]
19
+ end
20
+
21
+ def column_by_coord(coord)
22
+ coord = Roo::Base.number_to_letter(coord) if coord.is_a? Integer
23
+ @column_index[coord]
24
+ end
25
+
26
+ def column_by_name(name = nil)
27
+ return @column if name.nil?
28
+ name = name.to_sym
29
+ @column[name] or ( parent.is_a?(Sheet) and parent.column(name) ) or nil
30
+ end
31
+ alias column column_by_name
32
+
33
+ def columns
34
+ @column.values
35
+ end
36
+
37
+ def column_range
38
+ first = columns.first.coord
39
+ first = first.begin if first.is_a? Range
40
+ last = columns.last.coord
41
+ last = last.end if last.is_a? Range
42
+ first .. last
43
+ end
44
+
45
+ def each_column(&block)
46
+ columns.each do |column|
47
+ if column.is_a? ColumnBlock
48
+ column.each_column(&block)
49
+ else
50
+ yield column
51
+ end
52
+ end
53
+ end
54
+
55
+ def subject_column
56
+ column_name = self.subject.try(:fetch, :column, nil) || :uri
57
+ @column[column_name]
58
+ end
59
+
60
+ def fix_row_count_per_resource
61
+ row_count_per_resource or ( !subject_column && 1 ) or nil
62
+ end
63
+
64
+ def subject_mapping_mode
65
+ case
66
+ when ( subject.try(:fetch, :uri, nil) || subject ) == :bnode
67
+ :bnode
68
+ else
69
+ :from_column
70
+ end
71
+ end
72
+
73
+ def subject_namespace
74
+ subject_namespace_name =
75
+ subject.try(:fetch, :uri, nil).try(:fetch, :namespace, nil)
76
+ Namespace.resolve_to_namespace(subject_namespace_name)
77
+ end
78
+
79
+ def subject_resource_type
80
+ subject.try(:fetch, :type, nil) or
81
+ (subject.try(:fetch, :sub_class_of, nil) && RDF::RDFS.Class) or
82
+ nil
83
+ end
84
+
85
+ end
86
+ end
87
+ end
@@ -0,0 +1,54 @@
1
+ module Spread2RDF
2
+ module Schema
3
+ class Sheet
4
+ class DSL
5
+ def initialize(spreadsheet_dsl, worksheet, filename, &block)
6
+ @spreadsheet_dsl = spreadsheet_dsl
7
+ @worksheet = worksheet
8
+ @filename = filename
9
+ instance_exec(&block) if block_given?
10
+ end
11
+
12
+ def column(name, options={}, &block)
13
+ name = name.to_sym
14
+ column = @worksheet.column[name] ||= Column.new(@worksheet, &block)
15
+ column.update_attributes options.merge(name: name)
16
+ column # TODO: chaining logic ...?
17
+ end
18
+
19
+ def column_block(name, options={}, &block)
20
+ name = name.to_sym
21
+ sub_sheet = @worksheet.column[name] ||= ColumnBlock.new(@worksheet)
22
+ sub_sheet.update_attributes options.merge(name: name)
23
+ DSL.new(@spreadsheet_dsl, sub_sheet, @filename, &block)
24
+ end
25
+
26
+ def cell(coord, options = {}, &block)
27
+ content = ROO.cell(coord, @worksheet.source_name)
28
+ content = block.call(content) if block_given?
29
+ content
30
+ end
31
+
32
+ def include(template, *args)
33
+ instance_exec(*args, &__template__(template))
34
+ end
35
+
36
+ def __template__(name)
37
+ @spreadsheet_dsl.instance_variable_get(:@templates)[name]
38
+ end
39
+ private :__template__
40
+
41
+ def __cell_mapping__(name)
42
+ @spreadsheet_dsl.instance_variable_get(:@cell_mappings)[name] or
43
+ Mapping::Cell::Default.method(name).to_proc
44
+ end
45
+ private :__cell_mapping__
46
+
47
+ def method_missing(name, *args)
48
+ __template__(name) or __cell_mapping__(name) or super
49
+ end
50
+
51
+ end
52
+ end
53
+ end
54
+ end
@@ -0,0 +1,49 @@
1
+ module Spread2RDF
2
+ module Schema
3
+ class Spreadsheet
4
+
5
+ attr_reader :name
6
+ attr_reader :worksheet
7
+
8
+ def initialize(name, &block)
9
+ @name = name
10
+ @worksheet = {}
11
+ @schema_spec = block
12
+ end
13
+
14
+ def spreadsheet
15
+ self
16
+ end
17
+
18
+ def worksheets
19
+ @worksheet.values
20
+ end
21
+
22
+ def sorted_worksheets
23
+ unsorted_worksheets, sorted_worksheets = worksheets, []
24
+ unsorted_worksheets.reject! do |worksheet|
25
+ worksheet.columns.empty? and sorted_worksheets << worksheet
26
+ end
27
+ while not unsorted_worksheets.empty?
28
+ independent = unsorted_worksheets.find_index { |worksheet|
29
+ unsorted_worksheets.none? do |other_worksheet|
30
+ worksheet.depends_on? other_worksheet
31
+ end
32
+ }
33
+ raise "schema contains cyclic dependencies" if independent.nil?
34
+ sorted_worksheets << unsorted_worksheets.delete_at(independent)
35
+ end
36
+ sorted_worksheets
37
+ end
38
+
39
+ def map(input_file)
40
+ mapping = Mapping::Spreadsheet.new(self, input_file)
41
+ DSL.new(self, input_file).instance_exec(&@schema_spec)
42
+ worksheets.each { |worksheet| worksheet.init }
43
+ mapping.map
44
+ mapping
45
+ end
46
+
47
+ end
48
+ end
49
+ end
@@ -0,0 +1,42 @@
1
+ module Spread2RDF
2
+ module Schema
3
+ class Spreadsheet
4
+ class DSL
5
+
6
+ def initialize(schema, filename)
7
+ @schema = schema
8
+ @filename = filename
9
+ @templates = {}
10
+ @cell_mappings = {}
11
+ end
12
+
13
+ def namespaces(namespaces)
14
+ namespaces.each { |name, namespace| Namespace[name] = namespace }
15
+ end
16
+
17
+ def worksheet(name, options={}, &block)
18
+ source_name = options[:source_name] = name
19
+ name = ( options.delete(:name) || source_name ).to_sym
20
+ worksheet = @schema.worksheet[name] ||= Worksheet.new(@schema)
21
+ worksheet.update_attributes options.merge(name: name, source_name: source_name)
22
+ Sheet::DSL.new(self, worksheet, @filename, &block)
23
+ end
24
+
25
+ def template(name, &block)
26
+ raise "required block for template #{name} missing" unless block_given?
27
+ @templates[name.to_sym] = block
28
+ end
29
+
30
+ def cell_mapping(name, &block)
31
+ raise "required block for cell_mapping #{name} missing" unless block_given?
32
+ @cell_mappings[name.to_sym] = block
33
+ end
34
+
35
+ def method_missing(name, *args)
36
+ @templates[name] or super
37
+ end
38
+
39
+ end
40
+ end
41
+ end
42
+ end
@@ -0,0 +1,26 @@
1
+ module Spread2RDF
2
+ module Schema
3
+ module StatementMapping
4
+
5
+ def statement_mapping_mode
6
+ case
7
+ when statement == :none then :ignore
8
+ when statement == :none then :ignore
9
+ when predicate.nil? then :ignore
10
+ when restriction_mode then :restriction
11
+ else :default
12
+ end
13
+ end
14
+
15
+ def restriction_mode
16
+ case statement
17
+ when :restriction then RDF::OWL.hasValue
18
+ when Hash then restriction_mode[:restriction]
19
+ else false
20
+ end
21
+ end
22
+
23
+
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,47 @@
1
+ module Spread2RDF
2
+ module Schema
3
+ class Worksheet < Sheet
4
+
5
+ def init
6
+ index_columns!
7
+ end
8
+
9
+ def index_columns!
10
+ index = start_coord.column_as_number
11
+ each_column do |column|
12
+ index_letter = Roo::Base.number_to_letter(index)
13
+ column.instance_variable_set :@coord, index_letter
14
+ parent = column
15
+ until parent.is_a? Worksheet
16
+ parent = parent.parent
17
+ column_index = parent.instance_variable_get :@column_index
18
+ column_index[index_letter] = column
19
+ end
20
+ index += 1
21
+ end
22
+ end
23
+
24
+ def dependent_sheets
25
+ references = []
26
+ each_column do |column|
27
+ if column.object_mapping_mode == :resource_ref
28
+ references << spreadsheet.worksheet[column.object[:from]]
29
+ end
30
+ end
31
+ references
32
+ end
33
+
34
+ def depends_on?(worksheet)
35
+ return false unless worksheet.is_a? Worksheet
36
+ return false if worksheet == self
37
+ dependent_sheets = self.dependent_sheets
38
+ return false if dependent_sheets.empty?
39
+ return true if dependent_sheets.include? worksheet
40
+ dependent_sheets.any? do |dependent_sheet|
41
+ dependent_sheet != self and dependent_sheet.depends_on? worksheet
42
+ end
43
+ end
44
+
45
+ end
46
+ end
47
+ end