spread2rdf 0.0.1pre.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,67 @@
1
+ module Spread2RDF
2
+ class Spreadsheet
3
+ class MappingContext
4
+ include Attributes
5
+
6
+ self.attributes = {
7
+ }
8
+
9
+ attr_reader :element
10
+ attr_reader :parent_context
11
+
12
+ attr_reader :graph
13
+
14
+ def initialize(element, parent_context, attr = {})
15
+ @element = element
16
+ @parent_context = parent_context
17
+ @graph = RDF::Repository.new
18
+ init_attributes(attr)
19
+ end
20
+
21
+ def sheet
22
+ @element.sheet
23
+ end
24
+
25
+ def worksheet
26
+ @element.worksheet
27
+ end
28
+
29
+ def spreadsheet
30
+ @element.spreadsheet
31
+ end
32
+
33
+ def cell_value(coord)
34
+ worksheet.cell(coord)
35
+ end
36
+
37
+ def cell(coord)
38
+ coord = Coord[coord] unless coord.is_a? Coord
39
+ worksheet.cell_mapping[coord.to_sym]
40
+ end
41
+
42
+ def to_s
43
+ "#{self.class.name.split('::')[-2..-1].join('::')} of #{element}"
44
+ end
45
+
46
+ ##########################################################################
47
+ # statement generators
48
+
49
+ private
50
+
51
+ def add_statement(*args)
52
+ args = args.first if args.count == 1 and args.first.is_a? Array
53
+ #puts "adding statement: #{args.inspect}"
54
+ raise "internal error: trying to add a bad triple with nil value: #{args}" if args.count != 3 or args.one? { |arg| arg.nil? }
55
+ @graph << RDF::Statement.new(*args)
56
+ end
57
+ alias statement add_statement
58
+
59
+ def add_statements(*args)
60
+ args = args.first if args.count == 1 and args.first.is_a? Array
61
+ args.each { |arg| statement(arg) }
62
+ end
63
+ alias statements add_statements
64
+
65
+ end
66
+ end
67
+ end
@@ -0,0 +1,23 @@
1
+ module Spread2RDF
2
+ class Spreadsheet
3
+ class MappingDSL
4
+
5
+ def initialize(schema)
6
+ @schema = schema
7
+ end
8
+
9
+ def namespaces(namespaces)
10
+ namespaces.each { |name, namespace| Namespace[name] = namespace }
11
+ end
12
+
13
+ def worksheet(name, options={}, &block)
14
+ source_name = options[:source_name] = name
15
+ name = ( options.delete(:name) || source_name ).to_sym
16
+ worksheet = @schema.worksheet[name] ||= Worksheet.new(@schema)
17
+ worksheet.update_attributes options.merge(name: name, source_name: source_name)
18
+ Sheet::DSL.new(worksheet, &block)
19
+ end
20
+
21
+ end
22
+ end
23
+ end
@@ -0,0 +1,128 @@
1
+ module Spread2RDF
2
+ class Spreadsheet
3
+ class Sheet < Element
4
+
5
+ self.attributes = {
6
+ subject: nil,
7
+ start: :A2,
8
+ row_count_per_resource: nil
9
+ }
10
+
11
+ def initialize(parent, options={}, &block)
12
+ super(parent, options, &block)
13
+ @column = {}
14
+ end
15
+
16
+ def worksheet
17
+ return self if self.is_a? Worksheet
18
+ parent = self.parent
19
+ parent = parent.parent until parent.is_a? Worksheet or parent.nil?
20
+ parent
21
+ end
22
+
23
+ def column(name = nil)
24
+ return @column if name.nil?
25
+ name = name.to_sym
26
+ @column[name] or ( parent.is_a?(Sheet) and parent.column(name) ) or nil
27
+ end
28
+
29
+ def columns
30
+ @column.values
31
+ end
32
+
33
+ def cell(coord)
34
+ coord = Coord[coord]
35
+ spreadsheet.roo.cell(coord.column, coord.row, worksheet.source_name)
36
+ end
37
+
38
+ def start_coord
39
+ Coord[start]
40
+ end
41
+
42
+ def row_range
43
+ raise NotImplementedError, 'subclasses of Sheet must implement this method'
44
+ end
45
+
46
+ def column_range
47
+ first = columns.first.coord
48
+ first = first.begin if first.is_a? Range
49
+ last = columns.last.coord
50
+ last = last.end if last.is_a? Range
51
+ first .. last
52
+ end
53
+
54
+ def each_column(&block)
55
+ columns.each do |column|
56
+ if column.is_a? SubSheet
57
+ column.each_column(&block)
58
+ else
59
+ yield column
60
+ end
61
+ end
62
+ end
63
+
64
+ def subject_column
65
+ #return nil unless subject_mapping_type == :from_column
66
+ column_name = self.subject.try(:fetch, :column, nil) || :uri
67
+ @column[column_name]
68
+ end
69
+
70
+ def fix_row_count_per_resource
71
+ row_count_per_resource or ( !subject_column && 1 ) or nil
72
+ end
73
+
74
+ def add_triple(*args)
75
+ raise "internal error: trying to add a bad triple with nil value in #{self}: #{args}" if args.count != 3 or args.one? { |arg| arg.nil? }
76
+ worksheet.graph << RDF::Statement.new(*args)
77
+ end
78
+
79
+ def map(row_range = self.row_range, context = nil)
80
+ #puts "processing #{self} ..."
81
+ return [] if row_range.nil?
82
+ subjects = rows_per_resource(row_range).map do |resource_range|
83
+ mapping = create_context(context, row_range: resource_range)
84
+ spreadsheet.worksheet_mapping[worksheet.name] = mapping unless self.is_a? SubSheet
85
+ mapping.subject
86
+ end
87
+ subjects
88
+ end
89
+
90
+ private
91
+
92
+ def rows_per_resource(row_range)
93
+ return [] if row_range.nil?
94
+ rows = if fix_row_count = fix_row_count_per_resource
95
+ row_range.find_all do |row|
96
+ (row - row_range.begin) % fix_row_count == 0
97
+ end
98
+ else
99
+ subject_column_coord = self.subject_column.try(:coord)
100
+ raise "no subject column for #{self}" if subject_column_coord.blank?
101
+ row_range.find_all do |row|
102
+ not cell(row: row, column: subject_column_coord).blank?
103
+ end
104
+ end
105
+ rows_per_resource = []
106
+ rows.each_with_index do |first_row, i|
107
+ last_row = (i+1 == rows.count ? row_range.end : rows[i+1]-1)
108
+ rows_per_resource << Range.new(first_row, last_row)
109
+ end
110
+ rows_per_resource
111
+ end
112
+
113
+ def roo_select
114
+ spreadsheet.roo.default_sheet = worksheet.source_name
115
+ end
116
+
117
+ def roo
118
+ last_default_sheet = spreadsheet.roo.default_sheet
119
+ roo_select
120
+ return nil unless block_given?
121
+ result = yield
122
+ spreadsheet.roo.default_sheet = last_default_sheet
123
+ result
124
+ end
125
+
126
+ end
127
+ end
128
+ end
@@ -0,0 +1,34 @@
1
+ module Spread2RDF
2
+ class Spreadsheet
3
+ class Sheet
4
+ class DSL
5
+ def initialize(sheet, &block)
6
+ @sheet = sheet
7
+ instance_exec(&block) if block_given?
8
+ end
9
+
10
+ def column(name, options={}, &block)
11
+ name = name.to_sym
12
+ column = @sheet.column[name] ||= Column.new(@sheet, &block)
13
+ column.update_attributes options.merge(name: name)
14
+ column # TODO: chaining logic ...?
15
+ end
16
+
17
+ def sub_sheet(name, options={}, &block)
18
+ name = name.to_sym
19
+ sub_sheet = @sheet.column[name] ||= SubSheet.new(@sheet)
20
+ sub_sheet.update_attributes options.merge(name: name)
21
+ Sheet::DSL.new(sub_sheet, &block)
22
+ end
23
+ alias column_block sub_sheet
24
+
25
+ def cell(coord, options = {}, &block)
26
+ content = @sheet.cell(coord)
27
+ content = block.call(content) if block_given?
28
+ content
29
+ end
30
+
31
+ end
32
+ end
33
+ end
34
+ end
@@ -0,0 +1,90 @@
1
+ module Spread2RDF
2
+ class Spreadsheet
3
+ class Sheet
4
+ class MappingContext < Spreadsheet::MappingContext
5
+
6
+ self.attributes = {
7
+ row_range: nil
8
+ }
9
+
10
+ alias sheet element
11
+
12
+ def initialize(sheet, parent_context = nil, attr = {})
13
+ super
14
+ @objects = ( sheet.columns - [ sheet.subject_column ] ).map do |column|
15
+ column.map(row_range, self).compact.presence
16
+ end.compact
17
+ return if @objects.empty?
18
+ subject_description
19
+ statements_to_object
20
+ worksheet.graph << self.graph
21
+ end
22
+
23
+ ########################################################################
24
+ # subject mapping
25
+
26
+ # TODO: every new context instance (for the same cell) returns a different bnode, it must be stored ...
27
+ def subject
28
+ @subject ||= case subject_mapping_mode
29
+ when :bnode then RDF::Node.new
30
+ when :from_column then subject_resource_from_column
31
+ else raise 'unknown subject mapping type'
32
+ end
33
+ end
34
+ alias subject_resource subject
35
+
36
+ def subject_resource_type
37
+ sheet.subject.try(:fetch, :type, nil) or
38
+ (sheet.subject.try(:fetch, :sub_class_of, nil) && RDF::RDFS.Class) or
39
+ nil
40
+ end
41
+
42
+ def subject_namespace
43
+ subject_namespace_name =
44
+ sheet.subject.try(:fetch, :uri, nil).try(:fetch, :namespace, nil)
45
+ Namespace.resolve_to_namespace(subject_namespace_name)
46
+ end
47
+
48
+ private
49
+
50
+ def subject_mapping_mode
51
+ case
52
+ when ( sheet.subject.try(:fetch, :uri, nil) || sheet.subject ) == :bnode
53
+ :bnode
54
+ else
55
+ :from_column
56
+ end
57
+ end
58
+
59
+ def subject_name_suffix
60
+ cells = row_range.map do |row|
61
+ cell_value(row: row, column: sheet.subject_column.coord).presence
62
+ end.compact
63
+ raise "no subject found for #{sheet} in #{row_range}" if cells.empty?
64
+ raise "multiple subjects found for #{sheet} in #{row_range}: #{cells.inspect}" if cells.count > 1
65
+ cells.first
66
+ end
67
+
68
+ def subject_resource_from_column
69
+ namespace = subject_namespace
70
+ subject_suffix = Helper.resource_name(subject_name_suffix)
71
+ #puts "subject resource for #{sheet} in #{range}: " + RDF::URI.new("#{namespace}#{subject_suffix}" )
72
+ RDF::URI.new("#{namespace}#{subject_suffix}")
73
+ end
74
+
75
+ def subject_description
76
+ type = subject_resource_type
77
+ statement(subject, RDF.type, type) unless type.nil?
78
+ if type == RDF::RDFS.Class &&
79
+ super_class = sheet.subject.try(:fetch, :sub_class_of, nil)
80
+ statement(subject, RDF::RDFS.subClassOf, super_class)
81
+ end
82
+ end
83
+
84
+ def statements_to_object
85
+ end
86
+
87
+ end
88
+ end
89
+ end
90
+ end
@@ -0,0 +1,14 @@
1
+ module Spread2RDF
2
+ class Spreadsheet
3
+ class SubSheet < Sheet
4
+
5
+ self.attributes = {
6
+ predicate: nil,
7
+ statement: nil
8
+ }
9
+
10
+ alias coord column_range
11
+
12
+ end
13
+ end
14
+ end
@@ -0,0 +1,55 @@
1
+ module Spread2RDF
2
+ class Spreadsheet
3
+ class SubSheet
4
+ class MappingContext < Sheet::MappingContext
5
+
6
+ self.attributes = {
7
+ }
8
+
9
+ alias sub_sheet element
10
+ alias column_block element
11
+
12
+ private
13
+
14
+ ##########################################################################
15
+ # Statement mapping
16
+ # TODO: Duplication Column::MappingContext ! Share it ?
17
+
18
+ def statement_mapping_mode
19
+ case
20
+ when column_block.statement == :none then :ignore
21
+ when column_block.statement == :none then :ignore
22
+ when column_block.predicate.nil? then :ignore
23
+ when restriction_mode then :restriction
24
+ else :default
25
+ end
26
+ end
27
+
28
+ def restriction_mode
29
+ restriction_mode = column_block.statement
30
+ case restriction_mode
31
+ when :restriction then RDF::OWL.hasValue
32
+ when Hash then restriction_mode[:restriction]
33
+ else nil
34
+ end
35
+ end
36
+
37
+ def statements_to_object
38
+ case statement_mapping_mode
39
+ when :default
40
+ statement(parent_context.subject, column_block.predicate, subject)
41
+ when :restriction
42
+ restriction_class = RDF::Node.new
43
+ statements(
44
+ [ parent_context.subject, RDF::RDFS.subClassOf, restriction_class ],
45
+ [ restriction_class, RDF.type, RDF::OWL.Restriction ],
46
+ [ restriction_class, RDF::OWL.onProperty, column_block.predicate ],
47
+ [ restriction_class, restriction_mode, subject ]
48
+ )
49
+ end
50
+ end
51
+
52
+ end
53
+ end
54
+ end
55
+ end
@@ -0,0 +1,49 @@
1
+ module Spread2RDF
2
+ class Spreadsheet
3
+ class Worksheet < Sheet
4
+
5
+ self.attributes = {
6
+ }
7
+
8
+ attr_reader :cell_mapping
9
+ attr_reader :graph
10
+
11
+ def initialize(parent, options={}, &block)
12
+ super
13
+ @cell_mapping = {}
14
+ @graph = RDF::Repository.new
15
+ end
16
+
17
+ def init
18
+ index_columns!
19
+ end
20
+
21
+ =begin
22
+ def cell_mapping_by_name(name)
23
+
24
+ end
25
+
26
+ def cell_mapping_by_coord(coord)
27
+
28
+ end
29
+ =end
30
+
31
+ def index_columns!
32
+ column_index = start_coord.column_as_number
33
+ each_column do |column|
34
+ column.instance_variable_set :@coord,
35
+ Roo::Base.number_to_letter(column_index)
36
+ column_index += 1
37
+ end
38
+ end
39
+
40
+ def row_range
41
+ range = roo { (Coord[start].row .. spreadsheet.roo.last_row) }
42
+ range.begin <= range.end ? range : nil
43
+ end
44
+
45
+ private
46
+
47
+ end
48
+ end
49
+ end
@@ -0,0 +1,92 @@
1
+ require 'spread2rdf/spreadsheet/coord'
2
+ require 'spread2rdf/spreadsheet/element'
3
+ require 'spread2rdf/spreadsheet/mapping_context'
4
+ require 'spread2rdf/spreadsheet/sheet'
5
+ require 'spread2rdf/spreadsheet/sub_sheet'
6
+ require 'spread2rdf/spreadsheet/worksheet'
7
+ require 'spread2rdf/spreadsheet/sheet_mapping_context'
8
+ require 'spread2rdf/spreadsheet/sub_sheet_mapping_context'
9
+ require 'spread2rdf/spreadsheet/column'
10
+ require 'spread2rdf/spreadsheet/column_mapping_context'
11
+
12
+ require 'spread2rdf/spreadsheet/sheet_dsl'
13
+ require 'spread2rdf/spreadsheet/mapping_dsl'
14
+
15
+ module Spread2RDF
16
+ class Spreadsheet
17
+
18
+ attr_reader :name
19
+ attr_reader :worksheet
20
+ attr_reader :worksheet_mapping
21
+ attr_reader :input_file
22
+ attr_reader :roo
23
+
24
+ def initialize(name, &block)
25
+ @name = name
26
+ @worksheet = {}
27
+ @worksheet_mapping = {}
28
+ @schema_spec = block
29
+ end
30
+
31
+ def spreadsheet
32
+ self
33
+ end
34
+
35
+ def worksheets
36
+ @worksheet.values
37
+ end
38
+
39
+ def templates
40
+ @template.values
41
+ end
42
+
43
+ def read(filename)
44
+ @input_file = filename
45
+ load_roo
46
+ load_schema
47
+ load_resources
48
+ self
49
+ end
50
+
51
+ def graph
52
+ graph = RDF::Repository.new
53
+ worksheets.each { |worksheet| graph << worksheet.graph }
54
+ graph
55
+ end
56
+ alias to_rdf graph
57
+
58
+ private
59
+
60
+ # TODO: make this work with other spreadsheets than Excel
61
+ def load_roo
62
+ options = {}
63
+ options[:packed], options[:file_warning] = :zip, :ignore if
64
+ File.extname(@input_file).downcase == '.xlsm'
65
+ @roo = Roo::Excelx.new(@input_file, options)
66
+ end
67
+
68
+ def load_schema
69
+ Spreadsheet::MappingDSL.new(self).instance_exec(&@schema_spec)
70
+ worksheets.each { |worksheet| worksheet.init }
71
+ end
72
+
73
+ def load_resources
74
+ worksheets.each do |worksheet|
75
+ next if worksheet.column.empty?
76
+ worksheet.map
77
+ end
78
+ end
79
+
80
+ class << self
81
+ def definition(*args, &block)
82
+ definitions << new(*args, &block)
83
+ end
84
+ private :new
85
+
86
+ def definitions
87
+ @@definitions ||= []
88
+ end
89
+ end
90
+
91
+ end
92
+ end
@@ -0,0 +1,19 @@
1
+ module Spread2RDF
2
+ module VERSION
3
+ FILE = File.expand_path('../../../VERSION', __FILE__)
4
+ MAJOR, MINOR, TINY, EXTRA = File.read(FILE).chomp.split('.')
5
+ STRING = [MAJOR, MINOR, TINY, EXTRA].compact.join('.').freeze
6
+
7
+ ##
8
+ # @return [String]
9
+ def self.to_s() STRING end
10
+
11
+ ##
12
+ # @return [String]
13
+ def self.to_str() STRING end
14
+
15
+ ##
16
+ # @return [Array(Integer, Integer, Integer)]
17
+ def self.to_a() [MAJOR, MINOR, TINY] end
18
+ end
19
+ end
data/lib/spread2rdf.rb ADDED
@@ -0,0 +1,23 @@
1
+ # coding: utf-8
2
+ require 'rubygems/package'
3
+
4
+ require 'optparse'
5
+
6
+ require 'active_support/core_ext'
7
+ require 'awesome_print'
8
+
9
+ require 'roo'
10
+ require 'spread2rdf/extensions/roo_xlsm_fix'
11
+
12
+ require 'linkeddata'
13
+
14
+ require 'spread2rdf/attributes'
15
+ require 'spread2rdf/version'
16
+ require 'spread2rdf/helper'
17
+ require 'spread2rdf/namespace'
18
+ require 'spread2rdf/spreadsheet'
19
+ require 'spread2rdf/cli'
20
+
21
+ module Spread2RDF
22
+ end
23
+
@@ -0,0 +1,35 @@
1
+ # -*- encoding: utf-8 -*-
2
+ require File.expand_path('../lib/spread2rdf/version', __FILE__)
3
+
4
+ Gem::Specification.new do |gem|
5
+ gem.name = 'spread2rdf'
6
+ gem.authors = ['Marcel Otto']
7
+ gem.email = %w[marcelotto.de@gmail.com]
8
+ gem.summary = %q{a DSL-based converter for spreadsheets to RDF}
9
+ gem.description = %q{Spread2RDF is a converter for complex spreadsheets to RDF and a DSL for specifying the mapping rules for this conversion.}
10
+ gem.homepage = 'http://github.com/marcelotto/spread2rdf'
11
+ gem.license = 'MIT'
12
+
13
+ gem.files = `git ls-files`.split($\)
14
+ gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
15
+ gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
16
+ gem.require_paths = ['lib']
17
+ gem.version = Spread2RDF::VERSION.to_s
18
+ gem.bindir = 'bin'
19
+ gem.executables = ['spread2rdf']
20
+
21
+ gem.required_ruby_version = '>= 1.9.3'
22
+
23
+ gem.add_dependency('activesupport', '~> 3.2.3')
24
+ gem.add_dependency('awesome_print')
25
+
26
+ gem.add_dependency('roo', '~> 1.12.2')
27
+ gem.add_dependency('rubyzip', '~> 1.0.0') # for the roo-xlsm-fix
28
+
29
+ gem.add_dependency('linkeddata')
30
+
31
+ gem.add_development_dependency('rake')
32
+ gem.add_development_dependency('pry', '~> 0.9.12.2')
33
+ gem.add_development_dependency('pry-nav', '~> 0.2.3')
34
+
35
+ end