spread2rdf 0.0.1pre.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,67 @@
1
+ module Spread2RDF
2
+ class Spreadsheet
3
+ class MappingContext
4
+ include Attributes
5
+
6
+ self.attributes = {
7
+ }
8
+
9
+ attr_reader :element
10
+ attr_reader :parent_context
11
+
12
+ attr_reader :graph
13
+
14
+ def initialize(element, parent_context, attr = {})
15
+ @element = element
16
+ @parent_context = parent_context
17
+ @graph = RDF::Repository.new
18
+ init_attributes(attr)
19
+ end
20
+
21
+ def sheet
22
+ @element.sheet
23
+ end
24
+
25
+ def worksheet
26
+ @element.worksheet
27
+ end
28
+
29
+ def spreadsheet
30
+ @element.spreadsheet
31
+ end
32
+
33
+ def cell_value(coord)
34
+ worksheet.cell(coord)
35
+ end
36
+
37
+ def cell(coord)
38
+ coord = Coord[coord] unless coord.is_a? Coord
39
+ worksheet.cell_mapping[coord.to_sym]
40
+ end
41
+
42
+ def to_s
43
+ "#{self.class.name.split('::')[-2..-1].join('::')} of #{element}"
44
+ end
45
+
46
+ ##########################################################################
47
+ # statement generators
48
+
49
+ private
50
+
51
+ def add_statement(*args)
52
+ args = args.first if args.count == 1 and args.first.is_a? Array
53
+ #puts "adding statement: #{args.inspect}"
54
+ raise "internal error: trying to add a bad triple with nil value: #{args}" if args.count != 3 or args.one? { |arg| arg.nil? }
55
+ @graph << RDF::Statement.new(*args)
56
+ end
57
+ alias statement add_statement
58
+
59
+ def add_statements(*args)
60
+ args = args.first if args.count == 1 and args.first.is_a? Array
61
+ args.each { |arg| statement(arg) }
62
+ end
63
+ alias statements add_statements
64
+
65
+ end
66
+ end
67
+ end
@@ -0,0 +1,23 @@
1
+ module Spread2RDF
2
+ class Spreadsheet
3
+ class MappingDSL
4
+
5
+ def initialize(schema)
6
+ @schema = schema
7
+ end
8
+
9
+ def namespaces(namespaces)
10
+ namespaces.each { |name, namespace| Namespace[name] = namespace }
11
+ end
12
+
13
+ def worksheet(name, options={}, &block)
14
+ source_name = options[:source_name] = name
15
+ name = ( options.delete(:name) || source_name ).to_sym
16
+ worksheet = @schema.worksheet[name] ||= Worksheet.new(@schema)
17
+ worksheet.update_attributes options.merge(name: name, source_name: source_name)
18
+ Sheet::DSL.new(worksheet, &block)
19
+ end
20
+
21
+ end
22
+ end
23
+ end
@@ -0,0 +1,128 @@
1
+ module Spread2RDF
2
+ class Spreadsheet
3
+ class Sheet < Element
4
+
5
+ self.attributes = {
6
+ subject: nil,
7
+ start: :A2,
8
+ row_count_per_resource: nil
9
+ }
10
+
11
+ def initialize(parent, options={}, &block)
12
+ super(parent, options, &block)
13
+ @column = {}
14
+ end
15
+
16
+ def worksheet
17
+ return self if self.is_a? Worksheet
18
+ parent = self.parent
19
+ parent = parent.parent until parent.is_a? Worksheet or parent.nil?
20
+ parent
21
+ end
22
+
23
+ def column(name = nil)
24
+ return @column if name.nil?
25
+ name = name.to_sym
26
+ @column[name] or ( parent.is_a?(Sheet) and parent.column(name) ) or nil
27
+ end
28
+
29
+ def columns
30
+ @column.values
31
+ end
32
+
33
+ def cell(coord)
34
+ coord = Coord[coord]
35
+ spreadsheet.roo.cell(coord.column, coord.row, worksheet.source_name)
36
+ end
37
+
38
+ def start_coord
39
+ Coord[start]
40
+ end
41
+
42
+ def row_range
43
+ raise NotImplementedError, 'subclasses of Sheet must implement this method'
44
+ end
45
+
46
+ def column_range
47
+ first = columns.first.coord
48
+ first = first.begin if first.is_a? Range
49
+ last = columns.last.coord
50
+ last = last.end if last.is_a? Range
51
+ first .. last
52
+ end
53
+
54
+ def each_column(&block)
55
+ columns.each do |column|
56
+ if column.is_a? SubSheet
57
+ column.each_column(&block)
58
+ else
59
+ yield column
60
+ end
61
+ end
62
+ end
63
+
64
+ def subject_column
65
+ #return nil unless subject_mapping_type == :from_column
66
+ column_name = self.subject.try(:fetch, :column, nil) || :uri
67
+ @column[column_name]
68
+ end
69
+
70
+ def fix_row_count_per_resource
71
+ row_count_per_resource or ( !subject_column && 1 ) or nil
72
+ end
73
+
74
+ def add_triple(*args)
75
+ raise "internal error: trying to add a bad triple with nil value in #{self}: #{args}" if args.count != 3 or args.one? { |arg| arg.nil? }
76
+ worksheet.graph << RDF::Statement.new(*args)
77
+ end
78
+
79
+ def map(row_range = self.row_range, context = nil)
80
+ #puts "processing #{self} ..."
81
+ return [] if row_range.nil?
82
+ subjects = rows_per_resource(row_range).map do |resource_range|
83
+ mapping = create_context(context, row_range: resource_range)
84
+ spreadsheet.worksheet_mapping[worksheet.name] = mapping unless self.is_a? SubSheet
85
+ mapping.subject
86
+ end
87
+ subjects
88
+ end
89
+
90
+ private
91
+
92
+ def rows_per_resource(row_range)
93
+ return [] if row_range.nil?
94
+ rows = if fix_row_count = fix_row_count_per_resource
95
+ row_range.find_all do |row|
96
+ (row - row_range.begin) % fix_row_count == 0
97
+ end
98
+ else
99
+ subject_column_coord = self.subject_column.try(:coord)
100
+ raise "no subject column for #{self}" if subject_column_coord.blank?
101
+ row_range.find_all do |row|
102
+ not cell(row: row, column: subject_column_coord).blank?
103
+ end
104
+ end
105
+ rows_per_resource = []
106
+ rows.each_with_index do |first_row, i|
107
+ last_row = (i+1 == rows.count ? row_range.end : rows[i+1]-1)
108
+ rows_per_resource << Range.new(first_row, last_row)
109
+ end
110
+ rows_per_resource
111
+ end
112
+
113
+ def roo_select
114
+ spreadsheet.roo.default_sheet = worksheet.source_name
115
+ end
116
+
117
+ def roo
118
+ last_default_sheet = spreadsheet.roo.default_sheet
119
+ roo_select
120
+ return nil unless block_given?
121
+ result = yield
122
+ spreadsheet.roo.default_sheet = last_default_sheet
123
+ result
124
+ end
125
+
126
+ end
127
+ end
128
+ end
@@ -0,0 +1,34 @@
1
+ module Spread2RDF
2
+ class Spreadsheet
3
+ class Sheet
4
+ class DSL
5
+ def initialize(sheet, &block)
6
+ @sheet = sheet
7
+ instance_exec(&block) if block_given?
8
+ end
9
+
10
+ def column(name, options={}, &block)
11
+ name = name.to_sym
12
+ column = @sheet.column[name] ||= Column.new(@sheet, &block)
13
+ column.update_attributes options.merge(name: name)
14
+ column # TODO: chaining logic ...?
15
+ end
16
+
17
+ def sub_sheet(name, options={}, &block)
18
+ name = name.to_sym
19
+ sub_sheet = @sheet.column[name] ||= SubSheet.new(@sheet)
20
+ sub_sheet.update_attributes options.merge(name: name)
21
+ Sheet::DSL.new(sub_sheet, &block)
22
+ end
23
+ alias column_block sub_sheet
24
+
25
+ def cell(coord, options = {}, &block)
26
+ content = @sheet.cell(coord)
27
+ content = block.call(content) if block_given?
28
+ content
29
+ end
30
+
31
+ end
32
+ end
33
+ end
34
+ end
@@ -0,0 +1,90 @@
1
+ module Spread2RDF
2
+ class Spreadsheet
3
+ class Sheet
4
+ class MappingContext < Spreadsheet::MappingContext
5
+
6
+ self.attributes = {
7
+ row_range: nil
8
+ }
9
+
10
+ alias sheet element
11
+
12
+ def initialize(sheet, parent_context = nil, attr = {})
13
+ super
14
+ @objects = ( sheet.columns - [ sheet.subject_column ] ).map do |column|
15
+ column.map(row_range, self).compact.presence
16
+ end.compact
17
+ return if @objects.empty?
18
+ subject_description
19
+ statements_to_object
20
+ worksheet.graph << self.graph
21
+ end
22
+
23
+ ########################################################################
24
+ # subject mapping
25
+
26
+ # TODO: every new context instance (for the same cell) returns a different bnode, it must be stored ...
27
+ def subject
28
+ @subject ||= case subject_mapping_mode
29
+ when :bnode then RDF::Node.new
30
+ when :from_column then subject_resource_from_column
31
+ else raise 'unknown subject mapping type'
32
+ end
33
+ end
34
+ alias subject_resource subject
35
+
36
+ def subject_resource_type
37
+ sheet.subject.try(:fetch, :type, nil) or
38
+ (sheet.subject.try(:fetch, :sub_class_of, nil) && RDF::RDFS.Class) or
39
+ nil
40
+ end
41
+
42
+ def subject_namespace
43
+ subject_namespace_name =
44
+ sheet.subject.try(:fetch, :uri, nil).try(:fetch, :namespace, nil)
45
+ Namespace.resolve_to_namespace(subject_namespace_name)
46
+ end
47
+
48
+ private
49
+
50
+ def subject_mapping_mode
51
+ case
52
+ when ( sheet.subject.try(:fetch, :uri, nil) || sheet.subject ) == :bnode
53
+ :bnode
54
+ else
55
+ :from_column
56
+ end
57
+ end
58
+
59
+ def subject_name_suffix
60
+ cells = row_range.map do |row|
61
+ cell_value(row: row, column: sheet.subject_column.coord).presence
62
+ end.compact
63
+ raise "no subject found for #{sheet} in #{row_range}" if cells.empty?
64
+ raise "multiple subjects found for #{sheet} in #{row_range}: #{cells.inspect}" if cells.count > 1
65
+ cells.first
66
+ end
67
+
68
+ def subject_resource_from_column
69
+ namespace = subject_namespace
70
+ subject_suffix = Helper.resource_name(subject_name_suffix)
71
+ #puts "subject resource for #{sheet} in #{range}: " + RDF::URI.new("#{namespace}#{subject_suffix}" )
72
+ RDF::URI.new("#{namespace}#{subject_suffix}")
73
+ end
74
+
75
+ def subject_description
76
+ type = subject_resource_type
77
+ statement(subject, RDF.type, type) unless type.nil?
78
+ if type == RDF::RDFS.Class &&
79
+ super_class = sheet.subject.try(:fetch, :sub_class_of, nil)
80
+ statement(subject, RDF::RDFS.subClassOf, super_class)
81
+ end
82
+ end
83
+
84
+ def statements_to_object
85
+ end
86
+
87
+ end
88
+ end
89
+ end
90
+ end
@@ -0,0 +1,14 @@
1
+ module Spread2RDF
2
+ class Spreadsheet
3
+ class SubSheet < Sheet
4
+
5
+ self.attributes = {
6
+ predicate: nil,
7
+ statement: nil
8
+ }
9
+
10
+ alias coord column_range
11
+
12
+ end
13
+ end
14
+ end
@@ -0,0 +1,55 @@
1
+ module Spread2RDF
2
+ class Spreadsheet
3
+ class SubSheet
4
+ class MappingContext < Sheet::MappingContext
5
+
6
+ self.attributes = {
7
+ }
8
+
9
+ alias sub_sheet element
10
+ alias column_block element
11
+
12
+ private
13
+
14
+ ##########################################################################
15
+ # Statement mapping
16
+ # TODO: Duplication Column::MappingContext ! Share it ?
17
+
18
+ def statement_mapping_mode
19
+ case
20
+ when column_block.statement == :none then :ignore
21
+ when column_block.statement == :none then :ignore
22
+ when column_block.predicate.nil? then :ignore
23
+ when restriction_mode then :restriction
24
+ else :default
25
+ end
26
+ end
27
+
28
+ def restriction_mode
29
+ restriction_mode = column_block.statement
30
+ case restriction_mode
31
+ when :restriction then RDF::OWL.hasValue
32
+ when Hash then restriction_mode[:restriction]
33
+ else nil
34
+ end
35
+ end
36
+
37
+ def statements_to_object
38
+ case statement_mapping_mode
39
+ when :default
40
+ statement(parent_context.subject, column_block.predicate, subject)
41
+ when :restriction
42
+ restriction_class = RDF::Node.new
43
+ statements(
44
+ [ parent_context.subject, RDF::RDFS.subClassOf, restriction_class ],
45
+ [ restriction_class, RDF.type, RDF::OWL.Restriction ],
46
+ [ restriction_class, RDF::OWL.onProperty, column_block.predicate ],
47
+ [ restriction_class, restriction_mode, subject ]
48
+ )
49
+ end
50
+ end
51
+
52
+ end
53
+ end
54
+ end
55
+ end
@@ -0,0 +1,49 @@
1
+ module Spread2RDF
2
+ class Spreadsheet
3
+ class Worksheet < Sheet
4
+
5
+ self.attributes = {
6
+ }
7
+
8
+ attr_reader :cell_mapping
9
+ attr_reader :graph
10
+
11
+ def initialize(parent, options={}, &block)
12
+ super
13
+ @cell_mapping = {}
14
+ @graph = RDF::Repository.new
15
+ end
16
+
17
+ def init
18
+ index_columns!
19
+ end
20
+
21
+ =begin
22
+ def cell_mapping_by_name(name)
23
+
24
+ end
25
+
26
+ def cell_mapping_by_coord(coord)
27
+
28
+ end
29
+ =end
30
+
31
+ def index_columns!
32
+ column_index = start_coord.column_as_number
33
+ each_column do |column|
34
+ column.instance_variable_set :@coord,
35
+ Roo::Base.number_to_letter(column_index)
36
+ column_index += 1
37
+ end
38
+ end
39
+
40
+ def row_range
41
+ range = roo { (Coord[start].row .. spreadsheet.roo.last_row) }
42
+ range.begin <= range.end ? range : nil
43
+ end
44
+
45
+ private
46
+
47
+ end
48
+ end
49
+ end
@@ -0,0 +1,92 @@
1
+ require 'spread2rdf/spreadsheet/coord'
2
+ require 'spread2rdf/spreadsheet/element'
3
+ require 'spread2rdf/spreadsheet/mapping_context'
4
+ require 'spread2rdf/spreadsheet/sheet'
5
+ require 'spread2rdf/spreadsheet/sub_sheet'
6
+ require 'spread2rdf/spreadsheet/worksheet'
7
+ require 'spread2rdf/spreadsheet/sheet_mapping_context'
8
+ require 'spread2rdf/spreadsheet/sub_sheet_mapping_context'
9
+ require 'spread2rdf/spreadsheet/column'
10
+ require 'spread2rdf/spreadsheet/column_mapping_context'
11
+
12
+ require 'spread2rdf/spreadsheet/sheet_dsl'
13
+ require 'spread2rdf/spreadsheet/mapping_dsl'
14
+
15
+ module Spread2RDF
16
+ class Spreadsheet
17
+
18
+ attr_reader :name
19
+ attr_reader :worksheet
20
+ attr_reader :worksheet_mapping
21
+ attr_reader :input_file
22
+ attr_reader :roo
23
+
24
+ def initialize(name, &block)
25
+ @name = name
26
+ @worksheet = {}
27
+ @worksheet_mapping = {}
28
+ @schema_spec = block
29
+ end
30
+
31
+ def spreadsheet
32
+ self
33
+ end
34
+
35
+ def worksheets
36
+ @worksheet.values
37
+ end
38
+
39
+ def templates
40
+ @template.values
41
+ end
42
+
43
+ def read(filename)
44
+ @input_file = filename
45
+ load_roo
46
+ load_schema
47
+ load_resources
48
+ self
49
+ end
50
+
51
+ def graph
52
+ graph = RDF::Repository.new
53
+ worksheets.each { |worksheet| graph << worksheet.graph }
54
+ graph
55
+ end
56
+ alias to_rdf graph
57
+
58
+ private
59
+
60
+ # TODO: make this work with other spreadsheets than Excel
61
+ def load_roo
62
+ options = {}
63
+ options[:packed], options[:file_warning] = :zip, :ignore if
64
+ File.extname(@input_file).downcase == '.xlsm'
65
+ @roo = Roo::Excelx.new(@input_file, options)
66
+ end
67
+
68
+ def load_schema
69
+ Spreadsheet::MappingDSL.new(self).instance_exec(&@schema_spec)
70
+ worksheets.each { |worksheet| worksheet.init }
71
+ end
72
+
73
+ def load_resources
74
+ worksheets.each do |worksheet|
75
+ next if worksheet.column.empty?
76
+ worksheet.map
77
+ end
78
+ end
79
+
80
+ class << self
81
+ def definition(*args, &block)
82
+ definitions << new(*args, &block)
83
+ end
84
+ private :new
85
+
86
+ def definitions
87
+ @@definitions ||= []
88
+ end
89
+ end
90
+
91
+ end
92
+ end
@@ -0,0 +1,19 @@
1
+ module Spread2RDF
2
+ module VERSION
3
+ FILE = File.expand_path('../../../VERSION', __FILE__)
4
+ MAJOR, MINOR, TINY, EXTRA = File.read(FILE).chomp.split('.')
5
+ STRING = [MAJOR, MINOR, TINY, EXTRA].compact.join('.').freeze
6
+
7
+ ##
8
+ # @return [String]
9
+ def self.to_s() STRING end
10
+
11
+ ##
12
+ # @return [String]
13
+ def self.to_str() STRING end
14
+
15
+ ##
16
+ # @return [Array(Integer, Integer, Integer)]
17
+ def self.to_a() [MAJOR, MINOR, TINY] end
18
+ end
19
+ end
data/lib/spread2rdf.rb ADDED
@@ -0,0 +1,23 @@
1
+ # coding: utf-8
2
+ require 'rubygems/package'
3
+
4
+ require 'optparse'
5
+
6
+ require 'active_support/core_ext'
7
+ require 'awesome_print'
8
+
9
+ require 'roo'
10
+ require 'spread2rdf/extensions/roo_xlsm_fix'
11
+
12
+ require 'linkeddata'
13
+
14
+ require 'spread2rdf/attributes'
15
+ require 'spread2rdf/version'
16
+ require 'spread2rdf/helper'
17
+ require 'spread2rdf/namespace'
18
+ require 'spread2rdf/spreadsheet'
19
+ require 'spread2rdf/cli'
20
+
21
+ module Spread2RDF
22
+ end
23
+
@@ -0,0 +1,35 @@
1
+ # -*- encoding: utf-8 -*-
2
+ require File.expand_path('../lib/spread2rdf/version', __FILE__)
3
+
4
+ Gem::Specification.new do |gem|
5
+ gem.name = 'spread2rdf'
6
+ gem.authors = ['Marcel Otto']
7
+ gem.email = %w[marcelotto.de@gmail.com]
8
+ gem.summary = %q{a DSL-based converter for spreadsheets to RDF}
9
+ gem.description = %q{Spread2RDF is a converter for complex spreadsheets to RDF and a DSL for specifying the mapping rules for this conversion.}
10
+ gem.homepage = 'http://github.com/marcelotto/spread2rdf'
11
+ gem.license = 'MIT'
12
+
13
+ gem.files = `git ls-files`.split($\)
14
+ gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
15
+ gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
16
+ gem.require_paths = ['lib']
17
+ gem.version = Spread2RDF::VERSION.to_s
18
+ gem.bindir = 'bin'
19
+ gem.executables = ['spread2rdf']
20
+
21
+ gem.required_ruby_version = '>= 1.9.3'
22
+
23
+ gem.add_dependency('activesupport', '~> 3.2.3')
24
+ gem.add_dependency('awesome_print')
25
+
26
+ gem.add_dependency('roo', '~> 1.12.2')
27
+ gem.add_dependency('rubyzip', '~> 1.0.0') # for the roo-xlsm-fix
28
+
29
+ gem.add_dependency('linkeddata')
30
+
31
+ gem.add_development_dependency('rake')
32
+ gem.add_development_dependency('pry', '~> 0.9.12.2')
33
+ gem.add_development_dependency('pry-nav', '~> 0.2.3')
34
+
35
+ end