spread2rdf 0.0.1pre.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,15 @@
1
+ ---
2
+ !binary "U0hBMQ==":
3
+ metadata.gz: !binary |-
4
+ MzBlOGJjZDMyMjBjMjNmNDA5MTk3MmY1NjMzMmE2OWNiZGE3MTljYw==
5
+ data.tar.gz: !binary |-
6
+ Y2IxMjY2MDBjZDk1OGYxNjE2YjlmM2UyZGZkYTQ2NmZjNDk5MWYyYQ==
7
+ SHA512:
8
+ metadata.gz: !binary |-
9
+ ODlkODhkZDQ1NzZmNGVhY2E0MGIwNDgyOWVmZDFkODkyYjJlYmNiZjYyMzlh
10
+ ZmEzNjk4YjJiNjIyZjM1OTM2YTQ1ZGZlZDJjZTFhOTA1Mzc0ZWJlZTY4NjY1
11
+ YmEyNGZmNGM1ZjBkYTMxMTNmZWQ2YTZkNzVkOTViMDA2Y2I0ZTk=
12
+ data.tar.gz: !binary |-
13
+ MDExZWZiZjY4YjBlNDdkYzhkN2ExZjNmNTVhNWVkNGEwMDBmYzg0YTY0ZGM2
14
+ Njk3ZmQ4NmU3MmY3MjAxYzkwZjkzZThjM2VhZWMzYjYyZmJjYjBjMTc0MTdi
15
+ YzUwNzlhNTRhMjY0NDJiNzIxZTdiYTkzNTZlM2U0ZTBjZTNmODY=
data/.gitignore ADDED
@@ -0,0 +1,23 @@
1
+ .DS_Store
2
+ .idea
3
+ .idea45
4
+ *.gem
5
+ *.rbc
6
+ .bundle
7
+ .config
8
+ .yardoc
9
+ Gemfile.lock
10
+ InstalledFiles
11
+ _yardoc
12
+ coverage
13
+ doc/
14
+ lib/bundler/man
15
+ pkg
16
+ rdoc
17
+ spec/reports
18
+ test/tmp
19
+ test/version_tmp
20
+ testdata
21
+ tmp
22
+
23
+ lib/**/-*
data/Gemfile ADDED
@@ -0,0 +1,10 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in spread2rdf.gemspec
4
+ gemspec
5
+
6
+ group :development, :test do
7
+ gem 'rake'
8
+ gem 'pry', '~> 0.9.12.2'
9
+ gem 'pry-nav', '~> 0.2.3'
10
+ end
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2013 Marcel Otto
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,44 @@
1
+ # Spread2RDF
2
+
3
+ Spread2RDF is a converter for complex spreadsheets to RDF and a DSL for
4
+ specifying the mapping rules for this conversion.
5
+
6
+ ## Features
7
+
8
+ * Supports Excel/Excelx, Google spreadsheets, OpenOffice, LibreOffice and CSV
9
+ spreadsheets as input, thanks to [Roo](https://github.com/Empact/roo).
10
+ (Currently, it's tested for Excel only.
11
+ If you have a problem with another spreadsheet type,
12
+ [raise an issue](https://github.com/marcelotto/spread2rdf/issues).)
13
+ * Supports many RDF serialization formats for the output, thanks to
14
+ [RDF.rb](https://github.com/ruby-rdf/rdf).
15
+
16
+ ## Installation
17
+
18
+ Install [Ruby](http://www.ruby-lang.org/) and execute the following command
19
+ in a terminal:
20
+
21
+ $ gem install spread2rdf
22
+
23
+ ## Command-line interface
24
+
25
+ For a description of all available parameters, type the following in a terminal:
26
+
27
+ $ spread2rdf --help
28
+
29
+ ## Mapping DSL
30
+
31
+ Description is following soon.
32
+
33
+ ## Contributing
34
+
35
+ 1. Fork it
36
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
37
+ 3. Commit your changes (`git commit -am 'Added some feature'`)
38
+ 4. Push to the branch (`git push origin my-new-feature`)
39
+ 5. Create new Pull Request
40
+
41
+
42
+ ## Authors
43
+
44
+ * Marcel Otto
data/Rakefile ADDED
@@ -0,0 +1,2 @@
1
+ #!/usr/bin/env rake
2
+ require "bundler/gem_tasks"
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.0.1pre.1
data/bin/spread2rdf ADDED
@@ -0,0 +1,3 @@
1
+ #!/usr/bin/env ruby
2
+ require 'spread2rdf'
3
+ Spread2RDF::Cli.new.run
@@ -0,0 +1,45 @@
1
+ module Spread2RDF
2
+ module Attributes
3
+ extend ActiveSupport::Concern
4
+
5
+ module ClassMethods
6
+ def attributes
7
+ if superclass.respond_to?(:attributes) and
8
+ (super_attributes = superclass.attributes).is_a? Hash
9
+ @attributes.reverse_merge(super_attributes)
10
+ else
11
+ @attributes
12
+ end
13
+ end
14
+
15
+ def attributes=(defaults)
16
+ defaults.each { |attribute, default_value| attr_accessor attribute }
17
+ @attributes = @attributes.try(:merge, defaults) || defaults
18
+ end
19
+ end
20
+
21
+ def init_attributes(initial_values)
22
+ self.class.attributes.each do |attribute, default_value|
23
+ instance_variable_set("@#{attribute}".to_sym,
24
+ initial_values.delete(attribute) || default_value)
25
+ end
26
+ initial_values
27
+ end
28
+
29
+ def update_attributes(update_values)
30
+ update_values.each do |attribute, value|
31
+ next unless self.class.attributes.include? attribute
32
+ instance_variable_set("@#{attribute}".to_sym, value)
33
+ end
34
+ update_values
35
+ end
36
+
37
+ def inspect
38
+ "#{self}: " +
39
+ self.class.attributes.map do |attribute, default_value|
40
+ "#{attribute}=#{self.send(attribute)}"
41
+ end.join(', ')
42
+ end
43
+
44
+ end
45
+ end
@@ -0,0 +1,91 @@
1
+ # coding: utf-8
2
+
3
+ module Spread2RDF
4
+ class Cli
5
+ def initialize
6
+ parse_command_line!
7
+ end
8
+
9
+ def run(schema_spec_file = nil)
10
+ schema_spec_file ||= @options[:schema_spec_file]
11
+ abort "No schema specification file given" if schema_spec_file.nil?
12
+ abort "Couldn't find schema specification file #{schema_spec_file}" unless
13
+ File.exist?(schema_spec_file)
14
+ load schema_spec_file
15
+ abort "No schema specification found" if Spreadsheet.definitions.empty?
16
+ puts "Reading #{@input_file} ..."
17
+ @table = Spreadsheet.definitions.first
18
+ @table.read(@input_file)
19
+ write_output
20
+ self
21
+ end
22
+
23
+ private
24
+
25
+ # Parse command line options
26
+ def parse_command_line!(options={})
27
+ @options = options
28
+ optparse = OptionParser.new do |opts|
29
+ opts.banner = 'Usage: spread2rdf [options] -s SPEC_FILE SPREAD_SHEET_FILE'
30
+
31
+ opts.on( '-h', '--help', 'Display this information' ) do
32
+ puts opts
33
+ exit
34
+ end
35
+
36
+ @options[:output_dir] = '.'
37
+ opts.on( '-o', '--output DIR', 'Output directory (default: current directory)' ) do |dir|
38
+ abort "Output directory #{dir} doesn't exist" unless Dir.exist?(dir)
39
+ @options[:output_dir] = dir
40
+ end
41
+
42
+ @options[:output_format] = 'ttl'
43
+ opts.on( '-f', '--output-format FORMAT', 'Serialization format for the RDF data',
44
+ "FORMAT being one of: nt, n3, ttl, rdf, xml, html, json (default: ttl)") do |format|
45
+ #format = 'turtle' if format == 'ttl'
46
+ @options[:output_format] = format.strip.downcase
47
+ end
48
+
49
+ @options[:schema_spec_file] = nil
50
+ opts.on( '-s', '--schema SPEC_FILE', 'Schema specification file (required)' ) do |file|
51
+ @options[:schema_spec_file] = file
52
+ end
53
+
54
+ end
55
+
56
+ optparse.parse!
57
+ raise OptionParser::ParseError, 'required file arguments missing' if ARGV.empty?
58
+ raise OptionParser::ParseError, 'required schema specification file missing' if @options[:schema_spec_file].nil?
59
+
60
+ @input_file = ARGV.first
61
+ rescue OptionParser::ParseError => e
62
+ puts e.message
63
+ puts optparse.help
64
+ exit
65
+ end
66
+
67
+ def output_filename
68
+ output_dir = @options[:output_dir]
69
+ name = File.basename(@input_file, File.extname(@input_file))
70
+ "#{output_dir}/#{name}.#{@options[:output_format]}"
71
+ end
72
+
73
+ def write_output
74
+ filename = output_filename
75
+ abort 'No RDF data to write!' if @table.try(:to_rdf).blank?
76
+ graph = @table.to_rdf
77
+ puts "Writing #{graph.count} RDF statements to #{filename} ... "
78
+ # TODO: base_uri: ... for writer constructor
79
+ RDF::Writer.open(filename) do |writer|
80
+ RDF::Vocabulary.each do |vocabulary|
81
+ writer.prefix vocabulary.__prefix__, vocabulary.to_s
82
+ end
83
+ Namespace.namespace.each do |name, namespace|
84
+ writer.prefix name.to_s.downcase, namespace.to_s
85
+ end
86
+ graph.each_statement { |statement| writer << statement }
87
+ end
88
+ end
89
+ self
90
+ end
91
+ end
@@ -0,0 +1,28 @@
1
+ require 'zip'
2
+ #require 'zip/zipfilesystem'
3
+ # source (adapted to newer version of Roo and ruby-zip): https://gist.github.com/roblingle/1333908
4
+
5
+ # Easy access to xlsm files through the roo gem, version 1.10.0.
6
+ # The error that led me to write this is in the file below for google fodder. Not exactly sure what was causing
7
+ # the problem, so I'm not sure that this change won't break everything on your computer or summon zombies.
8
+ #
9
+ # Be sure to tell roo that you don't care about the extension mismatch:
10
+ # xl = Roo::Excelx.new("C:/path/to/spreadsheet_with_macro.xlsm", :zip, :warning)
11
+ #
12
+ class Roo::Excelx
13
+
14
+ alias :old_initialize :initialize
15
+ def initialize(filename, options = {}) # , packed=nil, file_warning = :error)
16
+ @original_file = filename
17
+ old_initialize(filename, options)
18
+ end
19
+
20
+ # extract files from the zip file, rewrites a method of the same name in lib/roo/excelx.rb
21
+ def extract_content(tmpdir, zipfilename_unused)
22
+ #Zip::ZipFile.open(@original_file) do |zip|
23
+ Zip::File.open(@original_file) do |zip|
24
+ process_zipfile(tmpdir, @original_file, zip)
25
+ end
26
+ end
27
+
28
+ end
@@ -0,0 +1,14 @@
1
+ module Spread2RDF
2
+ module Helper
3
+
4
+ module_function
5
+
6
+ # TODO: include this in the MappingContext(s)
7
+ def resource_name(string)
8
+ string
9
+ .gsub(', ', '-')
10
+ .gsub(' ', '-')
11
+ end
12
+
13
+ end
14
+ end
@@ -0,0 +1,50 @@
1
+ module Spread2RDF
2
+ module Namespace
3
+ class << self
4
+ def [](name)
5
+ name = name.to_sym
6
+ self.namespace[name] ||
7
+ ( RDF.const_defined?(name) && RDF.const_get(name)) ||
8
+ nil
9
+ end
10
+
11
+ def []=(name, namespace)
12
+ name = name.to_sym
13
+ self.namespace[name] = case namespace
14
+ when RDF::Vocabulary then namespace
15
+ when String, RDF::URI then RDF::Vocabulary.new(namespace)
16
+ else raise ArgumentError, "expecting a namespace but got #{namespace}:#{namespace.class}"
17
+ end
18
+ end
19
+
20
+ def namespace
21
+ @namespace ||= {}
22
+ end
23
+
24
+ def namespaces
25
+ namespace.values
26
+ end
27
+
28
+ def resolve_to_namespace(namespace_descriptor)
29
+ case namespace_descriptor
30
+ when Symbol
31
+ Namespace[namespace_descriptor]
32
+ when RDF::Vocabulary, RDF::URI, String
33
+ namespace_descriptor.to_s
34
+ else
35
+ raise "invalid namespace: #{namespace_descriptor.inspect}"
36
+ end
37
+ end
38
+
39
+ def const_missing(name)
40
+ self[name] or super
41
+ end
42
+ end
43
+ end
44
+ NS = Namespace
45
+
46
+ def self.const_missing(name)
47
+ Namespace[name] or super
48
+ end
49
+
50
+ end
@@ -0,0 +1,48 @@
1
+ module Spread2RDF
2
+ class Spreadsheet
3
+ class Sheet
4
+ class Column < Element
5
+
6
+ attr_reader :coord # this is set by Worksheet#index_columns!
7
+
8
+ self.attributes = {
9
+ predicate: nil,
10
+ object: nil,
11
+ statement: nil
12
+ }
13
+
14
+ def initialize(sheet, options = {}, &block)
15
+ super
16
+ end
17
+
18
+ alias sheet parent
19
+
20
+ def worksheet
21
+ parent = self.parent
22
+ parent = parent.parent until parent.is_a? Worksheet or parent.nil?
23
+ parent
24
+ end
25
+
26
+ def map(range, context)
27
+ #puts "mapping #{self} in #{range} ..."
28
+ case range
29
+ when Integer
30
+ coord = Coord[row: range, column: self.coord]
31
+ worksheet.cell_mapping[coord.to_sym] ||= mapping =
32
+ create_context(context, row: range,
33
+ subject: context.subject, predicate: predicate)
34
+ mapping.object
35
+ when Range
36
+ range.map { |row| self.map(row, context) }
37
+ else raise ArgumentError
38
+ end
39
+ end
40
+
41
+ def to_s
42
+ "#{super} of #{sheet}"
43
+ end
44
+
45
+ end
46
+ end
47
+ end
48
+ end
@@ -0,0 +1,156 @@
1
+ module Spread2RDF
2
+ class Spreadsheet
3
+ class Sheet
4
+ class Column
5
+ class MappingContext < Spreadsheet::MappingContext
6
+
7
+ self.attributes = {
8
+ subject: nil,
9
+ predicate: nil,
10
+ row: nil
11
+ }
12
+
13
+ alias column element
14
+ alias property predicate
15
+
16
+ attr_reader :value
17
+
18
+ def initialize(sheet, parent_context = nil, attr = {})
19
+ super
20
+ @value = cell_value(row: row, column: column.coord)
21
+ return if @value.blank?
22
+ statements_to_object
23
+ worksheet.graph << self.graph
24
+ end
25
+
26
+ def cell_coord
27
+ Coord[row: row, column: column.coord]
28
+ end
29
+
30
+ def subject
31
+ @subject or parent_context.try(:subject)
32
+ end
33
+
34
+ def object
35
+ @object ||= @value && map_to_object(value)
36
+ end
37
+
38
+ def value_of_column(name)
39
+ other_column = sheet.column[name]
40
+ raise "couldn't find column #{name} when mapping #{column}" if
41
+ other_column.nil?
42
+ cell_value(row: row, column: other_column.coord)
43
+ end
44
+
45
+ def object_of_column(name)
46
+ other_column = sheet.column[name]
47
+ raise "couldn't find column #{name} when mapping #{column}" if
48
+ other_column.nil?
49
+ cell(row: row, column: other_column.coord).object
50
+ end
51
+
52
+ ######################################################################
53
+ # Value-to-object mapping
54
+
55
+ private
56
+
57
+ def map_to_object(value)
58
+ case object_mapping_mode
59
+ when :to_string
60
+ value
61
+ when :resource_ref
62
+ resolve_resource_ref
63
+ when :new_resource
64
+ create_resource_object
65
+ when :custom
66
+ # TODO execute a mapping block in the context of Column::MappingContext
67
+ else
68
+ raise 'internal error: unknown column mapping type'
69
+ end
70
+ end
71
+
72
+
73
+ def object_mapping_mode
74
+ case
75
+ when column.object.nil? then :to_string
76
+ when column.object.is_a?(Proc) then :custom
77
+ when !column.object[:uri].nil? then :new_resource
78
+ when !column.object[:from].nil? then :resource_ref
79
+ else
80
+ raise "mapping specification error: don't know how to map #{column}"
81
+ end
82
+ end
83
+
84
+ def resolve_resource_ref
85
+ source = column.object[:from]
86
+ source = { worksheet: source } if source.is_a? Symbol
87
+ raise ArgumentError, "expecting a Hash as source, but got #{source}" unless source.is_a? Hash
88
+ source_worksheet = source[:worksheet]
89
+ source_worksheet = spreadsheet.worksheet[source_worksheet]
90
+ raise "#{column}: couldn't find source worksheet #{source[:worksheet]}" if source_worksheet.nil?
91
+ source_predicate = source[:predicate] || RDF::RDFS.label
92
+ result = source_worksheet.graph.query([nil, source_predicate, value])
93
+ raise "#{column}: couldn't find a resource for #{value} in #{source_worksheet}" if result.empty?
94
+ raise "#{column}: found multiple resources for #{value} in #{source_worksheet}: #{result.map(&:subject)}" if result.count > 1
95
+ result.first.subject
96
+ end
97
+
98
+ # TODO: Should we reuse/share mapping logic with Sheet::MappingContext (#subject etc.)?
99
+ def create_resource_object
100
+ case
101
+ when (column.object.try(:fetch, :uri, nil) || object) == :bnode
102
+ RDF::Node.new
103
+ else
104
+ raise NotImplementedError
105
+ end
106
+ end
107
+
108
+
109
+ ######################################################################
110
+ # Statement mapping
111
+
112
+ def statement_mapping_mode
113
+ case
114
+ when column.statement == :none then :ignore
115
+ when column.statement == :none then :ignore
116
+ when column.predicate.nil? then :ignore
117
+ when restriction_mode then :restriction
118
+ else :default
119
+ end
120
+ end
121
+
122
+ def restriction_mode
123
+ restriction_mode = column.statement
124
+ case restriction_mode
125
+ when :restriction then RDF::OWL.hasValue
126
+ when Hash then restriction_mode[:restriction]
127
+ else nil
128
+ end
129
+ end
130
+
131
+ def statements_to_object
132
+ case statement_mapping_mode
133
+ when :default
134
+ statement(subject, predicate, object)
135
+ when :restriction
136
+ restriction_class = RDF::Node.new
137
+ statements(
138
+ [ subject, RDF::RDFS.subClassOf, restriction_class ],
139
+ [ restriction_class, RDF.type, RDF::OWL.Restriction ],
140
+ [ restriction_class, RDF::OWL.onProperty, predicate ],
141
+ [ restriction_class, restriction_mode, object ]
142
+ )
143
+ end
144
+ exec(value, &column.block) if column.block
145
+ end
146
+
147
+ def exec(value, &block)
148
+ #puts "executing block of #{@___column___} in row #{row}"
149
+ self.instance_exec(value, &block)
150
+ end
151
+
152
+ end
153
+ end
154
+ end
155
+ end
156
+ end
@@ -0,0 +1,51 @@
1
+ module Spread2RDF
2
+ class Spreadsheet
3
+ class Coord < Struct.new(:column, :row)
4
+ def initialize(*args)
5
+ case args.length
6
+ when 2 then super
7
+ when 1
8
+ case args = args.first
9
+ when Hash
10
+ super(args[:column], args[:row])
11
+ when Symbol, String
12
+ coord = args.to_s
13
+ raise "Invalid cell coordinates #{coord}" unless coord =~ /(\w+)(\d+)/
14
+ super(Regexp.last_match[1], Regexp.last_match[2].to_i)
15
+ else raise ArgumentError, "can't handle argument #{args}"
16
+ end
17
+ else raise ArgumentError, "too many arguments: #{args}"
18
+ end
19
+ end
20
+
21
+ def column_as_number
22
+ Roo::Base.letter_to_number(column)
23
+ end
24
+
25
+ def column_as_index
26
+ column_as_number - 1
27
+ end
28
+
29
+ def increment_column(count = 1)
30
+ self.class.increment_column(self.column, count)
31
+ end
32
+
33
+ def to_s
34
+ "#{column}#{row}"
35
+ end
36
+
37
+ def to_sym
38
+ to_s.to_sym
39
+ end
40
+
41
+ class << self
42
+ alias [] new
43
+
44
+ def increment_column(column, count=1)
45
+ Roo::Base.number_to_letter(Roo::Base.letter_to_number(column) + count)
46
+ end
47
+ end
48
+
49
+ end
50
+ end
51
+ end
@@ -0,0 +1,53 @@
1
+ module Spread2RDF
2
+ class Spreadsheet
3
+ class Element
4
+ include Attributes
5
+
6
+ self.attributes = {
7
+ name: nil,
8
+ source_name: nil
9
+ }
10
+
11
+ attr_reader :parent
12
+ attr_reader :block
13
+
14
+
15
+ def initialize(parent, attr={}, &block)
16
+ @parent = parent
17
+ @block = block
18
+ init_attributes(attr)
19
+ end
20
+
21
+ def init
22
+
23
+ end
24
+
25
+ def name
26
+ (@name or @source_name).try(:to_sym)
27
+ end
28
+
29
+ def source_name
30
+ (@source_name or @name).try(:to_s)
31
+ end
32
+
33
+ def spreadsheet
34
+ parent.spreadsheet
35
+ end
36
+
37
+ def to_s
38
+ name = (self.name.to_s == self.source_name.to_s ?
39
+ self.name : "#{self.name} (#{self.source_name})" )
40
+ "#{self.class.name.split('::').last} #{name}"
41
+ end
42
+
43
+ private
44
+
45
+ def create_context(parent_context, attr)
46
+ context_class = self.class.const_get(:MappingContext)
47
+ context_class.new(self, parent_context, attr)
48
+ end
49
+
50
+ end
51
+ end
52
+ end
53
+