csv2rdf 0.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/csv2rdf.rb +9 -0
- data/lib/csv2rdf/converter.rb +123 -0
- data/lib/csv2rdf/vocabs.rb +10 -0
- metadata +80 -0
data/lib/csv2rdf.rb
ADDED
@@ -0,0 +1,123 @@
|
|
1
|
+
require 'rdf'
|
2
|
+
require 'rdf/ntriples'
|
3
|
+
require 'logger'
|
4
|
+
|
5
|
+
|
6
|
+
# Abstract super-class for all converters.
|
7
|
+
#
|
8
|
+
# == Usage
|
9
|
+
#
|
10
|
+
# Usually, a converter will be used as follows:
|
11
|
+
# converter = RatingConverter.new("path/to/file.csv", "path/to/output_file.nt", context_object)
|
12
|
+
# converter.convert
|
13
|
+
# converter.serialize
|
14
|
+
#
|
15
|
+
class Converter
|
16
|
+
|
17
|
+
# The Logger object, defaults to STDOUT
|
18
|
+
attr_accessor :log
|
19
|
+
|
20
|
+
# ==== Attributes
|
21
|
+
#
|
22
|
+
# * +csv_in+ - Path to the CSV file that is going to be converted. Is tested for existence and type, might raise an
|
23
|
+
# +IOError+.
|
24
|
+
# * +out_file+ - Path to the desired output file. The containing folder is tested for existence, might raise an
|
25
|
+
# +IOError+.
|
26
|
+
# * +context+ - Optionally a context Hash, can be used to pass information to the converter.
|
27
|
+
# * +log+ - the Logger object, defaults to STDOUT
|
28
|
+
#
|
29
|
+
def initialize(csv_in, out_file, context=nil)
|
30
|
+
unless (File.exists?(csv_in))
|
31
|
+
raise IOError, "file '#{csv_in}' does not exist"
|
32
|
+
end
|
33
|
+
unless (File.file?(csv_in))
|
34
|
+
raise IOError, "'#{csv_in}' is not a regular file"
|
35
|
+
end
|
36
|
+
unless (File.exists?(File.dirname(out_file)))
|
37
|
+
raise IOError, "directory '#{File.dirname(out_file)}' does not exist"
|
38
|
+
end
|
39
|
+
@csv_in = csv_in
|
40
|
+
@rdf_out = out_file
|
41
|
+
@context = context
|
42
|
+
@graph = RDF::Graph.new
|
43
|
+
@log = Logger.new(STDOUT)
|
44
|
+
@log.info("#{self.class}: converting #{@csv_in} to #{@rdf_out}")
|
45
|
+
end
|
46
|
+
|
47
|
+
# Turn any string into a URI component, useful for creating URIs from names and titles.
|
48
|
+
# The conversion is based on ActiveSupport's +parameterize+ method.
|
49
|
+
#
|
50
|
+
# ==== Attributes
|
51
|
+
#
|
52
|
+
# * +name+ - the string to be converted
|
53
|
+
# * +capitalize+ - if TRUE, the individual components of the name will be capitalized
|
54
|
+
#
|
55
|
+
# ==== Examples
|
56
|
+
#
|
57
|
+
# Converter.name2uri("Knud Möller")
|
58
|
+
# => "knud-moller"
|
59
|
+
#
|
60
|
+
# Converter.name2uri("Knud Möller", TRUE)
|
61
|
+
# => "Knud-Moller"
|
62
|
+
#
|
63
|
+
def Converter.name2uri(name, capitalize=FALSE)
|
64
|
+
name = name.parameterize.downcase
|
65
|
+
if capitalize
|
66
|
+
name = name.split("-").map { |x| x.capitalize }.join("-")
|
67
|
+
end
|
68
|
+
return name
|
69
|
+
end
|
70
|
+
|
71
|
+
# Convert the string of a German-style float ("1,59") into an actual float.
|
72
|
+
# If passed an actual float, it will just return it.
|
73
|
+
#
|
74
|
+
# ==== Attributes
|
75
|
+
#
|
76
|
+
# * +float+ - the string to be converted
|
77
|
+
#
|
78
|
+
# ==== Examples
|
79
|
+
#
|
80
|
+
# Converter.german_to_english_float("1,59")
|
81
|
+
# => 1.59
|
82
|
+
#
|
83
|
+
def Converter.german_to_english_float(float)
|
84
|
+
return float.to_s.gsub(",", ".").to_f
|
85
|
+
end
|
86
|
+
|
87
|
+
# Convert a string into a boolean
|
88
|
+
# For string.downcase == "ja" the method will return TRUE, for all other
|
89
|
+
# values it will return FALSE
|
90
|
+
#
|
91
|
+
# === Attributes
|
92
|
+
#
|
93
|
+
# * +string+ - the string to be converted
|
94
|
+
#
|
95
|
+
# === Examples
|
96
|
+
#
|
97
|
+
# Converter.ja_nein("ja")
|
98
|
+
# => true
|
99
|
+
# Converter.ja_nein("Ja")
|
100
|
+
# => true
|
101
|
+
# Converter.ja_nein("nein")
|
102
|
+
# => false
|
103
|
+
# Converter.ja_nein("hurtz")
|
104
|
+
# => false
|
105
|
+
#
|
106
|
+
def Converter.ja_nein(string)
|
107
|
+
return string.downcase.eql?("ja")
|
108
|
+
end
|
109
|
+
|
110
|
+
# The actual conversion takes place in this method. Implementations in sub-classes
|
111
|
+
# need to build the output graph on the +graph+ instance variable.
|
112
|
+
#
|
113
|
+
def convert
|
114
|
+
raise "method #{self.class.name}#convert() is not implemented!"
|
115
|
+
end
|
116
|
+
|
117
|
+
# After calling +convert+, +serialize+ will write the +graph+ to the desired output path
|
118
|
+
# (+rdf_out+) as an N-Triples file (http://www.w3.org/TR/n-triples/).
|
119
|
+
#
|
120
|
+
def serialize
|
121
|
+
RDF::Writer.open(@rdf_out) { |writer| writer << @graph }
|
122
|
+
end
|
123
|
+
end
|
@@ -0,0 +1,10 @@
|
|
1
|
+
require 'rdf'
|
2
|
+
|
3
|
+
#
|
4
|
+
# Some widely used RDF vocabulary namespaces.
|
5
|
+
#
|
6
|
+
|
7
|
+
SCHEMA = RDF::Vocabulary.new("http://schema.org/")
|
8
|
+
GEO = RDF::Vocabulary.new("http://www.w3.org/2003/01/geo/wgs84_pos#>")
|
9
|
+
GR = RDF::Vocabulary.new("http://purl.org/goodrelations/v1#")
|
10
|
+
DCT = RDF::Vocabulary.new("http://purl.org/dc/terms/")
|
metadata
ADDED
@@ -0,0 +1,80 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: csv2rdf
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.0
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Knud Möller
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2013-12-01 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: rdf
|
16
|
+
requirement: !ruby/object:Gem::Requirement
|
17
|
+
none: false
|
18
|
+
requirements:
|
19
|
+
- - ! '>='
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: 1.0.9
|
22
|
+
type: :runtime
|
23
|
+
prerelease: false
|
24
|
+
version_requirements: !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
26
|
+
requirements:
|
27
|
+
- - ! '>='
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
version: 1.0.9
|
30
|
+
- !ruby/object:Gem::Dependency
|
31
|
+
name: activesupport
|
32
|
+
requirement: !ruby/object:Gem::Requirement
|
33
|
+
none: false
|
34
|
+
requirements:
|
35
|
+
- - ! '>='
|
36
|
+
- !ruby/object:Gem::Version
|
37
|
+
version: 4.0.1
|
38
|
+
type: :runtime
|
39
|
+
prerelease: false
|
40
|
+
version_requirements: !ruby/object:Gem::Requirement
|
41
|
+
none: false
|
42
|
+
requirements:
|
43
|
+
- - ! '>='
|
44
|
+
- !ruby/object:Gem::Version
|
45
|
+
version: 4.0.1
|
46
|
+
description: A super light-weight framework for converting arbitrary CSV files to
|
47
|
+
RDF.
|
48
|
+
email: knud@datalysator.com
|
49
|
+
executables: []
|
50
|
+
extensions: []
|
51
|
+
extra_rdoc_files: []
|
52
|
+
files:
|
53
|
+
- lib/csv2rdf.rb
|
54
|
+
- lib/csv2rdf/vocabs.rb
|
55
|
+
- lib/csv2rdf/converter.rb
|
56
|
+
homepage: https://github.com/knudmoeller/csv2rdf
|
57
|
+
licenses: []
|
58
|
+
post_install_message:
|
59
|
+
rdoc_options: []
|
60
|
+
require_paths:
|
61
|
+
- lib
|
62
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
63
|
+
none: false
|
64
|
+
requirements:
|
65
|
+
- - ! '>='
|
66
|
+
- !ruby/object:Gem::Version
|
67
|
+
version: '0'
|
68
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
69
|
+
none: false
|
70
|
+
requirements:
|
71
|
+
- - ! '>='
|
72
|
+
- !ruby/object:Gem::Version
|
73
|
+
version: '0'
|
74
|
+
requirements: []
|
75
|
+
rubyforge_project:
|
76
|
+
rubygems_version: 1.8.25
|
77
|
+
signing_key:
|
78
|
+
specification_version: 3
|
79
|
+
summary: Convert CSV files to RDF
|
80
|
+
test_files: []
|