csv2rdf 0.0.0
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/csv2rdf.rb +9 -0
- data/lib/csv2rdf/converter.rb +123 -0
- data/lib/csv2rdf/vocabs.rb +10 -0
- metadata +80 -0
data/lib/csv2rdf.rb
ADDED
@@ -0,0 +1,123 @@
|
|
1
|
+
require 'rdf'
|
2
|
+
require 'rdf/ntriples'
|
3
|
+
require 'logger'
|
4
|
+
|
5
|
+
|
6
|
+
# Abstract super-class for all converters.
|
7
|
+
#
|
8
|
+
# == Usage
|
9
|
+
#
|
10
|
+
# Usually, a converter will be used as follows:
|
11
|
+
# converter = RatingConverter.new("path/to/file.csv", "path/to/output_file.nt", context_object)
|
12
|
+
# converter.convert
|
13
|
+
# converter.serialize
|
14
|
+
#
|
15
|
+
class Converter
|
16
|
+
|
17
|
+
# The Logger object, defaults to STDOUT
|
18
|
+
attr_accessor :log
|
19
|
+
|
20
|
+
# ==== Attributes
|
21
|
+
#
|
22
|
+
# * +csv_in+ - Path to the CSV file that is going to be converted. Is tested for existence and type, might raise an
|
23
|
+
# +IOError+.
|
24
|
+
# * +out_file+ - Path to the desired output file. The containing folder is tested for existence, might raise an
|
25
|
+
# +IOError+.
|
26
|
+
# * +context+ - Optionally a context Hash, can be used to pass information to the converter.
|
27
|
+
# * +log+ - the Logger object, defaults to STDOUT
|
28
|
+
#
|
29
|
+
def initialize(csv_in, out_file, context=nil)
|
30
|
+
unless (File.exists?(csv_in))
|
31
|
+
raise IOError, "file '#{csv_in}' does not exist"
|
32
|
+
end
|
33
|
+
unless (File.file?(csv_in))
|
34
|
+
raise IOError, "'#{csv_in}' is not a regular file"
|
35
|
+
end
|
36
|
+
unless (File.exists?(File.dirname(out_file)))
|
37
|
+
raise IOError, "directory '#{File.dirname(out_file)}' does not exist"
|
38
|
+
end
|
39
|
+
@csv_in = csv_in
|
40
|
+
@rdf_out = out_file
|
41
|
+
@context = context
|
42
|
+
@graph = RDF::Graph.new
|
43
|
+
@log = Logger.new(STDOUT)
|
44
|
+
@log.info("#{self.class}: converting #{@csv_in} to #{@rdf_out}")
|
45
|
+
end
|
46
|
+
|
47
|
+
# Turn any string into a URI component, useful for creating URIs from names and titles.
|
48
|
+
# The conversion is based on ActiveSupport's +parameterize+ method.
|
49
|
+
#
|
50
|
+
# ==== Attributes
|
51
|
+
#
|
52
|
+
# * +name+ - the string to be converted
|
53
|
+
# * +capitalize+ - if TRUE, the individual components of the name will be capitalized
|
54
|
+
#
|
55
|
+
# ==== Examples
|
56
|
+
#
|
57
|
+
# Converter.name2uri("Knud Möller")
|
58
|
+
# => "knud-moller"
|
59
|
+
#
|
60
|
+
# Converter.name2uri("Knud Möller", TRUE)
|
61
|
+
# => "Knud-Moller"
|
62
|
+
#
|
63
|
+
def Converter.name2uri(name, capitalize=FALSE)
|
64
|
+
name = name.parameterize.downcase
|
65
|
+
if capitalize
|
66
|
+
name = name.split("-").map { |x| x.capitalize }.join("-")
|
67
|
+
end
|
68
|
+
return name
|
69
|
+
end
|
70
|
+
|
71
|
+
# Convert the string of a German-style float ("1,59") into an actual float.
|
72
|
+
# If passed an actual float, it will just return it.
|
73
|
+
#
|
74
|
+
# ==== Attributes
|
75
|
+
#
|
76
|
+
# * +float+ - the string to be converted
|
77
|
+
#
|
78
|
+
# ==== Examples
|
79
|
+
#
|
80
|
+
# Converter.german_to_english_float("1,59")
|
81
|
+
# => 1.59
|
82
|
+
#
|
83
|
+
def Converter.german_to_english_float(float)
|
84
|
+
return float.to_s.gsub(",", ".").to_f
|
85
|
+
end
|
86
|
+
|
87
|
+
# Convert a string into a boolean
|
88
|
+
# For string.downcase == "ja" the method will return TRUE, for all other
|
89
|
+
# values it will return FALSE
|
90
|
+
#
|
91
|
+
# === Attributes
|
92
|
+
#
|
93
|
+
# * +string+ - the string to be converted
|
94
|
+
#
|
95
|
+
# === Examples
|
96
|
+
#
|
97
|
+
# Converter.ja_nein("ja")
|
98
|
+
# => true
|
99
|
+
# Converter.ja_nein("Ja")
|
100
|
+
# => true
|
101
|
+
# Converter.ja_nein("nein")
|
102
|
+
# => false
|
103
|
+
# Converter.ja_nein("hurtz")
|
104
|
+
# => false
|
105
|
+
#
|
106
|
+
def Converter.ja_nein(string)
|
107
|
+
return string.downcase.eql?("ja")
|
108
|
+
end
|
109
|
+
|
110
|
+
# The actual conversion takes place in this method. Implementations in sub-classes
|
111
|
+
# need to build the output graph on the +graph+ instance variable.
|
112
|
+
#
|
113
|
+
def convert
|
114
|
+
raise "method #{self.class.name}#convert() is not implemented!"
|
115
|
+
end
|
116
|
+
|
117
|
+
# After calling +convert+, +serialize+ will write the +graph+ to the desired output path
|
118
|
+
# (+rdf_out+) as an N-Triples file (http://www.w3.org/TR/n-triples/).
|
119
|
+
#
|
120
|
+
def serialize
|
121
|
+
RDF::Writer.open(@rdf_out) { |writer| writer << @graph }
|
122
|
+
end
|
123
|
+
end
|
@@ -0,0 +1,10 @@
|
|
1
|
+
require 'rdf'
|
2
|
+
|
3
|
+
#
|
4
|
+
# Some widely used RDF vocabulary namespaces.
|
5
|
+
#
|
6
|
+
|
7
|
+
SCHEMA = RDF::Vocabulary.new("http://schema.org/")
|
8
|
+
GEO = RDF::Vocabulary.new("http://www.w3.org/2003/01/geo/wgs84_pos#>")
|
9
|
+
GR = RDF::Vocabulary.new("http://purl.org/goodrelations/v1#")
|
10
|
+
DCT = RDF::Vocabulary.new("http://purl.org/dc/terms/")
|
metadata
ADDED
@@ -0,0 +1,80 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: csv2rdf
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.0
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Knud Möller
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2013-12-01 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: rdf
|
16
|
+
requirement: !ruby/object:Gem::Requirement
|
17
|
+
none: false
|
18
|
+
requirements:
|
19
|
+
- - ! '>='
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: 1.0.9
|
22
|
+
type: :runtime
|
23
|
+
prerelease: false
|
24
|
+
version_requirements: !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
26
|
+
requirements:
|
27
|
+
- - ! '>='
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
version: 1.0.9
|
30
|
+
- !ruby/object:Gem::Dependency
|
31
|
+
name: activesupport
|
32
|
+
requirement: !ruby/object:Gem::Requirement
|
33
|
+
none: false
|
34
|
+
requirements:
|
35
|
+
- - ! '>='
|
36
|
+
- !ruby/object:Gem::Version
|
37
|
+
version: 4.0.1
|
38
|
+
type: :runtime
|
39
|
+
prerelease: false
|
40
|
+
version_requirements: !ruby/object:Gem::Requirement
|
41
|
+
none: false
|
42
|
+
requirements:
|
43
|
+
- - ! '>='
|
44
|
+
- !ruby/object:Gem::Version
|
45
|
+
version: 4.0.1
|
46
|
+
description: A super light-weight framework for converting arbitrary CSV files to
|
47
|
+
RDF.
|
48
|
+
email: knud@datalysator.com
|
49
|
+
executables: []
|
50
|
+
extensions: []
|
51
|
+
extra_rdoc_files: []
|
52
|
+
files:
|
53
|
+
- lib/csv2rdf.rb
|
54
|
+
- lib/csv2rdf/vocabs.rb
|
55
|
+
- lib/csv2rdf/converter.rb
|
56
|
+
homepage: https://github.com/knudmoeller/csv2rdf
|
57
|
+
licenses: []
|
58
|
+
post_install_message:
|
59
|
+
rdoc_options: []
|
60
|
+
require_paths:
|
61
|
+
- lib
|
62
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
63
|
+
none: false
|
64
|
+
requirements:
|
65
|
+
- - ! '>='
|
66
|
+
- !ruby/object:Gem::Version
|
67
|
+
version: '0'
|
68
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
69
|
+
none: false
|
70
|
+
requirements:
|
71
|
+
- - ! '>='
|
72
|
+
- !ruby/object:Gem::Version
|
73
|
+
version: '0'
|
74
|
+
requirements: []
|
75
|
+
rubyforge_project:
|
76
|
+
rubygems_version: 1.8.25
|
77
|
+
signing_key:
|
78
|
+
specification_version: 3
|
79
|
+
summary: Convert CSV files to RDF
|
80
|
+
test_files: []
|