tripleloop 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/README.md +64 -0
- data/lib/tripleloop.rb +15 -0
- data/lib/tripleloop/document_processor.rb +62 -0
- data/lib/tripleloop/extractor.rb +69 -0
- data/lib/tripleloop/rdf_writer.rb +61 -0
- data/lib/tripleloop/util.rb +56 -0
- data/spec/spec_helper.rb +5 -0
- data/spec/tripleloop/document_processor_spec.rb +114 -0
- data/spec/tripleloop/extractor_spec.rb +87 -0
- data/spec/tripleloop/rdf_writer_spec.rb +102 -0
- data/spec/tripleloop/util_spec.rb +113 -0
- metadata +112 -0
data/README.md
ADDED
@@ -0,0 +1,64 @@
|
|
1
|
+
# Tripleloop
|
2
|
+
|
3
|
+
A DSL for extracting data from hash-like objects into RDF statements (i.e. triples or quads).
|
4
|
+
|
5
|
+
## Usage
|
6
|
+
|
7
|
+
Start by creating some extractor classes. Each extractor maps one or several document fragments
|
8
|
+
to RDF statments.
|
9
|
+
|
10
|
+
```ruby
|
11
|
+
class ArticleCoreExtractor < Tripleloop::Extractor
|
12
|
+
bind(:doi) { |doc| RDF::DOI.send(doc[:doi]) }
|
13
|
+
|
14
|
+
map(:title) { |title| [doi, RDF::DC11.title, title, RDF::NPGG.articles] }
|
15
|
+
map(:published_date) { |date | [doi, RDF::DC11.date, Date.parse(date), RDF::NPGG.articles] }
|
16
|
+
map(:product) { |product| [doi, RDF::NPG.product, RDF::NPGP.nature, RDF::NPGG.articles] }
|
17
|
+
end
|
18
|
+
|
19
|
+
class SubjectsExtractor < Tripleloop::Extractor
|
20
|
+
bind(:doi) { |doc| RDF::DOI.send(doc[:doi]) }
|
21
|
+
|
22
|
+
map(:subjects) { |subjects|
|
23
|
+
subjects.map { |s|
|
24
|
+
[doi, RDF::NPG.hasSubject, RDF::NPGS.send(s) ]
|
25
|
+
}
|
26
|
+
}
|
27
|
+
end
|
28
|
+
```
|
29
|
+
|
30
|
+
Once defined, extractors can be composed into a DocumentProcessor class.
|
31
|
+
|
32
|
+
```ruby
|
33
|
+
class NPGProcessor < Tripleloop::DocumentProcessor
|
34
|
+
extractors :article_core, :subjects
|
35
|
+
end
|
36
|
+
```
|
37
|
+
|
38
|
+
The processor can then be fed with a collection of hash like documents and return RDF data grouped by
|
39
|
+
extractor name.
|
40
|
+
|
41
|
+
```ruby
|
42
|
+
data = NPGProcessor.batch_process(documents)
|
43
|
+
=> { :article_core => [[<RDF::URI:0x00000002651ce0(http://dx.doi.org/10.1038/481241e)>,
|
44
|
+
<RDF::URI:0x1b0c060(http://purl.org/dc/elements/1.1/title)>,
|
45
|
+
"Developmental biology: Watching cells die in real time"],...],
|
46
|
+
:subjects => [...] }
|
47
|
+
```
|
48
|
+
|
49
|
+
Notice that the output retuned by the `batch_process` method is still a plain ruby data structure, and not an instance of RDF::Statement.
|
50
|
+
The actual job of instantiating RDF statements and writing them to disc is in fact responsability of the `Tripleloop::RDFWriter` class, which can be used as follows:
|
51
|
+
|
52
|
+
```ruby
|
53
|
+
Tripleloop::RDFWriter.new(data, :dataset_path => Pathname.new("my-datasets")).write
|
54
|
+
```
|
55
|
+
|
56
|
+
This will create the following two files:
|
57
|
+
|
58
|
+
- `my-dataset/article_core.nq`
|
59
|
+
- `my-dataset/subjects.nq`
|
60
|
+
|
61
|
+
When `#write` method is executed, `RDFWriter` will internally generate RDF triples, delegating the RDF serialisation job to RDF.rb's [`RDF::Writer`](http://rubydoc.info/github/ruby-rdf/rdf/master/RDF/Writer).
|
62
|
+
The only logic involved in the implementation of `Tripleloop::RDFWriter#write` concerns the assignment of the right RDF serialisation format and file extension. When all the RDF statements
|
63
|
+
generated by an extractor do specify also a graph (as in the example above), the writer will use the `RDF::NQuads::Writer`, falling back to `RDF::NTriples::Writer` otherwise.
|
64
|
+
|
data/lib/tripleloop.rb
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
basedir = File.realpath(File.dirname(File.dirname(__FILE__)))
|
2
|
+
$LOAD_PATH << "#{basedir}/lib/tripleloop"
|
3
|
+
$LOAD_PATH << "#{basedir}/lib/tripleloop/support"
|
4
|
+
|
5
|
+
module Tripleloop; end
|
6
|
+
|
7
|
+
require 'extractor'
|
8
|
+
require 'document_processor'
|
9
|
+
require 'rdf_writer'
|
10
|
+
require 'util'
|
11
|
+
|
12
|
+
require 'pathname'
|
13
|
+
require 'fileutils'
|
14
|
+
require 'rdf'
|
15
|
+
require 'rdf/ntriples'
|
@@ -0,0 +1,62 @@
|
|
1
|
+
module Tripleloop
|
2
|
+
class DocumentProcessor
|
3
|
+
attr_reader :document
|
4
|
+
|
5
|
+
def initialize(document)
|
6
|
+
@document = Util.with_nested_fetch(document)
|
7
|
+
end
|
8
|
+
|
9
|
+
def self.extractors(*args)
|
10
|
+
options = args.last.respond_to?(:fetch) ? args.pop : {}
|
11
|
+
@extractors ||= {}
|
12
|
+
|
13
|
+
args.each do |ext|
|
14
|
+
@extractors[ext] = options
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
def extracted_statements
|
19
|
+
@extracted_statements ||= Hash[extractor_instances.map { |extractor|
|
20
|
+
[extractor.name.to_sym, extractor.extract]
|
21
|
+
}]
|
22
|
+
end
|
23
|
+
|
24
|
+
def self.batch_process(documents)
|
25
|
+
documents.map { |doc|
|
26
|
+
self.new(doc).extracted_statements
|
27
|
+
}.reduce(Hash.new([])) { |accu, statements|
|
28
|
+
accu.merge(statements) { |k, olds, news|
|
29
|
+
olds.concat(news)
|
30
|
+
}
|
31
|
+
}
|
32
|
+
end
|
33
|
+
|
34
|
+
private
|
35
|
+
def extractor_instances
|
36
|
+
extractors = self.class.instance_variable_get(:@extractors)
|
37
|
+
|
38
|
+
@extractor_instances ||= extractors.map { |ext, opts|
|
39
|
+
klass = extractor_class(ext)
|
40
|
+
context = get_context(opts[:context])
|
41
|
+
klass.new(context)
|
42
|
+
}
|
43
|
+
end
|
44
|
+
|
45
|
+
def extractor_class(extractor)
|
46
|
+
class_name = Tripleloop::Util::String.classify("#{extractor}_extractor")
|
47
|
+
scope.const_get(class_name)
|
48
|
+
rescue NameError
|
49
|
+
raise ExtractorNotFoundError, "Cannot find an extractor with class name '#{scope}::#{class_name}'"
|
50
|
+
end
|
51
|
+
|
52
|
+
def scope
|
53
|
+
Tripleloop::Util.module(self)
|
54
|
+
end
|
55
|
+
|
56
|
+
def get_context(context)
|
57
|
+
context ? document.get_in(*context) : document
|
58
|
+
end
|
59
|
+
|
60
|
+
class ExtractorNotFoundError < StandardError;end
|
61
|
+
end
|
62
|
+
end
|
@@ -0,0 +1,69 @@
|
|
1
|
+
class Tripleloop::Extractor
|
2
|
+
def initialize(context)
|
3
|
+
@context = context
|
4
|
+
bind_variables!
|
5
|
+
end
|
6
|
+
|
7
|
+
def name
|
8
|
+
class_name = self.class.name.split('::').last
|
9
|
+
Tripleloop::Util::String.snake_case(class_name).gsub(/_extractor$/,'')
|
10
|
+
end
|
11
|
+
|
12
|
+
def self.map(*fragment, &block)
|
13
|
+
@fragment_map ||= {}
|
14
|
+
@fragment_map.merge!(fragment => block)
|
15
|
+
end
|
16
|
+
|
17
|
+
def self.bind(name, &block)
|
18
|
+
@extractor_bindings ||= {}
|
19
|
+
@extractor_bindings[name.to_sym] = block
|
20
|
+
end
|
21
|
+
|
22
|
+
def self.fragment_map
|
23
|
+
@fragment_map || {}
|
24
|
+
end
|
25
|
+
|
26
|
+
def extract
|
27
|
+
self.class.fragment_map.reduce([]) do |memo, (path, block)|
|
28
|
+
fragment = Tripleloop::Util.with_nested_fetch(context).get_in(*path)
|
29
|
+
returned = instance_exec(fragment, &block)
|
30
|
+
|
31
|
+
if nested_triples?(returned)
|
32
|
+
returned.each do |value|
|
33
|
+
ensure_triple_or_quad(value)
|
34
|
+
end
|
35
|
+
memo.concat(returned)
|
36
|
+
else
|
37
|
+
ensure_triple_or_quad(returned)
|
38
|
+
memo << returned
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
private
|
44
|
+
def nested_triples?(value)
|
45
|
+
value.all? { |object| object.is_a?(Array) }
|
46
|
+
end
|
47
|
+
|
48
|
+
def ensure_triple_or_quad(value)
|
49
|
+
message = "Cannot build a triple or a quad with #{value}."
|
50
|
+
raise BrokenMappingError, message unless is_triple_or_quad?(value)
|
51
|
+
end
|
52
|
+
|
53
|
+
def is_triple_or_quad?(value)
|
54
|
+
[3,4].include? value.length
|
55
|
+
end
|
56
|
+
|
57
|
+
def bind_variables!
|
58
|
+
klass = self.class
|
59
|
+
extractor_bindings = klass.instance_variable_get(:@extractor_bindings) || {}
|
60
|
+
extractor_bindings.each do |method, block|
|
61
|
+
klass.send(:define_method, method) do
|
62
|
+
block.call(context)
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
class BrokenMappingError < StandardError; end
|
68
|
+
attr_reader :context
|
69
|
+
end
|
@@ -0,0 +1,61 @@
|
|
1
|
+
class Tripleloop::RDFWriter
|
2
|
+
def initialize(data, opts={})
|
3
|
+
@data = data
|
4
|
+
@options = opts
|
5
|
+
end
|
6
|
+
|
7
|
+
def statements
|
8
|
+
@statements ||= Hash[@data.map { |extractor_name, statements|
|
9
|
+
[extractor_name,
|
10
|
+
statements.map { |s| as_statement(s) }]
|
11
|
+
}]
|
12
|
+
end
|
13
|
+
|
14
|
+
def write
|
15
|
+
FileUtils.mkdir_p(datasets_path)
|
16
|
+
|
17
|
+
statements.each do |extractor, extracted_statements|
|
18
|
+
build_writer(extractor, extracted_statements) do |writer|
|
19
|
+
extracted_statements.each do |statement|
|
20
|
+
writer << statement
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
private
|
27
|
+
attr_reader :options
|
28
|
+
|
29
|
+
def datasets_path(filename=nil)
|
30
|
+
path = Pathname.new(options.fetch(:path, "datasets"))
|
31
|
+
filename ? path.join(filename) : path
|
32
|
+
end
|
33
|
+
|
34
|
+
def build_writer(extractor, statements, &block)
|
35
|
+
statements_format = format(statements)
|
36
|
+
ext = extensions[statements_format]
|
37
|
+
folder_path = options.fetch(:dataset_path, datasets_path)
|
38
|
+
path = folder_path.join("#{extractor}.#{ext}")
|
39
|
+
|
40
|
+
FileUtils.mkdir_p(folder_path)
|
41
|
+
RDF::Writer.for(statements_format).open(path, &block)
|
42
|
+
end
|
43
|
+
|
44
|
+
def format(statements)
|
45
|
+
statements.all?(&:has_context?) ? :nquads : :ntriples
|
46
|
+
end
|
47
|
+
|
48
|
+
def extensions
|
49
|
+
{
|
50
|
+
:ntriples => "nt",
|
51
|
+
:nquads => "nq"
|
52
|
+
}
|
53
|
+
end
|
54
|
+
|
55
|
+
def as_statement(args)
|
56
|
+
statement_args = Hash[
|
57
|
+
[:subject, :predicate, :object, :context].zip(args)
|
58
|
+
]
|
59
|
+
RDF::Statement.new(statement_args)
|
60
|
+
end
|
61
|
+
end
|
@@ -0,0 +1,56 @@
|
|
1
|
+
module Tripleloop
|
2
|
+
module Util
|
3
|
+
module NestedFetch
|
4
|
+
def get_in(*keys)
|
5
|
+
return self if keys.empty?
|
6
|
+
|
7
|
+
value = Util.with_nested_fetch(self[keys.shift])
|
8
|
+
|
9
|
+
if value.respond_to?(:get_in) && !keys.empty?
|
10
|
+
value.get_in(*keys)
|
11
|
+
else
|
12
|
+
value
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
module String
|
18
|
+
module_function
|
19
|
+
|
20
|
+
def classify(string)
|
21
|
+
string.split("_").reduce("") { |accu, chunk|
|
22
|
+
accu << chunk.capitalize
|
23
|
+
}
|
24
|
+
end
|
25
|
+
|
26
|
+
def snake_case(string)
|
27
|
+
string.gsub(/(.)([A-Z])/,'\1_\2').downcase
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
module Hash
|
32
|
+
def symbolize_keys
|
33
|
+
self.reduce({}){ |accu, (k,v)|
|
34
|
+
v = v.extend(Util::Hash).symbolize_keys if v.respond_to?(:keys)
|
35
|
+
accu.merge(k.to_sym => v)
|
36
|
+
}
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
module_function
|
41
|
+
def with_nested_fetch(object)
|
42
|
+
object.is_a?(Enumerable) ? object.extend(NestedFetch) : object
|
43
|
+
end
|
44
|
+
|
45
|
+
def module(object)
|
46
|
+
constant(object.class.name.split('::')[0..-2])
|
47
|
+
end
|
48
|
+
|
49
|
+
def constant(names, context=Kernel)
|
50
|
+
return context if names.empty?
|
51
|
+
|
52
|
+
const_name = names.shift
|
53
|
+
constant(names, context.const_get(const_name))
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1,114 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe Tripleloop::DocumentProcessor do
|
4
|
+
module Example
|
5
|
+
class FooExtractor < Tripleloop::Extractor
|
6
|
+
map(:attrs, :foo) { |foo| [:subject, foo, :object ] }
|
7
|
+
end
|
8
|
+
|
9
|
+
class BarExtractor < Tripleloop::Extractor
|
10
|
+
map(:attrs, :bar) { |bar| [:subject, bar, :object ] }
|
11
|
+
end
|
12
|
+
|
13
|
+
class BazExtractor < Tripleloop::Extractor
|
14
|
+
map(:baz) { |baz|
|
15
|
+
baz.map { |v| [:subject, v, :object ] }
|
16
|
+
}
|
17
|
+
end
|
18
|
+
|
19
|
+
class SampleProcessor < Tripleloop::DocumentProcessor
|
20
|
+
extractors :foo, :bar
|
21
|
+
extractors :baz, :context => [:attrs, :nested]
|
22
|
+
end
|
23
|
+
|
24
|
+
class ProcessorWithMissingExtractor < Tripleloop::DocumentProcessor
|
25
|
+
extractors :foo, :missing
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
let(:document) {{
|
30
|
+
:attrs => {
|
31
|
+
:foo => "foo-value",
|
32
|
+
:bar => "bar-value",
|
33
|
+
:nested => {
|
34
|
+
:baz => ["baz a", "baz b"]
|
35
|
+
}
|
36
|
+
}
|
37
|
+
}}
|
38
|
+
|
39
|
+
describe "#extracted_statements" do
|
40
|
+
subject { Example::SampleProcessor.new(document) }
|
41
|
+
|
42
|
+
context "when some of the registered extractors cannot be found" do
|
43
|
+
it "raises an ExtractorNotFound error" do
|
44
|
+
expect {
|
45
|
+
Example::ProcessorWithMissingExtractor.new(document).extracted_statements
|
46
|
+
}.to raise_error(Example::SampleProcessor::ExtractorNotFoundError, /Example::MissingExtractor/)
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
context "when all the registered extractors can be found" do
|
51
|
+
it "returns a hash mapping extractor names to extracted statements" do
|
52
|
+
subject.extracted_statements.should eq({
|
53
|
+
:foo => [[:subject, "foo-value", :object]],
|
54
|
+
:bar => [[:subject, "bar-value", :object]],
|
55
|
+
:baz => [[:subject, "baz a", :object],
|
56
|
+
[:subject, "baz b", :object]]
|
57
|
+
})
|
58
|
+
end
|
59
|
+
|
60
|
+
it "runs the extractors only once" do
|
61
|
+
[Example::FooExtractor, Example::BarExtractor, Example::BazExtractor].each_with_index do |klass, i|
|
62
|
+
extractor = double('extractor', :name => "extractor_#{i}")
|
63
|
+
klass.stub(:new) { extractor }
|
64
|
+
extractor.should_receive(:extract).once.and_return { :extracted }
|
65
|
+
end
|
66
|
+
|
67
|
+
subject.extracted_statements.should eq({
|
68
|
+
:extractor_0 => :extracted,
|
69
|
+
:extractor_1 => :extracted,
|
70
|
+
:extractor_2 => :extracted
|
71
|
+
})
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
describe ".batch_process" do
|
77
|
+
let(:documents) {
|
78
|
+
3.times.map { |n| {
|
79
|
+
:attrs => {
|
80
|
+
:foo => "foo-value #{n}",
|
81
|
+
:bar => "bar-value #{n}",
|
82
|
+
:nested => {
|
83
|
+
:baz => ["baz a #{n}", "baz b #{n}"]
|
84
|
+
}
|
85
|
+
}}
|
86
|
+
}
|
87
|
+
}
|
88
|
+
|
89
|
+
subject { Example::SampleProcessor.batch_process(documents) }
|
90
|
+
|
91
|
+
it "returns a hash of combined statements, grouped by extractor name" do
|
92
|
+
subject.should eq({
|
93
|
+
:foo => [
|
94
|
+
[:subject, "foo-value 0", :object],
|
95
|
+
[:subject, "foo-value 1", :object],
|
96
|
+
[:subject, "foo-value 2", :object]
|
97
|
+
],
|
98
|
+
:bar => [
|
99
|
+
[:subject, "bar-value 0", :object],
|
100
|
+
[:subject, "bar-value 1", :object],
|
101
|
+
[:subject, "bar-value 2", :object]
|
102
|
+
],
|
103
|
+
:baz => [
|
104
|
+
[:subject, "baz a 0", :object],
|
105
|
+
[:subject, "baz b 0", :object],
|
106
|
+
[:subject, "baz a 1", :object],
|
107
|
+
[:subject, "baz b 1", :object],
|
108
|
+
[:subject, "baz a 2", :object],
|
109
|
+
[:subject, "baz b 2", :object],
|
110
|
+
]
|
111
|
+
})
|
112
|
+
end
|
113
|
+
end
|
114
|
+
end
|
@@ -0,0 +1,87 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe Tripleloop::Extractor do
|
4
|
+
class SampleExtractor < Tripleloop::Extractor
|
5
|
+
map(:path, :to, :key) { |fragment|
|
6
|
+
[fragment, :predicate, :object]
|
7
|
+
}
|
8
|
+
|
9
|
+
map(:path, :to, :enumerable) { |enumerable|
|
10
|
+
enumerable.map { |item|
|
11
|
+
[:subject, item, :object]
|
12
|
+
}
|
13
|
+
}
|
14
|
+
end
|
15
|
+
|
16
|
+
class BrokenExtractor < Tripleloop::Extractor
|
17
|
+
map(:path, :to, :key) { |fragment|
|
18
|
+
[[:subject, fragment, :obj],
|
19
|
+
[:subject, :obj]] # <= missing predicate
|
20
|
+
}
|
21
|
+
end
|
22
|
+
|
23
|
+
let(:document) {{
|
24
|
+
:path => {
|
25
|
+
:to => {
|
26
|
+
:key => :test_key,
|
27
|
+
:enumerable => [ :foo, :bar, :baz]
|
28
|
+
}
|
29
|
+
}
|
30
|
+
}}
|
31
|
+
|
32
|
+
let(:extractor) {
|
33
|
+
SampleExtractor.new(document)
|
34
|
+
}
|
35
|
+
|
36
|
+
describe "#extract" do
|
37
|
+
let(:triples) { extractor.extract }
|
38
|
+
|
39
|
+
it "maps a document fragment to a block" do
|
40
|
+
triples.first.should eq([:test_key, :predicate, :object])
|
41
|
+
end
|
42
|
+
|
43
|
+
context "when a block returns multiple triples arguments" do
|
44
|
+
it "concats the returned values to the extracted list" do
|
45
|
+
triples[1..3].should eq([
|
46
|
+
[:subject, :foo, :object],
|
47
|
+
[:subject, :bar, :object],
|
48
|
+
[:subject, :baz, :object]
|
49
|
+
])
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
context "when a block does not return a valid constructor argument for RDF::Statement" do
|
54
|
+
it "raises an ArgumentError" do
|
55
|
+
expect {
|
56
|
+
BrokenExtractor.new(document).extract
|
57
|
+
}.to raise_error(Tripleloop::Extractor::BrokenMappingError)
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
describe ".define" do
|
63
|
+
let(:document) {{
|
64
|
+
:doi => "10.1038/481241e",
|
65
|
+
:title => "Sample document"
|
66
|
+
}}
|
67
|
+
|
68
|
+
class ExtractorWithBinding < Tripleloop::Extractor
|
69
|
+
bind(:doi) { |doc| doc[:doi] }
|
70
|
+
|
71
|
+
map(:title) { |title|
|
72
|
+
[doi, RDF::DC11.title, title]
|
73
|
+
}
|
74
|
+
end
|
75
|
+
|
76
|
+
it "defines a binding which can be used from within a map block" do
|
77
|
+
extractor = ExtractorWithBinding.new(document)
|
78
|
+
extractor.extract.should eq([["10.1038/481241e", RDF::DC11.title, "Sample document"]])
|
79
|
+
end
|
80
|
+
end
|
81
|
+
|
82
|
+
describe "#name" do
|
83
|
+
it "returns the extractor name (in snake case)" do
|
84
|
+
extractor.name.should eq("sample")
|
85
|
+
end
|
86
|
+
end
|
87
|
+
end
|
@@ -0,0 +1,102 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
require 'fakefs/spec_helpers'
|
3
|
+
|
4
|
+
describe Tripleloop::RDFWriter do
|
5
|
+
class ExampleNs < RDF::Vocabulary("http://example.com/resources/"); end
|
6
|
+
class SampleGraph < RDF::Vocabulary("http://example.com/graphs/"); end
|
7
|
+
|
8
|
+
let(:triples) {[
|
9
|
+
[ExampleNs.my_resource, RDF::DC11.title, "My test resource"],
|
10
|
+
[ExampleNs.my_resource, RDF::DC11.author, "sample author"]
|
11
|
+
]}
|
12
|
+
|
13
|
+
let(:triples_as_rdf) {[
|
14
|
+
RDF::Statement(:subject => ExampleNs.my_resource,
|
15
|
+
:predicate => RDF::DC11.title,
|
16
|
+
:object => "My test resource"),
|
17
|
+
|
18
|
+
RDF::Statement(:subject => ExampleNs.my_resource,
|
19
|
+
:predicate => RDF::DC11.author,
|
20
|
+
:object => "sample author")
|
21
|
+
]}
|
22
|
+
|
23
|
+
let(:quads) {[
|
24
|
+
[ExampleNs.my_resource, RDF::DOAP.homepage, ExampleNs.my_resource, SampleGraph.projects],
|
25
|
+
[ExampleNs.my_resource, RDF::DOAP.mailing_list, RDF::URI("mailto://example-list@mailman.example.com"), SampleGraph.projects]
|
26
|
+
]}
|
27
|
+
|
28
|
+
let(:quads_as_rdf) {[
|
29
|
+
RDF::Statement(:subject => ExampleNs.my_resource,
|
30
|
+
:predicate => RDF::DOAP.homepage,
|
31
|
+
:object => ExampleNs.my_resource,
|
32
|
+
:context => SampleGraph.projects),
|
33
|
+
|
34
|
+
RDF::Statement(:subject => ExampleNs.my_resource,
|
35
|
+
:predicate => RDF::DOAP.mailing_list,
|
36
|
+
:object => RDF::URI("mailto://example-list@mailman.example.com"),
|
37
|
+
:context => SampleGraph.projects)
|
38
|
+
]}
|
39
|
+
|
40
|
+
let(:statements) {{
|
41
|
+
:triples => triples,
|
42
|
+
:quads => quads
|
43
|
+
}}
|
44
|
+
|
45
|
+
let(:options) {{}}
|
46
|
+
|
47
|
+
let(:rdf_writer) {
|
48
|
+
Tripleloop::RDFWriter.new(statements, options)
|
49
|
+
}
|
50
|
+
|
51
|
+
describe "#statements" do
|
52
|
+
it "returns 3 items long arrays as RDF triples" do
|
53
|
+
rdf_writer.statements[:triples].should eq(triples_as_rdf)
|
54
|
+
end
|
55
|
+
|
56
|
+
it "returns 4 items long arrays as RDF quads" do
|
57
|
+
rdf_writer.statements[:quads].should eq(quads_as_rdf)
|
58
|
+
rdf_writer.statements[:quads].each do |statement|
|
59
|
+
statement.context.should eq(SampleGraph.projects)
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
describe "#write" do
|
65
|
+
include FakeFS::SpecHelpers
|
66
|
+
|
67
|
+
before do
|
68
|
+
FakeFS.activate!
|
69
|
+
rdf_writer.write
|
70
|
+
end
|
71
|
+
|
72
|
+
context "when a dataset path is not supplied in the options" do
|
73
|
+
it "saves files in the standard 'datasets' folder" do
|
74
|
+
File.directory?("datasets").should be_true
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
context "when a dataset path is supplied" do
|
79
|
+
let(:options) {{
|
80
|
+
:dataset_path => Pathname.new("test-path")
|
81
|
+
}}
|
82
|
+
|
83
|
+
it "exports quads as .nq file" do
|
84
|
+
File.read("test-path/quads.nq").split(/\s*\.\n/).should eq([
|
85
|
+
"<#{ExampleNs.my_resource}> <#{RDF::DOAP.homepage}> <#{ExampleNs.my_resource}> <#{SampleGraph.projects}>",
|
86
|
+
"<#{ExampleNs.my_resource}> <#{RDF::DOAP.mailing_list}> <mailto://example-list@mailman.example.com> <#{SampleGraph.projects}>",
|
87
|
+
])
|
88
|
+
end
|
89
|
+
|
90
|
+
it "exports triples as .n3 files" do
|
91
|
+
File.read("test-path/triples.nt").split(/\s*\.\n/).should eq([
|
92
|
+
"<#{ExampleNs.my_resource}> <#{RDF::DC11.title}> \"My test resource\"",
|
93
|
+
"<#{ExampleNs.my_resource}> <#{RDF::DC11.author}> \"sample author\"",
|
94
|
+
])
|
95
|
+
end
|
96
|
+
end
|
97
|
+
|
98
|
+
after do
|
99
|
+
FakeFS.deactivate!
|
100
|
+
end
|
101
|
+
end
|
102
|
+
end
|
@@ -0,0 +1,113 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe Tripleloop::Util do
|
4
|
+
subject { Tripleloop::Util }
|
5
|
+
|
6
|
+
describe ".with_nested_fetch" do
|
7
|
+
context "when supplied argument is an array" do
|
8
|
+
it "extends it with the NestedFetch module" do
|
9
|
+
subject.with_nested_fetch({}).should respond_to(:get_in)
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
context "when supplied argument is an hash" do
|
14
|
+
it "extends it with the NestedFetch module" do
|
15
|
+
subject.with_nested_fetch([]).should respond_to(:get_in)
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
context "when supplied argument is not enumerable" do
|
20
|
+
it "returns the supplied argument" do
|
21
|
+
subject.with_nested_fetch(Object.new).should_not respond_to(:get_in)
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
describe ".module" do
|
27
|
+
module Test
|
28
|
+
module Foo
|
29
|
+
class Bar; end
|
30
|
+
end
|
31
|
+
class Baz; end
|
32
|
+
end
|
33
|
+
|
34
|
+
context "when the supplied object's class is within a nested namespace" do
|
35
|
+
it "returns the parent module as a constant" do
|
36
|
+
subject.module(Test::Foo::Bar.new).should eq(Test::Foo)
|
37
|
+
subject.module(Test::Baz.new).should eq(Test)
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
context "when the supplied object class is not noested within a namespace" do
|
42
|
+
it "returns the Kernel constant" do
|
43
|
+
subject.module(Object.new).should eq(Kernel)
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
describe Tripleloop::Util::NestedFetch do
|
49
|
+
describe "#get_in" do
|
50
|
+
context "when object is a hash" do
|
51
|
+
subject { Tripleloop::Util.with_nested_fetch({
|
52
|
+
:path => {
|
53
|
+
:to => {
|
54
|
+
:value => :ok
|
55
|
+
}
|
56
|
+
}
|
57
|
+
})}
|
58
|
+
|
59
|
+
it "returns the value corresponding to the supplied path" do
|
60
|
+
subject.get_in(:path, :to, :value).should eq(:ok)
|
61
|
+
end
|
62
|
+
|
63
|
+
it "returns nothing when the corresponding value cannot be found" do
|
64
|
+
subject.get_in(:wrong, :path).should be_nil
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
context "when object is an array" do
|
69
|
+
subject { Tripleloop::Util.with_nested_fetch([
|
70
|
+
[0,1,2,[
|
71
|
+
[:ok]
|
72
|
+
]]
|
73
|
+
])}
|
74
|
+
|
75
|
+
it "returns the value corresponding to the supplied path" do
|
76
|
+
subject.get_in(0,3,0,0).should eq(:ok)
|
77
|
+
end
|
78
|
+
|
79
|
+
it "returns nothing when no corresponding value can be found" do
|
80
|
+
subject.get_in(0,3,1).should be_nil
|
81
|
+
end
|
82
|
+
end
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
86
|
+
describe Tripleloop::Util::String do
|
87
|
+
subject { Tripleloop::Util::String }
|
88
|
+
|
89
|
+
describe ".classify" do
|
90
|
+
it "turns 'snake case' into 'camel case'" do
|
91
|
+
subject.classify("foo_bar_baz").should eq("FooBarBaz")
|
92
|
+
end
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
describe Tripleloop::Util::Hash do
|
97
|
+
subject { {"foo" => 1,
|
98
|
+
"bar" => 2,
|
99
|
+
"baz" => {"k" => "v"}
|
100
|
+
}.extend(Tripleloop::Util::Hash) }
|
101
|
+
|
102
|
+
describe ".symbolize_keys" do
|
103
|
+
it "returns a copy of the supplied hash replacing symbols with strings" do
|
104
|
+
subject.symbolize_keys.should eq({
|
105
|
+
:foo => 1,
|
106
|
+
:bar => 2,
|
107
|
+
:baz => {:k => "v"}
|
108
|
+
})
|
109
|
+
|
110
|
+
end
|
111
|
+
end
|
112
|
+
end
|
113
|
+
end
|
metadata
ADDED
@@ -0,0 +1,112 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: tripleloop
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Andrea Fiore
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2013-02-22 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: rdf
|
16
|
+
requirement: &12159420 !ruby/object:Gem::Requirement
|
17
|
+
none: false
|
18
|
+
requirements:
|
19
|
+
- - ! '>='
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: '0'
|
22
|
+
type: :runtime
|
23
|
+
prerelease: false
|
24
|
+
version_requirements: *12159420
|
25
|
+
- !ruby/object:Gem::Dependency
|
26
|
+
name: rspec
|
27
|
+
requirement: &12277940 !ruby/object:Gem::Requirement
|
28
|
+
none: false
|
29
|
+
requirements:
|
30
|
+
- - ~>
|
31
|
+
- !ruby/object:Gem::Version
|
32
|
+
version: 2.12.0
|
33
|
+
type: :development
|
34
|
+
prerelease: false
|
35
|
+
version_requirements: *12277940
|
36
|
+
- !ruby/object:Gem::Dependency
|
37
|
+
name: fakefs
|
38
|
+
requirement: &12276080 !ruby/object:Gem::Requirement
|
39
|
+
none: false
|
40
|
+
requirements:
|
41
|
+
- - ~>
|
42
|
+
- !ruby/object:Gem::Version
|
43
|
+
version: 0.4.0
|
44
|
+
type: :development
|
45
|
+
prerelease: false
|
46
|
+
version_requirements: *12276080
|
47
|
+
- !ruby/object:Gem::Dependency
|
48
|
+
name: pry
|
49
|
+
requirement: &12274760 !ruby/object:Gem::Requirement
|
50
|
+
none: false
|
51
|
+
requirements:
|
52
|
+
- - ! '>='
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0'
|
55
|
+
type: :development
|
56
|
+
prerelease: false
|
57
|
+
version_requirements: *12274760
|
58
|
+
- !ruby/object:Gem::Dependency
|
59
|
+
name: babosa
|
60
|
+
requirement: &12221100 !ruby/object:Gem::Requirement
|
61
|
+
none: false
|
62
|
+
requirements:
|
63
|
+
- - ! '>='
|
64
|
+
- !ruby/object:Gem::Version
|
65
|
+
version: '0'
|
66
|
+
type: :development
|
67
|
+
prerelease: false
|
68
|
+
version_requirements: *12221100
|
69
|
+
description: Simple tool for extracting RDF triples from Ruby hashes
|
70
|
+
email: andrea.giulio.fiore@googlemail.com
|
71
|
+
executables: []
|
72
|
+
extensions: []
|
73
|
+
extra_rdoc_files: []
|
74
|
+
files:
|
75
|
+
- README.md
|
76
|
+
- lib/tripleloop.rb
|
77
|
+
- lib/tripleloop/document_processor.rb
|
78
|
+
- lib/tripleloop/extractor.rb
|
79
|
+
- lib/tripleloop/rdf_writer.rb
|
80
|
+
- lib/tripleloop/util.rb
|
81
|
+
- spec/spec_helper.rb
|
82
|
+
- spec/tripleloop/document_processor_spec.rb
|
83
|
+
- spec/tripleloop/extractor_spec.rb
|
84
|
+
- spec/tripleloop/rdf_writer_spec.rb
|
85
|
+
- spec/tripleloop/util_spec.rb
|
86
|
+
homepage: http://github.com/afiore/tripleloop
|
87
|
+
licenses: []
|
88
|
+
post_install_message:
|
89
|
+
rdoc_options:
|
90
|
+
- --charset=UTF-8
|
91
|
+
require_paths:
|
92
|
+
- lib
|
93
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
94
|
+
none: false
|
95
|
+
requirements:
|
96
|
+
- - ! '>='
|
97
|
+
- !ruby/object:Gem::Version
|
98
|
+
version: '0'
|
99
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
100
|
+
none: false
|
101
|
+
requirements:
|
102
|
+
- - ! '>='
|
103
|
+
- !ruby/object:Gem::Version
|
104
|
+
version: '0'
|
105
|
+
requirements: []
|
106
|
+
rubyforge_project: tripleloop
|
107
|
+
rubygems_version: 1.8.17
|
108
|
+
signing_key:
|
109
|
+
specification_version: 2
|
110
|
+
summary: Simple tool for extracting RDF triples from Ruby hashes
|
111
|
+
test_files: []
|
112
|
+
has_rdoc:
|