tripleloop 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.md +64 -0
- data/lib/tripleloop.rb +15 -0
- data/lib/tripleloop/document_processor.rb +62 -0
- data/lib/tripleloop/extractor.rb +69 -0
- data/lib/tripleloop/rdf_writer.rb +61 -0
- data/lib/tripleloop/util.rb +56 -0
- data/spec/spec_helper.rb +5 -0
- data/spec/tripleloop/document_processor_spec.rb +114 -0
- data/spec/tripleloop/extractor_spec.rb +87 -0
- data/spec/tripleloop/rdf_writer_spec.rb +102 -0
- data/spec/tripleloop/util_spec.rb +113 -0
- metadata +112 -0
data/README.md
ADDED
@@ -0,0 +1,64 @@
|
|
1
|
+
# Tripleloop
|
2
|
+
|
3
|
+
A DSL for extracting data from hash-like objects into RDF statements (i.e. triples or quads).
|
4
|
+
|
5
|
+
## Usage
|
6
|
+
|
7
|
+
Start by creating some extractor classes. Each extractor maps one or several document fragments
|
8
|
+
to RDF statments.
|
9
|
+
|
10
|
+
```ruby
|
11
|
+
class ArticleCoreExtractor < Tripleloop::Extractor
|
12
|
+
bind(:doi) { |doc| RDF::DOI.send(doc[:doi]) }
|
13
|
+
|
14
|
+
map(:title) { |title| [doi, RDF::DC11.title, title, RDF::NPGG.articles] }
|
15
|
+
map(:published_date) { |date | [doi, RDF::DC11.date, Date.parse(date), RDF::NPGG.articles] }
|
16
|
+
map(:product) { |product| [doi, RDF::NPG.product, RDF::NPGP.nature, RDF::NPGG.articles] }
|
17
|
+
end
|
18
|
+
|
19
|
+
class SubjectsExtractor < Tripleloop::Extractor
|
20
|
+
bind(:doi) { |doc| RDF::DOI.send(doc[:doi]) }
|
21
|
+
|
22
|
+
map(:subjects) { |subjects|
|
23
|
+
subjects.map { |s|
|
24
|
+
[doi, RDF::NPG.hasSubject, RDF::NPGS.send(s) ]
|
25
|
+
}
|
26
|
+
}
|
27
|
+
end
|
28
|
+
```
|
29
|
+
|
30
|
+
Once defined, extractors can be composed into a DocumentProcessor class.
|
31
|
+
|
32
|
+
```ruby
|
33
|
+
class NPGProcessor < Tripleloop::DocumentProcessor
|
34
|
+
extractors :article_core, :subjects
|
35
|
+
end
|
36
|
+
```
|
37
|
+
|
38
|
+
The processor can then be fed with a collection of hash like documents and return RDF data grouped by
|
39
|
+
extractor name.
|
40
|
+
|
41
|
+
```ruby
|
42
|
+
data = NPGProcessor.batch_process(documents)
|
43
|
+
=> { :article_core => [[<RDF::URI:0x00000002651ce0(http://dx.doi.org/10.1038/481241e)>,
|
44
|
+
<RDF::URI:0x1b0c060(http://purl.org/dc/elements/1.1/title)>,
|
45
|
+
"Developmental biology: Watching cells die in real time"],...],
|
46
|
+
:subjects => [...] }
|
47
|
+
```
|
48
|
+
|
49
|
+
Notice that the output retuned by the `batch_process` method is still a plain ruby data structure, and not an instance of RDF::Statement.
|
50
|
+
The actual job of instantiating RDF statements and writing them to disc is in fact responsability of the `Tripleloop::RDFWriter` class, which can be used as follows:
|
51
|
+
|
52
|
+
```ruby
|
53
|
+
Tripleloop::RDFWriter.new(data, :dataset_path => Pathname.new("my-datasets")).write
|
54
|
+
```
|
55
|
+
|
56
|
+
This will create the following two files:
|
57
|
+
|
58
|
+
- `my-dataset/article_core.nq`
|
59
|
+
- `my-dataset/subjects.nq`
|
60
|
+
|
61
|
+
When `#write` method is executed, `RDFWriter` will internally generate RDF triples, delegating the RDF serialisation job to RDF.rb's [`RDF::Writer`](http://rubydoc.info/github/ruby-rdf/rdf/master/RDF/Writer).
|
62
|
+
The only logic involved in the implementation of `Tripleloop::RDFWriter#write` concerns the assignment of the right RDF serialisation format and file extension. When all the RDF statements
|
63
|
+
generated by an extractor do specify also a graph (as in the example above), the writer will use the `RDF::NQuads::Writer`, falling back to `RDF::NTriples::Writer` otherwise.
|
64
|
+
|
data/lib/tripleloop.rb
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
basedir = File.realpath(File.dirname(File.dirname(__FILE__)))
|
2
|
+
$LOAD_PATH << "#{basedir}/lib/tripleloop"
|
3
|
+
$LOAD_PATH << "#{basedir}/lib/tripleloop/support"
|
4
|
+
|
5
|
+
module Tripleloop; end
|
6
|
+
|
7
|
+
require 'extractor'
|
8
|
+
require 'document_processor'
|
9
|
+
require 'rdf_writer'
|
10
|
+
require 'util'
|
11
|
+
|
12
|
+
require 'pathname'
|
13
|
+
require 'fileutils'
|
14
|
+
require 'rdf'
|
15
|
+
require 'rdf/ntriples'
|
@@ -0,0 +1,62 @@
|
|
1
|
+
module Tripleloop
|
2
|
+
class DocumentProcessor
|
3
|
+
attr_reader :document
|
4
|
+
|
5
|
+
def initialize(document)
|
6
|
+
@document = Util.with_nested_fetch(document)
|
7
|
+
end
|
8
|
+
|
9
|
+
def self.extractors(*args)
|
10
|
+
options = args.last.respond_to?(:fetch) ? args.pop : {}
|
11
|
+
@extractors ||= {}
|
12
|
+
|
13
|
+
args.each do |ext|
|
14
|
+
@extractors[ext] = options
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
def extracted_statements
|
19
|
+
@extracted_statements ||= Hash[extractor_instances.map { |extractor|
|
20
|
+
[extractor.name.to_sym, extractor.extract]
|
21
|
+
}]
|
22
|
+
end
|
23
|
+
|
24
|
+
def self.batch_process(documents)
|
25
|
+
documents.map { |doc|
|
26
|
+
self.new(doc).extracted_statements
|
27
|
+
}.reduce(Hash.new([])) { |accu, statements|
|
28
|
+
accu.merge(statements) { |k, olds, news|
|
29
|
+
olds.concat(news)
|
30
|
+
}
|
31
|
+
}
|
32
|
+
end
|
33
|
+
|
34
|
+
private
|
35
|
+
def extractor_instances
|
36
|
+
extractors = self.class.instance_variable_get(:@extractors)
|
37
|
+
|
38
|
+
@extractor_instances ||= extractors.map { |ext, opts|
|
39
|
+
klass = extractor_class(ext)
|
40
|
+
context = get_context(opts[:context])
|
41
|
+
klass.new(context)
|
42
|
+
}
|
43
|
+
end
|
44
|
+
|
45
|
+
def extractor_class(extractor)
|
46
|
+
class_name = Tripleloop::Util::String.classify("#{extractor}_extractor")
|
47
|
+
scope.const_get(class_name)
|
48
|
+
rescue NameError
|
49
|
+
raise ExtractorNotFoundError, "Cannot find an extractor with class name '#{scope}::#{class_name}'"
|
50
|
+
end
|
51
|
+
|
52
|
+
def scope
|
53
|
+
Tripleloop::Util.module(self)
|
54
|
+
end
|
55
|
+
|
56
|
+
def get_context(context)
|
57
|
+
context ? document.get_in(*context) : document
|
58
|
+
end
|
59
|
+
|
60
|
+
class ExtractorNotFoundError < StandardError;end
|
61
|
+
end
|
62
|
+
end
|
@@ -0,0 +1,69 @@
|
|
1
|
+
class Tripleloop::Extractor
|
2
|
+
def initialize(context)
|
3
|
+
@context = context
|
4
|
+
bind_variables!
|
5
|
+
end
|
6
|
+
|
7
|
+
def name
|
8
|
+
class_name = self.class.name.split('::').last
|
9
|
+
Tripleloop::Util::String.snake_case(class_name).gsub(/_extractor$/,'')
|
10
|
+
end
|
11
|
+
|
12
|
+
def self.map(*fragment, &block)
|
13
|
+
@fragment_map ||= {}
|
14
|
+
@fragment_map.merge!(fragment => block)
|
15
|
+
end
|
16
|
+
|
17
|
+
def self.bind(name, &block)
|
18
|
+
@extractor_bindings ||= {}
|
19
|
+
@extractor_bindings[name.to_sym] = block
|
20
|
+
end
|
21
|
+
|
22
|
+
def self.fragment_map
|
23
|
+
@fragment_map || {}
|
24
|
+
end
|
25
|
+
|
26
|
+
def extract
|
27
|
+
self.class.fragment_map.reduce([]) do |memo, (path, block)|
|
28
|
+
fragment = Tripleloop::Util.with_nested_fetch(context).get_in(*path)
|
29
|
+
returned = instance_exec(fragment, &block)
|
30
|
+
|
31
|
+
if nested_triples?(returned)
|
32
|
+
returned.each do |value|
|
33
|
+
ensure_triple_or_quad(value)
|
34
|
+
end
|
35
|
+
memo.concat(returned)
|
36
|
+
else
|
37
|
+
ensure_triple_or_quad(returned)
|
38
|
+
memo << returned
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
private
|
44
|
+
def nested_triples?(value)
|
45
|
+
value.all? { |object| object.is_a?(Array) }
|
46
|
+
end
|
47
|
+
|
48
|
+
def ensure_triple_or_quad(value)
|
49
|
+
message = "Cannot build a triple or a quad with #{value}."
|
50
|
+
raise BrokenMappingError, message unless is_triple_or_quad?(value)
|
51
|
+
end
|
52
|
+
|
53
|
+
def is_triple_or_quad?(value)
|
54
|
+
[3,4].include? value.length
|
55
|
+
end
|
56
|
+
|
57
|
+
def bind_variables!
|
58
|
+
klass = self.class
|
59
|
+
extractor_bindings = klass.instance_variable_get(:@extractor_bindings) || {}
|
60
|
+
extractor_bindings.each do |method, block|
|
61
|
+
klass.send(:define_method, method) do
|
62
|
+
block.call(context)
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
class BrokenMappingError < StandardError; end
|
68
|
+
attr_reader :context
|
69
|
+
end
|
@@ -0,0 +1,61 @@
|
|
1
|
+
class Tripleloop::RDFWriter
|
2
|
+
def initialize(data, opts={})
|
3
|
+
@data = data
|
4
|
+
@options = opts
|
5
|
+
end
|
6
|
+
|
7
|
+
def statements
|
8
|
+
@statements ||= Hash[@data.map { |extractor_name, statements|
|
9
|
+
[extractor_name,
|
10
|
+
statements.map { |s| as_statement(s) }]
|
11
|
+
}]
|
12
|
+
end
|
13
|
+
|
14
|
+
def write
|
15
|
+
FileUtils.mkdir_p(datasets_path)
|
16
|
+
|
17
|
+
statements.each do |extractor, extracted_statements|
|
18
|
+
build_writer(extractor, extracted_statements) do |writer|
|
19
|
+
extracted_statements.each do |statement|
|
20
|
+
writer << statement
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
private
|
27
|
+
attr_reader :options
|
28
|
+
|
29
|
+
def datasets_path(filename=nil)
|
30
|
+
path = Pathname.new(options.fetch(:path, "datasets"))
|
31
|
+
filename ? path.join(filename) : path
|
32
|
+
end
|
33
|
+
|
34
|
+
def build_writer(extractor, statements, &block)
|
35
|
+
statements_format = format(statements)
|
36
|
+
ext = extensions[statements_format]
|
37
|
+
folder_path = options.fetch(:dataset_path, datasets_path)
|
38
|
+
path = folder_path.join("#{extractor}.#{ext}")
|
39
|
+
|
40
|
+
FileUtils.mkdir_p(folder_path)
|
41
|
+
RDF::Writer.for(statements_format).open(path, &block)
|
42
|
+
end
|
43
|
+
|
44
|
+
def format(statements)
|
45
|
+
statements.all?(&:has_context?) ? :nquads : :ntriples
|
46
|
+
end
|
47
|
+
|
48
|
+
def extensions
|
49
|
+
{
|
50
|
+
:ntriples => "nt",
|
51
|
+
:nquads => "nq"
|
52
|
+
}
|
53
|
+
end
|
54
|
+
|
55
|
+
def as_statement(args)
|
56
|
+
statement_args = Hash[
|
57
|
+
[:subject, :predicate, :object, :context].zip(args)
|
58
|
+
]
|
59
|
+
RDF::Statement.new(statement_args)
|
60
|
+
end
|
61
|
+
end
|
@@ -0,0 +1,56 @@
|
|
1
|
+
module Tripleloop
|
2
|
+
module Util
|
3
|
+
module NestedFetch
|
4
|
+
def get_in(*keys)
|
5
|
+
return self if keys.empty?
|
6
|
+
|
7
|
+
value = Util.with_nested_fetch(self[keys.shift])
|
8
|
+
|
9
|
+
if value.respond_to?(:get_in) && !keys.empty?
|
10
|
+
value.get_in(*keys)
|
11
|
+
else
|
12
|
+
value
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
module String
|
18
|
+
module_function
|
19
|
+
|
20
|
+
def classify(string)
|
21
|
+
string.split("_").reduce("") { |accu, chunk|
|
22
|
+
accu << chunk.capitalize
|
23
|
+
}
|
24
|
+
end
|
25
|
+
|
26
|
+
def snake_case(string)
|
27
|
+
string.gsub(/(.)([A-Z])/,'\1_\2').downcase
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
module Hash
|
32
|
+
def symbolize_keys
|
33
|
+
self.reduce({}){ |accu, (k,v)|
|
34
|
+
v = v.extend(Util::Hash).symbolize_keys if v.respond_to?(:keys)
|
35
|
+
accu.merge(k.to_sym => v)
|
36
|
+
}
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
module_function
|
41
|
+
def with_nested_fetch(object)
|
42
|
+
object.is_a?(Enumerable) ? object.extend(NestedFetch) : object
|
43
|
+
end
|
44
|
+
|
45
|
+
def module(object)
|
46
|
+
constant(object.class.name.split('::')[0..-2])
|
47
|
+
end
|
48
|
+
|
49
|
+
def constant(names, context=Kernel)
|
50
|
+
return context if names.empty?
|
51
|
+
|
52
|
+
const_name = names.shift
|
53
|
+
constant(names, context.const_get(const_name))
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1,114 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe Tripleloop::DocumentProcessor do
|
4
|
+
module Example
|
5
|
+
class FooExtractor < Tripleloop::Extractor
|
6
|
+
map(:attrs, :foo) { |foo| [:subject, foo, :object ] }
|
7
|
+
end
|
8
|
+
|
9
|
+
class BarExtractor < Tripleloop::Extractor
|
10
|
+
map(:attrs, :bar) { |bar| [:subject, bar, :object ] }
|
11
|
+
end
|
12
|
+
|
13
|
+
class BazExtractor < Tripleloop::Extractor
|
14
|
+
map(:baz) { |baz|
|
15
|
+
baz.map { |v| [:subject, v, :object ] }
|
16
|
+
}
|
17
|
+
end
|
18
|
+
|
19
|
+
class SampleProcessor < Tripleloop::DocumentProcessor
|
20
|
+
extractors :foo, :bar
|
21
|
+
extractors :baz, :context => [:attrs, :nested]
|
22
|
+
end
|
23
|
+
|
24
|
+
class ProcessorWithMissingExtractor < Tripleloop::DocumentProcessor
|
25
|
+
extractors :foo, :missing
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
let(:document) {{
|
30
|
+
:attrs => {
|
31
|
+
:foo => "foo-value",
|
32
|
+
:bar => "bar-value",
|
33
|
+
:nested => {
|
34
|
+
:baz => ["baz a", "baz b"]
|
35
|
+
}
|
36
|
+
}
|
37
|
+
}}
|
38
|
+
|
39
|
+
describe "#extracted_statements" do
|
40
|
+
subject { Example::SampleProcessor.new(document) }
|
41
|
+
|
42
|
+
context "when some of the registered extractors cannot be found" do
|
43
|
+
it "raises an ExtractorNotFound error" do
|
44
|
+
expect {
|
45
|
+
Example::ProcessorWithMissingExtractor.new(document).extracted_statements
|
46
|
+
}.to raise_error(Example::SampleProcessor::ExtractorNotFoundError, /Example::MissingExtractor/)
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
context "when all the registered extractors can be found" do
|
51
|
+
it "returns a hash mapping extractor names to extracted statements" do
|
52
|
+
subject.extracted_statements.should eq({
|
53
|
+
:foo => [[:subject, "foo-value", :object]],
|
54
|
+
:bar => [[:subject, "bar-value", :object]],
|
55
|
+
:baz => [[:subject, "baz a", :object],
|
56
|
+
[:subject, "baz b", :object]]
|
57
|
+
})
|
58
|
+
end
|
59
|
+
|
60
|
+
it "runs the extractors only once" do
|
61
|
+
[Example::FooExtractor, Example::BarExtractor, Example::BazExtractor].each_with_index do |klass, i|
|
62
|
+
extractor = double('extractor', :name => "extractor_#{i}")
|
63
|
+
klass.stub(:new) { extractor }
|
64
|
+
extractor.should_receive(:extract).once.and_return { :extracted }
|
65
|
+
end
|
66
|
+
|
67
|
+
subject.extracted_statements.should eq({
|
68
|
+
:extractor_0 => :extracted,
|
69
|
+
:extractor_1 => :extracted,
|
70
|
+
:extractor_2 => :extracted
|
71
|
+
})
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
describe ".batch_process" do
|
77
|
+
let(:documents) {
|
78
|
+
3.times.map { |n| {
|
79
|
+
:attrs => {
|
80
|
+
:foo => "foo-value #{n}",
|
81
|
+
:bar => "bar-value #{n}",
|
82
|
+
:nested => {
|
83
|
+
:baz => ["baz a #{n}", "baz b #{n}"]
|
84
|
+
}
|
85
|
+
}}
|
86
|
+
}
|
87
|
+
}
|
88
|
+
|
89
|
+
subject { Example::SampleProcessor.batch_process(documents) }
|
90
|
+
|
91
|
+
it "returns a hash of combined statements, grouped by extractor name" do
|
92
|
+
subject.should eq({
|
93
|
+
:foo => [
|
94
|
+
[:subject, "foo-value 0", :object],
|
95
|
+
[:subject, "foo-value 1", :object],
|
96
|
+
[:subject, "foo-value 2", :object]
|
97
|
+
],
|
98
|
+
:bar => [
|
99
|
+
[:subject, "bar-value 0", :object],
|
100
|
+
[:subject, "bar-value 1", :object],
|
101
|
+
[:subject, "bar-value 2", :object]
|
102
|
+
],
|
103
|
+
:baz => [
|
104
|
+
[:subject, "baz a 0", :object],
|
105
|
+
[:subject, "baz b 0", :object],
|
106
|
+
[:subject, "baz a 1", :object],
|
107
|
+
[:subject, "baz b 1", :object],
|
108
|
+
[:subject, "baz a 2", :object],
|
109
|
+
[:subject, "baz b 2", :object],
|
110
|
+
]
|
111
|
+
})
|
112
|
+
end
|
113
|
+
end
|
114
|
+
end
|
@@ -0,0 +1,87 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe Tripleloop::Extractor do
|
4
|
+
class SampleExtractor < Tripleloop::Extractor
|
5
|
+
map(:path, :to, :key) { |fragment|
|
6
|
+
[fragment, :predicate, :object]
|
7
|
+
}
|
8
|
+
|
9
|
+
map(:path, :to, :enumerable) { |enumerable|
|
10
|
+
enumerable.map { |item|
|
11
|
+
[:subject, item, :object]
|
12
|
+
}
|
13
|
+
}
|
14
|
+
end
|
15
|
+
|
16
|
+
class BrokenExtractor < Tripleloop::Extractor
|
17
|
+
map(:path, :to, :key) { |fragment|
|
18
|
+
[[:subject, fragment, :obj],
|
19
|
+
[:subject, :obj]] # <= missing predicate
|
20
|
+
}
|
21
|
+
end
|
22
|
+
|
23
|
+
let(:document) {{
|
24
|
+
:path => {
|
25
|
+
:to => {
|
26
|
+
:key => :test_key,
|
27
|
+
:enumerable => [ :foo, :bar, :baz]
|
28
|
+
}
|
29
|
+
}
|
30
|
+
}}
|
31
|
+
|
32
|
+
let(:extractor) {
|
33
|
+
SampleExtractor.new(document)
|
34
|
+
}
|
35
|
+
|
36
|
+
describe "#extract" do
|
37
|
+
let(:triples) { extractor.extract }
|
38
|
+
|
39
|
+
it "maps a document fragment to a block" do
|
40
|
+
triples.first.should eq([:test_key, :predicate, :object])
|
41
|
+
end
|
42
|
+
|
43
|
+
context "when a block returns multiple triples arguments" do
|
44
|
+
it "concats the returned values to the extracted list" do
|
45
|
+
triples[1..3].should eq([
|
46
|
+
[:subject, :foo, :object],
|
47
|
+
[:subject, :bar, :object],
|
48
|
+
[:subject, :baz, :object]
|
49
|
+
])
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
context "when a block does not return a valid constructor argument for RDF::Statement" do
|
54
|
+
it "raises an ArgumentError" do
|
55
|
+
expect {
|
56
|
+
BrokenExtractor.new(document).extract
|
57
|
+
}.to raise_error(Tripleloop::Extractor::BrokenMappingError)
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
describe ".define" do
|
63
|
+
let(:document) {{
|
64
|
+
:doi => "10.1038/481241e",
|
65
|
+
:title => "Sample document"
|
66
|
+
}}
|
67
|
+
|
68
|
+
class ExtractorWithBinding < Tripleloop::Extractor
|
69
|
+
bind(:doi) { |doc| doc[:doi] }
|
70
|
+
|
71
|
+
map(:title) { |title|
|
72
|
+
[doi, RDF::DC11.title, title]
|
73
|
+
}
|
74
|
+
end
|
75
|
+
|
76
|
+
it "defines a binding which can be used from within a map block" do
|
77
|
+
extractor = ExtractorWithBinding.new(document)
|
78
|
+
extractor.extract.should eq([["10.1038/481241e", RDF::DC11.title, "Sample document"]])
|
79
|
+
end
|
80
|
+
end
|
81
|
+
|
82
|
+
describe "#name" do
|
83
|
+
it "returns the extractor name (in snake case)" do
|
84
|
+
extractor.name.should eq("sample")
|
85
|
+
end
|
86
|
+
end
|
87
|
+
end
|
@@ -0,0 +1,102 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
require 'fakefs/spec_helpers'
|
3
|
+
|
4
|
+
describe Tripleloop::RDFWriter do
|
5
|
+
class ExampleNs < RDF::Vocabulary("http://example.com/resources/"); end
|
6
|
+
class SampleGraph < RDF::Vocabulary("http://example.com/graphs/"); end
|
7
|
+
|
8
|
+
let(:triples) {[
|
9
|
+
[ExampleNs.my_resource, RDF::DC11.title, "My test resource"],
|
10
|
+
[ExampleNs.my_resource, RDF::DC11.author, "sample author"]
|
11
|
+
]}
|
12
|
+
|
13
|
+
let(:triples_as_rdf) {[
|
14
|
+
RDF::Statement(:subject => ExampleNs.my_resource,
|
15
|
+
:predicate => RDF::DC11.title,
|
16
|
+
:object => "My test resource"),
|
17
|
+
|
18
|
+
RDF::Statement(:subject => ExampleNs.my_resource,
|
19
|
+
:predicate => RDF::DC11.author,
|
20
|
+
:object => "sample author")
|
21
|
+
]}
|
22
|
+
|
23
|
+
let(:quads) {[
|
24
|
+
[ExampleNs.my_resource, RDF::DOAP.homepage, ExampleNs.my_resource, SampleGraph.projects],
|
25
|
+
[ExampleNs.my_resource, RDF::DOAP.mailing_list, RDF::URI("mailto://example-list@mailman.example.com"), SampleGraph.projects]
|
26
|
+
]}
|
27
|
+
|
28
|
+
let(:quads_as_rdf) {[
|
29
|
+
RDF::Statement(:subject => ExampleNs.my_resource,
|
30
|
+
:predicate => RDF::DOAP.homepage,
|
31
|
+
:object => ExampleNs.my_resource,
|
32
|
+
:context => SampleGraph.projects),
|
33
|
+
|
34
|
+
RDF::Statement(:subject => ExampleNs.my_resource,
|
35
|
+
:predicate => RDF::DOAP.mailing_list,
|
36
|
+
:object => RDF::URI("mailto://example-list@mailman.example.com"),
|
37
|
+
:context => SampleGraph.projects)
|
38
|
+
]}
|
39
|
+
|
40
|
+
let(:statements) {{
|
41
|
+
:triples => triples,
|
42
|
+
:quads => quads
|
43
|
+
}}
|
44
|
+
|
45
|
+
let(:options) {{}}
|
46
|
+
|
47
|
+
let(:rdf_writer) {
|
48
|
+
Tripleloop::RDFWriter.new(statements, options)
|
49
|
+
}
|
50
|
+
|
51
|
+
describe "#statements" do
|
52
|
+
it "returns 3 items long arrays as RDF triples" do
|
53
|
+
rdf_writer.statements[:triples].should eq(triples_as_rdf)
|
54
|
+
end
|
55
|
+
|
56
|
+
it "returns 4 items long arrays as RDF quads" do
|
57
|
+
rdf_writer.statements[:quads].should eq(quads_as_rdf)
|
58
|
+
rdf_writer.statements[:quads].each do |statement|
|
59
|
+
statement.context.should eq(SampleGraph.projects)
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
describe "#write" do
|
65
|
+
include FakeFS::SpecHelpers
|
66
|
+
|
67
|
+
before do
|
68
|
+
FakeFS.activate!
|
69
|
+
rdf_writer.write
|
70
|
+
end
|
71
|
+
|
72
|
+
context "when a dataset path is not supplied in the options" do
|
73
|
+
it "saves files in the standard 'datasets' folder" do
|
74
|
+
File.directory?("datasets").should be_true
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
context "when a dataset path is supplied" do
|
79
|
+
let(:options) {{
|
80
|
+
:dataset_path => Pathname.new("test-path")
|
81
|
+
}}
|
82
|
+
|
83
|
+
it "exports quads as .nq file" do
|
84
|
+
File.read("test-path/quads.nq").split(/\s*\.\n/).should eq([
|
85
|
+
"<#{ExampleNs.my_resource}> <#{RDF::DOAP.homepage}> <#{ExampleNs.my_resource}> <#{SampleGraph.projects}>",
|
86
|
+
"<#{ExampleNs.my_resource}> <#{RDF::DOAP.mailing_list}> <mailto://example-list@mailman.example.com> <#{SampleGraph.projects}>",
|
87
|
+
])
|
88
|
+
end
|
89
|
+
|
90
|
+
it "exports triples as .n3 files" do
|
91
|
+
File.read("test-path/triples.nt").split(/\s*\.\n/).should eq([
|
92
|
+
"<#{ExampleNs.my_resource}> <#{RDF::DC11.title}> \"My test resource\"",
|
93
|
+
"<#{ExampleNs.my_resource}> <#{RDF::DC11.author}> \"sample author\"",
|
94
|
+
])
|
95
|
+
end
|
96
|
+
end
|
97
|
+
|
98
|
+
after do
|
99
|
+
FakeFS.deactivate!
|
100
|
+
end
|
101
|
+
end
|
102
|
+
end
|
@@ -0,0 +1,113 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe Tripleloop::Util do
|
4
|
+
subject { Tripleloop::Util }
|
5
|
+
|
6
|
+
describe ".with_nested_fetch" do
|
7
|
+
context "when supplied argument is an array" do
|
8
|
+
it "extends it with the NestedFetch module" do
|
9
|
+
subject.with_nested_fetch({}).should respond_to(:get_in)
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
context "when supplied argument is an hash" do
|
14
|
+
it "extends it with the NestedFetch module" do
|
15
|
+
subject.with_nested_fetch([]).should respond_to(:get_in)
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
context "when supplied argument is not enumerable" do
|
20
|
+
it "returns the supplied argument" do
|
21
|
+
subject.with_nested_fetch(Object.new).should_not respond_to(:get_in)
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
describe ".module" do
|
27
|
+
module Test
|
28
|
+
module Foo
|
29
|
+
class Bar; end
|
30
|
+
end
|
31
|
+
class Baz; end
|
32
|
+
end
|
33
|
+
|
34
|
+
context "when the supplied object's class is within a nested namespace" do
|
35
|
+
it "returns the parent module as a constant" do
|
36
|
+
subject.module(Test::Foo::Bar.new).should eq(Test::Foo)
|
37
|
+
subject.module(Test::Baz.new).should eq(Test)
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
context "when the supplied object class is not noested within a namespace" do
|
42
|
+
it "returns the Kernel constant" do
|
43
|
+
subject.module(Object.new).should eq(Kernel)
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
describe Tripleloop::Util::NestedFetch do
|
49
|
+
describe "#get_in" do
|
50
|
+
context "when object is a hash" do
|
51
|
+
subject { Tripleloop::Util.with_nested_fetch({
|
52
|
+
:path => {
|
53
|
+
:to => {
|
54
|
+
:value => :ok
|
55
|
+
}
|
56
|
+
}
|
57
|
+
})}
|
58
|
+
|
59
|
+
it "returns the value corresponding to the supplied path" do
|
60
|
+
subject.get_in(:path, :to, :value).should eq(:ok)
|
61
|
+
end
|
62
|
+
|
63
|
+
it "returns nothing when the corresponding value cannot be found" do
|
64
|
+
subject.get_in(:wrong, :path).should be_nil
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
context "when object is an array" do
|
69
|
+
subject { Tripleloop::Util.with_nested_fetch([
|
70
|
+
[0,1,2,[
|
71
|
+
[:ok]
|
72
|
+
]]
|
73
|
+
])}
|
74
|
+
|
75
|
+
it "returns the value corresponding to the supplied path" do
|
76
|
+
subject.get_in(0,3,0,0).should eq(:ok)
|
77
|
+
end
|
78
|
+
|
79
|
+
it "returns nothing when no corresponding value can be found" do
|
80
|
+
subject.get_in(0,3,1).should be_nil
|
81
|
+
end
|
82
|
+
end
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
86
|
+
describe Tripleloop::Util::String do
|
87
|
+
subject { Tripleloop::Util::String }
|
88
|
+
|
89
|
+
describe ".classify" do
|
90
|
+
it "turns 'snake case' into 'camel case'" do
|
91
|
+
subject.classify("foo_bar_baz").should eq("FooBarBaz")
|
92
|
+
end
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
describe Tripleloop::Util::Hash do
|
97
|
+
subject { {"foo" => 1,
|
98
|
+
"bar" => 2,
|
99
|
+
"baz" => {"k" => "v"}
|
100
|
+
}.extend(Tripleloop::Util::Hash) }
|
101
|
+
|
102
|
+
describe ".symbolize_keys" do
|
103
|
+
it "returns a copy of the supplied hash replacing symbols with strings" do
|
104
|
+
subject.symbolize_keys.should eq({
|
105
|
+
:foo => 1,
|
106
|
+
:bar => 2,
|
107
|
+
:baz => {:k => "v"}
|
108
|
+
})
|
109
|
+
|
110
|
+
end
|
111
|
+
end
|
112
|
+
end
|
113
|
+
end
|
metadata
ADDED
@@ -0,0 +1,112 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: tripleloop
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Andrea Fiore
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2013-02-22 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: rdf
|
16
|
+
requirement: &12159420 !ruby/object:Gem::Requirement
|
17
|
+
none: false
|
18
|
+
requirements:
|
19
|
+
- - ! '>='
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: '0'
|
22
|
+
type: :runtime
|
23
|
+
prerelease: false
|
24
|
+
version_requirements: *12159420
|
25
|
+
- !ruby/object:Gem::Dependency
|
26
|
+
name: rspec
|
27
|
+
requirement: &12277940 !ruby/object:Gem::Requirement
|
28
|
+
none: false
|
29
|
+
requirements:
|
30
|
+
- - ~>
|
31
|
+
- !ruby/object:Gem::Version
|
32
|
+
version: 2.12.0
|
33
|
+
type: :development
|
34
|
+
prerelease: false
|
35
|
+
version_requirements: *12277940
|
36
|
+
- !ruby/object:Gem::Dependency
|
37
|
+
name: fakefs
|
38
|
+
requirement: &12276080 !ruby/object:Gem::Requirement
|
39
|
+
none: false
|
40
|
+
requirements:
|
41
|
+
- - ~>
|
42
|
+
- !ruby/object:Gem::Version
|
43
|
+
version: 0.4.0
|
44
|
+
type: :development
|
45
|
+
prerelease: false
|
46
|
+
version_requirements: *12276080
|
47
|
+
- !ruby/object:Gem::Dependency
|
48
|
+
name: pry
|
49
|
+
requirement: &12274760 !ruby/object:Gem::Requirement
|
50
|
+
none: false
|
51
|
+
requirements:
|
52
|
+
- - ! '>='
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0'
|
55
|
+
type: :development
|
56
|
+
prerelease: false
|
57
|
+
version_requirements: *12274760
|
58
|
+
- !ruby/object:Gem::Dependency
|
59
|
+
name: babosa
|
60
|
+
requirement: &12221100 !ruby/object:Gem::Requirement
|
61
|
+
none: false
|
62
|
+
requirements:
|
63
|
+
- - ! '>='
|
64
|
+
- !ruby/object:Gem::Version
|
65
|
+
version: '0'
|
66
|
+
type: :development
|
67
|
+
prerelease: false
|
68
|
+
version_requirements: *12221100
|
69
|
+
description: Simple tool for extracting RDF triples from Ruby hashes
|
70
|
+
email: andrea.giulio.fiore@googlemail.com
|
71
|
+
executables: []
|
72
|
+
extensions: []
|
73
|
+
extra_rdoc_files: []
|
74
|
+
files:
|
75
|
+
- README.md
|
76
|
+
- lib/tripleloop.rb
|
77
|
+
- lib/tripleloop/document_processor.rb
|
78
|
+
- lib/tripleloop/extractor.rb
|
79
|
+
- lib/tripleloop/rdf_writer.rb
|
80
|
+
- lib/tripleloop/util.rb
|
81
|
+
- spec/spec_helper.rb
|
82
|
+
- spec/tripleloop/document_processor_spec.rb
|
83
|
+
- spec/tripleloop/extractor_spec.rb
|
84
|
+
- spec/tripleloop/rdf_writer_spec.rb
|
85
|
+
- spec/tripleloop/util_spec.rb
|
86
|
+
homepage: http://github.com/afiore/tripleloop
|
87
|
+
licenses: []
|
88
|
+
post_install_message:
|
89
|
+
rdoc_options:
|
90
|
+
- --charset=UTF-8
|
91
|
+
require_paths:
|
92
|
+
- lib
|
93
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
94
|
+
none: false
|
95
|
+
requirements:
|
96
|
+
- - ! '>='
|
97
|
+
- !ruby/object:Gem::Version
|
98
|
+
version: '0'
|
99
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
100
|
+
none: false
|
101
|
+
requirements:
|
102
|
+
- - ! '>='
|
103
|
+
- !ruby/object:Gem::Version
|
104
|
+
version: '0'
|
105
|
+
requirements: []
|
106
|
+
rubyforge_project: tripleloop
|
107
|
+
rubygems_version: 1.8.17
|
108
|
+
signing_key:
|
109
|
+
specification_version: 2
|
110
|
+
summary: Simple tool for extracting RDF triples from Ruby hashes
|
111
|
+
test_files: []
|
112
|
+
has_rdoc:
|