dbd 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +17 -0
- data/.rspec +2 -0
- data/.rvmrc +1 -0
- data/.travis.yml +10 -0
- data/Gemfile +8 -0
- data/Guardfile +7 -0
- data/LICENSE.txt +22 -0
- data/README.md +97 -0
- data/Rakefile +1 -0
- data/dbd.gemspec +30 -0
- data/docs/rationale.md +17 -0
- data/docs/stories/001_create_a_fact.txt +15 -0
- data/docs/stories/002_create_a_facts_collection.txt +14 -0
- data/docs/stories/003_create_a_fact_origin.txt +15 -0
- data/docs/stories/004_create_fact_origins_collection.txt +8 -0
- data/docs/stories/005_CSV_export_the_graph.txt +18 -0
- data/docs/stories/006_refactor_fact_origin_to_provenance_fact.txt +20 -0
- data/docs/stories/007_rename_property_to_predicate.txt +6 -0
- data/docs/stories/008_testing_different_ruby_versions.txt +7 -0
- data/docs/stories/009_build_and_store_resources_with_provenance.txt +38 -0
- data/docs/stories/010_provenance_fact_properties_from_provenance_ontology.txt +10 -0
- data/docs/test.rb +32 -0
- data/lib/dbd.rb +13 -0
- data/lib/dbd/errors.rb +11 -0
- data/lib/dbd/fact.rb +182 -0
- data/lib/dbd/fact/collection.rb +60 -0
- data/lib/dbd/fact/id.rb +19 -0
- data/lib/dbd/fact/subject.rb +21 -0
- data/lib/dbd/graph.rb +47 -0
- data/lib/dbd/helpers/ordered_set_collection.rb +86 -0
- data/lib/dbd/helpers/uuid.rb +33 -0
- data/lib/dbd/provenance_fact.rb +76 -0
- data/lib/dbd/provenance_resource.rb +54 -0
- data/lib/dbd/rdf.rb +9 -0
- data/lib/dbd/repo.rb +8 -0
- data/lib/dbd/repo/neo4j_repo.rb +4 -0
- data/lib/dbd/repo/neo4j_repo/base.rb +55 -0
- data/lib/dbd/resource.rb +117 -0
- data/lib/dbd/version.rb +3 -0
- data/spec/factories/fact.rb +76 -0
- data/spec/factories/provenance_fact.rb +34 -0
- data/spec/factories/provenance_resource.rb +16 -0
- data/spec/factories/resource.rb +17 -0
- data/spec/lib/dbd/fact/collection_spec.rb +236 -0
- data/spec/lib/dbd/fact/id_spec.rb +19 -0
- data/spec/lib/dbd/fact/subject_spec.rb +19 -0
- data/spec/lib/dbd/fact_spec.rb +217 -0
- data/spec/lib/dbd/graph_spec.rb +214 -0
- data/spec/lib/dbd/helpers/ordered_set_collection_spec.rb +88 -0
- data/spec/lib/dbd/helpers/uuid_spec.rb +15 -0
- data/spec/lib/dbd/provenance_fact_spec.rb +108 -0
- data/spec/lib/dbd/provenance_resource_spec.rb +77 -0
- data/spec/lib/dbd/rdf_base_spec.rb +39 -0
- data/spec/lib/dbd/repo/neo4j_repo/base_spec.rb +85 -0
- data/spec/lib/dbd/repo/neo4j_repo/performance_spec.rb +40 -0
- data/spec/lib/dbd/resource_spec.rb +166 -0
- data/spec/spec_helper.rb +19 -0
- metadata +272 -0
@@ -0,0 +1,76 @@
|
|
1
|
+
module Dbd
|
2
|
+
|
3
|
+
##
|
4
|
+
# ProvenanceFact
|
5
|
+
#
|
6
|
+
# ProvenanceFact is derived from Fact and behaves very similar.
|
7
|
+
#
|
8
|
+
# The ProvenanceFacts with same subject form a ProvenanceResource and
|
9
|
+
# this is used as the target for the provenance_subject of a Fact.
|
10
|
+
#
|
11
|
+
# The provenance_subject of a ProvenanceFact itself is empty, so the
|
12
|
+
# usage of provenance_subject is not recursive on this level (this
|
13
|
+
# allows efficient single pass loading in an underlying database).
|
14
|
+
#
|
15
|
+
# In the serialisation of the fact stream, the presence or absence of a
|
16
|
+
# provenance_subject marks the difference between a (base) Fact and a
|
17
|
+
# ProvenanceFact.
|
18
|
+
#
|
19
|
+
# The predicates in a ProvenanceFact would typically come from a defined
|
20
|
+
# "provenance ontology". An experimental example of a provenance ontology
|
21
|
+
# is built-up on https://data.vandenabeele.com/ontologies/provenance.
|
22
|
+
class ProvenanceFact < Fact
|
23
|
+
|
24
|
+
##
|
25
|
+
# Builds a new ProvenanceFact.
|
26
|
+
#
|
27
|
+
# @param [Hash{Symbol => Object}] options
|
28
|
+
# @option options [Fact::Subject] :subject (new_subject) Optional: the subject for the ProvenanceFact
|
29
|
+
# @option options [String] :predicate Required: the subject for the ProvenanceFact
|
30
|
+
# @option options [String] :object Required: the object for the ProvenanceFact
|
31
|
+
def initialize(options)
|
32
|
+
validate_provenance_subject(options)
|
33
|
+
super
|
34
|
+
end
|
35
|
+
|
36
|
+
##
|
37
|
+
# Executes the required update in used_provenance_subjects.
|
38
|
+
#
|
39
|
+
# For a ProvenanceFact, there is no provenance_subject, so
|
40
|
+
# pointless to mark it in used_provenance_subjects hash.
|
41
|
+
def update_used_provenance_subjects(h)
|
42
|
+
# Do nothing (override the behaviour from super).
|
43
|
+
end
|
44
|
+
|
45
|
+
##
|
46
|
+
# Validates the presence or absence of provenance_subject.
|
47
|
+
#
|
48
|
+
# Here, in the derived ProvenanceFact, it must not be present.
|
49
|
+
# @param [#nil?] provenance_subject
|
50
|
+
# Return [Boolean]
|
51
|
+
def provenance_subject_valid?(provenance_subject)
|
52
|
+
provenance_subject.nil?
|
53
|
+
end
|
54
|
+
|
55
|
+
##
|
56
|
+
# Builds duplicate with the subject set.
|
57
|
+
#
|
58
|
+
# @param [Subject] subject_arg
|
59
|
+
# @return [ProvenanceFact] the duplicate fact
|
60
|
+
def dup_with_subject(subject_arg)
|
61
|
+
self.class.new(
|
62
|
+
subject: subject_arg, # from arg
|
63
|
+
predicate: predicate,
|
64
|
+
object: object)
|
65
|
+
end
|
66
|
+
|
67
|
+
private
|
68
|
+
|
69
|
+
##
|
70
|
+
# Validate that provenance_subject is not set here.
|
71
|
+
def validate_provenance_subject(options)
|
72
|
+
raise ProvenanceError if options[:provenance_subject]
|
73
|
+
end
|
74
|
+
|
75
|
+
end
|
76
|
+
end
|
@@ -0,0 +1,54 @@
|
|
1
|
+
module Dbd
|
2
|
+
##
|
3
|
+
# A ProvenanceResource is derived from a Resource, specifically
|
4
|
+
# for a Provenance (does not have and does need a provenance_subject)
|
5
|
+
class ProvenanceResource < Resource
|
6
|
+
|
7
|
+
##
|
8
|
+
# Build a new ProvenanceResource.
|
9
|
+
#
|
10
|
+
# The subject can be either given as an argument or a new (random)
|
11
|
+
# subject is automatically set (see Resource for details).
|
12
|
+
#
|
13
|
+
# A provenance_subject may not be given here.
|
14
|
+
# @option options [Fact::Subject] :subject (new_subject) Optional: the subject for the resource
|
15
|
+
def initialize(options = {})
|
16
|
+
super
|
17
|
+
end
|
18
|
+
|
19
|
+
##
|
20
|
+
# Add a ProvenanceFact.
|
21
|
+
#
|
22
|
+
# * if it has no subject, the subject is set in a duplicate provenance_fact
|
23
|
+
# * if is has the same subject as the resource, added unchanged.
|
24
|
+
# * if it has a different subject, a SubjectError is raised.
|
25
|
+
def <<(provenance_fact)
|
26
|
+
# TODO: check the type of the provenance_fact (ProvenanceFact)
|
27
|
+
super
|
28
|
+
end
|
29
|
+
|
30
|
+
private
|
31
|
+
|
32
|
+
##
|
33
|
+
# Should not be called in ProvenanceResource subclass.
|
34
|
+
def provenance_subject
|
35
|
+
raise RuntimeError, "provenance_subject should not be called in ProvenanceResource."
|
36
|
+
end
|
37
|
+
|
38
|
+
##
|
39
|
+
# Validate that provenance_subject is not set here.
|
40
|
+
def validate_provenance_subject
|
41
|
+
raise ProvenanceError if @provenance_subject
|
42
|
+
end
|
43
|
+
|
44
|
+
##
|
45
|
+
# Check provenance_subject, which should be nil here
|
46
|
+
# @param [ProvenanceFact] provenance_fact
|
47
|
+
# @return [ProvenanceFact] with validated nil on provenance_subject
|
48
|
+
def check_or_set_provenance(provenance_fact)
|
49
|
+
raise ProvenanceError if provenance_fact.provenance_subject
|
50
|
+
provenance_fact
|
51
|
+
end
|
52
|
+
|
53
|
+
end
|
54
|
+
end
|
data/lib/dbd/rdf.rb
ADDED
data/lib/dbd/repo.rb
ADDED
@@ -0,0 +1,55 @@
|
|
1
|
+
module Dbd
|
2
|
+
module Repo
|
3
|
+
module Neo4jRepo
|
4
|
+
##
|
5
|
+
# To allow some tests on the Neo4j Neography gem.
|
6
|
+
class Base
|
7
|
+
|
8
|
+
def initialize
|
9
|
+
@neo = Neography::Rest.new
|
10
|
+
end
|
11
|
+
|
12
|
+
def create_node(hash)
|
13
|
+
@neo.create_node(hash)
|
14
|
+
end
|
15
|
+
|
16
|
+
def batch(*args)
|
17
|
+
@neo.batch(*args)
|
18
|
+
end
|
19
|
+
|
20
|
+
def create_relationship(p, s, o)
|
21
|
+
@neo.create_relationship(p, s, o)
|
22
|
+
end
|
23
|
+
|
24
|
+
def list_node_indexes
|
25
|
+
@neo.list_node_indexes
|
26
|
+
end
|
27
|
+
|
28
|
+
def create_node_index(name, type, provider)
|
29
|
+
@neo.create_node_index(name, type, provider)
|
30
|
+
end
|
31
|
+
|
32
|
+
def add_node_to_index(index, key, value, node)
|
33
|
+
@neo.add_node_to_index(index, key, value, node)
|
34
|
+
end
|
35
|
+
|
36
|
+
def get_node_index(index, key, value)
|
37
|
+
@neo.get_node_index(index, key, value)
|
38
|
+
end
|
39
|
+
|
40
|
+
def execute_query(query_string)
|
41
|
+
@neo.execute_query(query_string)
|
42
|
+
end
|
43
|
+
|
44
|
+
def get_root
|
45
|
+
@neo.get_root
|
46
|
+
end
|
47
|
+
|
48
|
+
def load_node(uri)
|
49
|
+
Neography::Node.load(uri)
|
50
|
+
end
|
51
|
+
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
data/lib/dbd/resource.rb
ADDED
@@ -0,0 +1,117 @@
|
|
1
|
+
require 'dbd/helpers/ordered_set_collection'
|
2
|
+
|
3
|
+
module Dbd
|
4
|
+
##
|
5
|
+
# A Resource is a collection of facts that have the same subject.
|
6
|
+
#
|
7
|
+
# In the real-world this is a mainly an "instance" about which all
|
8
|
+
# facts are giving information (e.g. a conference, a person, a
|
9
|
+
# bicycle, ...). More generally this can also be used to describe
|
10
|
+
# classes and other concepts in the software system.
|
11
|
+
#
|
12
|
+
# A new (random) subject is generated for a resource. In Dbd,
|
13
|
+
# a subject is a random uuid (like a oid), not a meaningful URI
|
14
|
+
# as it is in RDF.
|
15
|
+
#
|
16
|
+
# A provenance_subject is a required field in the options hash.
|
17
|
+
# Practically, first a ProvenanceResource will be created and the
|
18
|
+
# subject of that will be used as provenance_subject for the
|
19
|
+
# Resources that are associated with it.
|
20
|
+
#
|
21
|
+
# During build-up of a Fact, the subject and the provenance_subject
|
22
|
+
# can be nil. These will then be set in a local duplicate when the
|
23
|
+
# Fact is added (with '<<') to a resource.
|
24
|
+
class Resource
|
25
|
+
|
26
|
+
include Helpers::OrderedSetCollection
|
27
|
+
|
28
|
+
attr_reader :subject
|
29
|
+
|
30
|
+
##
|
31
|
+
# @return [Fact::Subject] a new (random) Resource subject
|
32
|
+
def self.new_subject
|
33
|
+
Fact.new_subject
|
34
|
+
end
|
35
|
+
|
36
|
+
##
|
37
|
+
# Build a new resource.
|
38
|
+
#
|
39
|
+
# By default, a new (random) subject is generated for a resource.
|
40
|
+
# Optionally, an explicit subject can be given in the options parameter
|
41
|
+
# (this is best created with the new_subject class method for forward
|
42
|
+
# compatibility).
|
43
|
+
#
|
44
|
+
# The provenance_subject argument is required. This will typically be
|
45
|
+
# taken from an earlier created ProvenanceResource.
|
46
|
+
# @param [Hash{Symbol => Object}] options
|
47
|
+
# @option options [Fact::Subject] :provenance_subject (required) the subject of the provenance resource for this resource
|
48
|
+
# @option options [Fact::Subject] :subject (new_subject) Optional: the subject for the resource
|
49
|
+
def initialize(options)
|
50
|
+
@subject = options[:subject] || self.class.new_subject
|
51
|
+
@provenance_subject = options[:provenance_subject]
|
52
|
+
validate_provenance_subject
|
53
|
+
super()
|
54
|
+
end
|
55
|
+
|
56
|
+
##
|
57
|
+
# Add a fact.
|
58
|
+
#
|
59
|
+
# * if it has no subject, the subject is set in a duplicate fact
|
60
|
+
# * if is has the same subject as the resource, added unchanged.
|
61
|
+
# * if it has a different subject, a SubjectError is raised.
|
62
|
+
# * if it has no provenance_subject, the provenance_subject is set in a duplicate fact
|
63
|
+
# * if is has the same provenance_subject as the resource, added unchanged.
|
64
|
+
# * if it has a different provenance_subject, a ProvenanceError is raised.
|
65
|
+
def <<(fact)
|
66
|
+
# TODO: check the type of the fact (Fact)
|
67
|
+
super(check_or_set_subject_and_provenance(fact))
|
68
|
+
end
|
69
|
+
|
70
|
+
##
|
71
|
+
# Getter for provenance_subject.
|
72
|
+
#
|
73
|
+
# Will be overridden in the ProvenanceResource subclass.
|
74
|
+
def provenance_subject
|
75
|
+
@provenance_subject
|
76
|
+
end
|
77
|
+
|
78
|
+
private
|
79
|
+
|
80
|
+
def check_or_set_subject_and_provenance(element)
|
81
|
+
with_subject = check_or_set_subject(element)
|
82
|
+
check_or_set_provenance(with_subject)
|
83
|
+
end
|
84
|
+
|
85
|
+
def check_or_set_subject(element)
|
86
|
+
if element.subject
|
87
|
+
if element.subject == @subject
|
88
|
+
return element
|
89
|
+
else
|
90
|
+
raise SubjectError,
|
91
|
+
"self.subject is #{subject} and element.subject is #{element.subject}"
|
92
|
+
end
|
93
|
+
else
|
94
|
+
element.dup_with_subject(@subject)
|
95
|
+
end
|
96
|
+
end
|
97
|
+
|
98
|
+
# this will be overriden in the ProvenanceResource sub_class
|
99
|
+
def check_or_set_provenance(element)
|
100
|
+
if element.provenance_subject
|
101
|
+
if element.provenance_subject == @provenance_subject
|
102
|
+
return element
|
103
|
+
else
|
104
|
+
raise ProvenanceError,
|
105
|
+
"self.provenance_subject is #{provenance_subject} and element.provenance_subject is #{element.provenance_subject}"
|
106
|
+
end
|
107
|
+
else
|
108
|
+
element.dup_with_provenance_subject(@provenance_subject)
|
109
|
+
end
|
110
|
+
end
|
111
|
+
|
112
|
+
def validate_provenance_subject
|
113
|
+
raise ProvenanceError if @provenance_subject.nil?
|
114
|
+
end
|
115
|
+
|
116
|
+
end
|
117
|
+
end
|
data/lib/dbd/version.rb
ADDED
@@ -0,0 +1,76 @@
|
|
1
|
+
module Factories
|
2
|
+
module Fact
|
3
|
+
|
4
|
+
def self.factory_for
|
5
|
+
::Dbd::Fact
|
6
|
+
end
|
7
|
+
|
8
|
+
def self.new_subject
|
9
|
+
factory_for.new_subject
|
10
|
+
end
|
11
|
+
|
12
|
+
def self.fact_1(provenance_subject = nil)
|
13
|
+
factory_for.new(
|
14
|
+
provenance_subject: provenance_subject,
|
15
|
+
predicate: "http://example.org/test/name",
|
16
|
+
object: "Gandhi")
|
17
|
+
end
|
18
|
+
|
19
|
+
def self.fact_2_with_subject(provenance_subject = nil)
|
20
|
+
factory_for.new(
|
21
|
+
provenance_subject: provenance_subject,
|
22
|
+
subject: new_subject,
|
23
|
+
predicate: "http://example.org/test/name",
|
24
|
+
object: "Mandela")
|
25
|
+
end
|
26
|
+
|
27
|
+
def self.fact_3_with_subject(provenance_subject = nil)
|
28
|
+
factory_for.new(
|
29
|
+
provenance_subject: provenance_subject,
|
30
|
+
subject: new_subject,
|
31
|
+
predicate: "http://example.org/test/name",
|
32
|
+
object: "King")
|
33
|
+
end
|
34
|
+
|
35
|
+
def self.data_fact(provenance_subject = nil, subject = nil)
|
36
|
+
factory_for.new(
|
37
|
+
provenance_subject: provenance_subject,
|
38
|
+
subject: subject,
|
39
|
+
predicate: "http://example.org/test/name",
|
40
|
+
object: "Aung San Suu Kyi")
|
41
|
+
end
|
42
|
+
|
43
|
+
def self.data_fact_EU(provenance_subject = nil, subject = nil)
|
44
|
+
factory_for.new(
|
45
|
+
provenance_subject: provenance_subject,
|
46
|
+
subject: subject,
|
47
|
+
predicate: "http://example.org/test/name",
|
48
|
+
object: "European Union")
|
49
|
+
end
|
50
|
+
|
51
|
+
module Collection
|
52
|
+
|
53
|
+
def self.factory_for_instance
|
54
|
+
o = Object.new
|
55
|
+
o.extend(::Dbd::Fact::Collection)
|
56
|
+
o.send(:initialize)
|
57
|
+
o
|
58
|
+
end
|
59
|
+
|
60
|
+
def self.fact_2_3(provenance_subject)
|
61
|
+
factory_for_instance.tap do |fact_collection|
|
62
|
+
fact_collection << Fact.fact_2_with_subject(provenance_subject)
|
63
|
+
fact_collection << Fact.fact_3_with_subject(provenance_subject)
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
def self.provenance_facts(subject)
|
68
|
+
factory_for_instance.tap do |provenance_facts|
|
69
|
+
provenance_facts << ProvenanceFact.context(subject)
|
70
|
+
provenance_facts << ProvenanceFact.created_by(subject)
|
71
|
+
provenance_facts << ProvenanceFact.original_source(subject)
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|
76
|
+
end
|
@@ -0,0 +1,34 @@
|
|
1
|
+
module Factories
|
2
|
+
module ProvenanceFact
|
3
|
+
|
4
|
+
def self.factory_for
|
5
|
+
::Dbd::ProvenanceFact
|
6
|
+
end
|
7
|
+
|
8
|
+
def self.new_subject
|
9
|
+
factory_for.new_subject
|
10
|
+
end
|
11
|
+
|
12
|
+
def self.context(subject = nil)
|
13
|
+
factory_for.new(
|
14
|
+
subject: subject,
|
15
|
+
predicate: "https://data.vandenabeele.com/ontologies/provenance#context",
|
16
|
+
object: "public")
|
17
|
+
end
|
18
|
+
|
19
|
+
def self.created_by(subject = nil)
|
20
|
+
factory_for.new(
|
21
|
+
subject: subject,
|
22
|
+
predicate: "https://data.vandenabeele.com/ontologies/provenance#created_by",
|
23
|
+
object:"peter_v")
|
24
|
+
end
|
25
|
+
|
26
|
+
def self.original_source(subject = nil)
|
27
|
+
factory_for.new(
|
28
|
+
subject: subject,
|
29
|
+
predicate: "https://data.vandenabeele.com/ontologies/provenance#original_source",
|
30
|
+
object: "this has a comma , a newline \n and a double quote \"")
|
31
|
+
end
|
32
|
+
|
33
|
+
end
|
34
|
+
end
|