dbd 0.0.1 → 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/HISTORY.txt +13 -0
- data/README.md +11 -12
- data/dbd.gemspec +1 -1
- data/docs/stories/009_build_and_store_resources_with_provenance.txt +6 -3
- data/docs/stories/010_time_class.txt +23 -0
- data/docs/stories/011_store_resource_in_graph.txt +10 -0
- data/docs/stories/{010_provenance_fact_properties_from_provenance_ontology.txt → 012_provenance_fact_properties_from_provenance_ontology.txt} +1 -1
- data/docs/test.rb +12 -2
- data/lib/dbd.rb +1 -0
- data/lib/dbd/fact.rb +47 -44
- data/lib/dbd/fact/collection.rb +2 -3
- data/lib/dbd/graph.rb +13 -14
- data/lib/dbd/provenance_fact.rb +4 -4
- data/lib/dbd/provenance_resource.rb +5 -4
- data/lib/dbd/resource.rb +19 -40
- data/lib/dbd/time_stamp.rb +105 -0
- data/lib/dbd/version.rb +1 -1
- data/spec/lib/dbd/fact/collection_spec.rb +8 -6
- data/spec/lib/dbd/fact_spec.rb +38 -52
- data/spec/lib/dbd/graph_spec.rb +57 -14
- data/spec/lib/dbd/performance_spec.rb +41 -0
- data/spec/lib/dbd/provenance_fact_spec.rb +18 -20
- data/spec/lib/dbd/provenance_resource_spec.rb +7 -5
- data/spec/lib/dbd/resource_spec.rb +27 -29
- data/spec/lib/dbd/time_stamp_spec.rb +110 -0
- metadata +16 -34
@@ -0,0 +1,105 @@
|
|
1
|
+
module Dbd
|
2
|
+
|
3
|
+
##
|
4
|
+
# TimeStamp
|
5
|
+
#
|
6
|
+
# Each Fact has a time_stamp with a granularity of 1 ns. The small
|
7
|
+
# granularity is essential to allow enough "density" of Facts in a
|
8
|
+
# large fact stream. Since all Facts need to have a strictly
|
9
|
+
# monotonically increasing time_stamp, this causes a limitation of
|
10
|
+
# max 1_000_000_000 Facts per second in a fact stream.
|
11
|
+
#
|
12
|
+
# A second reason for a fine grained granularity of the time_stamp
|
13
|
+
# is to reduce the chance (but not to zero) for collisions between
|
14
|
+
# Facts when 2 (or more) fact streams with overlapping time ranges
|
15
|
+
# need to be merged. But, collisions are always possible and need
|
16
|
+
# to be handled (since this can be expensive, we need to avoid them).
|
17
|
+
#
|
18
|
+
# A practicaly problem with calculating a "randomized" time_stamp is
|
19
|
+
# that the system reports a Wall clock with a granularity of 1 us on
|
20
|
+
# MRI Ruby and only 1 ms on JRuby (see JSR 310). To solve this problem,
|
21
|
+
# some nifty tricks are needed to create more "randomized" time_stamps,
|
22
|
+
# while still guaranteeing, the strictly monotonic increase in an
|
23
|
+
# upredictable fact stream.
|
24
|
+
#
|
25
|
+
# Performance measurements show a typical 30 - 60 us delay between the
|
26
|
+
# consecutive created facts (on MRI and JRuby), so a randomization of
|
27
|
+
# e.g. 1 - 999 ns should not cause fundamental problems for the density
|
28
|
+
# of the facts (even if computers speed up a factor of 30 or an
|
29
|
+
# implementation in a faster language). Still this is an ad-hoc
|
30
|
+
# optimization at creation time and can be optimized without breaking
|
31
|
+
# the specification of the fact stream.
|
32
|
+
#
|
33
|
+
# A time_stamp does not need to represent the exact time of the
|
34
|
+
# creation of the fact, it only has to increase strictly monotic
|
35
|
+
# in a fact stream.
|
36
|
+
class TimeStamp
|
37
|
+
|
38
|
+
attr_reader :time
|
39
|
+
|
40
|
+
##
|
41
|
+
# Builds a new TimeStamp.
|
42
|
+
#
|
43
|
+
# @param [Hash{Symbol => Object}] options
|
44
|
+
# @option options [Time] :time (Time.now) force the time to this value
|
45
|
+
# @option options [TimeStamp] :larger_than (void) time_stamp must be larger than this
|
46
|
+
def initialize(options={})
|
47
|
+
@time = options[:time] || new_time(options[:larger_than])
|
48
|
+
end
|
49
|
+
|
50
|
+
##
|
51
|
+
# regexp for the nanosecond granularity and in UTC
|
52
|
+
#
|
53
|
+
# Can be used to validate input strings or in tests.
|
54
|
+
def self.to_s_regexp
|
55
|
+
/\d{4}-\d\d-\d\d \d\d:\d\d:\d\d\.\d{9} UTC/
|
56
|
+
end
|
57
|
+
|
58
|
+
private
|
59
|
+
|
60
|
+
def new_time(larger_than)
|
61
|
+
max_with_nil(Time.now.utc, (larger_than && larger_than.time)) + random_offset
|
62
|
+
end
|
63
|
+
|
64
|
+
def random_offset
|
65
|
+
Rational("#{1+rand(999)}/1_000_000_000")
|
66
|
+
end
|
67
|
+
|
68
|
+
public
|
69
|
+
|
70
|
+
##
|
71
|
+
# to a nanosecond granularity and in UTC
|
72
|
+
def to_s
|
73
|
+
@time.strftime('%F %T.%N %Z')
|
74
|
+
end
|
75
|
+
|
76
|
+
def ==(other)
|
77
|
+
@time == other.time
|
78
|
+
end
|
79
|
+
|
80
|
+
def hash
|
81
|
+
@time.hash
|
82
|
+
end
|
83
|
+
|
84
|
+
def >(other)
|
85
|
+
@time > other.time
|
86
|
+
end
|
87
|
+
|
88
|
+
def <(other)
|
89
|
+
@time < other.time
|
90
|
+
end
|
91
|
+
|
92
|
+
def <=(other)
|
93
|
+
@time <= other.time
|
94
|
+
end
|
95
|
+
|
96
|
+
def +(seconds)
|
97
|
+
TimeStamp.new(time: (@time + seconds))
|
98
|
+
end
|
99
|
+
|
100
|
+
def -(other)
|
101
|
+
@time - other.time
|
102
|
+
end
|
103
|
+
|
104
|
+
end
|
105
|
+
end
|
data/lib/dbd/version.rb
CHANGED
@@ -98,8 +98,8 @@ module Dbd
|
|
98
98
|
|
99
99
|
describe "validate that only 'newer' elements are added" do
|
100
100
|
before(:each) do
|
101
|
-
fact_2_with_subject.stub(:time_stamp).and_return(Time.new(2013,05,9,12,0,0))
|
102
|
-
fact_3_with_subject.stub(:time_stamp).and_return(Time.new(2013,05,9,12,0,1))
|
101
|
+
fact_2_with_subject.stub(:time_stamp).and_return(TimeStamp.new(time: Time.new(2013,05,9,12,0,0)))
|
102
|
+
fact_3_with_subject.stub(:time_stamp).and_return(TimeStamp.new(time: Time.new(2013,05,9,12,0,1)))
|
103
103
|
end
|
104
104
|
|
105
105
|
it "adding an element with a newer time_stamp succeeds" do
|
@@ -171,14 +171,16 @@ module Dbd
|
|
171
171
|
end
|
172
172
|
end
|
173
173
|
|
174
|
-
describe "validate that facts
|
174
|
+
describe "validate that facts do not have errors when loading in the Fact::Collection" do
|
175
175
|
it "succeeds with a fact from factory" do
|
176
176
|
subject << fact_2_with_subject # should_not raise_error
|
177
177
|
end
|
178
178
|
|
179
|
-
it "raises FactError when fact.
|
180
|
-
provenance_fact_context.stub(:
|
181
|
-
lambda { subject << provenance_fact_context } . should raise_error
|
179
|
+
it "raises FactError with message when fact.errors has errors" do
|
180
|
+
provenance_fact_context.stub(:errors).and_return(["Error 1", "Error 2"])
|
181
|
+
lambda { subject << provenance_fact_context } . should raise_error(
|
182
|
+
FactError,
|
183
|
+
"Error 1, Error 2.")
|
182
184
|
end
|
183
185
|
end
|
184
186
|
|
data/spec/lib/dbd/fact_spec.rb
CHANGED
@@ -58,43 +58,55 @@ module Dbd
|
|
58
58
|
end
|
59
59
|
end
|
60
60
|
|
61
|
-
describe "time_stamp" do
|
62
|
-
it "can be set after creation" do
|
63
|
-
fact_1.time_stamp = Time.now
|
64
|
-
end
|
61
|
+
describe "time_stamp=" do
|
65
62
|
|
66
|
-
it "
|
67
|
-
|
68
|
-
fact_1.time_stamp = time_now
|
69
|
-
fact_1.time_stamp.should == time_now
|
63
|
+
it "checks the type (too easy to try to give a Time arg" do
|
64
|
+
lambda { fact_1.time_stamp = Time.now } . should raise_error(ArgumentError)
|
70
65
|
end
|
71
66
|
|
72
|
-
|
73
|
-
time_now = Time.now
|
74
|
-
fact_1.time_stamp = time_now + 0.000_000_002
|
75
|
-
fact_1.time_stamp.should > time_now
|
76
|
-
end
|
67
|
+
describe "set_once" do
|
77
68
|
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
69
|
+
let(:time_stamp_now) { TimeStamp.new }
|
70
|
+
|
71
|
+
it "can be set when nil" do
|
72
|
+
fact_1.time_stamp = time_stamp_now
|
73
|
+
fact_1.time_stamp.should == time_stamp_now
|
74
|
+
end
|
75
|
+
|
76
|
+
describe "setting it two times" do
|
77
|
+
it "with a different value raises a SetOnceError" do
|
78
|
+
fact_1.time_stamp = time_stamp_now
|
79
|
+
lambda { fact_1.time_stamp = (time_stamp_now+1) } . should raise_error SetOnceError
|
80
|
+
end
|
81
|
+
end
|
82
82
|
end
|
83
83
|
end
|
84
84
|
|
85
|
-
describe "
|
86
|
-
it "the factory
|
87
|
-
fact_2_with_subject.should
|
85
|
+
describe "errors" do
|
86
|
+
it "the factory has no errors" do
|
87
|
+
fact_2_with_subject.errors.should be_empty
|
88
88
|
end
|
89
89
|
|
90
|
-
|
91
|
-
|
92
|
-
|
90
|
+
describe "without provenance_subject" do
|
91
|
+
|
92
|
+
before(:each) do
|
93
|
+
fact_2_with_subject.stub(:provenance_subject).and_return(nil)
|
94
|
+
end
|
95
|
+
|
96
|
+
it "errors returns an array with 1 error message" do
|
97
|
+
fact_2_with_subject.errors.single.should match(/Provenance subject is missing/)
|
98
|
+
end
|
93
99
|
end
|
94
100
|
|
95
|
-
|
96
|
-
|
97
|
-
|
101
|
+
describe "without subject" do
|
102
|
+
|
103
|
+
before(:each) do
|
104
|
+
fact_2_with_subject.stub(:subject).and_return(nil)
|
105
|
+
end
|
106
|
+
|
107
|
+
it "errors returns an array with an errorm message" do
|
108
|
+
fact_2_with_subject.errors.single.should match(/Subject is missing/)
|
109
|
+
end
|
98
110
|
end
|
99
111
|
end
|
100
112
|
|
@@ -124,32 +136,6 @@ module Dbd
|
|
124
136
|
end
|
125
137
|
end
|
126
138
|
|
127
|
-
describe "#dup_with_subject" do
|
128
|
-
|
129
|
-
let (:new_fact) do
|
130
|
-
fact_1.dup_with_subject(subject)
|
131
|
-
end
|
132
|
-
|
133
|
-
it "is a different instance" do
|
134
|
-
new_fact.should_not be_equal(fact_1)
|
135
|
-
end
|
136
|
-
|
137
|
-
it "is from the same class" do
|
138
|
-
new_fact.should be_a(fact_1.class)
|
139
|
-
end
|
140
|
-
|
141
|
-
it "has copied over the other attributes except :id, :time_stamp" do
|
142
|
-
(fact_1.class.attributes - [:id, :time_stamp, :subject]).each do |attr|
|
143
|
-
new_fact.send(attr).should == fact_1.send(attr)
|
144
|
-
end
|
145
|
-
end
|
146
|
-
|
147
|
-
it "has set the subject to the Resource subject" do
|
148
|
-
fact_1.subject.should_not == new_fact.subject # double check
|
149
|
-
new_fact.subject.should == subject
|
150
|
-
end
|
151
|
-
end
|
152
|
-
|
153
139
|
describe "update_used_provenance_subjects" do
|
154
140
|
it "sets the value for provenance_subject to true for a fact" do
|
155
141
|
h = {}
|
data/spec/lib/dbd/graph_spec.rb
CHANGED
@@ -48,21 +48,64 @@ module Dbd
|
|
48
48
|
end
|
49
49
|
end
|
50
50
|
|
51
|
-
describe "sets the time_stamp and adds
|
52
|
-
|
53
|
-
|
51
|
+
describe "sets the time_stamp and adds a random time (1..999 nanoseconds) if needed" do
|
52
|
+
|
53
|
+
# NOTE: reduced the far_future from 2500 to 2250 as work around for
|
54
|
+
# http://jira.codehaus.org/browse/JRUBY-7095
|
55
|
+
let(:far_future) { TimeStamp.new(time: Time.new(2250,1,1,12,0,0).utc) }
|
56
|
+
|
57
|
+
it "don't touch the time_stamp if already set" do
|
58
|
+
data_fact.time_stamp = far_future
|
54
59
|
subject << data_fact
|
55
|
-
subject.first.time_stamp.should
|
60
|
+
subject.first.time_stamp.should == far_future
|
56
61
|
end
|
57
62
|
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
63
|
+
describe "sets the time_stamp if not yet set" do
|
64
|
+
|
65
|
+
let(:near_future) { Time.now.utc + 100}
|
66
|
+
let(:fake_time_stamp) { TimeStamp.new(time: near_future) }
|
67
|
+
|
68
|
+
before(:each) do
|
69
|
+
# get this before setting the stub
|
70
|
+
fake_time_stamp # get this before setting the stub
|
71
|
+
end
|
72
|
+
|
73
|
+
it "sets it (to TimeStamp.new)" do
|
74
|
+
TimeStamp.stub(:new).and_return(fake_time_stamp)
|
75
|
+
data_fact.time_stamp.should be_nil # assert pre-condition
|
76
|
+
subject << data_fact
|
77
|
+
subject.first.time_stamp.should == fake_time_stamp
|
78
|
+
end
|
79
|
+
|
80
|
+
it "sends a slightly higher time_stamp than newest_time_stamp if Time.now <= newest_time_stamp" do
|
81
|
+
subject.stub(:newest_time_stamp).and_return(fake_time_stamp)
|
82
|
+
subject << data_fact
|
83
|
+
subject.first.time_stamp.should > fake_time_stamp
|
84
|
+
(subject.first.time_stamp - fake_time_stamp).should < Rational('1/1000_000') # 1 us
|
85
|
+
end
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
89
|
+
describe "a ProvenanceResource and a Resource" do
|
90
|
+
|
91
|
+
let(:provenance_resource) { Factories::ProvenanceResource.provenance_resource }
|
92
|
+
let(:resource) { Factories::Resource.facts_resource(provenance_resource.subject) }
|
93
|
+
|
94
|
+
it "does not fail" do
|
95
|
+
subject << provenance_resource
|
96
|
+
end
|
97
|
+
|
98
|
+
it "Adds the facts from the provenance_resource to the graph" do
|
99
|
+
subject << provenance_resource
|
100
|
+
subject.size.should == 2
|
101
|
+
end
|
102
|
+
|
103
|
+
it "Adds the facts from the provenance_resource and the resource to the graph" do
|
104
|
+
subject << provenance_resource
|
105
|
+
subject << resource
|
106
|
+
subject.size.should == 4
|
107
|
+
subject.first.should be_a(ProvenanceFact)
|
108
|
+
subject.last.class.should == Fact
|
66
109
|
end
|
67
110
|
end
|
68
111
|
end
|
@@ -107,7 +150,7 @@ module Dbd
|
|
107
150
|
end
|
108
151
|
|
109
152
|
it "has time_stamp as second value" do
|
110
|
-
first_line.split(',')[1].should match(
|
153
|
+
first_line.split(',')[1].should match(TimeStamp.to_s_regexp)
|
111
154
|
end
|
112
155
|
|
113
156
|
it "has an empty third value (signature of a provenance_fact)" do
|
@@ -174,7 +217,7 @@ module Dbd
|
|
174
217
|
end
|
175
218
|
|
176
219
|
it "has time_stamp as second value" do
|
177
|
-
first_line.split(',')[1].should match(
|
220
|
+
first_line.split(',')[1].should match(TimeStamp.to_s_regexp)
|
178
221
|
end
|
179
222
|
|
180
223
|
it "has provenance_fact_1.subject as third value" do
|
@@ -0,0 +1,41 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
require 'benchmark'
|
3
|
+
|
4
|
+
module Dbd
|
5
|
+
describe "performance" do
|
6
|
+
|
7
|
+
include Benchmark
|
8
|
+
|
9
|
+
def new_subject
|
10
|
+
Fact.new_subject
|
11
|
+
end
|
12
|
+
|
13
|
+
let(:provenance_fact_1) { Factories::ProvenanceFact.context(new_subject) }
|
14
|
+
|
15
|
+
NUMBER_OF_FACTS = 10_000
|
16
|
+
|
17
|
+
describe "#{NUMBER_OF_FACTS} facts" do
|
18
|
+
it "reports and checks the used time" do
|
19
|
+
graph = Graph.new
|
20
|
+
graph << provenance_fact_1
|
21
|
+
# Rehearsal
|
22
|
+
NUMBER_OF_FACTS.times do |counter|
|
23
|
+
data_fact = Factories::Fact.data_fact(provenance_fact_1, new_subject)
|
24
|
+
graph << data_fact
|
25
|
+
end
|
26
|
+
# Actual
|
27
|
+
start = Time.now
|
28
|
+
NUMBER_OF_FACTS.times do |counter|
|
29
|
+
data_fact = Factories::Fact.data_fact(provenance_fact_1, new_subject)
|
30
|
+
graph << data_fact
|
31
|
+
end
|
32
|
+
duration = Time.now - start
|
33
|
+
puts "\nDuration for inserting #{NUMBER_OF_FACTS} facts in the in-memory graph was #{duration*1000_000/NUMBER_OF_FACTS} us PER FACT"
|
34
|
+
graph.size.should == 2 * NUMBER_OF_FACTS + 1
|
35
|
+
duration.should < 0.000_15 * NUMBER_OF_FACTS
|
36
|
+
# typ. 37 us on Mac Ruby 2.0.0
|
37
|
+
# typ. 60 us on Mac jruby 1.7.3
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
@@ -48,34 +48,32 @@ module Dbd
|
|
48
48
|
end
|
49
49
|
end
|
50
50
|
|
51
|
-
describe "
|
52
|
-
it "the factory isi valid?" do
|
53
|
-
provenance_fact_1.should be_valid
|
54
|
-
end
|
51
|
+
describe "errors" do
|
55
52
|
|
56
|
-
it "
|
57
|
-
provenance_fact_1.
|
58
|
-
provenance_fact_1.should_not be_valid
|
53
|
+
it "the factory has no errors" do
|
54
|
+
provenance_fact_1.errors.should be_empty
|
59
55
|
end
|
60
56
|
|
61
|
-
|
62
|
-
provenance_fact_1.stub(:subject).and_return(nil)
|
63
|
-
provenance_fact_1.should_not be_valid
|
64
|
-
end
|
65
|
-
end
|
57
|
+
describe "with a provenance_subject" do
|
66
58
|
|
67
|
-
|
59
|
+
before(:each) do
|
60
|
+
provenance_fact_1.stub(:provenance_subject).and_return(subject)
|
61
|
+
end
|
68
62
|
|
69
|
-
|
70
|
-
|
63
|
+
it "errors returns an array with 1 error message" do
|
64
|
+
provenance_fact_1.errors.single.should match(/Provenance subject should not be present in Provenance Fact/)
|
65
|
+
end
|
71
66
|
end
|
72
67
|
|
73
|
-
|
74
|
-
|
75
|
-
|
68
|
+
describe "without subject" do
|
69
|
+
|
70
|
+
before(:each) do
|
71
|
+
provenance_fact_1.stub(:subject).and_return(nil)
|
72
|
+
end
|
76
73
|
|
77
|
-
|
78
|
-
|
74
|
+
it "errors returns an array with an error message" do
|
75
|
+
provenance_fact_1.errors.single.should match(/Subject is missing/)
|
76
|
+
end
|
79
77
|
end
|
80
78
|
end
|
81
79
|
|
@@ -47,17 +47,19 @@ module Dbd
|
|
47
47
|
let(:fact_1) { Factories::Fact.fact_1(provenance_resource_subject) }
|
48
48
|
|
49
49
|
describe "data facts" do
|
50
|
-
it "with correct subject" do
|
50
|
+
it "with correct subject it works" do
|
51
51
|
provenance_resource << provenance_fact_context_with_correct_subject
|
52
52
|
provenance_resource.first.subject.should == provenance_resource_subject
|
53
53
|
end
|
54
54
|
|
55
|
-
it "with incorrect subject it
|
55
|
+
it "with incorrect subject it raises SubjectError" do
|
56
56
|
lambda { provenance_resource << provenance_fact_context_with_incorrect_subject } .
|
57
|
-
should raise_error
|
57
|
+
should raise_error SetOnceError,
|
58
|
+
"Value of subject was #{provenance_fact_context_with_incorrect_subject.subject}, " \
|
59
|
+
"trying to set it to #{provenance_resource.subject}"
|
58
60
|
end
|
59
61
|
|
60
|
-
it "with nil subject" do
|
62
|
+
it "with nil subject it sets the subject" do
|
61
63
|
provenance_resource << provenance_fact_context
|
62
64
|
provenance_resource.first.subject.should == provenance_resource_subject
|
63
65
|
end
|
@@ -67,7 +69,7 @@ module Dbd
|
|
67
69
|
provenance_resource.first.provenance_subject.should be_nil
|
68
70
|
end
|
69
71
|
|
70
|
-
it "with incorrect
|
72
|
+
it "with incorrect provenance_subject it raises ProvenanceError" do
|
71
73
|
lambda { provenance_resource << fact_1 } .
|
72
74
|
should raise_error ProvenanceError
|
73
75
|
end
|