dbd 0.0.1 → 0.0.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/HISTORY.txt +13 -0
- data/README.md +11 -12
- data/dbd.gemspec +1 -1
- data/docs/stories/009_build_and_store_resources_with_provenance.txt +6 -3
- data/docs/stories/010_time_class.txt +23 -0
- data/docs/stories/011_store_resource_in_graph.txt +10 -0
- data/docs/stories/{010_provenance_fact_properties_from_provenance_ontology.txt → 012_provenance_fact_properties_from_provenance_ontology.txt} +1 -1
- data/docs/test.rb +12 -2
- data/lib/dbd.rb +1 -0
- data/lib/dbd/fact.rb +47 -44
- data/lib/dbd/fact/collection.rb +2 -3
- data/lib/dbd/graph.rb +13 -14
- data/lib/dbd/provenance_fact.rb +4 -4
- data/lib/dbd/provenance_resource.rb +5 -4
- data/lib/dbd/resource.rb +19 -40
- data/lib/dbd/time_stamp.rb +105 -0
- data/lib/dbd/version.rb +1 -1
- data/spec/lib/dbd/fact/collection_spec.rb +8 -6
- data/spec/lib/dbd/fact_spec.rb +38 -52
- data/spec/lib/dbd/graph_spec.rb +57 -14
- data/spec/lib/dbd/performance_spec.rb +41 -0
- data/spec/lib/dbd/provenance_fact_spec.rb +18 -20
- data/spec/lib/dbd/provenance_resource_spec.rb +7 -5
- data/spec/lib/dbd/resource_spec.rb +27 -29
- data/spec/lib/dbd/time_stamp_spec.rb +110 -0
- metadata +16 -34
@@ -0,0 +1,105 @@
|
|
1
|
+
module Dbd
|
2
|
+
|
3
|
+
##
|
4
|
+
# TimeStamp
|
5
|
+
#
|
6
|
+
# Each Fact has a time_stamp with a granularity of 1 ns. The small
|
7
|
+
# granularity is essential to allow enough "density" of Facts in a
|
8
|
+
# large fact stream. Since all Facts need to have a strictly
|
9
|
+
# monotonically increasing time_stamp, this causes a limitation of
|
10
|
+
# max 1_000_000_000 Facts per second in a fact stream.
|
11
|
+
#
|
12
|
+
# A second reason for a fine grained granularity of the time_stamp
|
13
|
+
# is to reduce the chance (but not to zero) for collisions between
|
14
|
+
# Facts when 2 (or more) fact streams with overlapping time ranges
|
15
|
+
# need to be merged. But, collisions are always possible and need
|
16
|
+
# to be handled (since this can be expensive, we need to avoid them).
|
17
|
+
#
|
18
|
+
# A practicaly problem with calculating a "randomized" time_stamp is
|
19
|
+
# that the system reports a Wall clock with a granularity of 1 us on
|
20
|
+
# MRI Ruby and only 1 ms on JRuby (see JSR 310). To solve this problem,
|
21
|
+
# some nifty tricks are needed to create more "randomized" time_stamps,
|
22
|
+
# while still guaranteeing, the strictly monotonic increase in an
|
23
|
+
# upredictable fact stream.
|
24
|
+
#
|
25
|
+
# Performance measurements show a typical 30 - 60 us delay between the
|
26
|
+
# consecutive created facts (on MRI and JRuby), so a randomization of
|
27
|
+
# e.g. 1 - 999 ns should not cause fundamental problems for the density
|
28
|
+
# of the facts (even if computers speed up a factor of 30 or an
|
29
|
+
# implementation in a faster language). Still this is an ad-hoc
|
30
|
+
# optimization at creation time and can be optimized without breaking
|
31
|
+
# the specification of the fact stream.
|
32
|
+
#
|
33
|
+
# A time_stamp does not need to represent the exact time of the
|
34
|
+
# creation of the fact, it only has to increase strictly monotic
|
35
|
+
# in a fact stream.
|
36
|
+
class TimeStamp
|
37
|
+
|
38
|
+
attr_reader :time
|
39
|
+
|
40
|
+
##
|
41
|
+
# Builds a new TimeStamp.
|
42
|
+
#
|
43
|
+
# @param [Hash{Symbol => Object}] options
|
44
|
+
# @option options [Time] :time (Time.now) force the time to this value
|
45
|
+
# @option options [TimeStamp] :larger_than (void) time_stamp must be larger than this
|
46
|
+
def initialize(options={})
|
47
|
+
@time = options[:time] || new_time(options[:larger_than])
|
48
|
+
end
|
49
|
+
|
50
|
+
##
|
51
|
+
# regexp for the nanosecond granularity and in UTC
|
52
|
+
#
|
53
|
+
# Can be used to validate input strings or in tests.
|
54
|
+
def self.to_s_regexp
|
55
|
+
/\d{4}-\d\d-\d\d \d\d:\d\d:\d\d\.\d{9} UTC/
|
56
|
+
end
|
57
|
+
|
58
|
+
private
|
59
|
+
|
60
|
+
def new_time(larger_than)
|
61
|
+
max_with_nil(Time.now.utc, (larger_than && larger_than.time)) + random_offset
|
62
|
+
end
|
63
|
+
|
64
|
+
def random_offset
|
65
|
+
Rational("#{1+rand(999)}/1_000_000_000")
|
66
|
+
end
|
67
|
+
|
68
|
+
public
|
69
|
+
|
70
|
+
##
|
71
|
+
# to a nanosecond granularity and in UTC
|
72
|
+
def to_s
|
73
|
+
@time.strftime('%F %T.%N %Z')
|
74
|
+
end
|
75
|
+
|
76
|
+
def ==(other)
|
77
|
+
@time == other.time
|
78
|
+
end
|
79
|
+
|
80
|
+
def hash
|
81
|
+
@time.hash
|
82
|
+
end
|
83
|
+
|
84
|
+
def >(other)
|
85
|
+
@time > other.time
|
86
|
+
end
|
87
|
+
|
88
|
+
def <(other)
|
89
|
+
@time < other.time
|
90
|
+
end
|
91
|
+
|
92
|
+
def <=(other)
|
93
|
+
@time <= other.time
|
94
|
+
end
|
95
|
+
|
96
|
+
def +(seconds)
|
97
|
+
TimeStamp.new(time: (@time + seconds))
|
98
|
+
end
|
99
|
+
|
100
|
+
def -(other)
|
101
|
+
@time - other.time
|
102
|
+
end
|
103
|
+
|
104
|
+
end
|
105
|
+
end
|
data/lib/dbd/version.rb
CHANGED
@@ -98,8 +98,8 @@ module Dbd
|
|
98
98
|
|
99
99
|
describe "validate that only 'newer' elements are added" do
|
100
100
|
before(:each) do
|
101
|
-
fact_2_with_subject.stub(:time_stamp).and_return(Time.new(2013,05,9,12,0,0))
|
102
|
-
fact_3_with_subject.stub(:time_stamp).and_return(Time.new(2013,05,9,12,0,1))
|
101
|
+
fact_2_with_subject.stub(:time_stamp).and_return(TimeStamp.new(time: Time.new(2013,05,9,12,0,0)))
|
102
|
+
fact_3_with_subject.stub(:time_stamp).and_return(TimeStamp.new(time: Time.new(2013,05,9,12,0,1)))
|
103
103
|
end
|
104
104
|
|
105
105
|
it "adding an element with a newer time_stamp succeeds" do
|
@@ -171,14 +171,16 @@ module Dbd
|
|
171
171
|
end
|
172
172
|
end
|
173
173
|
|
174
|
-
describe "validate that facts
|
174
|
+
describe "validate that facts do not have errors when loading in the Fact::Collection" do
|
175
175
|
it "succeeds with a fact from factory" do
|
176
176
|
subject << fact_2_with_subject # should_not raise_error
|
177
177
|
end
|
178
178
|
|
179
|
-
it "raises FactError when fact.
|
180
|
-
provenance_fact_context.stub(:
|
181
|
-
lambda { subject << provenance_fact_context } . should raise_error
|
179
|
+
it "raises FactError with message when fact.errors has errors" do
|
180
|
+
provenance_fact_context.stub(:errors).and_return(["Error 1", "Error 2"])
|
181
|
+
lambda { subject << provenance_fact_context } . should raise_error(
|
182
|
+
FactError,
|
183
|
+
"Error 1, Error 2.")
|
182
184
|
end
|
183
185
|
end
|
184
186
|
|
data/spec/lib/dbd/fact_spec.rb
CHANGED
@@ -58,43 +58,55 @@ module Dbd
|
|
58
58
|
end
|
59
59
|
end
|
60
60
|
|
61
|
-
describe "time_stamp" do
|
62
|
-
it "can be set after creation" do
|
63
|
-
fact_1.time_stamp = Time.now
|
64
|
-
end
|
61
|
+
describe "time_stamp=" do
|
65
62
|
|
66
|
-
it "
|
67
|
-
|
68
|
-
fact_1.time_stamp = time_now
|
69
|
-
fact_1.time_stamp.should == time_now
|
63
|
+
it "checks the type (too easy to try to give a Time arg" do
|
64
|
+
lambda { fact_1.time_stamp = Time.now } . should raise_error(ArgumentError)
|
70
65
|
end
|
71
66
|
|
72
|
-
|
73
|
-
time_now = Time.now
|
74
|
-
fact_1.time_stamp = time_now + 0.000_000_002
|
75
|
-
fact_1.time_stamp.should > time_now
|
76
|
-
end
|
67
|
+
describe "set_once" do
|
77
68
|
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
69
|
+
let(:time_stamp_now) { TimeStamp.new }
|
70
|
+
|
71
|
+
it "can be set when nil" do
|
72
|
+
fact_1.time_stamp = time_stamp_now
|
73
|
+
fact_1.time_stamp.should == time_stamp_now
|
74
|
+
end
|
75
|
+
|
76
|
+
describe "setting it two times" do
|
77
|
+
it "with a different value raises a SetOnceError" do
|
78
|
+
fact_1.time_stamp = time_stamp_now
|
79
|
+
lambda { fact_1.time_stamp = (time_stamp_now+1) } . should raise_error SetOnceError
|
80
|
+
end
|
81
|
+
end
|
82
82
|
end
|
83
83
|
end
|
84
84
|
|
85
|
-
describe "
|
86
|
-
it "the factory
|
87
|
-
fact_2_with_subject.should
|
85
|
+
describe "errors" do
|
86
|
+
it "the factory has no errors" do
|
87
|
+
fact_2_with_subject.errors.should be_empty
|
88
88
|
end
|
89
89
|
|
90
|
-
|
91
|
-
|
92
|
-
|
90
|
+
describe "without provenance_subject" do
|
91
|
+
|
92
|
+
before(:each) do
|
93
|
+
fact_2_with_subject.stub(:provenance_subject).and_return(nil)
|
94
|
+
end
|
95
|
+
|
96
|
+
it "errors returns an array with 1 error message" do
|
97
|
+
fact_2_with_subject.errors.single.should match(/Provenance subject is missing/)
|
98
|
+
end
|
93
99
|
end
|
94
100
|
|
95
|
-
|
96
|
-
|
97
|
-
|
101
|
+
describe "without subject" do
|
102
|
+
|
103
|
+
before(:each) do
|
104
|
+
fact_2_with_subject.stub(:subject).and_return(nil)
|
105
|
+
end
|
106
|
+
|
107
|
+
it "errors returns an array with an errorm message" do
|
108
|
+
fact_2_with_subject.errors.single.should match(/Subject is missing/)
|
109
|
+
end
|
98
110
|
end
|
99
111
|
end
|
100
112
|
|
@@ -124,32 +136,6 @@ module Dbd
|
|
124
136
|
end
|
125
137
|
end
|
126
138
|
|
127
|
-
describe "#dup_with_subject" do
|
128
|
-
|
129
|
-
let (:new_fact) do
|
130
|
-
fact_1.dup_with_subject(subject)
|
131
|
-
end
|
132
|
-
|
133
|
-
it "is a different instance" do
|
134
|
-
new_fact.should_not be_equal(fact_1)
|
135
|
-
end
|
136
|
-
|
137
|
-
it "is from the same class" do
|
138
|
-
new_fact.should be_a(fact_1.class)
|
139
|
-
end
|
140
|
-
|
141
|
-
it "has copied over the other attributes except :id, :time_stamp" do
|
142
|
-
(fact_1.class.attributes - [:id, :time_stamp, :subject]).each do |attr|
|
143
|
-
new_fact.send(attr).should == fact_1.send(attr)
|
144
|
-
end
|
145
|
-
end
|
146
|
-
|
147
|
-
it "has set the subject to the Resource subject" do
|
148
|
-
fact_1.subject.should_not == new_fact.subject # double check
|
149
|
-
new_fact.subject.should == subject
|
150
|
-
end
|
151
|
-
end
|
152
|
-
|
153
139
|
describe "update_used_provenance_subjects" do
|
154
140
|
it "sets the value for provenance_subject to true for a fact" do
|
155
141
|
h = {}
|
data/spec/lib/dbd/graph_spec.rb
CHANGED
@@ -48,21 +48,64 @@ module Dbd
|
|
48
48
|
end
|
49
49
|
end
|
50
50
|
|
51
|
-
describe "sets the time_stamp and adds
|
52
|
-
|
53
|
-
|
51
|
+
describe "sets the time_stamp and adds a random time (1..999 nanoseconds) if needed" do
|
52
|
+
|
53
|
+
# NOTE: reduced the far_future from 2500 to 2250 as work around for
|
54
|
+
# http://jira.codehaus.org/browse/JRUBY-7095
|
55
|
+
let(:far_future) { TimeStamp.new(time: Time.new(2250,1,1,12,0,0).utc) }
|
56
|
+
|
57
|
+
it "don't touch the time_stamp if already set" do
|
58
|
+
data_fact.time_stamp = far_future
|
54
59
|
subject << data_fact
|
55
|
-
subject.first.time_stamp.should
|
60
|
+
subject.first.time_stamp.should == far_future
|
56
61
|
end
|
57
62
|
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
63
|
+
describe "sets the time_stamp if not yet set" do
|
64
|
+
|
65
|
+
let(:near_future) { Time.now.utc + 100}
|
66
|
+
let(:fake_time_stamp) { TimeStamp.new(time: near_future) }
|
67
|
+
|
68
|
+
before(:each) do
|
69
|
+
# get this before setting the stub
|
70
|
+
fake_time_stamp # get this before setting the stub
|
71
|
+
end
|
72
|
+
|
73
|
+
it "sets it (to TimeStamp.new)" do
|
74
|
+
TimeStamp.stub(:new).and_return(fake_time_stamp)
|
75
|
+
data_fact.time_stamp.should be_nil # assert pre-condition
|
76
|
+
subject << data_fact
|
77
|
+
subject.first.time_stamp.should == fake_time_stamp
|
78
|
+
end
|
79
|
+
|
80
|
+
it "sends a slightly higher time_stamp than newest_time_stamp if Time.now <= newest_time_stamp" do
|
81
|
+
subject.stub(:newest_time_stamp).and_return(fake_time_stamp)
|
82
|
+
subject << data_fact
|
83
|
+
subject.first.time_stamp.should > fake_time_stamp
|
84
|
+
(subject.first.time_stamp - fake_time_stamp).should < Rational('1/1000_000') # 1 us
|
85
|
+
end
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
89
|
+
describe "a ProvenanceResource and a Resource" do
|
90
|
+
|
91
|
+
let(:provenance_resource) { Factories::ProvenanceResource.provenance_resource }
|
92
|
+
let(:resource) { Factories::Resource.facts_resource(provenance_resource.subject) }
|
93
|
+
|
94
|
+
it "does not fail" do
|
95
|
+
subject << provenance_resource
|
96
|
+
end
|
97
|
+
|
98
|
+
it "Adds the facts from the provenance_resource to the graph" do
|
99
|
+
subject << provenance_resource
|
100
|
+
subject.size.should == 2
|
101
|
+
end
|
102
|
+
|
103
|
+
it "Adds the facts from the provenance_resource and the resource to the graph" do
|
104
|
+
subject << provenance_resource
|
105
|
+
subject << resource
|
106
|
+
subject.size.should == 4
|
107
|
+
subject.first.should be_a(ProvenanceFact)
|
108
|
+
subject.last.class.should == Fact
|
66
109
|
end
|
67
110
|
end
|
68
111
|
end
|
@@ -107,7 +150,7 @@ module Dbd
|
|
107
150
|
end
|
108
151
|
|
109
152
|
it "has time_stamp as second value" do
|
110
|
-
first_line.split(',')[1].should match(
|
153
|
+
first_line.split(',')[1].should match(TimeStamp.to_s_regexp)
|
111
154
|
end
|
112
155
|
|
113
156
|
it "has an empty third value (signature of a provenance_fact)" do
|
@@ -174,7 +217,7 @@ module Dbd
|
|
174
217
|
end
|
175
218
|
|
176
219
|
it "has time_stamp as second value" do
|
177
|
-
first_line.split(',')[1].should match(
|
220
|
+
first_line.split(',')[1].should match(TimeStamp.to_s_regexp)
|
178
221
|
end
|
179
222
|
|
180
223
|
it "has provenance_fact_1.subject as third value" do
|
@@ -0,0 +1,41 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
require 'benchmark'
|
3
|
+
|
4
|
+
module Dbd
|
5
|
+
describe "performance" do
|
6
|
+
|
7
|
+
include Benchmark
|
8
|
+
|
9
|
+
def new_subject
|
10
|
+
Fact.new_subject
|
11
|
+
end
|
12
|
+
|
13
|
+
let(:provenance_fact_1) { Factories::ProvenanceFact.context(new_subject) }
|
14
|
+
|
15
|
+
NUMBER_OF_FACTS = 10_000
|
16
|
+
|
17
|
+
describe "#{NUMBER_OF_FACTS} facts" do
|
18
|
+
it "reports and checks the used time" do
|
19
|
+
graph = Graph.new
|
20
|
+
graph << provenance_fact_1
|
21
|
+
# Rehearsal
|
22
|
+
NUMBER_OF_FACTS.times do |counter|
|
23
|
+
data_fact = Factories::Fact.data_fact(provenance_fact_1, new_subject)
|
24
|
+
graph << data_fact
|
25
|
+
end
|
26
|
+
# Actual
|
27
|
+
start = Time.now
|
28
|
+
NUMBER_OF_FACTS.times do |counter|
|
29
|
+
data_fact = Factories::Fact.data_fact(provenance_fact_1, new_subject)
|
30
|
+
graph << data_fact
|
31
|
+
end
|
32
|
+
duration = Time.now - start
|
33
|
+
puts "\nDuration for inserting #{NUMBER_OF_FACTS} facts in the in-memory graph was #{duration*1000_000/NUMBER_OF_FACTS} us PER FACT"
|
34
|
+
graph.size.should == 2 * NUMBER_OF_FACTS + 1
|
35
|
+
duration.should < 0.000_15 * NUMBER_OF_FACTS
|
36
|
+
# typ. 37 us on Mac Ruby 2.0.0
|
37
|
+
# typ. 60 us on Mac jruby 1.7.3
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
@@ -48,34 +48,32 @@ module Dbd
|
|
48
48
|
end
|
49
49
|
end
|
50
50
|
|
51
|
-
describe "
|
52
|
-
it "the factory isi valid?" do
|
53
|
-
provenance_fact_1.should be_valid
|
54
|
-
end
|
51
|
+
describe "errors" do
|
55
52
|
|
56
|
-
it "
|
57
|
-
provenance_fact_1.
|
58
|
-
provenance_fact_1.should_not be_valid
|
53
|
+
it "the factory has no errors" do
|
54
|
+
provenance_fact_1.errors.should be_empty
|
59
55
|
end
|
60
56
|
|
61
|
-
|
62
|
-
provenance_fact_1.stub(:subject).and_return(nil)
|
63
|
-
provenance_fact_1.should_not be_valid
|
64
|
-
end
|
65
|
-
end
|
57
|
+
describe "with a provenance_subject" do
|
66
58
|
|
67
|
-
|
59
|
+
before(:each) do
|
60
|
+
provenance_fact_1.stub(:provenance_subject).and_return(subject)
|
61
|
+
end
|
68
62
|
|
69
|
-
|
70
|
-
|
63
|
+
it "errors returns an array with 1 error message" do
|
64
|
+
provenance_fact_1.errors.single.should match(/Provenance subject should not be present in Provenance Fact/)
|
65
|
+
end
|
71
66
|
end
|
72
67
|
|
73
|
-
|
74
|
-
|
75
|
-
|
68
|
+
describe "without subject" do
|
69
|
+
|
70
|
+
before(:each) do
|
71
|
+
provenance_fact_1.stub(:subject).and_return(nil)
|
72
|
+
end
|
76
73
|
|
77
|
-
|
78
|
-
|
74
|
+
it "errors returns an array with an error message" do
|
75
|
+
provenance_fact_1.errors.single.should match(/Subject is missing/)
|
76
|
+
end
|
79
77
|
end
|
80
78
|
end
|
81
79
|
|
@@ -47,17 +47,19 @@ module Dbd
|
|
47
47
|
let(:fact_1) { Factories::Fact.fact_1(provenance_resource_subject) }
|
48
48
|
|
49
49
|
describe "data facts" do
|
50
|
-
it "with correct subject" do
|
50
|
+
it "with correct subject it works" do
|
51
51
|
provenance_resource << provenance_fact_context_with_correct_subject
|
52
52
|
provenance_resource.first.subject.should == provenance_resource_subject
|
53
53
|
end
|
54
54
|
|
55
|
-
it "with incorrect subject it
|
55
|
+
it "with incorrect subject it raises SubjectError" do
|
56
56
|
lambda { provenance_resource << provenance_fact_context_with_incorrect_subject } .
|
57
|
-
should raise_error
|
57
|
+
should raise_error SetOnceError,
|
58
|
+
"Value of subject was #{provenance_fact_context_with_incorrect_subject.subject}, " \
|
59
|
+
"trying to set it to #{provenance_resource.subject}"
|
58
60
|
end
|
59
61
|
|
60
|
-
it "with nil subject" do
|
62
|
+
it "with nil subject it sets the subject" do
|
61
63
|
provenance_resource << provenance_fact_context
|
62
64
|
provenance_resource.first.subject.should == provenance_resource_subject
|
63
65
|
end
|
@@ -67,7 +69,7 @@ module Dbd
|
|
67
69
|
provenance_resource.first.provenance_subject.should be_nil
|
68
70
|
end
|
69
71
|
|
70
|
-
it "with incorrect
|
72
|
+
it "with incorrect provenance_subject it raises ProvenanceError" do
|
71
73
|
lambda { provenance_resource << fact_1 } .
|
72
74
|
should raise_error ProvenanceError
|
73
75
|
end
|