strokedb 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. data/CONTRIBUTORS +7 -0
  2. data/CREDITS +13 -0
  3. data/README +44 -0
  4. data/bin/sdbc +2 -0
  5. data/lib/config/config.rb +161 -0
  6. data/lib/data_structures/inverted_list.rb +297 -0
  7. data/lib/data_structures/point_query.rb +24 -0
  8. data/lib/data_structures/skiplist.rb +302 -0
  9. data/lib/document/associations.rb +107 -0
  10. data/lib/document/callback.rb +11 -0
  11. data/lib/document/coercions.rb +57 -0
  12. data/lib/document/delete.rb +28 -0
  13. data/lib/document/document.rb +684 -0
  14. data/lib/document/meta.rb +261 -0
  15. data/lib/document/slot.rb +199 -0
  16. data/lib/document/util.rb +27 -0
  17. data/lib/document/validations.rb +704 -0
  18. data/lib/document/versions.rb +106 -0
  19. data/lib/document/virtualize.rb +82 -0
  20. data/lib/init.rb +57 -0
  21. data/lib/stores/chainable_storage.rb +57 -0
  22. data/lib/stores/inverted_list_index/inverted_list_file_storage.rb +56 -0
  23. data/lib/stores/inverted_list_index/inverted_list_index.rb +49 -0
  24. data/lib/stores/remote_store.rb +172 -0
  25. data/lib/stores/skiplist_store/chunk.rb +119 -0
  26. data/lib/stores/skiplist_store/chunk_storage.rb +21 -0
  27. data/lib/stores/skiplist_store/file_chunk_storage.rb +44 -0
  28. data/lib/stores/skiplist_store/memory_chunk_storage.rb +37 -0
  29. data/lib/stores/skiplist_store/skiplist_store.rb +217 -0
  30. data/lib/stores/store.rb +5 -0
  31. data/lib/sync/chain_sync.rb +38 -0
  32. data/lib/sync/diff.rb +126 -0
  33. data/lib/sync/lamport_timestamp.rb +81 -0
  34. data/lib/sync/store_sync.rb +79 -0
  35. data/lib/sync/stroke_diff/array.rb +102 -0
  36. data/lib/sync/stroke_diff/default.rb +21 -0
  37. data/lib/sync/stroke_diff/hash.rb +186 -0
  38. data/lib/sync/stroke_diff/string.rb +116 -0
  39. data/lib/sync/stroke_diff/stroke_diff.rb +9 -0
  40. data/lib/util/blankslate.rb +42 -0
  41. data/lib/util/ext/blank.rb +50 -0
  42. data/lib/util/ext/enumerable.rb +36 -0
  43. data/lib/util/ext/fixnum.rb +16 -0
  44. data/lib/util/ext/hash.rb +22 -0
  45. data/lib/util/ext/object.rb +8 -0
  46. data/lib/util/ext/string.rb +35 -0
  47. data/lib/util/inflect.rb +217 -0
  48. data/lib/util/java_util.rb +9 -0
  49. data/lib/util/lazy_array.rb +54 -0
  50. data/lib/util/lazy_mapping_array.rb +64 -0
  51. data/lib/util/lazy_mapping_hash.rb +46 -0
  52. data/lib/util/serialization.rb +29 -0
  53. data/lib/util/trigger_partition.rb +136 -0
  54. data/lib/util/util.rb +38 -0
  55. data/lib/util/xml.rb +6 -0
  56. data/lib/view/view.rb +55 -0
  57. data/script/console +70 -0
  58. data/strokedb.rb +75 -0
  59. metadata +148 -0
data/CONTRIBUTORS ADDED
@@ -0,0 +1,7 @@
1
+ Adrian Madrid <aemadrid@gmail.com>
2
+ Aman Gupta <aman@tmm1.net>
3
+ Claudio Perez Gamayo <crossblaim@gmail.com>
4
+ elliottcable.name <strokedb@elliottcable.com>
5
+ Joshua Miller <elefantstn@gmail.com>
6
+ Oleg Dashevskii <be9@be9.ru>
7
+ Michael Klishin (novemberain.com) <michael.s.klishin@gmail.com>
data/CREDITS ADDED
@@ -0,0 +1,13 @@
1
+
2
+ MANY THANKS TO
3
+ (for knowledge and aspiration)
4
+
5
+ 1) Andrew S. Tanenbaum, for book "Distributed Systems".
6
+ 2) William Pugh for skiplists.
7
+ 3) Xin Dong & Alon Halevy for the great article "Indexing Dataspaces".
8
+ 4) Linus Torvalds & Junio C. Hamano for the Git version control system.
9
+ 5) Damien Katz for the CouchDB.
10
+ 6) Yukihiro Matsumoto for the Ruby programming language.
11
+ 7) Dr. Leslie Lamport for timestamps and signatures.
12
+ 8) Victor Sovetov for years of talking with Yurii about databases and metaframes.
13
+ 9) Konstantin Olenin for talks with Oleg about distributed systems and algorithms.
data/README ADDED
@@ -0,0 +1,44 @@
1
+ StrokeDB is a distributed document-oriented database engine.
2
+ Main features are complete decentralization, JSON object format,
3
+ metadocuments, integration with runtime (it is just a ruby library).
4
+
5
+ = Installing StrokeDB
6
+
7
+ === Requirements
8
+
9
+ You need to install few gems in order to run StrokeDB:
10
+
11
+ $ sudo gem install diff-lcs uuidtools json
12
+
13
+ *Note*: There is no need in <tt>uuidtools</tt> if you use JRuby
14
+
15
+ === Getting source code
16
+
17
+ $ git clone git://gitorious.org/strokedb/mainline.git strokedb
18
+ or
19
+ $ git clone http://git.gitorious.org/strokedb/mainline.git strokedb
20
+
21
+ === Installing rubygem
22
+
23
+ There is no gem for StrokeDB (yet). As soon as we'll release 0.1, it will become available this way, too.
24
+
25
+ === <i>(Optional) Running test suite</i>
26
+
27
+ $ cd strokedb/strokedb-ruby
28
+ $ rake ci
29
+ $ rake jci # for jruby, jruby should be in PATH
30
+
31
+
32
+ = Starting points
33
+
34
+ One of the most important concepts of StrokeDB is a StrokeDB::Document.
35
+
36
+
37
+ = Some benchmarks
38
+
39
+ $ rake bench
40
+
41
+ =AUTHORS
42
+
43
+ * Yurii Rashkovskii <yrashk@issuesdone.com>
44
+ * Oleg Andreev <oleganza@gmail.com>
data/bin/sdbc ADDED
@@ -0,0 +1,2 @@
1
+ #! /usr/bin/env ruby
2
+ load File.dirname(__FILE__) + '/../script/console'
@@ -0,0 +1,161 @@
1
+ module StrokeDB
2
+ # errors raised in the process of configuration
3
+
4
+ class UnknownStorageTypeError < Exception; end
5
+ class UnknownIndexTypeError < Exception; end
6
+ class UnknownStoreTypeError < Exception; end
7
+
8
+ class Config
9
+ #
10
+ # Load config from file, probably making it the default one
11
+ #
12
+ def Config.load(filename, default = false)
13
+ build(JSON.parse(IO.read(filename)).merge(:default => default))
14
+ end
15
+
16
+ #
17
+ # Build the config from given options.
18
+ #
19
+ # Supported options are:
20
+ #
21
+ # :default - if set to true, config becomes the default one.
22
+ # :storages - must be an array of storage types.
23
+ # Appropriate storages will be initialized and chained
24
+ # together. Defaults to [:memory_chunk, :file_chunk]
25
+ # :index_storages - index storages. Defaults to [:inverted_list_file].
26
+ # :index - index type. Defaults to :inverted_list.
27
+ # :base_path - if set, specifies the path for storages. Otherwise,
28
+ # current directory is used.
29
+ # :store - store type to use. Defaults to :skiplist.
30
+ # :store_options - options passed to the created store
31
+
32
+ def Config.build(opts={})
33
+ opts = opts.stringify_keys
34
+
35
+ config = new(opts['default'])
36
+ storages = opts['storages'] || [:memory_chunk, :file_chunk]
37
+
38
+ base_path = opts['base_path'] || './'
39
+
40
+ add_storage = lambda do |name|
41
+ config.add_storage(name, name, :path => File.join(base_path, name.to_s))
42
+ end
43
+
44
+ ### setup document storages ###
45
+
46
+ initialized_storages = storages.map(&add_storage)
47
+ config.chain(*storages) if storages.size >= 2
48
+
49
+ initialized_storages.each_consecutive_pair do |cur, nxt|
50
+ # next storage is authoritative for each storage
51
+ cur.authoritative_source = nxt
52
+ end
53
+
54
+ ### setup index storages and indexes ###
55
+
56
+ index_storages = opts['index_storages'] || [:inverted_list_file]
57
+ index_storages.each(&add_storage)
58
+
59
+ config.add_index(:default, opts['index'] || :inverted_list, index_storages.first)
60
+
61
+ config.add_store(:default, opts['store'] || :skiplist,
62
+ { :storage => storages.first }.merge(opts['store_options'] || {}))
63
+
64
+ ### save config ###
65
+
66
+ config.build_config = opts.except('default')
67
+
68
+ FileUtils.mkdir_p base_path
69
+ File.open(File.join(base_path,'config'), "w+") do |file|
70
+ file.write config.build_config.to_json
71
+ end
72
+
73
+ config
74
+ end
75
+
76
+ attr_accessor :build_config
77
+ attr_reader :storages, :indexes, :stores
78
+
79
+ def initialize(default = false)
80
+ @storages, @indexes, @stores = {}, {}, {}
81
+
82
+ ::StrokeDB.default_config = self if default
83
+ end
84
+
85
+ def [](name)
86
+ @storages[name] || @indexes[name] || nil
87
+ end
88
+
89
+ def add_storage(key, type, *args)
90
+ @storages[key] = constantize(:storage, type).new(*args)
91
+ end
92
+
93
+ def chain_storages(*args)
94
+ raise ArgumentError, "Not enough storages to chain storages" unless args.size >= 2
95
+
96
+ args.map {|x| @storages[x] || raise("Missing storage #{x}") }.each_consecutive_pair do |cur, nxt|
97
+ cur.add_chained_storage! nxt
98
+ end
99
+ end
100
+
101
+ alias :chain :chain_storages
102
+
103
+ def add_index(key, type, storage_key, store_key = nil)
104
+ @indexes[key] = constantize(:index, type).new(@storages[storage_key])
105
+ @indexes[key].document_store = @stores[store_key] if store_key
106
+ end
107
+
108
+ def add_store(key, type, options = {})
109
+ options[:storage] = @storages[options[:storage] || :default]
110
+ raise "Missing storage for store #{key}" unless options[:storage]
111
+
112
+ options[:index] ||= @indexes[options[:index] || :default]
113
+
114
+ store_instance = constantize(:store, type).new(options)
115
+
116
+ if options[:index]
117
+ options[:index].document_store = store_instance
118
+ end
119
+
120
+ @stores[key] = store_instance
121
+ end
122
+
123
+ private
124
+
125
+ def constantize(name,type)
126
+ StrokeDB.const_get type_fullname(name,type)
127
+ rescue
128
+ exception = StrokeDB.const_get("Unknown#{name.to_s.camelize}TypeError")
129
+ raise exception, "Unable to load #{name} type #{type}"
130
+ end
131
+
132
+ def type_fullname(type, name)
133
+ "#{name.to_s.camelize}#{type.to_s.camelize}"
134
+ end
135
+ end
136
+
137
+ class <<self
138
+ def use_perthread_default_config!
139
+ class <<self
140
+ def default_config
141
+ Thread.current['StrokeDB.default_config']
142
+ end
143
+ def default_config=(config)
144
+ Thread.current['StrokeDB.default_config'] = config
145
+ end
146
+ end
147
+ end
148
+ def use_global_default_config!
149
+ class <<self
150
+ def default_config
151
+ $strokedb_default_config
152
+ end
153
+ def default_config=(config)
154
+ $strokedb_default_config = config
155
+ end
156
+ end
157
+ end
158
+ end
159
+ end
160
+
161
+ StrokeDB.use_perthread_default_config!
@@ -0,0 +1,297 @@
1
+ module StrokeDB
2
+ class InvertedList
3
+ include Enumerable
4
+
5
+ SEPARATOR = "\x01"
6
+ TERMINATOR = "\x02"
7
+
8
+ attr_accessor :default, :head, :tail, :cut_level
9
+
10
+ def initialize(cut_level = nil)
11
+ @cut_level = cut_level
12
+ @head = HeadNode.new
13
+ @tail = TailNode.new
14
+ @head.forward[0] = @tail
15
+ end
16
+
17
+ def insert(slots, data, __cheaters_level = nil)
18
+ slots.each do |key, value|
19
+ value = value.to_s
20
+ key = key.to_s
21
+ prefix = value + SEPARATOR + key + TERMINATOR
22
+ insert_attribute(prefix, data, __cheaters_level)
23
+ end
24
+ end
25
+
26
+ def insert_attribute(key, value, __cheaters_level = nil)
27
+ @size_cache = nil
28
+ update = Array.new(@head.level)
29
+ x = @head
30
+ @head.level.downto(1) do |i|
31
+ x = x.forward[i-1] while x.forward[i-1] < key
32
+ update[i-1] = x
33
+ end
34
+ x = x.forward[0]
35
+ if x.key == key
36
+ x.values.push value
37
+ else
38
+ newlevel = __cheaters_level || random_level
39
+ newlevel = 1 if empty?
40
+ if newlevel > @head.level
41
+ (@head.level + 1).upto(newlevel) do |i|
42
+ update[i-1] = @head
43
+ end
44
+ end
45
+
46
+ x = Node.new(newlevel, key, value)
47
+
48
+ if cut?(newlevel, update[0])
49
+ return new_chunks!(x, update)
50
+ else
51
+ newlevel.times do |i|
52
+ x.forward[i] = update[i].forward[i] || @tail
53
+ update[i].forward[i] = x
54
+ end
55
+ end
56
+ end
57
+ return self
58
+ end
59
+
60
+
61
+ def delete(slots, data)
62
+ slots.each do |key, value|
63
+ value = value.to_s
64
+ key = key.to_s
65
+ prefix = value + SEPARATOR + key + TERMINATOR
66
+ delete_attribute(prefix, data)
67
+ end
68
+ end
69
+
70
+ def delete_attribute(key, value)
71
+ @size_cache = nil
72
+ update = Array.new(@head.level)
73
+ x = @head
74
+ @head.level.downto(1) do |i|
75
+ x = x.forward[i-1] while x.forward[i-1] < key
76
+ update[i-1] = x
77
+ end
78
+ x = x.forward[0]
79
+ if x.key == key
80
+ x.values.delete value
81
+ value
82
+ else
83
+ nil
84
+ end
85
+ end
86
+
87
+
88
+ # Finders
89
+
90
+ def find(*args)
91
+ q = PointQuery.new(*args)
92
+ total = Set.new
93
+ first_pass = true
94
+ q.slots.each do |key, value|
95
+ results = []
96
+ key = key.to_s
97
+ value = value.to_s
98
+ prefix = value + SEPARATOR + key + TERMINATOR
99
+ node = find_node(prefix)
100
+ results = node.values if node
101
+ total = (first_pass ? results.to_set : (total & results))
102
+ first_pass = false
103
+ end
104
+ total
105
+ end
106
+
107
+ def find_node(key)
108
+ x = @head
109
+ @head.level.downto(1) do |i|
110
+ x = x.forward[i-1] while x.forward[i-1] < key
111
+ end
112
+ x = x.forward[0]
113
+ return (x.key && yield(x.key, key) ? x : nil) if block_given?
114
+ return x if x.key == key
115
+ nil
116
+ end
117
+
118
+ def first_node
119
+ @head.forward[0]
120
+ end
121
+
122
+ def size
123
+ @size_cache ||= inject(0){|c,k| c + 1}
124
+ end
125
+
126
+ def empty?
127
+ @head.forward[0] == @tail
128
+ end
129
+
130
+ # Returns a string representation of the Skiplist.
131
+ def to_s
132
+ "#<#{self.class.name} " +
133
+ [@head.to_s, map{|node| node.to_s }, @tail.to_s].flatten.join(', ') +
134
+ ">"
135
+ end
136
+ def to_s_levels
137
+ "#<#{self.class.name}:levels " +
138
+ [@head.to_s, map{|node| node.level.to_s }, @tail.to_s].flatten.join(', ') +
139
+ ">"
140
+ end
141
+
142
+ def debug_dump
143
+ s = ""
144
+ each do |n|
145
+ s << "#{n.key.inspect}: #{n.values.inspect}\n"
146
+ end
147
+ s
148
+ end
149
+
150
+ def each
151
+ n = @head.forward[0]
152
+ until TailNode === n
153
+ yield n
154
+ n = n.forward[0]
155
+ end
156
+ end
157
+
158
+ private
159
+
160
+ # 1/E is a fastest search value
161
+ PROBABILITY = 1/Math::E
162
+ MAX_LEVEL = 32
163
+
164
+ def random_level
165
+ l = 1
166
+ l += 1 while rand < PROBABILITY && l < MAX_LEVEL
167
+ return l
168
+ end
169
+
170
+ def cut?(l, prev)
171
+ @cut_level && !empty? && l >= @cut_level && prev != @head
172
+ end
173
+
174
+ def new_chunks!(newnode, update)
175
+ # Transposed picture:
176
+ #
177
+ # head level 8: - - - - - - - -
178
+ # update.size 8: - - - - - - - -
179
+ # ...
180
+ # newnode.level 5: - - - - -
181
+ # cut level 3: - - -
182
+ # regular node: -
183
+ # regular node: - -
184
+ # ...
185
+ # tail node: T T T T T T T T
186
+ # refs: A B C D E F G H
187
+ #
188
+ # How to cut?
189
+ #
190
+ # 0) tail1 = TailNode.new; list2 = Skiplist.new
191
+ # 1) newnode.{A, B, C, D, E} := update{A,B,C,D,E}.forward
192
+ # 2) update.{all} := tail1 (for current chunk)
193
+ # 3) list2.head.{A, B, C, D, E} = new_node.{A, B, C, D, E}
194
+ # 4) tail1.next_list = list2
195
+
196
+ list2 = Skiplist.new({}, @default, @cut_level)
197
+ tail1 = TailNode.new
198
+
199
+ newnode.level.times do |i|
200
+ # add '|| @tail' because update[i] may be head of a lower level
201
+ # without forward ref to tail.
202
+ newnode.forward[i] = update[i].forward[i] || @tail
203
+ list2.head.forward[i] = newnode
204
+ end
205
+ @head.level.times do |i|
206
+ update[i].forward[i] = tail1
207
+ end
208
+ tail1.next_list = list2
209
+ # return the current chunk and the next chunk
210
+ return self, list2
211
+ end
212
+
213
+ class Node
214
+ attr_accessor :key, :values, :forward
215
+ attr_accessor :_serialized_index
216
+ def initialize(level, key, value)
217
+ @key, @values = key, [value]
218
+ @forward = Array.new(level)
219
+ end
220
+ # this is called when node is thrown out of the list
221
+ # note, that node.value is called immediately after node.free
222
+ def free(list)
223
+ # do nothing
224
+ end
225
+ def level
226
+ @forward.size
227
+ end
228
+ def <(key)
229
+ @key < key
230
+ end
231
+ def <=(key)
232
+ @key <= key
233
+ end
234
+ def next
235
+ forward[0]
236
+ end
237
+ def to_s
238
+ "[#{level}]#{@key}: #{@values.inspect}"
239
+ end
240
+ end
241
+
242
+ class HeadNode < Node
243
+ def initialize
244
+ super 1, nil, nil
245
+ end
246
+ def <(key)
247
+ true
248
+ end
249
+ def <=(key)
250
+ true
251
+ end
252
+ def to_s
253
+ "head(#{level})"
254
+ end
255
+ end
256
+
257
+ # also proxy-to-next-chunk node
258
+ class TailNode < Node
259
+ attr_accessor :next_list
260
+ def initialize
261
+ super 1, nil, nil
262
+ end
263
+ def <(key)
264
+ false
265
+ end
266
+ def <=(key)
267
+ false
268
+ end
269
+ def to_s
270
+ "tail(#{level})"
271
+ end
272
+ end
273
+
274
+ def debug(msg)
275
+ if block_given?
276
+ begin
277
+ out = []
278
+ out << "\n\n---- Start of #{msg} -----"
279
+ yield(out)
280
+ return
281
+ rescue => e
282
+ puts out.join("\n")
283
+ puts "---- End of #{msg}: exception! -----"
284
+ puts e
285
+ puts e.backtrace.join("\n") rescue nil
286
+ puts "----"
287
+ raise e
288
+ end
289
+ else
290
+ puts "IL DEBUG: #{msg}" if ENV['DEBUG']
291
+ end
292
+ end
293
+ def debug_header
294
+ puts "\n==========================================\n" if ENV['DEBUG']
295
+ end
296
+ end
297
+ end
@@ -0,0 +1,24 @@
1
+ module StrokeDB
2
+ # PointQuery is used to perform navigation to a single multidimensinal point.
3
+ # Initializer accepts a hash of slots. Slots may have such value types:
4
+ # "string" scalar string value
5
+ # 3.1415 (numeric) numeric value
6
+ # :L lowest value
7
+ # :H highest value
8
+ #
9
+ # Example:
10
+ # PointQuery.new(:meta => 'Article',
11
+ # :author => 'Oleg Andreev',
12
+ # :date => :last)
13
+ #
14
+ class PointQuery
15
+ attr_reader :slots
16
+
17
+ def initialize(slots)
18
+ @slots = {}
19
+ slots.each do |k, v|
20
+ @slots[k.to_optimized_raw] = v.to_optimized_raw
21
+ end
22
+ end
23
+ end
24
+ end