rethinkdb 1.2.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/sequence.rb ADDED
@@ -0,0 +1,349 @@
1
+ # Copyright 2010-2012 RethinkDB, all rights reserved.
2
+ module RethinkDB
3
+ # A "Sequence" is either a JSON array or a stream. The functions in
4
+ # this module may be invoked as instance methods of both JSON_Expression and
5
+ # Stream_Expression, but you will get a runtime error if you invoke
6
+ # them on a JSON_Expression that turns out not to be an array.
7
+ module Sequence
8
+ # For each element of the sequence, execute 1 or more write queries (to
9
+ # execute more than 1, yield a list of write queries in the block). For
10
+ # example:
11
+ # table.for_each{|row| [table2.get(row[:id]).delete, table3.insert(row)]}
12
+ # will, for each row in <b>+table+</b>, delete the row that shares its id
13
+ # in <b>+table2+</b> and insert the row into <b>+table3+</b>.
14
+ def for_each
15
+ S.with_var { |vname,v|
16
+ queries = yield(v)
17
+ queries = [queries] if queries.class != Array
18
+ queries.each{|q|
19
+ if q.class != Write_Query
20
+ raise TypeError, "Foreach requires query #{q.inspect} to be a write query."
21
+ end}
22
+ Write_Query.new [:foreach, self, vname, queries]
23
+ }
24
+ end
25
+
26
+ # Filter the sequence based on a predicate. The provided block should take a
27
+ # single variable, an element of the sequence, and return either <b>+true+</b> if
28
+ # it should be in the resulting sequence or <b>+false+</b> otherwise. For example:
29
+ # table.filter {|row| row[:id] < 5}
30
+ # Alternatively, you may provide an object as an argument, in which case the
31
+ # <b>+filter+</b> will match JSON objects which match the provided object's
32
+ # attributes. For example, if we have a table <b>+people+</b>, the
33
+ # following are equivalent:
34
+ # people.filter{|row| row[:name].eq('Bob') & row[:age].eq(50)}
35
+ # people.filter({:name => 'Bob', :age => 50})
36
+ # Note that the values of attributes may themselves be queries. For
37
+ # instance, here is a query that matches anyone whose age is double their height:
38
+ # people.filter({:age => r.mul(2, 3)})
39
+ def filter(obj=nil)
40
+ if obj
41
+ if obj.class == Hash then self.filter { |row|
42
+ JSON_Expression.new [:call, [:all], obj.map{|kv|
43
+ row.getattr(kv[0]).eq(S.r(kv[1]))}]}
44
+ else raise ArgumentError,"Filter: Not a hash: #{obj.inspect}."
45
+ end
46
+ else
47
+ S.with_var{|vname,v|
48
+ self.class.new [:call, [:filter, vname, S.r(yield(v))], [self]]}
49
+ end
50
+ end
51
+
52
+ # Map a function over the sequence, then concatenate the results together. The
53
+ # provided block should take a single variable, an element in the sequence, and
54
+ # return a list of elements to include in the resulting sequence. If you have a
55
+ # table <b>+table+</b>, the following are all equivalent:
56
+ # table.concat_map {|row| [row[:id], row[:id]*2]}
57
+ # table.map{|row| [row[:id], row[:id]*2]}.reduce([]){|a,b| r.union(a,b)}
58
+ def concat_map
59
+ S.with_var { |vname,v|
60
+ self.class.new [:call, [:concatmap, vname, S.r(yield(v))], [self]]}
61
+ end
62
+
63
+ # Gets all rows with keys between <b>+start_key+</b> and
64
+ # <b>+end_key+</b> (inclusive). You may also optionally specify the name of
65
+ # the attribute to use as your key (<b>+keyname+</b>), but note that your
66
+ # table must be indexed by that attribute. Either <b>+start_key+</b> or
67
+ # <b>+end_key+</b> may be nil, in which case that side of the range is
68
+ # unbounded. For example, if we have a table <b>+table+</b>, these are
69
+ # equivalent:
70
+ # r.between(table, 3, 7)
71
+ # table.filter{|row| (row[:id] >= 3) & (row[:id] <= 7)}
72
+ # as are these:
73
+ # table.between(nil,7,:index)
74
+ # table.filter{|row| row[:index] <= 7}
75
+ def between(start_key, end_key, keyname=:id)
76
+ start_key = S.r(start_key || S.skip)
77
+ end_key = S.r(end_key || S.skip)
78
+ self.class.new [:call, [:between, keyname, start_key, end_key], [self]]
79
+ end
80
+
81
+ # Map a function over a sequence. The provided block should take
82
+ # a single variable, an element of the sequence, and return an
83
+ # element of the resulting sequence. For example:
84
+ # table.map {|row| row[:id]}
85
+ def map
86
+ S.with_var{|vname,v|
87
+ self.class.new [:call, [:map, vname, S.r(yield(v))], [self]]}
88
+ end
89
+
90
+ # For each element of a sequence, picks out the specified
91
+ # attributes from the object and returns only those. If the input
92
+ # is not an array, fails when the query is run. The folling are
93
+ # equivalent:
94
+ # r([{:a => 1, :b => 1, :c => 1},
95
+ # {:a => 2, :b => 2, :c => 2}]).pluck('a', 'b')
96
+ # r([{:a => 1, :b => 1}, {:a => 2, :b => 2}])
97
+ def pluck(*args)
98
+ self.map {|x| x.pick(*args)}
99
+ end
100
+
101
+ # For each element of a sequence, picks out the specified
102
+ # attributes from the object and returns the residual object. If
103
+ # the input is not an array, fails when the query is run. The
104
+ # following are equivalent:
105
+ # r([{:a => 1, :b => 1, :c => 1},
106
+ # {:a => 2, :b => 2, :c => 2}]).without('a', 'b')
107
+ # r([{:c => 1}, {:c => 2}])
108
+ def without(*args)
109
+ self.map {|x| x.unpick(*args)}
110
+ end
111
+
112
+ # Order a sequence of objects by one or more attributes. For
113
+ # example, to sort first by name and then by social security
114
+ # number for the table <b>+people+</b>, you could do:
115
+ # people.order_by(:name, :ssn)
116
+ # In place of an attribute name, you may provide a tuple of an attribute
117
+ # name and a boolean specifying whether to sort in ascending order (which is
118
+ # the default). For example:
119
+ # people.order_by([:name, false], :ssn)
120
+ # will sort first by name in descending order, and then by ssn in ascending
121
+ # order.
122
+ def order_by(*orderings)
123
+ orderings.map!{|x| x.class == Array ? x : [x, true]}
124
+ self.class.new [:call, [:orderby, *orderings], [self]]
125
+ end
126
+
127
+ # Reduce a function over the sequence. Note that unlike Ruby's reduce, you
128
+ # cannot omit the base case. The block you provide should take two
129
+ # arguments, just like Ruby's reduce. For example, if we have a table
130
+ # <b>+table+</b>, the following will add up the <b>+count+</b> attribute of
131
+ # all the rows:
132
+ # table.map{|row| row[:count]}.reduce(0){|a,b| a+b}
133
+ # <b>NOTE:</b> unlike Ruby's reduce, this reduce only works on
134
+ # sequences with elements of the same type as the base case. For
135
+ # example, the following is incorrect:
136
+ # table.reduce(0){|a,b| a + b[:count]} # INCORRECT
137
+ # because the base case is a number but the sequence contains
138
+ # objects. RQL reduce has this limitation so that it can be
139
+ # distributed across shards efficiently.
140
+ def reduce(base)
141
+ S.with_var { |aname,a|
142
+ S.with_var { |bname,b|
143
+ JSON_Expression.new [:call,
144
+ [:reduce, S.r(base), aname, bname, S.r(yield(a,b))],
145
+ [self]]}}
146
+ end
147
+
148
+ # This one is a little complicated. The logic is as follows:
149
+ # 1. Use <b>+grouping+</b> sort the elements into groups. <b>+grouping+</b> should be a callable that takes one argument, the current element of the sequence, and returns a JSON expression representing its group.
150
+ # 2. Map <b>+mapping+</b> over each of the groups. Mapping should be a callable that behaves the same as the block passed to Sequence#map.
151
+ # 3. Reduce the groups with <b>+base+</b> and <b>+reduction+</b>. Base should be the base term of the reduction, and <b>+reduction+</b> should be a callable that behaves the same as the block passed to Sequence#reduce.
152
+ #
153
+ # For example, the following are equivalent:
154
+ # table.grouped_map_reduce(lambda {|row| row[:id] % 4},
155
+ # lambda {|row| row[:id]},
156
+ # 0,
157
+ # lambda {|a,b| a+b})
158
+ # r([0,1,2,3]).map {|n|
159
+ # table.filter{|row| row[:id].eq(n)}.map{|row| row[:id]}.reduce(0){|a,b| a+b}
160
+ # }
161
+ # Groupedmapreduce is more efficient than the second form because
162
+ # it only has to traverse <b>+table+</b> once.
163
+ def grouped_map_reduce(grouping, mapping, base, reduction)
164
+ grouping_term = S.with_var{|vname,v| [vname, S.r(grouping.call(v))]}
165
+ mapping_term = S.with_var{|vname,v| [vname, S.r(mapping.call(v))]}
166
+ reduction_term = S.with_var {|aname, a| S.with_var {|bname, b|
167
+ [S.r(base), aname, bname, S.r(reduction.call(a, b))]}}
168
+ JSON_Expression.new [:call, [:groupedmapreduce,
169
+ grouping_term,
170
+ mapping_term,
171
+ reduction_term],
172
+ [self]]
173
+ end
174
+
175
+ # Group a sequence by one or more attributes and return some data about each
176
+ # group. For example, if you have a table <b>+people+</b>:
177
+ # people.group_by(:name, :town, r.count).filter{|row| row[:reduction] > 1}
178
+ # Will find all cases where two people in the same town share a name, and
179
+ # return a list of those name/town pairs along with the number of people who
180
+ # share that name in that town. You can find a list of builtin data
181
+ # collectors at Data_Collectors (which will also show you how to
182
+ # define your own).
183
+ def group_by(*args)
184
+ raise ArgumentError,"group_by requires at least one argument" if args.length < 1
185
+ attrs, opts = args[0..-2], args[-1]
186
+ S.check_opts(opts, [:mapping, :base, :reduction, :finalizer])
187
+ map = opts.has_key?(:mapping) ? opts[:mapping] : lambda {|row| row}
188
+ if !opts.has_key?(:base) || !opts.has_key?(:reduction)
189
+ raise TypeError, "Group by requires a reduction and base to be specified"
190
+ end
191
+ base = opts[:base]
192
+ reduction = opts[:reduction]
193
+
194
+ gmr = self.grouped_map_reduce(lambda{|r| attrs.map{|a| r[a]}}, map, base, reduction)
195
+ if (f = opts[:finalizer])
196
+ gmr = gmr.map{|group| group.merge({:reduction => f.call(group[:reduction])})}
197
+ end
198
+ return gmr
199
+ end
200
+
201
+ # Gets one or more elements from the sequence, much like [] in Ruby.
202
+ # The following are all equivalent:
203
+ # r([1,2,3])
204
+ # r([0,1,2,3])[1...4]
205
+ # r([0,1,2,3])[1..3]
206
+ # r([0,1,2,3])[1..-1]
207
+ # As are:
208
+ # r(1)
209
+ # r([0,1,2])[1]
210
+ # And:
211
+ # r(2)
212
+ # r({:a => 2})[:a]
213
+ # <b>NOTE:</b> If you are slicing an array, you can provide any negative index you
214
+ # want, but if you're slicing a stream then for efficiency reasons the only
215
+ # allowable negative index is '-1', and you must be using a closed range
216
+ # ('..', not '...').
217
+ def [](ind)
218
+ case ind.class.hash
219
+ when Fixnum.hash then
220
+ JSON_Expression.new [:call, [:nth], [self, RQL.expr(ind)]]
221
+ when Range.hash then
222
+ b = RQL.expr(ind.begin)
223
+ if ind.exclude_end? then e = ind.end
224
+ else e = (ind.end == -1 ? nil : RQL.expr(ind.end+1))
225
+ end
226
+ self.class.new [:call, [:slice], [self, RQL.expr(b), RQL.expr(e)]]
227
+ else raise ArgumentError, "RQL_Query#[] can't handle #{ind.inspect}."
228
+ end
229
+ end
230
+
231
+ # Return at most <b>+n+</b> elements from the sequence. The
232
+ # following are equivalent:
233
+ # r([1,2,3])
234
+ # r([1,2,3,4]).limit(3)
235
+ # r([1,2,3,4])[0...3]
236
+ def limit(n); self[0...n]; end
237
+
238
+ # Skip the first <b>+n+</b> elements of the sequence. The following are equivalent:
239
+ # r([2,3,4])
240
+ # r([1,2,3,4]).skip(1)
241
+ # r([1,2,3,4])[1..-1]
242
+ def skip(n); self[n..-1]; end
243
+
244
+ # Removes duplicate values from the sequence (similar to the *nix
245
+ # <b>+uniq+</b> function). Does not work for sequences of
246
+ # compound data types like objects or arrays, but in the case of
247
+ # objects (e.g. rows of a table), you may provide an attribute and
248
+ # it will first map the selector for that attribute over the
249
+ # sequence. If we have a table <b>+table+</b>, the following are
250
+ # equivalent:
251
+ # table.map{|row| row[:id]}.distinct
252
+ # table.distinct(:id)
253
+ # As are:
254
+ # r([1,2,3])
255
+ # r([1,2,3,1]).distinct
256
+ # And:
257
+ # r([1,2])
258
+ # r([{:x => 1}, {:x => 2}, {:x => 1}]).distinct(:x)
259
+ def distinct(attr=nil);
260
+ if attr then self.map{|row| row[attr]}.distinct
261
+ else self.class.new [:call, [:distinct], [self]];
262
+ end
263
+ end
264
+
265
+ # Get the length of the sequence. If we have a table
266
+ # <b>+table+</b> with at least 5 elements, the following are
267
+ # equivalent:
268
+ # table[0...5].count
269
+ # r([1,2,3,4,5]).count
270
+ def count(); JSON_Expression.new [:call, [:count], [self]]; end
271
+
272
+ # Get element <b>+n+</b> of the sequence. For example, the following are
273
+ # equivalent:
274
+ # r(2)
275
+ # r([0,1,2,3]).nth(2)
276
+ # (Note the 0-indexing.)
277
+ def nth(n)
278
+ JSON_Expression.new [:call, [:nth], [self, S.r(n)]]
279
+ end
280
+
281
+ # A normal inner join. Takes as an argument the table to join with and a
282
+ # block. The block you provide should accept two tows and return
283
+ # <b>+true+</b> if they should be joined or <b>+false+</b> otherwise. For
284
+ # example:
285
+ # table1.inner_join(table2) {|row1, row2| row1[:attr1] > row2[:attr2]}
286
+ # Note that we don't merge the two tables when you do this. The output will
287
+ # be a list of objects like:
288
+ # {'left' => ..., 'right' => ...}
289
+ # You can use Sequence#zip to get back a list of merged rows.
290
+ def inner_join(other)
291
+ self.concat_map {|row|
292
+ other.concat_map {|row2|
293
+ RQL.branch(yield(row, row2), [{:left => row, :right => row2}], [])
294
+ }
295
+ }
296
+ end
297
+
298
+
299
+ # A normal outer join. Takes as an argument the table to join with and a
300
+ # block. The block you provide should accept two tows and return
301
+ # <b>+true+</b> if they should be joined or <b>+false+</b> otherwise. For
302
+ # example:
303
+ # table1.outer_join(table2) {|row1, row2| row1[:attr1] > row2[:attr2]}
304
+ # Note that we don't merge the two tables when you do this. The output will
305
+ # be a list of objects like:
306
+ # {'left' => ..., 'right' => ...}
307
+ # You can use Sequence#zip to get back a list of merged rows.
308
+ def outer_join(other)
309
+ S.with_var {|vname, v|
310
+ self.concat_map {|row|
311
+ RQL.let({vname => other.concat_map {|row2|
312
+ RQL.branch(yield(row, row2),
313
+ [{:left => row, :right => row2}],
314
+ [])}.to_array}) {
315
+ RQL.branch(v.count() > 0, v, [{:left => row}])
316
+ }
317
+ }
318
+ }
319
+ end
320
+
321
+ # A special case of Sequence#inner_join that is guaranteed to run in
322
+ # O(n*log(n)) time. It does equality comparison between <b>+leftattr+</b> of
323
+ # the invoking stream and the primary key of the <b>+other+</b> stream. For
324
+ # example, the following are equivalent (if <b>+id+</b> is the primary key
325
+ # of <b>+table2+</b>):
326
+ # table1.eq_join(:a, table2)
327
+ # table2.inner_join(table2) {|row1, row2| r.eq row1[:a],row2[:id]}
328
+ def eq_join(leftattr, other)
329
+ S.with_var {|vname, v|
330
+ self.concat_map {|row|
331
+ RQL.let({vname => other.get(row[leftattr])}) {
332
+ RQL.branch(v.ne(nil), [{:left => row, :right => v}], [])
333
+ }
334
+ }
335
+ }
336
+ end
337
+
338
+ # Take the output of Sequence#inner_join, Sequence#outer_join, or
339
+ # Sequence#eq_join and merge the results together. The following are
340
+ # equivalent:
341
+ # table1.eq_join(:id, table2).zip
342
+ # table1.eq_join(:id, table2).map{|obj| obj['left'].merge(obj['right'])}
343
+ def zip
344
+ self.map {|row|
345
+ RQL.branch(row.contains('right'), row['left'].merge(row['right']), row['left'])
346
+ }
347
+ end
348
+ end
349
+ end
data/lib/streams.rb ADDED
@@ -0,0 +1,101 @@
1
+ # Copyright 2010-2012 RethinkDB, all rights reserved.
2
+ module RethinkDB
3
+ # A lazy sequence of rows, e.g. what we get when reading from a table.
4
+ # Includes the Sequence module, so look there for the methods.
5
+ class Stream_Expression
6
+ # Convert a stream into an array. THINK CAREFULLY BEFORE DOING
7
+ # THIS. You can do more with an array than you can with a stream
8
+ # (e.g., you can store an array in a variable), but that's because
9
+ # arrays are stored in memory. If your stream is big (e.g. you're
10
+ # reading a giant table), that has serious performance
11
+ # implications. Also, if you return an array instead of a stream
12
+ # from a query, the whole thing gets sent over the network at once
13
+ # instead of lazily consuming chunks. If you have a table <b>+table+</b> with at
14
+ # least 3 elements, the following are equivalent:
15
+ # r[[1,1,1]]
16
+ # table.limit(3).map{1}.stream_to_array
17
+ def stream_to_array(); JSON_Expression.new [:call, [:stream_to_array], [self]]; end
18
+ end
19
+
20
+ # A special case of Stream_Expression that you can write to. You
21
+ # will get a Multi_Row_Selection from most operations that access
22
+ # tables. For example, consider the following two queries:
23
+ # q1 = table.filter{|row| row[:id] < 5}
24
+ # q2 = table.map{|row| row[:id]}
25
+ # The first query simply accesses some elements of a table, while the
26
+ # second query does some work to produce a new stream.
27
+ # Correspondingly, the first query returns a Multi_Row_Selection
28
+ # while the second query returns a Stream_Expression. So:
29
+ # q1.delete
30
+ # is a legal query that will delete everything with <b>+id+</b> less
31
+ # than 5 in <b>+table+</b>. But:
32
+ # q2.delete
33
+ # will raise an error.
34
+ class Multi_Row_Selection < Stream_Expression
35
+ attr_accessor :opts
36
+ def initialize(body, context=nil, opts=nil) # :nodoc:
37
+ super(body, context)
38
+ if opts
39
+ @opts = opts
40
+ elsif @body[0] == :call and @body[2] and @body[2][0].kind_of? Multi_Row_Selection
41
+ @opts = @body[2][0].opts
42
+ end
43
+ end
44
+
45
+ def raise_if_outdated # :nodoc:
46
+ if @opts and @opts[:use_outdated]
47
+ raise RuntimeError, "Cannot write to outdated table."
48
+ end
49
+ end
50
+
51
+ # Delete all of the selected rows. For example, if we have
52
+ # a table <b>+table+</b>:
53
+ # table.filter{|row| row[:id] < 5}.delete
54
+ # will delete everything with <b>+id+</b> less than 5 in <b>+table+</b>.
55
+ def delete
56
+ raise_if_outdated
57
+ Write_Query.new [:delete, self]
58
+ end
59
+
60
+ # Update all of the selected rows. For example, if we have a table <b>+table+</b>:
61
+ # table.filter{|row| row[:id] < 5}.update{|row| {:score => row[:score]*2}}
62
+ # will double the score of everything with <b>+id+</b> less than 5.
63
+ # If the object returned in <b>+update+</b> has attributes
64
+ # which are not present in the original row, those attributes will
65
+ # still be added to the new row.
66
+ #
67
+ # If you want to do a non-atomic update, you should pass
68
+ # <b>+:non_atomic+</b> as the optional variant:
69
+ # table.update(:non_atomic){|row| r.js("...")}
70
+ # You need to do a non-atomic update when the block provided to update can't
71
+ # be proved deterministic (e.g. if it contains javascript or reads from
72
+ # another table).
73
+ def update(variant=nil)
74
+ raise_if_outdated
75
+ S.with_var {|vname,v|
76
+ Write_Query.new [:update, self, [vname, S.r(yield(v))]]
77
+ }.apply_variant(variant)
78
+ end
79
+
80
+ # Replace all of the selected rows. Unlike <b>+update+</b>, must return the
81
+ # new row rather than an object containing attributes to be updated (may be
82
+ # combined with RQL::merge to mimic <b>+update+</b>'s behavior).
83
+ # May also return <b>+nil+</b> to delete the row. For example, if we have a
84
+ # table <b>+table+</b>, then:
85
+ # table.replace{|row| r.if(row[:id] < 5, nil, row)}
86
+ # will delete everything with id less than 5, but leave the other rows untouched.
87
+ #
88
+ # If you want to do a non-atomic replace, you should pass
89
+ # <b>+:non_atomic+</b> as the optional variant:
90
+ # table.replace(:non_atomic){|row| r.js("...")}
91
+ # You need to do a non-atomic replace when the block provided to replace can't
92
+ # be proved deterministic (e.g. if it contains javascript or reads from
93
+ # another table).
94
+ def replace(variant=nil)
95
+ raise_if_outdated
96
+ S.with_var {|vname,v|
97
+ Write_Query.new [:replace, self, [vname, S.r(yield(v))]]
98
+ }.apply_variant(variant)
99
+ end
100
+ end
101
+ end