rethinkdb 1.2.0.1

Sign up to get free protection for your applications and to get access to all the features.
data/lib/sequence.rb ADDED
@@ -0,0 +1,349 @@
1
+ # Copyright 2010-2012 RethinkDB, all rights reserved.
2
+ module RethinkDB
3
+ # A "Sequence" is either a JSON array or a stream. The functions in
4
+ # this module may be invoked as instance methods of both JSON_Expression and
5
+ # Stream_Expression, but you will get a runtime error if you invoke
6
+ # them on a JSON_Expression that turns out not to be an array.
7
+ module Sequence
8
+ # For each element of the sequence, execute 1 or more write queries (to
9
+ # execute more than 1, yield a list of write queries in the block). For
10
+ # example:
11
+ # table.for_each{|row| [table2.get(row[:id]).delete, table3.insert(row)]}
12
+ # will, for each row in <b>+table+</b>, delete the row that shares its id
13
+ # in <b>+table2+</b> and insert the row into <b>+table3+</b>.
14
+ def for_each
15
+ S.with_var { |vname,v|
16
+ queries = yield(v)
17
+ queries = [queries] if queries.class != Array
18
+ queries.each{|q|
19
+ if q.class != Write_Query
20
+ raise TypeError, "Foreach requires query #{q.inspect} to be a write query."
21
+ end}
22
+ Write_Query.new [:foreach, self, vname, queries]
23
+ }
24
+ end
25
+
26
+ # Filter the sequence based on a predicate. The provided block should take a
27
+ # single variable, an element of the sequence, and return either <b>+true+</b> if
28
+ # it should be in the resulting sequence or <b>+false+</b> otherwise. For example:
29
+ # table.filter {|row| row[:id] < 5}
30
+ # Alternatively, you may provide an object as an argument, in which case the
31
+ # <b>+filter+</b> will match JSON objects which match the provided object's
32
+ # attributes. For example, if we have a table <b>+people+</b>, the
33
+ # following are equivalent:
34
+ # people.filter{|row| row[:name].eq('Bob') & row[:age].eq(50)}
35
+ # people.filter({:name => 'Bob', :age => 50})
36
+ # Note that the values of attributes may themselves be queries. For
37
+ # instance, here is a query that matches anyone whose age is double their height:
38
+ # people.filter({:age => r.mul(2, 3)})
39
+ def filter(obj=nil)
40
+ if obj
41
+ if obj.class == Hash then self.filter { |row|
42
+ JSON_Expression.new [:call, [:all], obj.map{|kv|
43
+ row.getattr(kv[0]).eq(S.r(kv[1]))}]}
44
+ else raise ArgumentError,"Filter: Not a hash: #{obj.inspect}."
45
+ end
46
+ else
47
+ S.with_var{|vname,v|
48
+ self.class.new [:call, [:filter, vname, S.r(yield(v))], [self]]}
49
+ end
50
+ end
51
+
52
+ # Map a function over the sequence, then concatenate the results together. The
53
+ # provided block should take a single variable, an element in the sequence, and
54
+ # return a list of elements to include in the resulting sequence. If you have a
55
+ # table <b>+table+</b>, the following are all equivalent:
56
+ # table.concat_map {|row| [row[:id], row[:id]*2]}
57
+ # table.map{|row| [row[:id], row[:id]*2]}.reduce([]){|a,b| r.union(a,b)}
58
+ def concat_map
59
+ S.with_var { |vname,v|
60
+ self.class.new [:call, [:concatmap, vname, S.r(yield(v))], [self]]}
61
+ end
62
+
63
+ # Gets all rows with keys between <b>+start_key+</b> and
64
+ # <b>+end_key+</b> (inclusive). You may also optionally specify the name of
65
+ # the attribute to use as your key (<b>+keyname+</b>), but note that your
66
+ # table must be indexed by that attribute. Either <b>+start_key+</b> or
67
+ # <b>+end_key+</b> may be nil, in which case that side of the range is
68
+ # unbounded. For example, if we have a table <b>+table+</b>, these are
69
+ # equivalent:
70
+ # r.between(table, 3, 7)
71
+ # table.filter{|row| (row[:id] >= 3) & (row[:id] <= 7)}
72
+ # as are these:
73
+ # table.between(nil,7,:index)
74
+ # table.filter{|row| row[:index] <= 7}
75
+ def between(start_key, end_key, keyname=:id)
76
+ start_key = S.r(start_key || S.skip)
77
+ end_key = S.r(end_key || S.skip)
78
+ self.class.new [:call, [:between, keyname, start_key, end_key], [self]]
79
+ end
80
+
81
+ # Map a function over a sequence. The provided block should take
82
+ # a single variable, an element of the sequence, and return an
83
+ # element of the resulting sequence. For example:
84
+ # table.map {|row| row[:id]}
85
+ def map
86
+ S.with_var{|vname,v|
87
+ self.class.new [:call, [:map, vname, S.r(yield(v))], [self]]}
88
+ end
89
+
90
+ # For each element of a sequence, picks out the specified
91
+ # attributes from the object and returns only those. If the input
92
+ # is not an array, fails when the query is run. The folling are
93
+ # equivalent:
94
+ # r([{:a => 1, :b => 1, :c => 1},
95
+ # {:a => 2, :b => 2, :c => 2}]).pluck('a', 'b')
96
+ # r([{:a => 1, :b => 1}, {:a => 2, :b => 2}])
97
+ def pluck(*args)
98
+ self.map {|x| x.pick(*args)}
99
+ end
100
+
101
+ # For each element of a sequence, picks out the specified
102
+ # attributes from the object and returns the residual object. If
103
+ # the input is not an array, fails when the query is run. The
104
+ # following are equivalent:
105
+ # r([{:a => 1, :b => 1, :c => 1},
106
+ # {:a => 2, :b => 2, :c => 2}]).without('a', 'b')
107
+ # r([{:c => 1}, {:c => 2}])
108
+ def without(*args)
109
+ self.map {|x| x.unpick(*args)}
110
+ end
111
+
112
+ # Order a sequence of objects by one or more attributes. For
113
+ # example, to sort first by name and then by social security
114
+ # number for the table <b>+people+</b>, you could do:
115
+ # people.order_by(:name, :ssn)
116
+ # In place of an attribute name, you may provide a tuple of an attribute
117
+ # name and a boolean specifying whether to sort in ascending order (which is
118
+ # the default). For example:
119
+ # people.order_by([:name, false], :ssn)
120
+ # will sort first by name in descending order, and then by ssn in ascending
121
+ # order.
122
+ def order_by(*orderings)
123
+ orderings.map!{|x| x.class == Array ? x : [x, true]}
124
+ self.class.new [:call, [:orderby, *orderings], [self]]
125
+ end
126
+
127
+ # Reduce a function over the sequence. Note that unlike Ruby's reduce, you
128
+ # cannot omit the base case. The block you provide should take two
129
+ # arguments, just like Ruby's reduce. For example, if we have a table
130
+ # <b>+table+</b>, the following will add up the <b>+count+</b> attribute of
131
+ # all the rows:
132
+ # table.map{|row| row[:count]}.reduce(0){|a,b| a+b}
133
+ # <b>NOTE:</b> unlike Ruby's reduce, this reduce only works on
134
+ # sequences with elements of the same type as the base case. For
135
+ # example, the following is incorrect:
136
+ # table.reduce(0){|a,b| a + b[:count]} # INCORRECT
137
+ # because the base case is a number but the sequence contains
138
+ # objects. RQL reduce has this limitation so that it can be
139
+ # distributed across shards efficiently.
140
+ def reduce(base)
141
+ S.with_var { |aname,a|
142
+ S.with_var { |bname,b|
143
+ JSON_Expression.new [:call,
144
+ [:reduce, S.r(base), aname, bname, S.r(yield(a,b))],
145
+ [self]]}}
146
+ end
147
+
148
+ # This one is a little complicated. The logic is as follows:
149
+ # 1. Use <b>+grouping+</b> sort the elements into groups. <b>+grouping+</b> should be a callable that takes one argument, the current element of the sequence, and returns a JSON expression representing its group.
150
+ # 2. Map <b>+mapping+</b> over each of the groups. Mapping should be a callable that behaves the same as the block passed to Sequence#map.
151
+ # 3. Reduce the groups with <b>+base+</b> and <b>+reduction+</b>. Base should be the base term of the reduction, and <b>+reduction+</b> should be a callable that behaves the same as the block passed to Sequence#reduce.
152
+ #
153
+ # For example, the following are equivalent:
154
+ # table.grouped_map_reduce(lambda {|row| row[:id] % 4},
155
+ # lambda {|row| row[:id]},
156
+ # 0,
157
+ # lambda {|a,b| a+b})
158
+ # r([0,1,2,3]).map {|n|
159
+ # table.filter{|row| row[:id].eq(n)}.map{|row| row[:id]}.reduce(0){|a,b| a+b}
160
+ # }
161
+ # Groupedmapreduce is more efficient than the second form because
162
+ # it only has to traverse <b>+table+</b> once.
163
+ def grouped_map_reduce(grouping, mapping, base, reduction)
164
+ grouping_term = S.with_var{|vname,v| [vname, S.r(grouping.call(v))]}
165
+ mapping_term = S.with_var{|vname,v| [vname, S.r(mapping.call(v))]}
166
+ reduction_term = S.with_var {|aname, a| S.with_var {|bname, b|
167
+ [S.r(base), aname, bname, S.r(reduction.call(a, b))]}}
168
+ JSON_Expression.new [:call, [:groupedmapreduce,
169
+ grouping_term,
170
+ mapping_term,
171
+ reduction_term],
172
+ [self]]
173
+ end
174
+
175
+ # Group a sequence by one or more attributes and return some data about each
176
+ # group. For example, if you have a table <b>+people+</b>:
177
+ # people.group_by(:name, :town, r.count).filter{|row| row[:reduction] > 1}
178
+ # Will find all cases where two people in the same town share a name, and
179
+ # return a list of those name/town pairs along with the number of people who
180
+ # share that name in that town. You can find a list of builtin data
181
+ # collectors at Data_Collectors (which will also show you how to
182
+ # define your own).
183
+ def group_by(*args)
184
+ raise ArgumentError,"group_by requires at least one argument" if args.length < 1
185
+ attrs, opts = args[0..-2], args[-1]
186
+ S.check_opts(opts, [:mapping, :base, :reduction, :finalizer])
187
+ map = opts.has_key?(:mapping) ? opts[:mapping] : lambda {|row| row}
188
+ if !opts.has_key?(:base) || !opts.has_key?(:reduction)
189
+ raise TypeError, "Group by requires a reduction and base to be specified"
190
+ end
191
+ base = opts[:base]
192
+ reduction = opts[:reduction]
193
+
194
+ gmr = self.grouped_map_reduce(lambda{|r| attrs.map{|a| r[a]}}, map, base, reduction)
195
+ if (f = opts[:finalizer])
196
+ gmr = gmr.map{|group| group.merge({:reduction => f.call(group[:reduction])})}
197
+ end
198
+ return gmr
199
+ end
200
+
201
+ # Gets one or more elements from the sequence, much like [] in Ruby.
202
+ # The following are all equivalent:
203
+ # r([1,2,3])
204
+ # r([0,1,2,3])[1...4]
205
+ # r([0,1,2,3])[1..3]
206
+ # r([0,1,2,3])[1..-1]
207
+ # As are:
208
+ # r(1)
209
+ # r([0,1,2])[1]
210
+ # And:
211
+ # r(2)
212
+ # r({:a => 2})[:a]
213
+ # <b>NOTE:</b> If you are slicing an array, you can provide any negative index you
214
+ # want, but if you're slicing a stream then for efficiency reasons the only
215
+ # allowable negative index is '-1', and you must be using a closed range
216
+ # ('..', not '...').
217
+ def [](ind)
218
+ case ind.class.hash
219
+ when Fixnum.hash then
220
+ JSON_Expression.new [:call, [:nth], [self, RQL.expr(ind)]]
221
+ when Range.hash then
222
+ b = RQL.expr(ind.begin)
223
+ if ind.exclude_end? then e = ind.end
224
+ else e = (ind.end == -1 ? nil : RQL.expr(ind.end+1))
225
+ end
226
+ self.class.new [:call, [:slice], [self, RQL.expr(b), RQL.expr(e)]]
227
+ else raise ArgumentError, "RQL_Query#[] can't handle #{ind.inspect}."
228
+ end
229
+ end
230
+
231
+ # Return at most <b>+n+</b> elements from the sequence. The
232
+ # following are equivalent:
233
+ # r([1,2,3])
234
+ # r([1,2,3,4]).limit(3)
235
+ # r([1,2,3,4])[0...3]
236
+ def limit(n); self[0...n]; end
237
+
238
+ # Skip the first <b>+n+</b> elements of the sequence. The following are equivalent:
239
+ # r([2,3,4])
240
+ # r([1,2,3,4]).skip(1)
241
+ # r([1,2,3,4])[1..-1]
242
+ def skip(n); self[n..-1]; end
243
+
244
+ # Removes duplicate values from the sequence (similar to the *nix
245
+ # <b>+uniq+</b> function). Does not work for sequences of
246
+ # compound data types like objects or arrays, but in the case of
247
+ # objects (e.g. rows of a table), you may provide an attribute and
248
+ # it will first map the selector for that attribute over the
249
+ # sequence. If we have a table <b>+table+</b>, the following are
250
+ # equivalent:
251
+ # table.map{|row| row[:id]}.distinct
252
+ # table.distinct(:id)
253
+ # As are:
254
+ # r([1,2,3])
255
+ # r([1,2,3,1]).distinct
256
+ # And:
257
+ # r([1,2])
258
+ # r([{:x => 1}, {:x => 2}, {:x => 1}]).distinct(:x)
259
+ def distinct(attr=nil);
260
+ if attr then self.map{|row| row[attr]}.distinct
261
+ else self.class.new [:call, [:distinct], [self]];
262
+ end
263
+ end
264
+
265
+ # Get the length of the sequence. If we have a table
266
+ # <b>+table+</b> with at least 5 elements, the following are
267
+ # equivalent:
268
+ # table[0...5].count
269
+ # r([1,2,3,4,5]).count
270
+ def count(); JSON_Expression.new [:call, [:count], [self]]; end
271
+
272
+ # Get element <b>+n+</b> of the sequence. For example, the following are
273
+ # equivalent:
274
+ # r(2)
275
+ # r([0,1,2,3]).nth(2)
276
+ # (Note the 0-indexing.)
277
+ def nth(n)
278
+ JSON_Expression.new [:call, [:nth], [self, S.r(n)]]
279
+ end
280
+
281
+ # A normal inner join. Takes as an argument the table to join with and a
282
+ # block. The block you provide should accept two tows and return
283
+ # <b>+true+</b> if they should be joined or <b>+false+</b> otherwise. For
284
+ # example:
285
+ # table1.inner_join(table2) {|row1, row2| row1[:attr1] > row2[:attr2]}
286
+ # Note that we don't merge the two tables when you do this. The output will
287
+ # be a list of objects like:
288
+ # {'left' => ..., 'right' => ...}
289
+ # You can use Sequence#zip to get back a list of merged rows.
290
+ def inner_join(other)
291
+ self.concat_map {|row|
292
+ other.concat_map {|row2|
293
+ RQL.branch(yield(row, row2), [{:left => row, :right => row2}], [])
294
+ }
295
+ }
296
+ end
297
+
298
+
299
+ # A normal outer join. Takes as an argument the table to join with and a
300
+ # block. The block you provide should accept two tows and return
301
+ # <b>+true+</b> if they should be joined or <b>+false+</b> otherwise. For
302
+ # example:
303
+ # table1.outer_join(table2) {|row1, row2| row1[:attr1] > row2[:attr2]}
304
+ # Note that we don't merge the two tables when you do this. The output will
305
+ # be a list of objects like:
306
+ # {'left' => ..., 'right' => ...}
307
+ # You can use Sequence#zip to get back a list of merged rows.
308
+ def outer_join(other)
309
+ S.with_var {|vname, v|
310
+ self.concat_map {|row|
311
+ RQL.let({vname => other.concat_map {|row2|
312
+ RQL.branch(yield(row, row2),
313
+ [{:left => row, :right => row2}],
314
+ [])}.to_array}) {
315
+ RQL.branch(v.count() > 0, v, [{:left => row}])
316
+ }
317
+ }
318
+ }
319
+ end
320
+
321
+ # A special case of Sequence#inner_join that is guaranteed to run in
322
+ # O(n*log(n)) time. It does equality comparison between <b>+leftattr+</b> of
323
+ # the invoking stream and the primary key of the <b>+other+</b> stream. For
324
+ # example, the following are equivalent (if <b>+id+</b> is the primary key
325
+ # of <b>+table2+</b>):
326
+ # table1.eq_join(:a, table2)
327
+ # table2.inner_join(table2) {|row1, row2| r.eq row1[:a],row2[:id]}
328
+ def eq_join(leftattr, other)
329
+ S.with_var {|vname, v|
330
+ self.concat_map {|row|
331
+ RQL.let({vname => other.get(row[leftattr])}) {
332
+ RQL.branch(v.ne(nil), [{:left => row, :right => v}], [])
333
+ }
334
+ }
335
+ }
336
+ end
337
+
338
+ # Take the output of Sequence#inner_join, Sequence#outer_join, or
339
+ # Sequence#eq_join and merge the results together. The following are
340
+ # equivalent:
341
+ # table1.eq_join(:id, table2).zip
342
+ # table1.eq_join(:id, table2).map{|obj| obj['left'].merge(obj['right'])}
343
+ def zip
344
+ self.map {|row|
345
+ RQL.branch(row.contains('right'), row['left'].merge(row['right']), row['left'])
346
+ }
347
+ end
348
+ end
349
+ end
data/lib/streams.rb ADDED
@@ -0,0 +1,101 @@
1
+ # Copyright 2010-2012 RethinkDB, all rights reserved.
2
+ module RethinkDB
3
+ # A lazy sequence of rows, e.g. what we get when reading from a table.
4
+ # Includes the Sequence module, so look there for the methods.
5
+ class Stream_Expression
6
+ # Convert a stream into an array. THINK CAREFULLY BEFORE DOING
7
+ # THIS. You can do more with an array than you can with a stream
8
+ # (e.g., you can store an array in a variable), but that's because
9
+ # arrays are stored in memory. If your stream is big (e.g. you're
10
+ # reading a giant table), that has serious performance
11
+ # implications. Also, if you return an array instead of a stream
12
+ # from a query, the whole thing gets sent over the network at once
13
+ # instead of lazily consuming chunks. If you have a table <b>+table+</b> with at
14
+ # least 3 elements, the following are equivalent:
15
+ # r[[1,1,1]]
16
+ # table.limit(3).map{1}.stream_to_array
17
+ def stream_to_array(); JSON_Expression.new [:call, [:stream_to_array], [self]]; end
18
+ end
19
+
20
+ # A special case of Stream_Expression that you can write to. You
21
+ # will get a Multi_Row_Selection from most operations that access
22
+ # tables. For example, consider the following two queries:
23
+ # q1 = table.filter{|row| row[:id] < 5}
24
+ # q2 = table.map{|row| row[:id]}
25
+ # The first query simply accesses some elements of a table, while the
26
+ # second query does some work to produce a new stream.
27
+ # Correspondingly, the first query returns a Multi_Row_Selection
28
+ # while the second query returns a Stream_Expression. So:
29
+ # q1.delete
30
+ # is a legal query that will delete everything with <b>+id+</b> less
31
+ # than 5 in <b>+table+</b>. But:
32
+ # q2.delete
33
+ # will raise an error.
34
+ class Multi_Row_Selection < Stream_Expression
35
+ attr_accessor :opts
36
+ def initialize(body, context=nil, opts=nil) # :nodoc:
37
+ super(body, context)
38
+ if opts
39
+ @opts = opts
40
+ elsif @body[0] == :call and @body[2] and @body[2][0].kind_of? Multi_Row_Selection
41
+ @opts = @body[2][0].opts
42
+ end
43
+ end
44
+
45
+ def raise_if_outdated # :nodoc:
46
+ if @opts and @opts[:use_outdated]
47
+ raise RuntimeError, "Cannot write to outdated table."
48
+ end
49
+ end
50
+
51
+ # Delete all of the selected rows. For example, if we have
52
+ # a table <b>+table+</b>:
53
+ # table.filter{|row| row[:id] < 5}.delete
54
+ # will delete everything with <b>+id+</b> less than 5 in <b>+table+</b>.
55
+ def delete
56
+ raise_if_outdated
57
+ Write_Query.new [:delete, self]
58
+ end
59
+
60
+ # Update all of the selected rows. For example, if we have a table <b>+table+</b>:
61
+ # table.filter{|row| row[:id] < 5}.update{|row| {:score => row[:score]*2}}
62
+ # will double the score of everything with <b>+id+</b> less than 5.
63
+ # If the object returned in <b>+update+</b> has attributes
64
+ # which are not present in the original row, those attributes will
65
+ # still be added to the new row.
66
+ #
67
+ # If you want to do a non-atomic update, you should pass
68
+ # <b>+:non_atomic+</b> as the optional variant:
69
+ # table.update(:non_atomic){|row| r.js("...")}
70
+ # You need to do a non-atomic update when the block provided to update can't
71
+ # be proved deterministic (e.g. if it contains javascript or reads from
72
+ # another table).
73
+ def update(variant=nil)
74
+ raise_if_outdated
75
+ S.with_var {|vname,v|
76
+ Write_Query.new [:update, self, [vname, S.r(yield(v))]]
77
+ }.apply_variant(variant)
78
+ end
79
+
80
+ # Replace all of the selected rows. Unlike <b>+update+</b>, must return the
81
+ # new row rather than an object containing attributes to be updated (may be
82
+ # combined with RQL::merge to mimic <b>+update+</b>'s behavior).
83
+ # May also return <b>+nil+</b> to delete the row. For example, if we have a
84
+ # table <b>+table+</b>, then:
85
+ # table.replace{|row| r.if(row[:id] < 5, nil, row)}
86
+ # will delete everything with id less than 5, but leave the other rows untouched.
87
+ #
88
+ # If you want to do a non-atomic replace, you should pass
89
+ # <b>+:non_atomic+</b> as the optional variant:
90
+ # table.replace(:non_atomic){|row| r.js("...")}
91
+ # You need to do a non-atomic replace when the block provided to replace can't
92
+ # be proved deterministic (e.g. if it contains javascript or reads from
93
+ # another table).
94
+ def replace(variant=nil)
95
+ raise_if_outdated
96
+ S.with_var {|vname,v|
97
+ Write_Query.new [:replace, self, [vname, S.r(yield(v))]]
98
+ }.apply_variant(variant)
99
+ end
100
+ end
101
+ end