cassie 1.0.0.beta.17 → 1.0.0.beta.21

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (83) hide show
  1. checksums.yaml +4 -4
  2. data/bin/cassie +40 -15
  3. data/lib/cassie/configuration/templates/cassandra.yml +1 -2
  4. data/lib/cassie/connection.rb +3 -3
  5. data/lib/cassie/connection_handler/cluster.rb +1 -1
  6. data/lib/cassie/connection_handler/sessions.rb +1 -1
  7. data/lib/cassie/connection_handler.rb +4 -2
  8. data/lib/cassie/definition.rb +8 -0
  9. data/lib/cassie/modification.rb +9 -0
  10. data/lib/cassie/query.rb +3 -15
  11. data/lib/cassie/statements/README.md +710 -0
  12. data/lib/cassie/statements/core.rb +23 -0
  13. data/lib/cassie/{queries/statement/batches.rb → statements/execution/batched_fetching.rb} +7 -11
  14. data/lib/cassie/{queries/statement → statements/execution}/callbacks.rb +1 -1
  15. data/lib/cassie/{queries/statement → statements/execution}/consistency.rb +1 -1
  16. data/lib/cassie/statements/execution/deserialization.rb +13 -0
  17. data/lib/cassie/{queries/statement → statements/execution}/fetching.rb +10 -21
  18. data/lib/cassie/{queries/instrumentation/execution.rb → statements/execution/instrumentation.rb} +6 -2
  19. data/lib/cassie/statements/execution/partition_linking/cursoring_policy.rb +13 -0
  20. data/lib/cassie/statements/execution/partition_linking/policy_methods.rb +128 -0
  21. data/lib/cassie/statements/execution/partition_linking/simple_policy.rb +19 -0
  22. data/lib/cassie/statements/execution/partition_linking.rb +49 -0
  23. data/lib/cassie/statements/execution/peeking.rb +28 -0
  24. data/lib/cassie/statements/execution/results/core.rb +11 -0
  25. data/lib/cassie/statements/execution/results/cursored_result.rb +26 -0
  26. data/lib/cassie/statements/execution/results/instrumentation.rb +18 -0
  27. data/lib/cassie/statements/execution/results/modification.rb +12 -0
  28. data/lib/cassie/statements/execution/results/modification_result.rb +8 -0
  29. data/lib/cassie/statements/execution/results/peeking.rb +42 -0
  30. data/lib/cassie/statements/execution/results/peeking_result.rb +10 -0
  31. data/lib/cassie/statements/execution/results/query_result.rb +9 -0
  32. data/lib/cassie/statements/execution/results/querying.rb +66 -0
  33. data/lib/cassie/statements/execution/results/result.rb +23 -0
  34. data/lib/cassie/statements/execution/results.rb +12 -0
  35. data/lib/cassie/statements/execution.rb +69 -0
  36. data/lib/cassie/statements/instrumenting.rb +6 -0
  37. data/lib/cassie/{queries/logging/building_resources_event.rb → statements/logging/deserialize_event.rb} +4 -4
  38. data/lib/cassie/statements/logging/deserialize_subscriber.rb +19 -0
  39. data/lib/cassie/{queries/logging/cql_execution_event.rb → statements/logging/execute_event.rb} +8 -4
  40. data/lib/cassie/statements/logging/execute_subscriber.rb +19 -0
  41. data/lib/cassie/statements/logging.rb +12 -0
  42. data/lib/cassie/statements/modification.rb +14 -0
  43. data/lib/cassie/statements/query.rb +12 -0
  44. data/lib/cassie/statements/statement/assignment.rb +51 -0
  45. data/lib/cassie/statements/statement/assignments.rb +87 -0
  46. data/lib/cassie/{queries → statements}/statement/conditions.rb +1 -3
  47. data/lib/cassie/{queries → statements}/statement/deleting.rb +15 -12
  48. data/lib/cassie/{queries → statements}/statement/inserting.rb +13 -10
  49. data/lib/cassie/statements/statement/limiting.rb +89 -0
  50. data/lib/cassie/{queries → statements}/statement/mapping.rb +21 -41
  51. data/lib/cassie/{queries → statements}/statement/ordering.rb +1 -1
  52. data/lib/cassie/statements/statement/pagination/cursors.rb +112 -0
  53. data/lib/cassie/statements/statement/pagination.rb +19 -0
  54. data/lib/cassie/{queries → statements}/statement/preparation/cache.rb +1 -1
  55. data/lib/cassie/{queries → statements}/statement/preparation.rb +4 -5
  56. data/lib/cassie/statements/statement/relation.rb +68 -0
  57. data/lib/cassie/statements/statement/relations.rb +93 -0
  58. data/lib/cassie/statements/statement/selection.rb +86 -0
  59. data/lib/cassie/{queries → statements}/statement/updating.rb +9 -10
  60. data/lib/cassie/{queries → statements}/statement.rb +10 -20
  61. data/lib/cassie/statements.rb +9 -0
  62. data/lib/cassie/testing/fake/definition.rb +11 -0
  63. data/lib/cassie/testing/fake/modification.rb +11 -0
  64. data/lib/cassie/testing/fake/result.rb +15 -3
  65. data/lib/cassie/testing.rb +2 -0
  66. data/lib/cassie.rb +2 -0
  67. metadata +57 -34
  68. data/lib/cassie/queries/README.md +0 -458
  69. data/lib/cassie/queries/instrumentation/loading.rb +0 -15
  70. data/lib/cassie/queries/instrumentation.rb +0 -18
  71. data/lib/cassie/queries/logging/subscription.rb +0 -24
  72. data/lib/cassie/queries/logging.rb +0 -21
  73. data/lib/cassie/queries/statement/assignment.rb +0 -36
  74. data/lib/cassie/queries/statement/assignments.rb +0 -67
  75. data/lib/cassie/queries/statement/execution.rb +0 -45
  76. data/lib/cassie/queries/statement/limiting.rb +0 -36
  77. data/lib/cassie/queries/statement/loading.rb +0 -24
  78. data/lib/cassie/queries/statement/pagination/cursors.rb +0 -168
  79. data/lib/cassie/queries/statement/pagination/page_size.rb +0 -7
  80. data/lib/cassie/queries/statement/pagination.rb +0 -37
  81. data/lib/cassie/queries/statement/relation.rb +0 -74
  82. data/lib/cassie/queries/statement/relations.rb +0 -66
  83. data/lib/cassie/queries/statement/selection.rb +0 -63
@@ -0,0 +1,710 @@
1
+ # Cassie Queries
2
+
3
+ `cassie` query classes aim to provide a query interface that is
4
+
5
+ * Easy to use
6
+ * Easy to understand (and thus maintain)
7
+ * Easy to test
8
+ * Compatible with a data mapper and/or repository design pattern
9
+
10
+ ### Usage
11
+
12
+ You might expect to see class methods allowing queries to be built like such:
13
+
14
+ ```ruby
15
+ Cassie.insert(:users_by_username,
16
+ "id = #{some_id}",
17
+ username: some_username)
18
+ ```
19
+ or
20
+ ```
21
+ Cassie.select_from(:table)
22
+ .where(id: some_id)
23
+ .where(username: some_username)
24
+ ```
25
+
26
+ Queries defined on the fly like this tend to create debt for an application in the long term. They:
27
+ * create gaps in test coverage
28
+ * lack clear documentation
29
+ * resist refactoring
30
+
31
+ Application queries represent distinct application behavior, `cassie` queries are designed to help create query classes that are reusable, testable and maintainable (so you can sleep better at night).
32
+
33
+ ```ruby
34
+ # Some user model
35
+ user = User.new(username: username)
36
+
37
+ MyInsertionQuery.new(user: user).execute
38
+ ```
39
+ <pre><b>
40
+ (1.2ms) INSERT INTO users_by_username (id, username) VALUES (?, ?); [["uuid()", "eprothro"]]
41
+ </b></pre>
42
+
43
+ ```ruby
44
+ class MyInsertionQuery < Cassie::Modification
45
+
46
+ insert_into :users_by_username
47
+
48
+ set :id
49
+ set :username
50
+
51
+ def id
52
+ Cassandra::TimeUuid::Generator.new.now
53
+ end
54
+ end
55
+ ```
56
+
57
+ CQL algebra is less complex than with SQL. So, rather than introducing a query abstraction layer (e.g. something like [arel](https://github.com/rails/arel)), `cassie` queries provide a lightweight CQL DSL to codify your CQL queries.
58
+
59
+ ```sql
60
+ SELECT *
61
+ FROM posts_by_author_category
62
+ WHERE author_id = ?
63
+ AND category = ?
64
+ LIMIT 30;
65
+ ```
66
+ ```ruby
67
+ select_from :posts_by_author_category
68
+ where :author_id, :eq
69
+ where :category, :eq
70
+ limit 30
71
+ ```
72
+
73
+ This maintains the clarity of CQL, allowing code to be expressive, but still use additional features without having get crazy with string manipulation.
74
+
75
+ #### Query Classes
76
+
77
+ CQL statements are used for 3 different kinds of queries:
78
+ * data definition (e.g. `ALTER`, `CREATE TBLE`, etc.)
79
+ * data modification (e.g. `INSERT`, `UPDATE`, `DELETE`)
80
+ * data query (e.g. `SELECT`)
81
+
82
+ Cassie provides 3 base classes for these 3 kinds of queries. Subclass `Cassie::Definition`, `Cassie::Modification`, and `Cassie::Query` to define your applicaiton query classes.
83
+
84
+ ##### `Cassie::Definition`
85
+ Only includes the core functionality for statement execution:
86
+ * connection methods (`session`, `keyspace`)
87
+ * `execute` method
88
+ * `result` attribute, populated by execution
89
+ * instrumentation and logging of execution
90
+
91
+ Typical use of a `Definition` subclass would be for a static DDL query. Override the `statement` method, returning a CQL statement (`String` or `Cassandra::Statements`) that will be executed with the `Cassandra` driver.
92
+
93
+ ##### `Cassie::Modification`
94
+ Includes core functionality for prepared statement execution.
95
+
96
+ * Adds DSL for `insert_into`, `update`, and `delete_from` statement types
97
+ * Adds support for automatically mapping values for assignments from a domain object
98
+
99
+ ##### `Query`
100
+ Includes core functionality for prepared statement execution.
101
+
102
+ * Adds DSL for `select_from` statement type
103
+ * Adds `fetch` and `fetch_first` methods for executing and getting results in combination
104
+ * Adds support for deserializing domain objects from Cassandra rows
105
+ * Adds support for paging through results with cursors
106
+ * Adds support for fetching large data sets in memory-efficient batches
107
+
108
+
109
+ #### Relations (`where` clauses)
110
+
111
+ ```ruby
112
+ select_from :posts_by_author
113
+
114
+ where :user_id, :eq
115
+ ```
116
+
117
+ Defining a CQL relation (the `where`) in a cassie query class creates a setter and getter for that relation. This allows the value for the term to be set for a particular query instance.
118
+
119
+ ```ruby
120
+ query.user_id = 123
121
+ query.fetch
122
+ => [#<Struct user_id=123, id="some post id">]
123
+ ```
124
+
125
+ <pre><b>
126
+ (2.9ms) SELECT * FROM posts_by_author WHERE user_id = ? LIMIT 1; [[123]]
127
+ </b></pre>
128
+
129
+ These methods are defined as simple attr_accessors. The underlying instance values can be treated as such.
130
+
131
+ ```ruby
132
+ select_from :posts_by_author
133
+
134
+ where :user_id, :eq
135
+
136
+ def author=(user)
137
+ @user_id = user.id
138
+ end
139
+ ```
140
+
141
+ ```ruby
142
+ query.author = User.new(id: 123)
143
+ query.fetch
144
+ => [#<Struct user_id=123, id="some post id">]
145
+ ```
146
+
147
+ <pre><b>
148
+ (2.9ms) SELECT * FROM posts_by_author WHERE user_id = ? LIMIT 1; [[123]]
149
+ </b></pre>
150
+
151
+ A different name can be defined for the value's setter/getter:
152
+
153
+ ```ruby
154
+ select_from :posts_by_author
155
+
156
+ where :user_id, :eq, value: :author_id
157
+ ```
158
+
159
+ ```ruby
160
+ query.author_id = 123
161
+ query.fetch
162
+ => [#<Struct user_id=123, id="some post id">]
163
+ ```
164
+
165
+ <pre><b>
166
+ (2.9ms) SELECT * FROM posts_by_author WHERE user_id = ? LIMIT 1; [[123]]
167
+ </b></pre>
168
+
169
+ Relations can be conditionally evaluated:
170
+
171
+ ```ruby
172
+ select_from :posts_by_author_category
173
+
174
+ where :author_id, :eq
175
+ where :category, :eq, if: :filter_by_category?
176
+
177
+ def filter_by_category?
178
+ #true or false, as makes sense for your query
179
+ end
180
+ ```
181
+ This can be overdone; it's recommended that one query class be in charge of one kind of query. Avoid query classes that can do too much!
182
+
183
+
184
+ #### Column Selection (`select`)
185
+
186
+ ```ruby
187
+ select_from :posts_by_author do |t|
188
+ t.select :post_id
189
+ t.select writetime(:post_id)
190
+ end
191
+ ```
192
+ which is the same as
193
+ ```ruby
194
+ select_from :posts_by_author
195
+
196
+ select :post_id
197
+ select writetime(:post_id)
198
+ ```
199
+
200
+ `count`, `write_time` (also aliased as `writetime`), and `ttl` selector helpers are available.
201
+
202
+ ```ruby
203
+ select_from :posts_by_author
204
+
205
+ select count
206
+ ```
207
+ ```
208
+ => SELECT COUNT(*) FROM posts_by_author;
209
+ ```
210
+ ```ruby
211
+ select_from :posts_by_author
212
+
213
+ select :id
214
+ select ttl(:popular)
215
+ select writetime(:popular), as: :created_at
216
+ ```
217
+ ```
218
+ => SELECT id, TTL(popular), WRITETIME(popular) AS created_at FROM posts_by_author;
219
+ ```
220
+
221
+ #### Values and Assignments (`set`)
222
+
223
+ Set values (for inserts) and assignments (for updates) with the same `set` method. Similar to relations defined with `where`, assignments provide simple getters and setters.
224
+
225
+ ```ruby
226
+ class InsertUserQuery < Cassandra::Modification
227
+
228
+ insert :users_by_id
229
+
230
+ set :id
231
+ set :username
232
+ end
233
+ ```
234
+
235
+ ```ruby
236
+ class UpdateUsernameQuery < Cassandra::Modification
237
+
238
+ insert :users_by_id
239
+
240
+ set :username
241
+
242
+ where :id, :eq
243
+ end
244
+ ```
245
+ ```ruby
246
+ query = UpdateUserQuery.new(id: current_user.id)
247
+ query.username = 'eprothro'
248
+ query.execute
249
+ => true
250
+ ```
251
+
252
+ Mapping assignemtnt values from a domain object is supported.
253
+
254
+ ```ruby
255
+ class UpdateUserQuery < Cassandra::Modification
256
+
257
+ update :users_by_id do |q|
258
+ q.set :phone
259
+ q.set :email
260
+ q.set :address
261
+ q.set :username
262
+ end
263
+
264
+ where :id, :eq
265
+
266
+ map_from :user
267
+ ```
268
+
269
+ This allows a domain object to be set for the modification object and have assignment values retrieved from that object.
270
+
271
+ ```ruby
272
+ user
273
+ => #<User:0x007ff8895ce660 @id=6539, @phone="+15555555555", @email="etp@example.com", @address=nil, @username= "etp">
274
+ UpdateUserQuery.new(user: user).execute
275
+ ```
276
+
277
+ <pre><b>
278
+ (1.2ms) UPDATE users_by_id (phone, email, address, username) VALUES (?, ?, ?, ?) WHERE id = ?; [["+15555555555", "etp@example.com", nil, "etp", 6539]]
279
+ </b></pre>
280
+
281
+ This mapping is done in a way akin to delegation, so the behavior can be changed easily for one or more accessors by overriding the getter.
282
+
283
+ ```
284
+ class UpdateUserQuery < Cassandra::Modification
285
+
286
+ update :users_by_id do |q|
287
+ q.set :phone
288
+ q.set :email
289
+ q.set :address
290
+ q.set :username
291
+ end
292
+
293
+ where :id, :eq
294
+
295
+ map_from :user
296
+
297
+ def username
298
+ user.username.downcase
299
+ end
300
+ ```
301
+ ```ruby
302
+ user
303
+ => #<User:0x007ff8895ce660 @id=6539, @phone="+15555555555", @email="etp@example.com", @address=nil, @username= "ETP">
304
+ UpdateUserQuery.new(user: user).execute
305
+ ```
306
+
307
+ <pre><b>
308
+ (1.2ms) UPDATE users_by_id (phone, email, address, username) VALUES (?, ?, ?, ?) WHERE id = ?; [["+15555555555", "etp@example.com", nil, "etp", 6539]]
309
+ </b></pre>
310
+
311
+ The above examples use positional terms (e.g. the term is '?' in the statement). The assignement's term can be defined explicitly.
312
+
313
+ ```ruby
314
+ insert_into :posts
315
+
316
+ set :id, term: "now()"
317
+ ```
318
+
319
+ ```ruby
320
+ insert_into :posts
321
+
322
+ set :published_at, "toTimestamp(now())"
323
+ ```
324
+
325
+ A value will be fetched and placed as an argument in the statement if the provided term includes a positional marker ('?').
326
+
327
+ ```ruby
328
+ select :posts
329
+
330
+ where :id, :gteq, term: "minTimeuuid(?)", value: :window_min_timestamp
331
+
332
+ def window_min_timestamp
333
+ '2013-02-02 10:00+0000'
334
+ end
335
+ ```
336
+
337
+ > Note: The `term` option should be used with care. Using it innapropriately could result in inefficient use of prepared statements, and/or leave you potentially vulnerable to injection attacks.
338
+
339
+ #### Consistency configuration
340
+
341
+ The [consistency level](http://datastax.github.io/ruby-driver/v2.1.6/api/cassandra/#consistencies-constant) for a query is determined by your `Cassie::configuration` by default, falling to back to the `Cassandra` default if none is given.
342
+
343
+ ```ruby
344
+ Cassie.configuration[:consistency]
345
+ => nil
346
+
347
+ Cassie.cluster.instance_variable_get(:@execution_options).consistency
348
+ => :one
349
+ ```
350
+
351
+ Cassie queries allow for a consistency level defined on the object, subclass, then base class levels. If one is found, it will override the `Cassandra` default when the query is executed.
352
+
353
+ ```ruby
354
+ select_from :posts_by_author_category
355
+
356
+ where :author_id, :eq
357
+ where :category, :eq, if: :filter_by_category?
358
+
359
+ def filter_by_category?
360
+ #true or false, as makes sense for your query
361
+ end
362
+
363
+ def consistency
364
+ #dynamically determine a query object's consistency level
365
+ if filter_by_category?
366
+ :quorum
367
+ else
368
+ super
369
+ end
370
+ end
371
+ ```
372
+
373
+ ```ruby
374
+ select_from :posts_by_author_category
375
+
376
+ where :author_id, :eq
377
+ where :category, :eq
378
+
379
+ consistency :quorum
380
+ ```
381
+
382
+ ```ruby
383
+ # lib/tasks/interesting_task.rake
384
+ require_relative "interesting_worker"
385
+
386
+ task :interesting_task do
387
+ Cassandra::Query.consistency = :all
388
+
389
+ InterestingWorker.new.perform
390
+ end
391
+ ```
392
+
393
+ #### Finders
394
+
395
+ To avoid confusion with ruby `Enumerable#find` and Rails' specific `find` functionality, Cassie::Query opts to use `fetch` and explict `fetch_first` or `fetch_first!` methods.
396
+
397
+ ##### `fetch`
398
+
399
+ Executes the query; returns an enumeration of results.
400
+
401
+ ```
402
+ UsersByResourceQuery.new.fetch(resource: some_resource).to_a
403
+ => [#<User id=:123, username=:eprothro>, #<User id=:456, username=:tenderlove>]
404
+ ```
405
+
406
+ ##### `fetch_first` and `fetch_first!`
407
+
408
+ Executes the query, temporarily limited to 1 result; returns a single result. Bang version raises if no result is found.
409
+
410
+ ```
411
+ UsersByUsernameQuery.new.fetch_first(username: "eprothro").username
412
+ => "eprothro"
413
+ ```
414
+
415
+ ```
416
+ UsersByUsernameQuery.new.fetch_first(username: "ActiveRecord")
417
+ => nil
418
+ ```
419
+
420
+ ```
421
+ UsersByUsernameQuery.new.fetch_first!(username: "active record").username
422
+ Cassie::Statements::RecordNotFound: CQL row does not exist
423
+ ```
424
+
425
+ ##### BatchedFetching
426
+
427
+ Similar to [Rails BatchedFetching](http://guides.rubyonrails.org/v4.2/active_record_querying.html#retrieving-multiple-objects-in-batches), Cassie allows efficient batching of `SELECT` queries.
428
+
429
+ ###### `fetch_each`
430
+
431
+ ```
432
+ UsersQuery.new.fetch_each do |user|
433
+ # only 1000 queried and loaded at a time
434
+ end
435
+ ```
436
+
437
+ ```
438
+ UsersQuery.new.fetch_each(batch_size: 500) do |user|
439
+ # only 500 queried and loaded at a time
440
+ end
441
+ ```
442
+
443
+ ```
444
+ UsersQuery.new.fetch_each.with_index do |user, index|
445
+ # Enumerator chaining without a block
446
+ end
447
+ ```
448
+
449
+ ###### `fetch_in_batches`
450
+
451
+ ```
452
+ UsersQuery.new.fetch_in_batches do |users_array|
453
+ # only 1000 queried and at a time
454
+ end
455
+ ```
456
+
457
+ ```
458
+ UsersQuery.new.fetch_in_batches(batch_size: 500) do |users_array|
459
+ # only 500 queried and at a time
460
+ end
461
+ ```
462
+
463
+ ```
464
+ UsersQuery.new.fetch_in_batches.with_index do |group, index|
465
+ # Enumerator chaining without a block
466
+ end
467
+ ```
468
+
469
+ #### Deserialization
470
+
471
+ For Selection Queries, records are deserialized as anonymous structs by default. Each field returned from the database will have an accessor.
472
+
473
+ ```ruby
474
+ UsersByUsernameQuery.new.fetch(username: "eprothro")
475
+ #=> [#<Struct id=:123, username=:eprothro>]
476
+
477
+ UsersByUsernameQuery.new.fetch_first(username: "eprothro").username
478
+ => "eprothro"
479
+ ```
480
+
481
+ Most applications will want to override `build_result` to construct more useful domain objects
482
+
483
+ ```
484
+ class UsersByUsernameQuery < Cassie::Query
485
+
486
+ select_from :users_by_username
487
+
488
+ where :username, :eq
489
+
490
+ def build_result(row)
491
+ User.new(row)
492
+ end
493
+ end
494
+ ```
495
+
496
+ ```ruby
497
+ UsersByUsernameQuery.new.fetch_first(username: "eprothro")
498
+ => #<User:0x007fedec219cd8 @id=123, @username="eprothro">
499
+ ```
500
+
501
+ `build_results` may be overridden as well to define completely custom processing of the rows that come back from Cassandra.
502
+
503
+ #### Cursored paging
504
+
505
+ Read about [cursored pagination](https://www.google.com/webhp?q=cursored%20paging#safe=off&q=cursor+paging) if unfamiliar with concept and how it optimizes paging through frequently updated data sets and I/O bandwidth.
506
+
507
+ ```ruby
508
+ class MyPagedQuery < Cassie::Query
509
+
510
+ select_from :events_by_user
511
+
512
+ where :user_id, :eq
513
+
514
+ max_cursor :event_id
515
+ since_cursor :event_id
516
+ end
517
+ ```
518
+
519
+ ```ruby
520
+ # Imagine a set of id's 100 decreasing to 1
521
+ # where the client already has 1-50 in memory.
522
+
523
+ q = MyPagedQuery.new(page_size: 25, user: current_user)
524
+
525
+ # fetch 100 - 76
526
+ page_1 = q.fetch(max_event_id: nil, since_event_id: 50)
527
+ q.next_max_event_id
528
+ # => 75
529
+
530
+ # fetch 75 - 51
531
+ page_2 = q.fetch(max_event_id: q.next_max_event_id, since_event_id: 50)
532
+ q.next_max_id
533
+ # => nil
534
+ ```
535
+
536
+ The `cursor_by` helper can be used as shorthand for defining these relations for which you wish to use cursors. The page size can be defined on the class
537
+ ```ruby
538
+ class MyPagedQuery < Cassie::Query
539
+
540
+ select_from :events_by_user
541
+
542
+ where :user_id, :eq
543
+
544
+ cursor_by :event_id
545
+
546
+ page_size 25
547
+ end
548
+ ```
549
+
550
+ > Note: the `page_size` class and instance setters are simply convenience aliases for associated `limit` methods.
551
+
552
+ #### Synthetic partitioning
553
+
554
+ Managing partition size is critical with a Cassandra physical layer.
555
+
556
+ When a partition defined by the conventional partition key may grow larger than [recommended](https://docs.datastax.com/en/landing_page/doc/landing_page/planning/planningPartitionSize.html), adding a synthetic partition key is one viable strategy to implment.
557
+ This synthetic partition key splits the entire conceptual partition into multiple logical / physical partitions.
558
+
559
+ A logical model with synthetic partitioning:
560
+ ```
561
+ +------------------+
562
+ | records_by_owner |
563
+ +------------------+
564
+ | owner_id K |
565
+ | bucket K |
566
+ | record C↑ |
567
+ | ... |
568
+ +------------------+
569
+ ```
570
+
571
+ Visualizing partitions with synthetic partitioning:
572
+ ```
573
+ +------------------------------------------------------+
574
+ || owner_id_1 || record | record | ... | record |
575
+ || bucket 0 || 1 | 2 | | 100,000 |
576
+ +------------------------------------------------------+
577
+
578
+ +------------------------------------------------------+
579
+ || owner_id_1 || record | record | ... | record |
580
+ || bucket 1 || 100,001 | 100,002 | | 200,000 |
581
+ +------------------------------------------------------+
582
+ ```
583
+
584
+ Cassie Queries provides support for selecting data sets that span these physical partitions (e.g. {99,990..100,090}).
585
+
586
+ Set up partition linking to accomplish this:
587
+
588
+ ```ruby
589
+ class RecordsByOwnerQuery < Cassie::Query
590
+ attr_accessor :min_record, :owner
591
+
592
+ select_from :records_by_owner
593
+
594
+ where :owner_id, :eq
595
+ where :bucket, :eq
596
+ where :record, :gteq, value: :min_record
597
+
598
+ limit 100
599
+
600
+ link_partitions :bucket, :ascending, [0, :last_bucket]
601
+
602
+ def owner_id
603
+ owner.id
604
+ end
605
+
606
+ def bucket
607
+ 1
608
+ end
609
+
610
+ protected
611
+
612
+ def last_bucket
613
+ owner.buckets
614
+ end
615
+ end
616
+ ```
617
+ ```
618
+ RecordsByOwnerQuery.new(owner: owner, min_record: 99,990).fetch.map(&:record)
619
+ (2.9ms) SELECT * FROM records_by_owner WHERE owner_id = ? AND bucket = ? AND record >= ? LIMIT 100; [123, 0, 99990]
620
+ (2.9ms) SELECT * FROM records_by_owner WHERE owner_id = ? AND bucket = ? AND record >= ? LIMIT 100; [123, 1, 99990]
621
+ => [99990, 99991, ..., 100089, 100090]
622
+ ```
623
+
624
+ The first partition queried is defined within the query class (bucket 0). The linking policy handles recognizing the end of the first partition has been reached, issuing the second query that switches to the second partition (bucket 1), and combining the results from both queries.
625
+
626
+ By default, this works for ascending and descending orderings when paging in the same order as the clustering order; it also works with cursoring.
627
+
628
+ Custom policies can be defined by setting `Query.partition_linker` for more complex schemas. See the `SimplePolicy` source for an example.
629
+
630
+ #### Prepared statements
631
+
632
+ A `Cassie::Query` will use prepared statements by default, cacheing prepared statements across all Cassie::Query objects, keyed by the bound CQL string.
633
+
634
+ To not use prepared statements for a particular query, disable the `.prepare` class option.
635
+
636
+ ```ruby
637
+ class MySpecialQuery < Cassie::Query
638
+
639
+ select_from :users_by_some_value do
640
+ where :bucket
641
+ where :some_value, :in
642
+ end
643
+
644
+ # the length of `some_values` that will be passed in
645
+ # is highly variable, so we don't want to incur the
646
+ # cost of preparing a statement for each unique length
647
+ self.prepare = false
648
+ end
649
+ ```
650
+
651
+ ```ruby
652
+ query = MySpecialQuery.new
653
+
654
+ # will not prepare statement
655
+ set_1 = query.fetch([1, 2, 3])
656
+ # will not prepare statement
657
+ set_2 = query.fetch([7, 8, 9, 10, 11, 12])
658
+ ```
659
+
660
+ #### Unbound statements
661
+
662
+ Cassie Query features are built around bound statements. However, overriding `#statement`, returning something that a `Cassandra::Session` can execute an unbound statement.
663
+
664
+ ```ruby
665
+ class MySafeQuery < Cassie::Definition
666
+ def statement
667
+ "ALTER TABLE foo ADD some_column timeuuid static;"
668
+ end
669
+ end
670
+ ```
671
+
672
+ > Note: unbound queries may be vulnerable to injection attacks.
673
+
674
+ #### Logging
675
+
676
+ Cassie Query objects use the Cassie logger unless overridden. This logs to STDOUT by default. Set any log stream you wish.
677
+
678
+ ```ruby
679
+ Cassie.logger = my_app.config.logger
680
+ ```
681
+
682
+ Set the log level to `debug` in order to log execution details.
683
+
684
+ ```ruby
685
+ Cassie::Query.logger.level = Logger::DEBUG
686
+ ```
687
+
688
+ #### Execution Time
689
+
690
+ Cassie Queries instrument execution time as `cassie.cql.execution` and logs a debug message.
691
+
692
+ ```ruby
693
+ SelectUserByUsernameQuery.new('some_user').execute
694
+ (5.5ms) SELECT * FROM users_by_username WHERE username = ? LIMIT 1; ["some_user"] [LOCAL_ONE]
695
+ ```
696
+ This measures the time to build the CQL query (statement and bindings), transmit the query to the cassandra coordinator, receive the result from the cassandra coordinator, and have the cassandra ruby driver build the ruby representation of the results. It does not include the time it takes for the Cassie Query to build its resource objects.
697
+
698
+ #### Result Deserialization
699
+
700
+ Cassie Queries instrument row deserialization as `cassie.deserialize` and logs a debug message.
701
+
702
+ ```ruby
703
+ SelectUserByUsernameQuery.new('some_user').fetch_first
704
+ (5.5ms) SELECT * FROM users_by_username WHERE username = ? LIMIT 1; ["some_user"] [LOCAL_ONE]
705
+ (0.2ms) 1 result deserialized from Cassandra rows
706
+ ```
707
+
708
+ This measures the time it takes Cassie to build the results (e.g. your domain objects) and is in addition to the execution time.
709
+
710
+ > total fetch time = `cassie.cql.execution` time + `cassie.deserialize` time