alf 0.9.0 → 0.9.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (77) hide show
  1. data/CHANGELOG.md +64 -0
  2. data/Gemfile.lock +4 -4
  3. data/README.md +257 -171
  4. data/TODO.md +4 -4
  5. data/alf.gemspec +3 -3
  6. data/alf.noespec +11 -6
  7. data/examples/pseudo-with.alf +7 -0
  8. data/examples/runall.sh +2 -2
  9. data/examples/unwrap.alf +4 -0
  10. data/examples/wrap.alf +2 -0
  11. data/lib/alf/relation.rb +118 -0
  12. data/lib/alf/version.rb +1 -1
  13. data/lib/alf.rb +320 -169
  14. data/spec/integration/src/test_minus.alf +5 -0
  15. data/spec/integration/src/test_project.alf +9 -0
  16. data/spec/{alf_spec.rb → integration/test_alf.rb} +8 -21
  17. data/spec/integration/test_alf_specs.rb +37 -0
  18. data/spec/{examples_spec.rb → integration/test_examples.rb} +1 -1
  19. data/spec/spec_helper.rb +19 -1
  20. data/spec/unit/environment/examples/suppliers.rash +5 -0
  21. data/spec/{environment/explicit_spec.rb → unit/environment/test_explicit.rb} +0 -0
  22. data/spec/{environment/folder_spec.rb → unit/environment/test_folder.rb} +1 -1
  23. data/spec/{operator → unit/operator}/non_relational/compact/buffer_based.rb +0 -0
  24. data/spec/{operator/non_relational/compact/sort_based_spec.rb → unit/operator/non_relational/compact/test_sort_based.rb} +0 -0
  25. data/spec/{operator/non_relational/autonum_spec.rb → unit/operator/non_relational/test_autonum.rb} +0 -0
  26. data/spec/{operator/non_relational/clip_spec.rb → unit/operator/non_relational/test_clip.rb} +0 -0
  27. data/spec/{operator/non_relational/compact_spec.rb → unit/operator/non_relational/test_compact.rb} +0 -0
  28. data/spec/{operator/non_relational/defaults_spec.rb → unit/operator/non_relational/test_defaults.rb} +0 -0
  29. data/spec/{operator/non_relational/sort_spec.rb → unit/operator/non_relational/test_sort.rb} +0 -0
  30. data/spec/{operator/relational/join/hash_based_spec.rb → unit/operator/relational/join/test_hash_based.rb} +0 -0
  31. data/spec/unit/operator/relational/summarize/test_hash_based.rb +38 -0
  32. data/spec/{operator/relational/summarize/sort_based_spec.rb → unit/operator/relational/summarize/test_sort_based.rb} +0 -0
  33. data/spec/{operator/relational/extend_spec.rb → unit/operator/relational/test_extend.rb} +0 -0
  34. data/spec/{operator/relational/group_spec.rb → unit/operator/relational/test_group.rb} +3 -2
  35. data/spec/{operator/relational/intersect_spec.rb → unit/operator/relational/test_intersect.rb} +0 -0
  36. data/spec/unit/operator/relational/test_join.rb +36 -0
  37. data/spec/{operator/relational/minus_spec.rb → unit/operator/relational/test_minus.rb} +0 -0
  38. data/spec/{operator/relational/project_spec.rb → unit/operator/relational/test_project.rb} +0 -0
  39. data/spec/{operator/relational/quota_spec.rb → unit/operator/relational/test_quota.rb} +0 -0
  40. data/spec/{operator/relational/rename_spec.rb → unit/operator/relational/test_rename.rb} +0 -0
  41. data/spec/{operator/relational/restrict_spec.rb → unit/operator/relational/test_restrict.rb} +0 -0
  42. data/spec/unit/operator/relational/test_summarize.rb +64 -0
  43. data/spec/{operator/relational/ungroup_spec.rb → unit/operator/relational/test_ungroup.rb} +0 -0
  44. data/spec/{operator/relational/union_spec.rb → unit/operator/relational/test_union.rb} +0 -0
  45. data/spec/{operator/relational/unnest_spec.rb → unit/operator/relational/test_unwrap.rb} +5 -5
  46. data/spec/{operator/relational/nest_spec.rb → unit/operator/relational/test_wrap.rb} +5 -5
  47. data/spec/{operator/command_methods_spec.rb → unit/operator/test_command_methods.rb} +0 -0
  48. data/spec/unit/operator/test_non_relational.rb +18 -0
  49. data/spec/unit/operator/test_relational.rb +27 -0
  50. data/spec/{reader → unit/reader}/input.rb +0 -0
  51. data/spec/unit/reader/test_alf_file.rb +27 -0
  52. data/spec/{reader/rash_spec.rb → unit/reader/test_rash.rb} +0 -0
  53. data/spec/unit/relation/test_coerce.rb +53 -0
  54. data/spec/unit/relation/test_inspect.rb +20 -0
  55. data/spec/unit/relation/test_relops.rb +46 -0
  56. data/spec/{renderer/text/cell_spec.rb → unit/renderer/text/test_cell.rb} +0 -0
  57. data/spec/{renderer/text/row_spec.rb → unit/renderer/text/test_row.rb} +0 -0
  58. data/spec/{renderer/text/table_spec.rb → unit/renderer/text/test_table.rb} +0 -0
  59. data/spec/{aggregator_spec.rb → unit/test_aggregator.rb} +6 -6
  60. data/spec/{assumptions_spec.rb → unit/test_assumptions.rb} +0 -0
  61. data/spec/{lispy_spec.rb → unit/test_lispy.rb} +0 -0
  62. data/spec/unit/test_operator.rb +16 -0
  63. data/spec/{reader_spec.rb → unit/test_reader.rb} +4 -0
  64. data/spec/unit/test_relation.rb +40 -0
  65. data/spec/{renderer_spec.rb → unit/test_renderer.rb} +0 -0
  66. data/spec/{tools/ordering_key_spec.rb → unit/tools/test_ordering_key.rb} +0 -0
  67. data/spec/{tools/projection_key_spec.rb → unit/tools/test_projection_key.rb} +0 -0
  68. data/spec/{tools/tools_spec.rb → unit/tools/test_tools.rb} +0 -0
  69. data/spec/{tools/tuple_handle_spec.rb → unit/tools/test_tuple_handle.rb} +0 -0
  70. data/tasks/clean.rake +3 -0
  71. data/tasks/spec_test.rake +1 -1
  72. metadata +143 -114
  73. data/examples/nest.alf +0 -2
  74. data/examples/unnest.alf +0 -4
  75. data/examples/with.alf +0 -23
  76. data/spec/operator/relational/summarize_spec.rb +0 -41
  77. data/spec/reader/alf_file_spec.rb +0 -15
data/README.md CHANGED
@@ -1,38 +1,64 @@
1
- # Alf - Classy data-manipulation dressed in a DSL (+ commandline)
1
+ # Alf - Relational Algebra at your fingertips (version 0.9.1)
2
+
3
+ ## Description
4
+
5
+ ### What & Why
6
+
7
+ Alf brings the relational algebra both in Shell and in Ruby. In Shell, because
8
+ manipulating any relation-like data source should be as straightforward as a
9
+ one-liner. In Ruby, because I've never understood why programming languages
10
+ provide data structures like arrays, hashes, sets, trees and graphs but not
11
+ _relations_... Let's stop the segregation ;-)
12
+
13
+ ### Install
2
14
 
3
15
  % [sudo] gem install alf
4
16
  % alf --help
5
17
 
6
- ## Links
18
+ ### Links
7
19
 
8
20
  * {http://rubydoc.info/github/blambeau/alf/master/frames} (read this file there!)
9
21
  * {http://github.com/blambeau/alf} (source code)
10
22
  * {http://revision-zero.org} (author's blog)
11
23
 
12
- ## Description
24
+ ### Quick overview
13
25
 
14
26
  Alf is a commandline tool and Ruby library to manipulate data with all the power
15
27
  of a truly relational algebra approach. Objectives behind Alf are manifold:
16
28
 
17
- * Pragmatically, Alf aims at being a useful commandline executable for
18
- manipulating csv files, database records, or whatever looks like a (physical
19
- representation of a) relation. See 'alf --help' for the list of available
20
- commands and implemented relational operators.
29
+ * Pragmatically, Alf aims at being a useful commandline executable for manipulating
30
+ relational-like data: database records, csv files, or **whatever can be interpreted
31
+ as (the physical encoding of) a relation**. See 'alf --help' for the list of
32
+ available commands and implemented relational operators.
21
33
 
22
34
  % alf restrict suppliers -- "city == 'London'" | alf join cities
23
35
 
24
36
  * Alf is also a 100% Ruby relational algebra implementation shipped with a simple
25
37
  to use, powerful, functional DSL for compiling and evaluating relational queries.
26
- Alf is not limited to simple scalar values, but admit values of arbitrary
38
+ Alf is not limited to simple scalar values, but admits values of arbitrary
27
39
  complexity (under a few requirements about their implementation, see next
28
40
  section). See 'alf --help' as well as .alf files in the examples directory
29
41
  for syntactic examples.
30
42
 
31
- Alf.lispy.compile{
43
+ Alf.lispy.evaluate {
32
44
  (join (restrict :suppliers, lambda{ city == 'London' }), :cities)
33
45
  }
34
-
35
- * Alf is also an educational tool, that I've written to draw people's attention
46
+
47
+ In addition to this functional syntax, Alf comes bundled with an in-memory
48
+ Relation data structure that provides an object-oriented way of manipulating
49
+ relations in simplest cases:
50
+
51
+ suppliers = Alf::Relation[
52
+ {:sid => 'S1', :name => 'Smith', :status => 20, :city => 'London'},
53
+ {:sid => 'S2', :name => 'Jones', :status => 10, :city => 'Paris'},
54
+ {:sid => 'S3', :name => 'Blake', :status => 30, :city => 'Paris'},
55
+ {:sid => 'S4', :name => 'Clark', :status => 20, :city => 'London'},
56
+ {:sid => 'S5', :name => 'Adams', :status => 30, :city => 'Athens'},
57
+ ]
58
+ cities = ...
59
+ puts suppliers.restrict(lambda{ city == 'London' }).join(cities)
60
+
61
+ * Alf is also an educational tool, that I've written to draw people attention
36
62
  about the ill-known relational theory (and ill-represented by SQL). The tool
37
63
  is largely inspired from TUTORIAL D, the tutorial language of Chris Date and
38
64
  Hugh Darwen in their books, more specifically in
@@ -41,9 +67,9 @@ of a truly relational algebra approach. Objectives behind Alf are manifold:
41
67
  there (Alf is neither a relational _database_, nor a relational _language_).
42
68
  I hope that people (especially talented developers) will be sufficiently
43
69
  enticed by features shown here to open that book, read it more deeply, and
44
- implement new stuff around Date & Darwen vision. Have a look at the result of
45
- the following query for things that you'll never ever have in SQL (see also
46
- 'alf help quota', 'alf help nest', 'alf help group', ...):
70
+ implement new stuff around Date & Darwen's vision. Have a look at the result of
71
+ the following query for the kind of things that you'll never ever have in SQL
72
+ (see also 'alf help quota', 'alf help wrap', 'alf help group', ...):
47
73
 
48
74
  % alf --text summarize supplies --by=sid -- total "sum(:qty)" -- which "group(:pid)"
49
75
 
@@ -74,7 +100,7 @@ Familiar? Skip. Otherwise, read on.
74
100
  ### The example database
75
101
 
76
102
  This README file shows a lot of examples built on top of the following suppliers
77
- & parts database (almost identical to the original version in C.J. Date database
103
+ & parts database (almost identical to the original version in C. J. Date's database
78
104
  books). By default, the alf command line is wired to this embedded example. All
79
105
  examples shown here should therefore work immediately, if you want to reproduce
80
106
  them!
@@ -103,7 +129,7 @@ them!
103
129
  +-------------------------------------+-------------------------------------------------+-------------------------+------------------------+
104
130
 
105
131
  Many people think that relational databases are necessary 'flat', that they are
106
- necessarily limited to simply scalar values in two dimension tables. This is
132
+ necessarily limited to simple scalar values put in two dimension tables. This is
107
133
  wrong; most SQL databases are indeed 'flat', but _relations_ (in the mathematical
108
134
  sense of the relational theory) are not! Look, **the example above is a relation!**;
109
135
  that 'contains' other relations as particular values, which, in turn, could
@@ -115,15 +141,16 @@ that 'contains' other relations as particular values, which, in turn, could
115
141
  To understand what is a relation exactly, one needs to remember elementary
116
142
  notions of set theory and the concepts of _type_ and _value_.
117
143
 
118
- * A _type_ is a finite set of values; it is non particularly ordered and, being
119
- a set, it does never contains two values which are considered equal.
144
+ * A _type_ is a finite set of values; it is not particularly ordered and, being
145
+ a set, it does never contain two values which are equal (any type is necessarily
146
+ accompanied with an equality operator, denoted here by '==').
120
147
 
121
148
  * A _value_ is **immutable** (you cannot 'change' a value, in any way), has no
122
149
  localization in time and space, and is always typed (that is, it is always
123
150
  accompanied by some identification of the type it belongs to).
124
151
 
125
152
  As you can see, _type_ and _value_ are not the same concepts as _class_ and
126
- _object_, with which you are probably familiar with. Alf considers that the
153
+ _object_, which you are probably more familiar with. Alf considers that the
127
154
  latter are _implementations_ of the former. Alf assumes _valid_ implementations
128
155
  (equality and hash methods must be correct) and _valid_ usage (objects used for
129
156
  representing values are kept immutable in practice). Alf _assumes_ this, but
@@ -157,11 +184,11 @@ can have them inside relations!
157
184
  pairs. Moreover, it does not contain two attributes with the same name and is
158
185
  **not particularly ordered**. Also, **a tuple is a _value_, and is therefore
159
186
  immutable**. Last, but not least, a tuple **does not admit nulls/nils**. Tuples
160
- in Alf are simply implemented with ruby hashes, taken as tuples implementations.
187
+ in Alf are simply implemented with ruby hashes, taken as tuple implementations.
161
188
  Not all hashes are valid tuple implementations, of course (those containing nil
162
189
  are not, for example). Alf _assumes_ valid tuples, but does not _enforce_ this
163
190
  precondition. It's up to you to use Alf the right way! No support is or will
164
- ever be provided for ordering tuple attributes. Howeber, as hashes are ordered
191
+ ever be provided for ordering tuple attributes. However, as hashes are ordered
165
192
  in Ruby 1.9, Alf implements a best effort strategy to keep a friendly ordering
166
193
  when rendering tuples and relations. This is a very good practical reason for
167
194
  migrating to ruby 1.9 if not already done!
@@ -173,15 +200,18 @@ can have them inside relations!
173
200
  particularly ordered**. Moreover, all tuples of a relation must have the same
174
201
  _heading_, that is, the same set of attribute (name, type) pairs. Also, **a
175
202
  relation is a _value_, is therefore immutable** and **does not admit null/nil**.
176
- Alf being mainly an implementation of relational algebra (see section below)
177
- it loosely considers any Iterator of tuples as a potentially valid relation
178
- implementation (see later).
203
+
204
+ Alf is mainly an implementation of relational algebra (see section below). The
205
+ implemented operators consider any Iterator of tuples as potentially valid
206
+ operand. In addition Alf provides a Relation ruby class, that acts as an
207
+ in-memory data structure that provides an Object-Oriented API to call operators
208
+ (see "Interfacing Alf in Ruby" below).
179
209
 
180
210
  ### Relational Algebra
181
211
 
182
- In classical algebra, you can do computations like <code>(5 + 2) - 3</code>. In
183
- relational algebra, you can do similar things on relations. Alf uses an infix,
184
- functional programming-oriented syntax for algebra expressions:
212
+ In classical algebra, you can make computations like <code>(5 + 2) - 3</code>.
213
+ In relational algebra, you can make similar things on relations. Alf uses an
214
+ infix, functional programming-oriented syntax for algebra expressions:
185
215
 
186
216
  (minus (union :suppliers, xxx), yyy)
187
217
 
@@ -201,28 +231,66 @@ you want! The same query, in shell:
201
231
  The Third Manifesto defines a series of prescriptions, proscriptions and very
202
232
  strong suggestions for designing a truly relational _language_, called a _D_,
203
233
  as an alternative to SQL for managing relational databases. This is far behind
204
- our objective with Alf, as we don't look at database aspects at all (persistence,
205
- transactions, and so on.) and don't actually define a programming language either
206
- (only a small functional ruby DSL).
234
+ my objective with Alf, as it does not touch at database issues at all (persistence,
235
+ transactions, and so on.) and don't actually define a programming language (only
236
+ a small functional ruby DSL).
207
237
 
208
238
  Alf must simply be interpreted as a ruby library implementing (a variant of)
209
- Date's and Darwen relational algebra. This library is designed as a set of operator
239
+ Date and Darwen's relational algebra. This library is designed as a set of operator
210
240
  implementations, that work as tuple iterators taking other tuple iterators as
211
241
  input. Under the pre-condition that you provide them _valid_ tuple iterators as
212
242
  input (no duplicates, no nil, + other preconditions on an operator basis), the
213
243
  result is a valid iterator as well. Unless explicitely stated otherwise, any
214
244
  behavior observed when not respecting these preconditions, even an interesting
215
- behavior, is not guaranteed and can change with tiny version changes (see section
216
- about versioning policy at the end of this file).
245
+ behavior, is not guaranteed and might change with tiny version changes (see
246
+ section about versioning policy at the end of this file).
247
+
248
+ ### The command line utility
249
+
250
+ #
251
+ # Provided that suppliers and cities are valid relation representations
252
+ # [something similar]
253
+ #
254
+ % alf restrict suppliers -- "city == 'London'" | alf join cities
255
+
256
+ # the resulting stream is a valid relation representation in the output
257
+ # stream format that you have selected (.rash by default). It can therefore
258
+ # be piped to another alf shell invocation, or saved to a file and re-read
259
+ # later (under the assumption that input and output data formats match, or
260
+ # course). [Something similar about responsibility and bug].
217
261
 
218
- ### In ruby
262
+ If you take a look at .alf example files, you'll find functional ruby expressions
263
+ like the following (called Lispy expressions):
264
+
265
+ % cat examples/minus.alf
266
+
267
+ # Give all suppliers, except those living in Paris
268
+ (minus :suppliers,
269
+ (restrict :suppliers, lambda{ city == 'Paris' }))
270
+
271
+ # This is a contrived example for illustrating minus, as the
272
+ # following is equivalent
273
+ (restrict :suppliers, lambda{ city != 'Paris' })
274
+
275
+ You can simply execute such expressions with the alf command line itself (the
276
+ three following invocations return the same result):
277
+
278
+ % alf examples/minus.alf | alf show
279
+ % alf show minus
280
+ % alf -e "(restrict :suppliers, lambda{ city != 'Paris' })" | alf show
281
+
282
+ Symbols are magically resolved from the environment, which is wired to the
283
+ examples by default. See the dedicated sections below to update this behavior
284
+ to your needs.
285
+
286
+ ### The algebra compiler
219
287
 
220
288
  #
221
289
  # Provided that :suppliers and :cities are valid relation representations
222
290
  # (under the responsibility shared by you and the Reader and Environment
223
291
  # subclasses you use -- see later), then,
224
292
  #
225
- op = Alf.lispy.compile{
293
+ op = Alf.lispy.compile {
226
294
  (join (restrict :suppliers, lambda{ city == 'London' }), :cities)
227
295
  }
228
296
 
@@ -231,19 +299,128 @@ about versioning policy at the end of this file).
231
299
  # of any other expression. This is under Alf's responsibility, and any
232
300
  # failure must be considered a bug!
233
301
 
234
- ### In shell
302
+ ### The Relation data structure
235
303
 
236
- #
237
- # Provided that suppliers and cities are valid relation representations
238
- # [something similar]
239
- #
240
- % alf restrict suppliers -- "city == 'London'" | alf join cities
304
+ In addition, Alf is bundled with an in-memory Relation data structure that
305
+ provided a more abstract API for manipulating relations in simple cases (the
306
+ rules are the same about pre and post-conditions):
241
307
 
242
- # the resulting stream is a valid relation representation in the output
243
- # stream format that you have selected (.rash by default). It can therefore
244
- # be piped to another alf shell invocation, or saved to a file and re-read
245
- # later (under the assumption that input and output data formats match, or
246
- # course). [Something similar about responsibility and bug].
308
+ # The query above can be done as follows. Note that relations are always
309
+ # loaded in memory here!
310
+ suppliers = Alf::Relation[ ... ]
311
+ cities = Alf::Relation[ ... ]
312
+ suppliers.restrict(lambda{ city == 'London' }).
313
+ join(cities)
314
+ # => Alf::Relation[ ... ]
315
+
316
+ All relational operators have an instance method equivalent on the Alf::Relation
317
+ class. Semantically, the receiver object is simply the first operand of the
318
+ functional call, as illustrated above.
319
+
320
+ ### Where do relations come from?
321
+
322
+ Relation literals can simply be written as follows:
323
+
324
+ suppliers = Alf::Relation[
325
+ {:sid => 'S1', :name => 'Smith', :status => 20, :city => 'London'},
326
+ {:sid => 'S2', :name => 'Jones', :status => 10, :city => 'Paris'},
327
+ {:sid => 'S3', :name => 'Blake', :status => 30, :city => 'Paris'},
328
+ {:sid => 'S4', :name => 'Clark', :status => 20, :city => 'London'},
329
+ {:sid => 'S5', :name => 'Adams', :status => 30, :city => 'Athens'},
330
+ ]
331
+
332
+ Environment classes serve datasets (see later) that always have a to_rel method
333
+ for obtaining in-memory relations:
334
+
335
+ env = Alf::Environment.examples
336
+ env.dataset(:suppliers).to_rel
337
+ # => Alf::Relation[ ... ]
338
+
339
+ Compiled expressions always have a to_rel method that allows obtaining an
340
+ in-memory relation:
341
+
342
+ op = Alf.lispy.compile {
343
+ (join (restrict :suppliers, lambda{ city == 'London' }), :cities)
344
+ }
345
+ op.to_rel
346
+ # => Alf::Relation[...]
347
+
348
+ Lispy provides an 'evaluate' method which is precisely equivalent to the chain
349
+ above. Therefore:
350
+
351
+ rel = Alf.lispy.evaluate {
352
+ (join (restrict :suppliers, lambda{ city == 'London' }), :cities)
353
+ }
354
+ # => Alf::Relation[...]
355
+
356
+ ### Algebra is closed under its operators!
357
+
358
+ Of course, from the closure property of a relational algebra (that states that
359
+ operators works on relations and return relations), you can use a sub expression
360
+ *everytime* a relational operand is expected, everytime:
361
+
362
+ # Compute the total qty supplied in each country together with the subset
363
+ # of products shipped there. Only consider suppliers that have a status
364
+ # greater than 10, however.
365
+ (summarize \
366
+ (join \
367
+ (join (restrict :suppliers, lambda{ status > 10 }),
368
+ :supplies),
369
+ :cities),
370
+ [:country],
371
+ :which => Agg::group(:pid),
372
+ :total => Agg::sum{ qty })
373
+
374
+ Of course, complex queries quickly become unreadable that way. But you can always
375
+ split complex tasks in more simple ones:
376
+
377
+ kept_suppliers = (restrict :suppliers, lambda{ status > 10 })
378
+ with_countries = (join kept_suppliers, :cities),
379
+ supplying = (join with_countries, :supplies)
380
+ (summarize supplying,
381
+ [:country],
382
+ :which => Agg::group(:pid),
383
+ :total => Agg::sum{ qty })
384
+
385
+ And here is the result !
386
+
387
+ +----------+----------+--------+
388
+ | :country | :which | :total |
389
+ +----------+----------+--------+
390
+ | England | +------+ | 2200 |
391
+ | | | :pid | | |
392
+ | | +------+ | |
393
+ | | | P1 | | |
394
+ | | | P2 | | |
395
+ | | | P3 | | |
396
+ | | | P4 | | |
397
+ | | | P5 | | |
398
+ | | | P6 | | |
399
+ | | +------+ | |
400
+ | France | +------+ | 200 |
401
+ | | | :pid | | |
402
+ | | +------+ | |
403
+ | | | P2 | | |
404
+ | | +------+ | |
405
+ +----------+----------+--------+
406
+
407
+ ### Reference API
408
+
409
+ For now, the Ruby API is documented in the commandline help itself (a cheatsheet
410
+ or something will be provided as soon as possible). For example, you'll find the
411
+ allowed syntaxes for RESTRICT as follows:
412
+
413
+ % alf help restrict
414
+
415
+ ...
416
+ API & EXAMPLE
417
+
418
+ # Restrict to suppliers with status greater than 20
419
+ (restrict :suppliers, lambda{ status > 20 })
420
+
421
+ # Restrict to suppliers that live in London
422
+ (restrict :suppliers, lambda{ city == 'London' })
423
+ ...
247
424
 
248
425
  ### Coping with non-relational data sources (nil, duplicates, etc.)
249
426
 
@@ -271,7 +448,7 @@ is null/nil, but it won't probably fail if any other attribute is nil.
271
448
 
272
449
  This best-effort strategy is not enough, and striclty speaking, must be considered
273
450
  unsound (for example, it strongly hurts optimization possibilities). Therefore,
274
- we strongly encourage you to go a step further: **if relational operators want
451
+ I strongly encourage you to go a step further: **if relational operators want
275
452
  true relations as input, please, give them!**. For this, Alf also provides a few
276
453
  non-relational operators in addition to relational ones. Those operators must be
277
454
  interpreted as "pre-relational" operators, in the sense that they help obtaining
@@ -281,13 +458,12 @@ You'll find,
281
458
 
282
459
  * <code>alf autonum</code> -- ensure no duplicates by generating a unique attribute
283
460
  * <code>alf compact</code> -- brute-force duplicates removal
284
- * <code>alf defaults</code> -- replace nulls/nil by valid values, on an attribute
285
- basis
461
+ * <code>alf defaults</code> -- replace nulls/nil by valid values, on an attribute basis
286
462
 
287
463
  Play the game, it's easy!
288
464
 
289
465
  - _Give id, name and status of suppliers whose status is greater that 10_
290
- - Hey man, we don't know supplier's status for all of them! What about the others?
466
+ - Hey man, we don't know the status for all suppliers! What about these cases?
291
467
  - _Ignore them_
292
468
  - No problem dude!
293
469
 
@@ -311,16 +487,16 @@ analyzing the whole query expression in the light of a catalog of typed
311
487
  operators. This way, a tool can check that a query is statically valid, i.e.
312
488
  that it respects operator preconditions. While this approach has the major
313
489
  advantage of allowing strong optimizations, it also has a few drawbacks (as
314
- knowing the heading of used datasources in advance) and is difficult to mary
315
- with dynamically-typed languages like Ruby. Therefore, Alf takes another approach,
316
- which is similar to duck-typing. In essence, this approach can be summarized as
317
- follows:
490
+ the need to know the heading of used datasources in advance) and is difficult to
491
+ mary with dynamically-typed languages like Ruby. Therefore, Alf takes another
492
+ approach, which is similar to duck-typing. In essence, this approach can be
493
+ summarized as follows:
318
494
 
319
- - _You have the responsibility of ensuring that the evaluation of your query
320
- will succeed and will return valid results_
495
+ - _You have the responsibility of not violating operators' preconditions. If you
496
+ do, Alf has the responsibility of returning correct results._.
321
497
  - No problem dude!
322
498
 
323
- ## Getting started in shell
499
+ ## More about the shell command line
324
500
 
325
501
  % alf --help
326
502
 
@@ -440,104 +616,7 @@ Also, mimicing the ruby executable, the following invocation is also possible:
440
616
  where the argument is a relational expression in Alf's Lispy dialect, which
441
617
  is detailed in the next section.
442
618
 
443
- ## Lispy expressions
444
-
445
- If you take a look at .alf example files, you'll find functional ruby expressions
446
- like the following:
447
-
448
- % cat examples/minus.alf
449
-
450
- # Give all suppliers, except those living in Paris
451
- (minus :suppliers,
452
- (restrict :suppliers, lambda{ city == 'Paris' }))
453
-
454
- # This is a contrived example for illustrating minus, as the
455
- # following is equivalent
456
- (restrict :suppliers, lambda{ city != 'Paris' })
457
-
458
- You can simply execute such expressions with the alf command line itself (the
459
- three following invocations return the same result):
460
-
461
- % alf examples/minus.alf | alf show
462
- % alf show minus
463
- % alf -e "(restrict :suppliers, lambda{ city != 'Paris' })" | alf show
464
-
465
- Symbols are magically resolved from the environment, which is wired to the
466
- examples by default. See the dedicated sections below to update this behavior
467
- to your needs.
468
-
469
- ### Algebra is closed under its operators!
470
-
471
- Of course, from the closure property of a relational algebra (that states that
472
- operators works on relations and return relations), you can use a sub expression
473
- *everytime* a relational operand is expected, everytime:
474
-
475
- # Compute the total qty supplied in each country together with the subset
476
- # of products shipped there. Only consider suppliers that have a status
477
- # greater than 10, however.
478
- (summarize \
479
- (join \
480
- (join (restrict :suppliers, lambda{ status > 10 }),
481
- :supplies),
482
- :cities),
483
- [:country],
484
- :which => Agg::group(:pid),
485
- :total => Agg::sum{ qty })
486
-
487
- Of course, complex queries quickly become unreadable that way. But you can always
488
- split complex tasks in more simple ones using _with_:
489
-
490
- with( :kept_suppliers => (restrict :suppliers, lambda{ status > 10 }),
491
- :with_countries => (join :kept_suppliers, :cities),
492
- :supplying => (join :with_countries, :supplies) ) do
493
- (summarize :supplying,
494
- [:country],
495
- :which => Agg::group(:pid),
496
- :total => Agg::sum{ qty })
497
- end
498
-
499
- And here is the result !
500
-
501
- +----------+----------+--------+
502
- | :country | :which | :total |
503
- +----------+----------+--------+
504
- | England | +------+ | 2200 |
505
- | | | :pid | | |
506
- | | +------+ | |
507
- | | | P1 | | |
508
- | | | P2 | | |
509
- | | | P3 | | |
510
- | | | P4 | | |
511
- | | | P5 | | |
512
- | | | P6 | | |
513
- | | +------+ | |
514
- | France | +------+ | 200 |
515
- | | | :pid | | |
516
- | | +------+ | |
517
- | | | P2 | | |
518
- | | +------+ | |
519
- +----------+----------+--------+
520
-
521
-
522
- ### Going further
523
-
524
- For now, the Ruby API is documented in the commandline help itself (a cheatsheet
525
- or something will be provided as soon as possible). For example, you'll find the
526
- allowed syntaxes for RESTRICT as follows:
527
-
528
- % alf help restrict
529
-
530
- ...
531
- API & EXAMPLE
532
-
533
- # Restrict to suppliers with status greater than 20
534
- (restrict :suppliers, lambda{ status > 20 })
535
-
536
- # Restrict to suppliers that live in London
537
- (restrict :suppliers, lambda{ city == 'London' })
538
- ...
539
-
540
- ## Interfacing Alf in Ruby
619
+ ## More about Alf in Ruby
541
620
 
542
621
  ### Calling commands 'ala' shell
543
622
 
@@ -560,11 +639,10 @@ If this kind of API is not sufficiently expressive for you, you'll have to learn
560
639
  the APIs deeper, and use the Lispy functional style that Alf provides, which can
561
640
  be compiled and used as explained in the next section.
562
641
 
563
- ### Compiling lispy expressions
642
+ ### Compiler vs. Relation data structure
564
643
 
565
- If you want to use Alf in ruby directly (that is, not in shell or by executing
566
- .alf files), you can simply compile expressions and use resulting operators as
567
- follows:
644
+ The compilers allow you to manipulate algebra expressions. Just obtain a Lispy
645
+ instance on an environment and you're ready:
568
646
 
569
647
  #
570
648
  # Expressions can simply be compiled as illustrated below. We use the
@@ -572,29 +650,37 @@ follows:
572
650
  # available environments.
573
651
  #
574
652
  lispy = Alf.lispy(Alf::Environment.examples)
575
- op = lispy.compile do
653
+ london_suppliers = lispy.compile do
576
654
  (restrict :suppliers, lambda{ city == 'London' })
577
655
  end
578
656
 
579
657
  #
580
- # Returned _op_ is an enumerable of ruby hashes. Provided that datasets
658
+ # Returned operator is an enumerable of ruby hashes. Provided that datasets
581
659
  # offered by the environment (:suppliers here) can be enumerated more than
582
660
  # once, the operator may be used multiple times and is even thread safe!
583
661
  #
584
- op.each do |tuple|
662
+ london_suppliers.each do |tuple|
585
663
  # tuple is a ruby Hash
586
664
  end
587
665
 
588
666
  #
589
667
  # Now, maybe you want to reuse op in a larger query, for example
590
- # by projecting on the city attribute... Here is how with expressions
591
- # can be handled in that case
668
+ # by projecting on the city attribute... Here is how this can be
669
+ # done:
592
670
  #
593
- projection = lispy.with(:kept_suppliers => op) do
594
- (project :kept_suppliers, [:city])
595
- end
671
+ projection = (project london_suppliers, [:city])
672
+
673
+ Note that the examples above manipulate algebra operators, not relations per se.
674
+ This means that equality and other such operators, that operate on relation
675
+ _values_, do not operate correctly here:
676
+
677
+ projection == Alf::Relation[{:city => 'London'}]
678
+ # => nil
679
+
680
+ In contrast, you can use such operators when operating on true relation values:
596
681
 
597
- ## Going further
682
+ projection.to_rel == Alf::Relation[{:city => 'London'}]
683
+ # => true
598
684
 
599
685
  ### Using/Implementing other Environments
600
686
 
@@ -681,7 +767,7 @@ following template for contributions in lib/alf/renderer
681
767
 
682
768
  ## Related Work & Tools
683
769
 
684
- - You should certainly have a look at the Third Manifesto website: http://www.thethirdmanifesto.com/
770
+ - You should certainly have a look at the Third Manifesto website: {http://www.thethirdmanifesto.com/}
685
771
  - Why not reading the {http://www.dcs.warwick.ac.uk/~hugh/TTM/DBE-Chapter01.pdf
686
772
  third manifesto paper} itself?
687
773
  - Also have a look at {http://www.dcs.warwick.ac.uk/~hugh/TTM/Projects.html other
@@ -713,8 +799,8 @@ your needs and I'll see what I can do!
713
799
  ### Internals -- Tribute to Sinatra
714
800
 
715
801
  Alf's code style is very inspired from what I've found in Sinatra when looking
716
- at its internals a few month ago. Alf, as Sinatra, is mostly implemented in a
717
- single file, lib/alf.rb. Everything is there except additional contributions
802
+ at its internals a few months ago. Alf, as Sinatra, is mostly implemented in a
803
+ single file, lib/alf.rb. Everything is there except specific third-party contributions
718
804
  (in lib/alf/...). You'll need an editor or IDE that supports code folding/unfolding.
719
805
  Then, follow the guide:
720
806
 
data/TODO.md CHANGED
@@ -4,17 +4,17 @@
4
4
  (rename :suppliers, [:name, :city], :suffix => "_sup")
5
5
  (rename :suppliers, [:name, :city], lambda{|name| name.upcase})
6
6
 
7
- * NEST: provide a multi-nesting ability?
7
+ * WRAP: provide a multi-wraping ability?
8
8
 
9
- (nest (nest :supplies, [:a, :b], :x), [:x, :c], :y)
10
- => (nest :supplies, :x => [:a, :b], :y => [:x, :c])
9
+ (wrap (wrap :supplies, [:a, :b], :x), [:x, :c], :y)
10
+ => (wrap :supplies, :x => [:a, :b], :y => [:x, :c])
11
11
 
12
12
  But this would only work with Ruby 1.9 as the hash order would be important
13
13
  as such
14
14
 
15
15
  * GROUP: provide a multi-grouping ability?
16
16
 
17
- Similar to nest, with same limitation.
17
+ Similar to wrap, with same limitation.
18
18
 
19
19
  * Add PIVOT and UNPIVOT operators
20
20
 
data/alf.gemspec CHANGED
@@ -21,13 +21,13 @@ Gem::Specification.new do |s|
21
21
  # A short summary of this gem
22
22
  #
23
23
  # This is displayed in `gem list -d`.
24
- s.summary = "Classy data-manipulation dressed in a DSL (+ commandline)"
24
+ s.summary = "Relational Algebra at your fingertips"
25
25
 
26
26
  # A long description of this gem (required)
27
27
  #
28
28
  # The description should be more detailed than the summary. For example,
29
29
  # you might wish to copy the entire README into the description.
30
- s.description = "Alf is a commandline tool and Ruby library to manipulate data with all the \npower of a truly relational algebra approach. "
30
+ s.description = "Alf brings the relational algebra both in Shell and in Ruby. In Shell, because \nmanipulating any relation-like data source should be as straightforward as a \none-liner. In Ruby, because I've never understood why programming languages \nprovide data structures like arrays, hashes, sets, trees and graphs but not \n_relations_... Let's stop the segregation ;-)"
31
31
 
32
32
  # The URL of this gem home page (optional)
33
33
  s.homepage = "http://rubydoc.info/github/blambeau/alf/master/frames"
@@ -123,7 +123,7 @@ Gem::Specification.new do |s|
123
123
  # One call to add_development_dependency('gem_name', 'gem version requirement')
124
124
  # for each development dependency. These gems are required for developers
125
125
  #
126
- s.add_development_dependency("rake", "~> 0.8.7")
126
+ s.add_development_dependency("rake", "~> 0.9.2")
127
127
  s.add_development_dependency("bundler", "~> 1.0")
128
128
  s.add_development_dependency("rspec", "~> 2.6.0")
129
129
  s.add_development_dependency("yard", "~> 0.7.2")