alf 0.9.0 → 0.9.1
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG.md +64 -0
- data/Gemfile.lock +4 -4
- data/README.md +257 -171
- data/TODO.md +4 -4
- data/alf.gemspec +3 -3
- data/alf.noespec +11 -6
- data/examples/pseudo-with.alf +7 -0
- data/examples/runall.sh +2 -2
- data/examples/unwrap.alf +4 -0
- data/examples/wrap.alf +2 -0
- data/lib/alf/relation.rb +118 -0
- data/lib/alf/version.rb +1 -1
- data/lib/alf.rb +320 -169
- data/spec/integration/src/test_minus.alf +5 -0
- data/spec/integration/src/test_project.alf +9 -0
- data/spec/{alf_spec.rb → integration/test_alf.rb} +8 -21
- data/spec/integration/test_alf_specs.rb +37 -0
- data/spec/{examples_spec.rb → integration/test_examples.rb} +1 -1
- data/spec/spec_helper.rb +19 -1
- data/spec/unit/environment/examples/suppliers.rash +5 -0
- data/spec/{environment/explicit_spec.rb → unit/environment/test_explicit.rb} +0 -0
- data/spec/{environment/folder_spec.rb → unit/environment/test_folder.rb} +1 -1
- data/spec/{operator → unit/operator}/non_relational/compact/buffer_based.rb +0 -0
- data/spec/{operator/non_relational/compact/sort_based_spec.rb → unit/operator/non_relational/compact/test_sort_based.rb} +0 -0
- data/spec/{operator/non_relational/autonum_spec.rb → unit/operator/non_relational/test_autonum.rb} +0 -0
- data/spec/{operator/non_relational/clip_spec.rb → unit/operator/non_relational/test_clip.rb} +0 -0
- data/spec/{operator/non_relational/compact_spec.rb → unit/operator/non_relational/test_compact.rb} +0 -0
- data/spec/{operator/non_relational/defaults_spec.rb → unit/operator/non_relational/test_defaults.rb} +0 -0
- data/spec/{operator/non_relational/sort_spec.rb → unit/operator/non_relational/test_sort.rb} +0 -0
- data/spec/{operator/relational/join/hash_based_spec.rb → unit/operator/relational/join/test_hash_based.rb} +0 -0
- data/spec/unit/operator/relational/summarize/test_hash_based.rb +38 -0
- data/spec/{operator/relational/summarize/sort_based_spec.rb → unit/operator/relational/summarize/test_sort_based.rb} +0 -0
- data/spec/{operator/relational/extend_spec.rb → unit/operator/relational/test_extend.rb} +0 -0
- data/spec/{operator/relational/group_spec.rb → unit/operator/relational/test_group.rb} +3 -2
- data/spec/{operator/relational/intersect_spec.rb → unit/operator/relational/test_intersect.rb} +0 -0
- data/spec/unit/operator/relational/test_join.rb +36 -0
- data/spec/{operator/relational/minus_spec.rb → unit/operator/relational/test_minus.rb} +0 -0
- data/spec/{operator/relational/project_spec.rb → unit/operator/relational/test_project.rb} +0 -0
- data/spec/{operator/relational/quota_spec.rb → unit/operator/relational/test_quota.rb} +0 -0
- data/spec/{operator/relational/rename_spec.rb → unit/operator/relational/test_rename.rb} +0 -0
- data/spec/{operator/relational/restrict_spec.rb → unit/operator/relational/test_restrict.rb} +0 -0
- data/spec/unit/operator/relational/test_summarize.rb +64 -0
- data/spec/{operator/relational/ungroup_spec.rb → unit/operator/relational/test_ungroup.rb} +0 -0
- data/spec/{operator/relational/union_spec.rb → unit/operator/relational/test_union.rb} +0 -0
- data/spec/{operator/relational/unnest_spec.rb → unit/operator/relational/test_unwrap.rb} +5 -5
- data/spec/{operator/relational/nest_spec.rb → unit/operator/relational/test_wrap.rb} +5 -5
- data/spec/{operator/command_methods_spec.rb → unit/operator/test_command_methods.rb} +0 -0
- data/spec/unit/operator/test_non_relational.rb +18 -0
- data/spec/unit/operator/test_relational.rb +27 -0
- data/spec/{reader → unit/reader}/input.rb +0 -0
- data/spec/unit/reader/test_alf_file.rb +27 -0
- data/spec/{reader/rash_spec.rb → unit/reader/test_rash.rb} +0 -0
- data/spec/unit/relation/test_coerce.rb +53 -0
- data/spec/unit/relation/test_inspect.rb +20 -0
- data/spec/unit/relation/test_relops.rb +46 -0
- data/spec/{renderer/text/cell_spec.rb → unit/renderer/text/test_cell.rb} +0 -0
- data/spec/{renderer/text/row_spec.rb → unit/renderer/text/test_row.rb} +0 -0
- data/spec/{renderer/text/table_spec.rb → unit/renderer/text/test_table.rb} +0 -0
- data/spec/{aggregator_spec.rb → unit/test_aggregator.rb} +6 -6
- data/spec/{assumptions_spec.rb → unit/test_assumptions.rb} +0 -0
- data/spec/{lispy_spec.rb → unit/test_lispy.rb} +0 -0
- data/spec/unit/test_operator.rb +16 -0
- data/spec/{reader_spec.rb → unit/test_reader.rb} +4 -0
- data/spec/unit/test_relation.rb +40 -0
- data/spec/{renderer_spec.rb → unit/test_renderer.rb} +0 -0
- data/spec/{tools/ordering_key_spec.rb → unit/tools/test_ordering_key.rb} +0 -0
- data/spec/{tools/projection_key_spec.rb → unit/tools/test_projection_key.rb} +0 -0
- data/spec/{tools/tools_spec.rb → unit/tools/test_tools.rb} +0 -0
- data/spec/{tools/tuple_handle_spec.rb → unit/tools/test_tuple_handle.rb} +0 -0
- data/tasks/clean.rake +3 -0
- data/tasks/spec_test.rake +1 -1
- metadata +143 -114
- data/examples/nest.alf +0 -2
- data/examples/unnest.alf +0 -4
- data/examples/with.alf +0 -23
- data/spec/operator/relational/summarize_spec.rb +0 -41
- data/spec/reader/alf_file_spec.rb +0 -15
data/README.md
CHANGED
@@ -1,38 +1,64 @@
|
|
1
|
-
# Alf -
|
1
|
+
# Alf - Relational Algebra at your fingertips (version 0.9.1)
|
2
|
+
|
3
|
+
## Description
|
4
|
+
|
5
|
+
### What & Why
|
6
|
+
|
7
|
+
Alf brings the relational algebra both in Shell and in Ruby. In Shell, because
|
8
|
+
manipulating any relation-like data source should be as straightforward as a
|
9
|
+
one-liner. In Ruby, because I've never understood why programming languages
|
10
|
+
provide data structures like arrays, hashes, sets, trees and graphs but not
|
11
|
+
_relations_... Let's stop the segregation ;-)
|
12
|
+
|
13
|
+
### Install
|
2
14
|
|
3
15
|
% [sudo] gem install alf
|
4
16
|
% alf --help
|
5
17
|
|
6
|
-
|
18
|
+
### Links
|
7
19
|
|
8
20
|
* {http://rubydoc.info/github/blambeau/alf/master/frames} (read this file there!)
|
9
21
|
* {http://github.com/blambeau/alf} (source code)
|
10
22
|
* {http://revision-zero.org} (author's blog)
|
11
23
|
|
12
|
-
|
24
|
+
### Quick overview
|
13
25
|
|
14
26
|
Alf is a commandline tool and Ruby library to manipulate data with all the power
|
15
27
|
of a truly relational algebra approach. Objectives behind Alf are manifold:
|
16
28
|
|
17
|
-
* Pragmatically, Alf aims at being a useful commandline executable for
|
18
|
-
|
19
|
-
|
20
|
-
commands and implemented relational operators.
|
29
|
+
* Pragmatically, Alf aims at being a useful commandline executable for manipulating
|
30
|
+
relational-like data: database records, csv files, or **whatever can be interpreted
|
31
|
+
as (the physical encoding of) a relation**. See 'alf --help' for the list of
|
32
|
+
available commands and implemented relational operators.
|
21
33
|
|
22
34
|
% alf restrict suppliers -- "city == 'London'" | alf join cities
|
23
35
|
|
24
36
|
* Alf is also a 100% Ruby relational algebra implementation shipped with a simple
|
25
37
|
to use, powerful, functional DSL for compiling and evaluating relational queries.
|
26
|
-
Alf is not limited to simple scalar values, but
|
38
|
+
Alf is not limited to simple scalar values, but admits values of arbitrary
|
27
39
|
complexity (under a few requirements about their implementation, see next
|
28
40
|
section). See 'alf --help' as well as .alf files in the examples directory
|
29
41
|
for syntactic examples.
|
30
42
|
|
31
|
-
Alf.lispy.
|
43
|
+
Alf.lispy.evaluate {
|
32
44
|
(join (restrict :suppliers, lambda{ city == 'London' }), :cities)
|
33
45
|
}
|
34
|
-
|
35
|
-
|
46
|
+
|
47
|
+
In addition to this functional syntax, Alf comes bundled with an in-memory
|
48
|
+
Relation data structure that provides an object-oriented way of manipulating
|
49
|
+
relations in simplest cases:
|
50
|
+
|
51
|
+
suppliers = Alf::Relation[
|
52
|
+
{:sid => 'S1', :name => 'Smith', :status => 20, :city => 'London'},
|
53
|
+
{:sid => 'S2', :name => 'Jones', :status => 10, :city => 'Paris'},
|
54
|
+
{:sid => 'S3', :name => 'Blake', :status => 30, :city => 'Paris'},
|
55
|
+
{:sid => 'S4', :name => 'Clark', :status => 20, :city => 'London'},
|
56
|
+
{:sid => 'S5', :name => 'Adams', :status => 30, :city => 'Athens'},
|
57
|
+
]
|
58
|
+
cities = ...
|
59
|
+
puts suppliers.restrict(lambda{ city == 'London' }).join(cities)
|
60
|
+
|
61
|
+
* Alf is also an educational tool, that I've written to draw people attention
|
36
62
|
about the ill-known relational theory (and ill-represented by SQL). The tool
|
37
63
|
is largely inspired from TUTORIAL D, the tutorial language of Chris Date and
|
38
64
|
Hugh Darwen in their books, more specifically in
|
@@ -41,9 +67,9 @@ of a truly relational algebra approach. Objectives behind Alf are manifold:
|
|
41
67
|
there (Alf is neither a relational _database_, nor a relational _language_).
|
42
68
|
I hope that people (especially talented developers) will be sufficiently
|
43
69
|
enticed by features shown here to open that book, read it more deeply, and
|
44
|
-
implement new stuff around Date & Darwen vision. Have a look at the result of
|
45
|
-
the following query for things that you'll never ever have in SQL
|
46
|
-
'alf help quota', 'alf help
|
70
|
+
implement new stuff around Date & Darwen's vision. Have a look at the result of
|
71
|
+
the following query for the kind of things that you'll never ever have in SQL
|
72
|
+
(see also 'alf help quota', 'alf help wrap', 'alf help group', ...):
|
47
73
|
|
48
74
|
% alf --text summarize supplies --by=sid -- total "sum(:qty)" -- which "group(:pid)"
|
49
75
|
|
@@ -74,7 +100,7 @@ Familiar? Skip. Otherwise, read on.
|
|
74
100
|
### The example database
|
75
101
|
|
76
102
|
This README file shows a lot of examples built on top of the following suppliers
|
77
|
-
& parts database (almost identical to the original version in C.J. Date database
|
103
|
+
& parts database (almost identical to the original version in C. J. Date's database
|
78
104
|
books). By default, the alf command line is wired to this embedded example. All
|
79
105
|
examples shown here should therefore work immediately, if you want to reproduce
|
80
106
|
them!
|
@@ -103,7 +129,7 @@ them!
|
|
103
129
|
+-------------------------------------+-------------------------------------------------+-------------------------+------------------------+
|
104
130
|
|
105
131
|
Many people think that relational databases are necessary 'flat', that they are
|
106
|
-
necessarily limited to
|
132
|
+
necessarily limited to simple scalar values put in two dimension tables. This is
|
107
133
|
wrong; most SQL databases are indeed 'flat', but _relations_ (in the mathematical
|
108
134
|
sense of the relational theory) are not! Look, **the example above is a relation!**;
|
109
135
|
that 'contains' other relations as particular values, which, in turn, could
|
@@ -115,15 +141,16 @@ that 'contains' other relations as particular values, which, in turn, could
|
|
115
141
|
To understand what is a relation exactly, one needs to remember elementary
|
116
142
|
notions of set theory and the concepts of _type_ and _value_.
|
117
143
|
|
118
|
-
* A _type_ is a finite set of values; it is
|
119
|
-
a set, it does never
|
144
|
+
* A _type_ is a finite set of values; it is not particularly ordered and, being
|
145
|
+
a set, it does never contain two values which are equal (any type is necessarily
|
146
|
+
accompanied with an equality operator, denoted here by '==').
|
120
147
|
|
121
148
|
* A _value_ is **immutable** (you cannot 'change' a value, in any way), has no
|
122
149
|
localization in time and space, and is always typed (that is, it is always
|
123
150
|
accompanied by some identification of the type it belongs to).
|
124
151
|
|
125
152
|
As you can see, _type_ and _value_ are not the same concepts as _class_ and
|
126
|
-
_object_,
|
153
|
+
_object_, which you are probably more familiar with. Alf considers that the
|
127
154
|
latter are _implementations_ of the former. Alf assumes _valid_ implementations
|
128
155
|
(equality and hash methods must be correct) and _valid_ usage (objects used for
|
129
156
|
representing values are kept immutable in practice). Alf _assumes_ this, but
|
@@ -157,11 +184,11 @@ can have them inside relations!
|
|
157
184
|
pairs. Moreover, it does not contain two attributes with the same name and is
|
158
185
|
**not particularly ordered**. Also, **a tuple is a _value_, and is therefore
|
159
186
|
immutable**. Last, but not least, a tuple **does not admit nulls/nils**. Tuples
|
160
|
-
in Alf are simply implemented with ruby hashes, taken as
|
187
|
+
in Alf are simply implemented with ruby hashes, taken as tuple implementations.
|
161
188
|
Not all hashes are valid tuple implementations, of course (those containing nil
|
162
189
|
are not, for example). Alf _assumes_ valid tuples, but does not _enforce_ this
|
163
190
|
precondition. It's up to you to use Alf the right way! No support is or will
|
164
|
-
ever be provided for ordering tuple attributes.
|
191
|
+
ever be provided for ordering tuple attributes. However, as hashes are ordered
|
165
192
|
in Ruby 1.9, Alf implements a best effort strategy to keep a friendly ordering
|
166
193
|
when rendering tuples and relations. This is a very good practical reason for
|
167
194
|
migrating to ruby 1.9 if not already done!
|
@@ -173,15 +200,18 @@ can have them inside relations!
|
|
173
200
|
particularly ordered**. Moreover, all tuples of a relation must have the same
|
174
201
|
_heading_, that is, the same set of attribute (name, type) pairs. Also, **a
|
175
202
|
relation is a _value_, is therefore immutable** and **does not admit null/nil**.
|
176
|
-
|
177
|
-
|
178
|
-
|
203
|
+
|
204
|
+
Alf is mainly an implementation of relational algebra (see section below). The
|
205
|
+
implemented operators consider any Iterator of tuples as potentially valid
|
206
|
+
operand. In addition Alf provides a Relation ruby class, that acts as an
|
207
|
+
in-memory data structure that provides an Object-Oriented API to call operators
|
208
|
+
(see "Interfacing Alf in Ruby" below).
|
179
209
|
|
180
210
|
### Relational Algebra
|
181
211
|
|
182
|
-
In classical algebra, you can
|
183
|
-
relational algebra, you can
|
184
|
-
functional programming-oriented syntax for algebra expressions:
|
212
|
+
In classical algebra, you can make computations like <code>(5 + 2) - 3</code>.
|
213
|
+
In relational algebra, you can make similar things on relations. Alf uses an
|
214
|
+
infix, functional programming-oriented syntax for algebra expressions:
|
185
215
|
|
186
216
|
(minus (union :suppliers, xxx), yyy)
|
187
217
|
|
@@ -201,28 +231,66 @@ you want! The same query, in shell:
|
|
201
231
|
The Third Manifesto defines a series of prescriptions, proscriptions and very
|
202
232
|
strong suggestions for designing a truly relational _language_, called a _D_,
|
203
233
|
as an alternative to SQL for managing relational databases. This is far behind
|
204
|
-
|
205
|
-
transactions, and so on.) and don't actually define a programming language
|
206
|
-
|
234
|
+
my objective with Alf, as it does not touch at database issues at all (persistence,
|
235
|
+
transactions, and so on.) and don't actually define a programming language (only
|
236
|
+
a small functional ruby DSL).
|
207
237
|
|
208
238
|
Alf must simply be interpreted as a ruby library implementing (a variant of)
|
209
|
-
Date
|
239
|
+
Date and Darwen's relational algebra. This library is designed as a set of operator
|
210
240
|
implementations, that work as tuple iterators taking other tuple iterators as
|
211
241
|
input. Under the pre-condition that you provide them _valid_ tuple iterators as
|
212
242
|
input (no duplicates, no nil, + other preconditions on an operator basis), the
|
213
243
|
result is a valid iterator as well. Unless explicitely stated otherwise, any
|
214
244
|
behavior observed when not respecting these preconditions, even an interesting
|
215
|
-
behavior, is not guaranteed and
|
216
|
-
about versioning policy at the end of this file).
|
245
|
+
behavior, is not guaranteed and might change with tiny version changes (see
|
246
|
+
section about versioning policy at the end of this file).
|
247
|
+
|
248
|
+
### The command line utility
|
249
|
+
|
250
|
+
#
|
251
|
+
# Provided that suppliers and cities are valid relation representations
|
252
|
+
# [something similar]
|
253
|
+
#
|
254
|
+
% alf restrict suppliers -- "city == 'London'" | alf join cities
|
255
|
+
|
256
|
+
# the resulting stream is a valid relation representation in the output
|
257
|
+
# stream format that you have selected (.rash by default). It can therefore
|
258
|
+
# be piped to another alf shell invocation, or saved to a file and re-read
|
259
|
+
# later (under the assumption that input and output data formats match, or
|
260
|
+
# course). [Something similar about responsibility and bug].
|
217
261
|
|
218
|
-
|
262
|
+
If you take a look at .alf example files, you'll find functional ruby expressions
|
263
|
+
like the following (called Lispy expressions):
|
264
|
+
|
265
|
+
% cat examples/minus.alf
|
266
|
+
|
267
|
+
# Give all suppliers, except those living in Paris
|
268
|
+
(minus :suppliers,
|
269
|
+
(restrict :suppliers, lambda{ city == 'Paris' }))
|
270
|
+
|
271
|
+
# This is a contrived example for illustrating minus, as the
|
272
|
+
# following is equivalent
|
273
|
+
(restrict :suppliers, lambda{ city != 'Paris' })
|
274
|
+
|
275
|
+
You can simply execute such expressions with the alf command line itself (the
|
276
|
+
three following invocations return the same result):
|
277
|
+
|
278
|
+
% alf examples/minus.alf | alf show
|
279
|
+
% alf show minus
|
280
|
+
% alf -e "(restrict :suppliers, lambda{ city != 'Paris' })" | alf show
|
281
|
+
|
282
|
+
Symbols are magically resolved from the environment, which is wired to the
|
283
|
+
examples by default. See the dedicated sections below to update this behavior
|
284
|
+
to your needs.
|
285
|
+
|
286
|
+
### The algebra compiler
|
219
287
|
|
220
288
|
#
|
221
289
|
# Provided that :suppliers and :cities are valid relation representations
|
222
290
|
# (under the responsibility shared by you and the Reader and Environment
|
223
291
|
# subclasses you use -- see later), then,
|
224
292
|
#
|
225
|
-
op = Alf.lispy.compile{
|
293
|
+
op = Alf.lispy.compile {
|
226
294
|
(join (restrict :suppliers, lambda{ city == 'London' }), :cities)
|
227
295
|
}
|
228
296
|
|
@@ -231,19 +299,128 @@ about versioning policy at the end of this file).
|
|
231
299
|
# of any other expression. This is under Alf's responsibility, and any
|
232
300
|
# failure must be considered a bug!
|
233
301
|
|
234
|
-
###
|
302
|
+
### The Relation data structure
|
235
303
|
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
#
|
240
|
-
% alf restrict suppliers -- "city == 'London'" | alf join cities
|
304
|
+
In addition, Alf is bundled with an in-memory Relation data structure that
|
305
|
+
provided a more abstract API for manipulating relations in simple cases (the
|
306
|
+
rules are the same about pre and post-conditions):
|
241
307
|
|
242
|
-
#
|
243
|
-
#
|
244
|
-
|
245
|
-
|
246
|
-
|
308
|
+
# The query above can be done as follows. Note that relations are always
|
309
|
+
# loaded in memory here!
|
310
|
+
suppliers = Alf::Relation[ ... ]
|
311
|
+
cities = Alf::Relation[ ... ]
|
312
|
+
suppliers.restrict(lambda{ city == 'London' }).
|
313
|
+
join(cities)
|
314
|
+
# => Alf::Relation[ ... ]
|
315
|
+
|
316
|
+
All relational operators have an instance method equivalent on the Alf::Relation
|
317
|
+
class. Semantically, the receiver object is simply the first operand of the
|
318
|
+
functional call, as illustrated above.
|
319
|
+
|
320
|
+
### Where do relations come from?
|
321
|
+
|
322
|
+
Relation literals can simply be written as follows:
|
323
|
+
|
324
|
+
suppliers = Alf::Relation[
|
325
|
+
{:sid => 'S1', :name => 'Smith', :status => 20, :city => 'London'},
|
326
|
+
{:sid => 'S2', :name => 'Jones', :status => 10, :city => 'Paris'},
|
327
|
+
{:sid => 'S3', :name => 'Blake', :status => 30, :city => 'Paris'},
|
328
|
+
{:sid => 'S4', :name => 'Clark', :status => 20, :city => 'London'},
|
329
|
+
{:sid => 'S5', :name => 'Adams', :status => 30, :city => 'Athens'},
|
330
|
+
]
|
331
|
+
|
332
|
+
Environment classes serve datasets (see later) that always have a to_rel method
|
333
|
+
for obtaining in-memory relations:
|
334
|
+
|
335
|
+
env = Alf::Environment.examples
|
336
|
+
env.dataset(:suppliers).to_rel
|
337
|
+
# => Alf::Relation[ ... ]
|
338
|
+
|
339
|
+
Compiled expressions always have a to_rel method that allows obtaining an
|
340
|
+
in-memory relation:
|
341
|
+
|
342
|
+
op = Alf.lispy.compile {
|
343
|
+
(join (restrict :suppliers, lambda{ city == 'London' }), :cities)
|
344
|
+
}
|
345
|
+
op.to_rel
|
346
|
+
# => Alf::Relation[...]
|
347
|
+
|
348
|
+
Lispy provides an 'evaluate' method which is precisely equivalent to the chain
|
349
|
+
above. Therefore:
|
350
|
+
|
351
|
+
rel = Alf.lispy.evaluate {
|
352
|
+
(join (restrict :suppliers, lambda{ city == 'London' }), :cities)
|
353
|
+
}
|
354
|
+
# => Alf::Relation[...]
|
355
|
+
|
356
|
+
### Algebra is closed under its operators!
|
357
|
+
|
358
|
+
Of course, from the closure property of a relational algebra (that states that
|
359
|
+
operators works on relations and return relations), you can use a sub expression
|
360
|
+
*everytime* a relational operand is expected, everytime:
|
361
|
+
|
362
|
+
# Compute the total qty supplied in each country together with the subset
|
363
|
+
# of products shipped there. Only consider suppliers that have a status
|
364
|
+
# greater than 10, however.
|
365
|
+
(summarize \
|
366
|
+
(join \
|
367
|
+
(join (restrict :suppliers, lambda{ status > 10 }),
|
368
|
+
:supplies),
|
369
|
+
:cities),
|
370
|
+
[:country],
|
371
|
+
:which => Agg::group(:pid),
|
372
|
+
:total => Agg::sum{ qty })
|
373
|
+
|
374
|
+
Of course, complex queries quickly become unreadable that way. But you can always
|
375
|
+
split complex tasks in more simple ones:
|
376
|
+
|
377
|
+
kept_suppliers = (restrict :suppliers, lambda{ status > 10 })
|
378
|
+
with_countries = (join kept_suppliers, :cities),
|
379
|
+
supplying = (join with_countries, :supplies)
|
380
|
+
(summarize supplying,
|
381
|
+
[:country],
|
382
|
+
:which => Agg::group(:pid),
|
383
|
+
:total => Agg::sum{ qty })
|
384
|
+
|
385
|
+
And here is the result !
|
386
|
+
|
387
|
+
+----------+----------+--------+
|
388
|
+
| :country | :which | :total |
|
389
|
+
+----------+----------+--------+
|
390
|
+
| England | +------+ | 2200 |
|
391
|
+
| | | :pid | | |
|
392
|
+
| | +------+ | |
|
393
|
+
| | | P1 | | |
|
394
|
+
| | | P2 | | |
|
395
|
+
| | | P3 | | |
|
396
|
+
| | | P4 | | |
|
397
|
+
| | | P5 | | |
|
398
|
+
| | | P6 | | |
|
399
|
+
| | +------+ | |
|
400
|
+
| France | +------+ | 200 |
|
401
|
+
| | | :pid | | |
|
402
|
+
| | +------+ | |
|
403
|
+
| | | P2 | | |
|
404
|
+
| | +------+ | |
|
405
|
+
+----------+----------+--------+
|
406
|
+
|
407
|
+
### Reference API
|
408
|
+
|
409
|
+
For now, the Ruby API is documented in the commandline help itself (a cheatsheet
|
410
|
+
or something will be provided as soon as possible). For example, you'll find the
|
411
|
+
allowed syntaxes for RESTRICT as follows:
|
412
|
+
|
413
|
+
% alf help restrict
|
414
|
+
|
415
|
+
...
|
416
|
+
API & EXAMPLE
|
417
|
+
|
418
|
+
# Restrict to suppliers with status greater than 20
|
419
|
+
(restrict :suppliers, lambda{ status > 20 })
|
420
|
+
|
421
|
+
# Restrict to suppliers that live in London
|
422
|
+
(restrict :suppliers, lambda{ city == 'London' })
|
423
|
+
...
|
247
424
|
|
248
425
|
### Coping with non-relational data sources (nil, duplicates, etc.)
|
249
426
|
|
@@ -271,7 +448,7 @@ is null/nil, but it won't probably fail if any other attribute is nil.
|
|
271
448
|
|
272
449
|
This best-effort strategy is not enough, and striclty speaking, must be considered
|
273
450
|
unsound (for example, it strongly hurts optimization possibilities). Therefore,
|
274
|
-
|
451
|
+
I strongly encourage you to go a step further: **if relational operators want
|
275
452
|
true relations as input, please, give them!**. For this, Alf also provides a few
|
276
453
|
non-relational operators in addition to relational ones. Those operators must be
|
277
454
|
interpreted as "pre-relational" operators, in the sense that they help obtaining
|
@@ -281,13 +458,12 @@ You'll find,
|
|
281
458
|
|
282
459
|
* <code>alf autonum</code> -- ensure no duplicates by generating a unique attribute
|
283
460
|
* <code>alf compact</code> -- brute-force duplicates removal
|
284
|
-
* <code>alf defaults</code> -- replace nulls/nil by valid values, on an attribute
|
285
|
-
basis
|
461
|
+
* <code>alf defaults</code> -- replace nulls/nil by valid values, on an attribute basis
|
286
462
|
|
287
463
|
Play the game, it's easy!
|
288
464
|
|
289
465
|
- _Give id, name and status of suppliers whose status is greater that 10_
|
290
|
-
- Hey man, we don't know
|
466
|
+
- Hey man, we don't know the status for all suppliers! What about these cases?
|
291
467
|
- _Ignore them_
|
292
468
|
- No problem dude!
|
293
469
|
|
@@ -311,16 +487,16 @@ analyzing the whole query expression in the light of a catalog of typed
|
|
311
487
|
operators. This way, a tool can check that a query is statically valid, i.e.
|
312
488
|
that it respects operator preconditions. While this approach has the major
|
313
489
|
advantage of allowing strong optimizations, it also has a few drawbacks (as
|
314
|
-
|
315
|
-
with dynamically-typed languages like Ruby. Therefore, Alf takes another
|
316
|
-
which is similar to duck-typing. In essence, this approach can be
|
317
|
-
follows:
|
490
|
+
the need to know the heading of used datasources in advance) and is difficult to
|
491
|
+
mary with dynamically-typed languages like Ruby. Therefore, Alf takes another
|
492
|
+
approach, which is similar to duck-typing. In essence, this approach can be
|
493
|
+
summarized as follows:
|
318
494
|
|
319
|
-
- _You have the responsibility of
|
320
|
-
|
495
|
+
- _You have the responsibility of not violating operators' preconditions. If you
|
496
|
+
do, Alf has the responsibility of returning correct results._.
|
321
497
|
- No problem dude!
|
322
498
|
|
323
|
-
##
|
499
|
+
## More about the shell command line
|
324
500
|
|
325
501
|
% alf --help
|
326
502
|
|
@@ -440,104 +616,7 @@ Also, mimicing the ruby executable, the following invocation is also possible:
|
|
440
616
|
where the argument is a relational expression in Alf's Lispy dialect, which
|
441
617
|
is detailed in the next section.
|
442
618
|
|
443
|
-
##
|
444
|
-
|
445
|
-
If you take a look at .alf example files, you'll find functional ruby expressions
|
446
|
-
like the following:
|
447
|
-
|
448
|
-
% cat examples/minus.alf
|
449
|
-
|
450
|
-
# Give all suppliers, except those living in Paris
|
451
|
-
(minus :suppliers,
|
452
|
-
(restrict :suppliers, lambda{ city == 'Paris' }))
|
453
|
-
|
454
|
-
# This is a contrived example for illustrating minus, as the
|
455
|
-
# following is equivalent
|
456
|
-
(restrict :suppliers, lambda{ city != 'Paris' })
|
457
|
-
|
458
|
-
You can simply execute such expressions with the alf command line itself (the
|
459
|
-
three following invocations return the same result):
|
460
|
-
|
461
|
-
% alf examples/minus.alf | alf show
|
462
|
-
% alf show minus
|
463
|
-
% alf -e "(restrict :suppliers, lambda{ city != 'Paris' })" | alf show
|
464
|
-
|
465
|
-
Symbols are magically resolved from the environment, which is wired to the
|
466
|
-
examples by default. See the dedicated sections below to update this behavior
|
467
|
-
to your needs.
|
468
|
-
|
469
|
-
### Algebra is closed under its operators!
|
470
|
-
|
471
|
-
Of course, from the closure property of a relational algebra (that states that
|
472
|
-
operators works on relations and return relations), you can use a sub expression
|
473
|
-
*everytime* a relational operand is expected, everytime:
|
474
|
-
|
475
|
-
# Compute the total qty supplied in each country together with the subset
|
476
|
-
# of products shipped there. Only consider suppliers that have a status
|
477
|
-
# greater than 10, however.
|
478
|
-
(summarize \
|
479
|
-
(join \
|
480
|
-
(join (restrict :suppliers, lambda{ status > 10 }),
|
481
|
-
:supplies),
|
482
|
-
:cities),
|
483
|
-
[:country],
|
484
|
-
:which => Agg::group(:pid),
|
485
|
-
:total => Agg::sum{ qty })
|
486
|
-
|
487
|
-
Of course, complex queries quickly become unreadable that way. But you can always
|
488
|
-
split complex tasks in more simple ones using _with_:
|
489
|
-
|
490
|
-
with( :kept_suppliers => (restrict :suppliers, lambda{ status > 10 }),
|
491
|
-
:with_countries => (join :kept_suppliers, :cities),
|
492
|
-
:supplying => (join :with_countries, :supplies) ) do
|
493
|
-
(summarize :supplying,
|
494
|
-
[:country],
|
495
|
-
:which => Agg::group(:pid),
|
496
|
-
:total => Agg::sum{ qty })
|
497
|
-
end
|
498
|
-
|
499
|
-
And here is the result !
|
500
|
-
|
501
|
-
+----------+----------+--------+
|
502
|
-
| :country | :which | :total |
|
503
|
-
+----------+----------+--------+
|
504
|
-
| England | +------+ | 2200 |
|
505
|
-
| | | :pid | | |
|
506
|
-
| | +------+ | |
|
507
|
-
| | | P1 | | |
|
508
|
-
| | | P2 | | |
|
509
|
-
| | | P3 | | |
|
510
|
-
| | | P4 | | |
|
511
|
-
| | | P5 | | |
|
512
|
-
| | | P6 | | |
|
513
|
-
| | +------+ | |
|
514
|
-
| France | +------+ | 200 |
|
515
|
-
| | | :pid | | |
|
516
|
-
| | +------+ | |
|
517
|
-
| | | P2 | | |
|
518
|
-
| | +------+ | |
|
519
|
-
+----------+----------+--------+
|
520
|
-
|
521
|
-
|
522
|
-
### Going further
|
523
|
-
|
524
|
-
For now, the Ruby API is documented in the commandline help itself (a cheatsheet
|
525
|
-
or something will be provided as soon as possible). For example, you'll find the
|
526
|
-
allowed syntaxes for RESTRICT as follows:
|
527
|
-
|
528
|
-
% alf help restrict
|
529
|
-
|
530
|
-
...
|
531
|
-
API & EXAMPLE
|
532
|
-
|
533
|
-
# Restrict to suppliers with status greater than 20
|
534
|
-
(restrict :suppliers, lambda{ status > 20 })
|
535
|
-
|
536
|
-
# Restrict to suppliers that live in London
|
537
|
-
(restrict :suppliers, lambda{ city == 'London' })
|
538
|
-
...
|
539
|
-
|
540
|
-
## Interfacing Alf in Ruby
|
619
|
+
## More about Alf in Ruby
|
541
620
|
|
542
621
|
### Calling commands 'ala' shell
|
543
622
|
|
@@ -560,11 +639,10 @@ If this kind of API is not sufficiently expressive for you, you'll have to learn
|
|
560
639
|
the APIs deeper, and use the Lispy functional style that Alf provides, which can
|
561
640
|
be compiled and used as explained in the next section.
|
562
641
|
|
563
|
-
###
|
642
|
+
### Compiler vs. Relation data structure
|
564
643
|
|
565
|
-
|
566
|
-
|
567
|
-
follows:
|
644
|
+
The compilers allow you to manipulate algebra expressions. Just obtain a Lispy
|
645
|
+
instance on an environment and you're ready:
|
568
646
|
|
569
647
|
#
|
570
648
|
# Expressions can simply be compiled as illustrated below. We use the
|
@@ -572,29 +650,37 @@ follows:
|
|
572
650
|
# available environments.
|
573
651
|
#
|
574
652
|
lispy = Alf.lispy(Alf::Environment.examples)
|
575
|
-
|
653
|
+
london_suppliers = lispy.compile do
|
576
654
|
(restrict :suppliers, lambda{ city == 'London' })
|
577
655
|
end
|
578
656
|
|
579
657
|
#
|
580
|
-
# Returned
|
658
|
+
# Returned operator is an enumerable of ruby hashes. Provided that datasets
|
581
659
|
# offered by the environment (:suppliers here) can be enumerated more than
|
582
660
|
# once, the operator may be used multiple times and is even thread safe!
|
583
661
|
#
|
584
|
-
|
662
|
+
london_suppliers.each do |tuple|
|
585
663
|
# tuple is a ruby Hash
|
586
664
|
end
|
587
665
|
|
588
666
|
#
|
589
667
|
# Now, maybe you want to reuse op in a larger query, for example
|
590
|
-
# by projecting on the city attribute... Here is how
|
591
|
-
#
|
668
|
+
# by projecting on the city attribute... Here is how this can be
|
669
|
+
# done:
|
592
670
|
#
|
593
|
-
projection =
|
594
|
-
|
595
|
-
|
671
|
+
projection = (project london_suppliers, [:city])
|
672
|
+
|
673
|
+
Note that the examples above manipulate algebra operators, not relations per se.
|
674
|
+
This means that equality and other such operators, that operate on relation
|
675
|
+
_values_, do not operate correctly here:
|
676
|
+
|
677
|
+
projection == Alf::Relation[{:city => 'London'}]
|
678
|
+
# => nil
|
679
|
+
|
680
|
+
In contrast, you can use such operators when operating on true relation values:
|
596
681
|
|
597
|
-
|
682
|
+
projection.to_rel == Alf::Relation[{:city => 'London'}]
|
683
|
+
# => true
|
598
684
|
|
599
685
|
### Using/Implementing other Environments
|
600
686
|
|
@@ -681,7 +767,7 @@ following template for contributions in lib/alf/renderer
|
|
681
767
|
|
682
768
|
## Related Work & Tools
|
683
769
|
|
684
|
-
- You should certainly have a look at the Third Manifesto website: http://www.thethirdmanifesto.com/
|
770
|
+
- You should certainly have a look at the Third Manifesto website: {http://www.thethirdmanifesto.com/}
|
685
771
|
- Why not reading the {http://www.dcs.warwick.ac.uk/~hugh/TTM/DBE-Chapter01.pdf
|
686
772
|
third manifesto paper} itself?
|
687
773
|
- Also have a look at {http://www.dcs.warwick.ac.uk/~hugh/TTM/Projects.html other
|
@@ -713,8 +799,8 @@ your needs and I'll see what I can do!
|
|
713
799
|
### Internals -- Tribute to Sinatra
|
714
800
|
|
715
801
|
Alf's code style is very inspired from what I've found in Sinatra when looking
|
716
|
-
at its internals a few
|
717
|
-
single file, lib/alf.rb. Everything is there except
|
802
|
+
at its internals a few months ago. Alf, as Sinatra, is mostly implemented in a
|
803
|
+
single file, lib/alf.rb. Everything is there except specific third-party contributions
|
718
804
|
(in lib/alf/...). You'll need an editor or IDE that supports code folding/unfolding.
|
719
805
|
Then, follow the guide:
|
720
806
|
|
data/TODO.md
CHANGED
@@ -4,17 +4,17 @@
|
|
4
4
|
(rename :suppliers, [:name, :city], :suffix => "_sup")
|
5
5
|
(rename :suppliers, [:name, :city], lambda{|name| name.upcase})
|
6
6
|
|
7
|
-
*
|
7
|
+
* WRAP: provide a multi-wraping ability?
|
8
8
|
|
9
|
-
(
|
10
|
-
=> (
|
9
|
+
(wrap (wrap :supplies, [:a, :b], :x), [:x, :c], :y)
|
10
|
+
=> (wrap :supplies, :x => [:a, :b], :y => [:x, :c])
|
11
11
|
|
12
12
|
But this would only work with Ruby 1.9 as the hash order would be important
|
13
13
|
as such
|
14
14
|
|
15
15
|
* GROUP: provide a multi-grouping ability?
|
16
16
|
|
17
|
-
Similar to
|
17
|
+
Similar to wrap, with same limitation.
|
18
18
|
|
19
19
|
* Add PIVOT and UNPIVOT operators
|
20
20
|
|
data/alf.gemspec
CHANGED
@@ -21,13 +21,13 @@ Gem::Specification.new do |s|
|
|
21
21
|
# A short summary of this gem
|
22
22
|
#
|
23
23
|
# This is displayed in `gem list -d`.
|
24
|
-
s.summary = "
|
24
|
+
s.summary = "Relational Algebra at your fingertips"
|
25
25
|
|
26
26
|
# A long description of this gem (required)
|
27
27
|
#
|
28
28
|
# The description should be more detailed than the summary. For example,
|
29
29
|
# you might wish to copy the entire README into the description.
|
30
|
-
s.description = "Alf
|
30
|
+
s.description = "Alf brings the relational algebra both in Shell and in Ruby. In Shell, because \nmanipulating any relation-like data source should be as straightforward as a \none-liner. In Ruby, because I've never understood why programming languages \nprovide data structures like arrays, hashes, sets, trees and graphs but not \n_relations_... Let's stop the segregation ;-)"
|
31
31
|
|
32
32
|
# The URL of this gem home page (optional)
|
33
33
|
s.homepage = "http://rubydoc.info/github/blambeau/alf/master/frames"
|
@@ -123,7 +123,7 @@ Gem::Specification.new do |s|
|
|
123
123
|
# One call to add_development_dependency('gem_name', 'gem version requirement')
|
124
124
|
# for each development dependency. These gems are required for developers
|
125
125
|
#
|
126
|
-
s.add_development_dependency("rake", "~> 0.
|
126
|
+
s.add_development_dependency("rake", "~> 0.9.2")
|
127
127
|
s.add_development_dependency("bundler", "~> 1.0")
|
128
128
|
s.add_development_dependency("rspec", "~> 2.6.0")
|
129
129
|
s.add_development_dependency("yard", "~> 0.7.2")
|