alf 0.9.0 → 0.9.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG.md +64 -0
- data/Gemfile.lock +4 -4
- data/README.md +257 -171
- data/TODO.md +4 -4
- data/alf.gemspec +3 -3
- data/alf.noespec +11 -6
- data/examples/pseudo-with.alf +7 -0
- data/examples/runall.sh +2 -2
- data/examples/unwrap.alf +4 -0
- data/examples/wrap.alf +2 -0
- data/lib/alf/relation.rb +118 -0
- data/lib/alf/version.rb +1 -1
- data/lib/alf.rb +320 -169
- data/spec/integration/src/test_minus.alf +5 -0
- data/spec/integration/src/test_project.alf +9 -0
- data/spec/{alf_spec.rb → integration/test_alf.rb} +8 -21
- data/spec/integration/test_alf_specs.rb +37 -0
- data/spec/{examples_spec.rb → integration/test_examples.rb} +1 -1
- data/spec/spec_helper.rb +19 -1
- data/spec/unit/environment/examples/suppliers.rash +5 -0
- data/spec/{environment/explicit_spec.rb → unit/environment/test_explicit.rb} +0 -0
- data/spec/{environment/folder_spec.rb → unit/environment/test_folder.rb} +1 -1
- data/spec/{operator → unit/operator}/non_relational/compact/buffer_based.rb +0 -0
- data/spec/{operator/non_relational/compact/sort_based_spec.rb → unit/operator/non_relational/compact/test_sort_based.rb} +0 -0
- data/spec/{operator/non_relational/autonum_spec.rb → unit/operator/non_relational/test_autonum.rb} +0 -0
- data/spec/{operator/non_relational/clip_spec.rb → unit/operator/non_relational/test_clip.rb} +0 -0
- data/spec/{operator/non_relational/compact_spec.rb → unit/operator/non_relational/test_compact.rb} +0 -0
- data/spec/{operator/non_relational/defaults_spec.rb → unit/operator/non_relational/test_defaults.rb} +0 -0
- data/spec/{operator/non_relational/sort_spec.rb → unit/operator/non_relational/test_sort.rb} +0 -0
- data/spec/{operator/relational/join/hash_based_spec.rb → unit/operator/relational/join/test_hash_based.rb} +0 -0
- data/spec/unit/operator/relational/summarize/test_hash_based.rb +38 -0
- data/spec/{operator/relational/summarize/sort_based_spec.rb → unit/operator/relational/summarize/test_sort_based.rb} +0 -0
- data/spec/{operator/relational/extend_spec.rb → unit/operator/relational/test_extend.rb} +0 -0
- data/spec/{operator/relational/group_spec.rb → unit/operator/relational/test_group.rb} +3 -2
- data/spec/{operator/relational/intersect_spec.rb → unit/operator/relational/test_intersect.rb} +0 -0
- data/spec/unit/operator/relational/test_join.rb +36 -0
- data/spec/{operator/relational/minus_spec.rb → unit/operator/relational/test_minus.rb} +0 -0
- data/spec/{operator/relational/project_spec.rb → unit/operator/relational/test_project.rb} +0 -0
- data/spec/{operator/relational/quota_spec.rb → unit/operator/relational/test_quota.rb} +0 -0
- data/spec/{operator/relational/rename_spec.rb → unit/operator/relational/test_rename.rb} +0 -0
- data/spec/{operator/relational/restrict_spec.rb → unit/operator/relational/test_restrict.rb} +0 -0
- data/spec/unit/operator/relational/test_summarize.rb +64 -0
- data/spec/{operator/relational/ungroup_spec.rb → unit/operator/relational/test_ungroup.rb} +0 -0
- data/spec/{operator/relational/union_spec.rb → unit/operator/relational/test_union.rb} +0 -0
- data/spec/{operator/relational/unnest_spec.rb → unit/operator/relational/test_unwrap.rb} +5 -5
- data/spec/{operator/relational/nest_spec.rb → unit/operator/relational/test_wrap.rb} +5 -5
- data/spec/{operator/command_methods_spec.rb → unit/operator/test_command_methods.rb} +0 -0
- data/spec/unit/operator/test_non_relational.rb +18 -0
- data/spec/unit/operator/test_relational.rb +27 -0
- data/spec/{reader → unit/reader}/input.rb +0 -0
- data/spec/unit/reader/test_alf_file.rb +27 -0
- data/spec/{reader/rash_spec.rb → unit/reader/test_rash.rb} +0 -0
- data/spec/unit/relation/test_coerce.rb +53 -0
- data/spec/unit/relation/test_inspect.rb +20 -0
- data/spec/unit/relation/test_relops.rb +46 -0
- data/spec/{renderer/text/cell_spec.rb → unit/renderer/text/test_cell.rb} +0 -0
- data/spec/{renderer/text/row_spec.rb → unit/renderer/text/test_row.rb} +0 -0
- data/spec/{renderer/text/table_spec.rb → unit/renderer/text/test_table.rb} +0 -0
- data/spec/{aggregator_spec.rb → unit/test_aggregator.rb} +6 -6
- data/spec/{assumptions_spec.rb → unit/test_assumptions.rb} +0 -0
- data/spec/{lispy_spec.rb → unit/test_lispy.rb} +0 -0
- data/spec/unit/test_operator.rb +16 -0
- data/spec/{reader_spec.rb → unit/test_reader.rb} +4 -0
- data/spec/unit/test_relation.rb +40 -0
- data/spec/{renderer_spec.rb → unit/test_renderer.rb} +0 -0
- data/spec/{tools/ordering_key_spec.rb → unit/tools/test_ordering_key.rb} +0 -0
- data/spec/{tools/projection_key_spec.rb → unit/tools/test_projection_key.rb} +0 -0
- data/spec/{tools/tools_spec.rb → unit/tools/test_tools.rb} +0 -0
- data/spec/{tools/tuple_handle_spec.rb → unit/tools/test_tuple_handle.rb} +0 -0
- data/tasks/clean.rake +3 -0
- data/tasks/spec_test.rake +1 -1
- metadata +143 -114
- data/examples/nest.alf +0 -2
- data/examples/unnest.alf +0 -4
- data/examples/with.alf +0 -23
- data/spec/operator/relational/summarize_spec.rb +0 -41
- data/spec/reader/alf_file_spec.rb +0 -15
data/README.md
CHANGED
|
@@ -1,38 +1,64 @@
|
|
|
1
|
-
# Alf -
|
|
1
|
+
# Alf - Relational Algebra at your fingertips (version 0.9.1)
|
|
2
|
+
|
|
3
|
+
## Description
|
|
4
|
+
|
|
5
|
+
### What & Why
|
|
6
|
+
|
|
7
|
+
Alf brings the relational algebra both in Shell and in Ruby. In Shell, because
|
|
8
|
+
manipulating any relation-like data source should be as straightforward as a
|
|
9
|
+
one-liner. In Ruby, because I've never understood why programming languages
|
|
10
|
+
provide data structures like arrays, hashes, sets, trees and graphs but not
|
|
11
|
+
_relations_... Let's stop the segregation ;-)
|
|
12
|
+
|
|
13
|
+
### Install
|
|
2
14
|
|
|
3
15
|
% [sudo] gem install alf
|
|
4
16
|
% alf --help
|
|
5
17
|
|
|
6
|
-
|
|
18
|
+
### Links
|
|
7
19
|
|
|
8
20
|
* {http://rubydoc.info/github/blambeau/alf/master/frames} (read this file there!)
|
|
9
21
|
* {http://github.com/blambeau/alf} (source code)
|
|
10
22
|
* {http://revision-zero.org} (author's blog)
|
|
11
23
|
|
|
12
|
-
|
|
24
|
+
### Quick overview
|
|
13
25
|
|
|
14
26
|
Alf is a commandline tool and Ruby library to manipulate data with all the power
|
|
15
27
|
of a truly relational algebra approach. Objectives behind Alf are manifold:
|
|
16
28
|
|
|
17
|
-
* Pragmatically, Alf aims at being a useful commandline executable for
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
commands and implemented relational operators.
|
|
29
|
+
* Pragmatically, Alf aims at being a useful commandline executable for manipulating
|
|
30
|
+
relational-like data: database records, csv files, or **whatever can be interpreted
|
|
31
|
+
as (the physical encoding of) a relation**. See 'alf --help' for the list of
|
|
32
|
+
available commands and implemented relational operators.
|
|
21
33
|
|
|
22
34
|
% alf restrict suppliers -- "city == 'London'" | alf join cities
|
|
23
35
|
|
|
24
36
|
* Alf is also a 100% Ruby relational algebra implementation shipped with a simple
|
|
25
37
|
to use, powerful, functional DSL for compiling and evaluating relational queries.
|
|
26
|
-
Alf is not limited to simple scalar values, but
|
|
38
|
+
Alf is not limited to simple scalar values, but admits values of arbitrary
|
|
27
39
|
complexity (under a few requirements about their implementation, see next
|
|
28
40
|
section). See 'alf --help' as well as .alf files in the examples directory
|
|
29
41
|
for syntactic examples.
|
|
30
42
|
|
|
31
|
-
Alf.lispy.
|
|
43
|
+
Alf.lispy.evaluate {
|
|
32
44
|
(join (restrict :suppliers, lambda{ city == 'London' }), :cities)
|
|
33
45
|
}
|
|
34
|
-
|
|
35
|
-
|
|
46
|
+
|
|
47
|
+
In addition to this functional syntax, Alf comes bundled with an in-memory
|
|
48
|
+
Relation data structure that provides an object-oriented way of manipulating
|
|
49
|
+
relations in simplest cases:
|
|
50
|
+
|
|
51
|
+
suppliers = Alf::Relation[
|
|
52
|
+
{:sid => 'S1', :name => 'Smith', :status => 20, :city => 'London'},
|
|
53
|
+
{:sid => 'S2', :name => 'Jones', :status => 10, :city => 'Paris'},
|
|
54
|
+
{:sid => 'S3', :name => 'Blake', :status => 30, :city => 'Paris'},
|
|
55
|
+
{:sid => 'S4', :name => 'Clark', :status => 20, :city => 'London'},
|
|
56
|
+
{:sid => 'S5', :name => 'Adams', :status => 30, :city => 'Athens'},
|
|
57
|
+
]
|
|
58
|
+
cities = ...
|
|
59
|
+
puts suppliers.restrict(lambda{ city == 'London' }).join(cities)
|
|
60
|
+
|
|
61
|
+
* Alf is also an educational tool, that I've written to draw people attention
|
|
36
62
|
about the ill-known relational theory (and ill-represented by SQL). The tool
|
|
37
63
|
is largely inspired from TUTORIAL D, the tutorial language of Chris Date and
|
|
38
64
|
Hugh Darwen in their books, more specifically in
|
|
@@ -41,9 +67,9 @@ of a truly relational algebra approach. Objectives behind Alf are manifold:
|
|
|
41
67
|
there (Alf is neither a relational _database_, nor a relational _language_).
|
|
42
68
|
I hope that people (especially talented developers) will be sufficiently
|
|
43
69
|
enticed by features shown here to open that book, read it more deeply, and
|
|
44
|
-
implement new stuff around Date & Darwen vision. Have a look at the result of
|
|
45
|
-
the following query for things that you'll never ever have in SQL
|
|
46
|
-
'alf help quota', 'alf help
|
|
70
|
+
implement new stuff around Date & Darwen's vision. Have a look at the result of
|
|
71
|
+
the following query for the kind of things that you'll never ever have in SQL
|
|
72
|
+
(see also 'alf help quota', 'alf help wrap', 'alf help group', ...):
|
|
47
73
|
|
|
48
74
|
% alf --text summarize supplies --by=sid -- total "sum(:qty)" -- which "group(:pid)"
|
|
49
75
|
|
|
@@ -74,7 +100,7 @@ Familiar? Skip. Otherwise, read on.
|
|
|
74
100
|
### The example database
|
|
75
101
|
|
|
76
102
|
This README file shows a lot of examples built on top of the following suppliers
|
|
77
|
-
& parts database (almost identical to the original version in C.J. Date database
|
|
103
|
+
& parts database (almost identical to the original version in C. J. Date's database
|
|
78
104
|
books). By default, the alf command line is wired to this embedded example. All
|
|
79
105
|
examples shown here should therefore work immediately, if you want to reproduce
|
|
80
106
|
them!
|
|
@@ -103,7 +129,7 @@ them!
|
|
|
103
129
|
+-------------------------------------+-------------------------------------------------+-------------------------+------------------------+
|
|
104
130
|
|
|
105
131
|
Many people think that relational databases are necessary 'flat', that they are
|
|
106
|
-
necessarily limited to
|
|
132
|
+
necessarily limited to simple scalar values put in two dimension tables. This is
|
|
107
133
|
wrong; most SQL databases are indeed 'flat', but _relations_ (in the mathematical
|
|
108
134
|
sense of the relational theory) are not! Look, **the example above is a relation!**;
|
|
109
135
|
that 'contains' other relations as particular values, which, in turn, could
|
|
@@ -115,15 +141,16 @@ that 'contains' other relations as particular values, which, in turn, could
|
|
|
115
141
|
To understand what is a relation exactly, one needs to remember elementary
|
|
116
142
|
notions of set theory and the concepts of _type_ and _value_.
|
|
117
143
|
|
|
118
|
-
* A _type_ is a finite set of values; it is
|
|
119
|
-
a set, it does never
|
|
144
|
+
* A _type_ is a finite set of values; it is not particularly ordered and, being
|
|
145
|
+
a set, it does never contain two values which are equal (any type is necessarily
|
|
146
|
+
accompanied with an equality operator, denoted here by '==').
|
|
120
147
|
|
|
121
148
|
* A _value_ is **immutable** (you cannot 'change' a value, in any way), has no
|
|
122
149
|
localization in time and space, and is always typed (that is, it is always
|
|
123
150
|
accompanied by some identification of the type it belongs to).
|
|
124
151
|
|
|
125
152
|
As you can see, _type_ and _value_ are not the same concepts as _class_ and
|
|
126
|
-
_object_,
|
|
153
|
+
_object_, which you are probably more familiar with. Alf considers that the
|
|
127
154
|
latter are _implementations_ of the former. Alf assumes _valid_ implementations
|
|
128
155
|
(equality and hash methods must be correct) and _valid_ usage (objects used for
|
|
129
156
|
representing values are kept immutable in practice). Alf _assumes_ this, but
|
|
@@ -157,11 +184,11 @@ can have them inside relations!
|
|
|
157
184
|
pairs. Moreover, it does not contain two attributes with the same name and is
|
|
158
185
|
**not particularly ordered**. Also, **a tuple is a _value_, and is therefore
|
|
159
186
|
immutable**. Last, but not least, a tuple **does not admit nulls/nils**. Tuples
|
|
160
|
-
in Alf are simply implemented with ruby hashes, taken as
|
|
187
|
+
in Alf are simply implemented with ruby hashes, taken as tuple implementations.
|
|
161
188
|
Not all hashes are valid tuple implementations, of course (those containing nil
|
|
162
189
|
are not, for example). Alf _assumes_ valid tuples, but does not _enforce_ this
|
|
163
190
|
precondition. It's up to you to use Alf the right way! No support is or will
|
|
164
|
-
ever be provided for ordering tuple attributes.
|
|
191
|
+
ever be provided for ordering tuple attributes. However, as hashes are ordered
|
|
165
192
|
in Ruby 1.9, Alf implements a best effort strategy to keep a friendly ordering
|
|
166
193
|
when rendering tuples and relations. This is a very good practical reason for
|
|
167
194
|
migrating to ruby 1.9 if not already done!
|
|
@@ -173,15 +200,18 @@ can have them inside relations!
|
|
|
173
200
|
particularly ordered**. Moreover, all tuples of a relation must have the same
|
|
174
201
|
_heading_, that is, the same set of attribute (name, type) pairs. Also, **a
|
|
175
202
|
relation is a _value_, is therefore immutable** and **does not admit null/nil**.
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
203
|
+
|
|
204
|
+
Alf is mainly an implementation of relational algebra (see section below). The
|
|
205
|
+
implemented operators consider any Iterator of tuples as potentially valid
|
|
206
|
+
operand. In addition Alf provides a Relation ruby class, that acts as an
|
|
207
|
+
in-memory data structure that provides an Object-Oriented API to call operators
|
|
208
|
+
(see "Interfacing Alf in Ruby" below).
|
|
179
209
|
|
|
180
210
|
### Relational Algebra
|
|
181
211
|
|
|
182
|
-
In classical algebra, you can
|
|
183
|
-
relational algebra, you can
|
|
184
|
-
functional programming-oriented syntax for algebra expressions:
|
|
212
|
+
In classical algebra, you can make computations like <code>(5 + 2) - 3</code>.
|
|
213
|
+
In relational algebra, you can make similar things on relations. Alf uses an
|
|
214
|
+
infix, functional programming-oriented syntax for algebra expressions:
|
|
185
215
|
|
|
186
216
|
(minus (union :suppliers, xxx), yyy)
|
|
187
217
|
|
|
@@ -201,28 +231,66 @@ you want! The same query, in shell:
|
|
|
201
231
|
The Third Manifesto defines a series of prescriptions, proscriptions and very
|
|
202
232
|
strong suggestions for designing a truly relational _language_, called a _D_,
|
|
203
233
|
as an alternative to SQL for managing relational databases. This is far behind
|
|
204
|
-
|
|
205
|
-
transactions, and so on.) and don't actually define a programming language
|
|
206
|
-
|
|
234
|
+
my objective with Alf, as it does not touch at database issues at all (persistence,
|
|
235
|
+
transactions, and so on.) and don't actually define a programming language (only
|
|
236
|
+
a small functional ruby DSL).
|
|
207
237
|
|
|
208
238
|
Alf must simply be interpreted as a ruby library implementing (a variant of)
|
|
209
|
-
Date
|
|
239
|
+
Date and Darwen's relational algebra. This library is designed as a set of operator
|
|
210
240
|
implementations, that work as tuple iterators taking other tuple iterators as
|
|
211
241
|
input. Under the pre-condition that you provide them _valid_ tuple iterators as
|
|
212
242
|
input (no duplicates, no nil, + other preconditions on an operator basis), the
|
|
213
243
|
result is a valid iterator as well. Unless explicitely stated otherwise, any
|
|
214
244
|
behavior observed when not respecting these preconditions, even an interesting
|
|
215
|
-
behavior, is not guaranteed and
|
|
216
|
-
about versioning policy at the end of this file).
|
|
245
|
+
behavior, is not guaranteed and might change with tiny version changes (see
|
|
246
|
+
section about versioning policy at the end of this file).
|
|
247
|
+
|
|
248
|
+
### The command line utility
|
|
249
|
+
|
|
250
|
+
#
|
|
251
|
+
# Provided that suppliers and cities are valid relation representations
|
|
252
|
+
# [something similar]
|
|
253
|
+
#
|
|
254
|
+
% alf restrict suppliers -- "city == 'London'" | alf join cities
|
|
255
|
+
|
|
256
|
+
# the resulting stream is a valid relation representation in the output
|
|
257
|
+
# stream format that you have selected (.rash by default). It can therefore
|
|
258
|
+
# be piped to another alf shell invocation, or saved to a file and re-read
|
|
259
|
+
# later (under the assumption that input and output data formats match, or
|
|
260
|
+
# course). [Something similar about responsibility and bug].
|
|
217
261
|
|
|
218
|
-
|
|
262
|
+
If you take a look at .alf example files, you'll find functional ruby expressions
|
|
263
|
+
like the following (called Lispy expressions):
|
|
264
|
+
|
|
265
|
+
% cat examples/minus.alf
|
|
266
|
+
|
|
267
|
+
# Give all suppliers, except those living in Paris
|
|
268
|
+
(minus :suppliers,
|
|
269
|
+
(restrict :suppliers, lambda{ city == 'Paris' }))
|
|
270
|
+
|
|
271
|
+
# This is a contrived example for illustrating minus, as the
|
|
272
|
+
# following is equivalent
|
|
273
|
+
(restrict :suppliers, lambda{ city != 'Paris' })
|
|
274
|
+
|
|
275
|
+
You can simply execute such expressions with the alf command line itself (the
|
|
276
|
+
three following invocations return the same result):
|
|
277
|
+
|
|
278
|
+
% alf examples/minus.alf | alf show
|
|
279
|
+
% alf show minus
|
|
280
|
+
% alf -e "(restrict :suppliers, lambda{ city != 'Paris' })" | alf show
|
|
281
|
+
|
|
282
|
+
Symbols are magically resolved from the environment, which is wired to the
|
|
283
|
+
examples by default. See the dedicated sections below to update this behavior
|
|
284
|
+
to your needs.
|
|
285
|
+
|
|
286
|
+
### The algebra compiler
|
|
219
287
|
|
|
220
288
|
#
|
|
221
289
|
# Provided that :suppliers and :cities are valid relation representations
|
|
222
290
|
# (under the responsibility shared by you and the Reader and Environment
|
|
223
291
|
# subclasses you use -- see later), then,
|
|
224
292
|
#
|
|
225
|
-
op = Alf.lispy.compile{
|
|
293
|
+
op = Alf.lispy.compile {
|
|
226
294
|
(join (restrict :suppliers, lambda{ city == 'London' }), :cities)
|
|
227
295
|
}
|
|
228
296
|
|
|
@@ -231,19 +299,128 @@ about versioning policy at the end of this file).
|
|
|
231
299
|
# of any other expression. This is under Alf's responsibility, and any
|
|
232
300
|
# failure must be considered a bug!
|
|
233
301
|
|
|
234
|
-
###
|
|
302
|
+
### The Relation data structure
|
|
235
303
|
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
#
|
|
240
|
-
% alf restrict suppliers -- "city == 'London'" | alf join cities
|
|
304
|
+
In addition, Alf is bundled with an in-memory Relation data structure that
|
|
305
|
+
provided a more abstract API for manipulating relations in simple cases (the
|
|
306
|
+
rules are the same about pre and post-conditions):
|
|
241
307
|
|
|
242
|
-
#
|
|
243
|
-
#
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
308
|
+
# The query above can be done as follows. Note that relations are always
|
|
309
|
+
# loaded in memory here!
|
|
310
|
+
suppliers = Alf::Relation[ ... ]
|
|
311
|
+
cities = Alf::Relation[ ... ]
|
|
312
|
+
suppliers.restrict(lambda{ city == 'London' }).
|
|
313
|
+
join(cities)
|
|
314
|
+
# => Alf::Relation[ ... ]
|
|
315
|
+
|
|
316
|
+
All relational operators have an instance method equivalent on the Alf::Relation
|
|
317
|
+
class. Semantically, the receiver object is simply the first operand of the
|
|
318
|
+
functional call, as illustrated above.
|
|
319
|
+
|
|
320
|
+
### Where do relations come from?
|
|
321
|
+
|
|
322
|
+
Relation literals can simply be written as follows:
|
|
323
|
+
|
|
324
|
+
suppliers = Alf::Relation[
|
|
325
|
+
{:sid => 'S1', :name => 'Smith', :status => 20, :city => 'London'},
|
|
326
|
+
{:sid => 'S2', :name => 'Jones', :status => 10, :city => 'Paris'},
|
|
327
|
+
{:sid => 'S3', :name => 'Blake', :status => 30, :city => 'Paris'},
|
|
328
|
+
{:sid => 'S4', :name => 'Clark', :status => 20, :city => 'London'},
|
|
329
|
+
{:sid => 'S5', :name => 'Adams', :status => 30, :city => 'Athens'},
|
|
330
|
+
]
|
|
331
|
+
|
|
332
|
+
Environment classes serve datasets (see later) that always have a to_rel method
|
|
333
|
+
for obtaining in-memory relations:
|
|
334
|
+
|
|
335
|
+
env = Alf::Environment.examples
|
|
336
|
+
env.dataset(:suppliers).to_rel
|
|
337
|
+
# => Alf::Relation[ ... ]
|
|
338
|
+
|
|
339
|
+
Compiled expressions always have a to_rel method that allows obtaining an
|
|
340
|
+
in-memory relation:
|
|
341
|
+
|
|
342
|
+
op = Alf.lispy.compile {
|
|
343
|
+
(join (restrict :suppliers, lambda{ city == 'London' }), :cities)
|
|
344
|
+
}
|
|
345
|
+
op.to_rel
|
|
346
|
+
# => Alf::Relation[...]
|
|
347
|
+
|
|
348
|
+
Lispy provides an 'evaluate' method which is precisely equivalent to the chain
|
|
349
|
+
above. Therefore:
|
|
350
|
+
|
|
351
|
+
rel = Alf.lispy.evaluate {
|
|
352
|
+
(join (restrict :suppliers, lambda{ city == 'London' }), :cities)
|
|
353
|
+
}
|
|
354
|
+
# => Alf::Relation[...]
|
|
355
|
+
|
|
356
|
+
### Algebra is closed under its operators!
|
|
357
|
+
|
|
358
|
+
Of course, from the closure property of a relational algebra (that states that
|
|
359
|
+
operators works on relations and return relations), you can use a sub expression
|
|
360
|
+
*everytime* a relational operand is expected, everytime:
|
|
361
|
+
|
|
362
|
+
# Compute the total qty supplied in each country together with the subset
|
|
363
|
+
# of products shipped there. Only consider suppliers that have a status
|
|
364
|
+
# greater than 10, however.
|
|
365
|
+
(summarize \
|
|
366
|
+
(join \
|
|
367
|
+
(join (restrict :suppliers, lambda{ status > 10 }),
|
|
368
|
+
:supplies),
|
|
369
|
+
:cities),
|
|
370
|
+
[:country],
|
|
371
|
+
:which => Agg::group(:pid),
|
|
372
|
+
:total => Agg::sum{ qty })
|
|
373
|
+
|
|
374
|
+
Of course, complex queries quickly become unreadable that way. But you can always
|
|
375
|
+
split complex tasks in more simple ones:
|
|
376
|
+
|
|
377
|
+
kept_suppliers = (restrict :suppliers, lambda{ status > 10 })
|
|
378
|
+
with_countries = (join kept_suppliers, :cities),
|
|
379
|
+
supplying = (join with_countries, :supplies)
|
|
380
|
+
(summarize supplying,
|
|
381
|
+
[:country],
|
|
382
|
+
:which => Agg::group(:pid),
|
|
383
|
+
:total => Agg::sum{ qty })
|
|
384
|
+
|
|
385
|
+
And here is the result !
|
|
386
|
+
|
|
387
|
+
+----------+----------+--------+
|
|
388
|
+
| :country | :which | :total |
|
|
389
|
+
+----------+----------+--------+
|
|
390
|
+
| England | +------+ | 2200 |
|
|
391
|
+
| | | :pid | | |
|
|
392
|
+
| | +------+ | |
|
|
393
|
+
| | | P1 | | |
|
|
394
|
+
| | | P2 | | |
|
|
395
|
+
| | | P3 | | |
|
|
396
|
+
| | | P4 | | |
|
|
397
|
+
| | | P5 | | |
|
|
398
|
+
| | | P6 | | |
|
|
399
|
+
| | +------+ | |
|
|
400
|
+
| France | +------+ | 200 |
|
|
401
|
+
| | | :pid | | |
|
|
402
|
+
| | +------+ | |
|
|
403
|
+
| | | P2 | | |
|
|
404
|
+
| | +------+ | |
|
|
405
|
+
+----------+----------+--------+
|
|
406
|
+
|
|
407
|
+
### Reference API
|
|
408
|
+
|
|
409
|
+
For now, the Ruby API is documented in the commandline help itself (a cheatsheet
|
|
410
|
+
or something will be provided as soon as possible). For example, you'll find the
|
|
411
|
+
allowed syntaxes for RESTRICT as follows:
|
|
412
|
+
|
|
413
|
+
% alf help restrict
|
|
414
|
+
|
|
415
|
+
...
|
|
416
|
+
API & EXAMPLE
|
|
417
|
+
|
|
418
|
+
# Restrict to suppliers with status greater than 20
|
|
419
|
+
(restrict :suppliers, lambda{ status > 20 })
|
|
420
|
+
|
|
421
|
+
# Restrict to suppliers that live in London
|
|
422
|
+
(restrict :suppliers, lambda{ city == 'London' })
|
|
423
|
+
...
|
|
247
424
|
|
|
248
425
|
### Coping with non-relational data sources (nil, duplicates, etc.)
|
|
249
426
|
|
|
@@ -271,7 +448,7 @@ is null/nil, but it won't probably fail if any other attribute is nil.
|
|
|
271
448
|
|
|
272
449
|
This best-effort strategy is not enough, and striclty speaking, must be considered
|
|
273
450
|
unsound (for example, it strongly hurts optimization possibilities). Therefore,
|
|
274
|
-
|
|
451
|
+
I strongly encourage you to go a step further: **if relational operators want
|
|
275
452
|
true relations as input, please, give them!**. For this, Alf also provides a few
|
|
276
453
|
non-relational operators in addition to relational ones. Those operators must be
|
|
277
454
|
interpreted as "pre-relational" operators, in the sense that they help obtaining
|
|
@@ -281,13 +458,12 @@ You'll find,
|
|
|
281
458
|
|
|
282
459
|
* <code>alf autonum</code> -- ensure no duplicates by generating a unique attribute
|
|
283
460
|
* <code>alf compact</code> -- brute-force duplicates removal
|
|
284
|
-
* <code>alf defaults</code> -- replace nulls/nil by valid values, on an attribute
|
|
285
|
-
basis
|
|
461
|
+
* <code>alf defaults</code> -- replace nulls/nil by valid values, on an attribute basis
|
|
286
462
|
|
|
287
463
|
Play the game, it's easy!
|
|
288
464
|
|
|
289
465
|
- _Give id, name and status of suppliers whose status is greater that 10_
|
|
290
|
-
- Hey man, we don't know
|
|
466
|
+
- Hey man, we don't know the status for all suppliers! What about these cases?
|
|
291
467
|
- _Ignore them_
|
|
292
468
|
- No problem dude!
|
|
293
469
|
|
|
@@ -311,16 +487,16 @@ analyzing the whole query expression in the light of a catalog of typed
|
|
|
311
487
|
operators. This way, a tool can check that a query is statically valid, i.e.
|
|
312
488
|
that it respects operator preconditions. While this approach has the major
|
|
313
489
|
advantage of allowing strong optimizations, it also has a few drawbacks (as
|
|
314
|
-
|
|
315
|
-
with dynamically-typed languages like Ruby. Therefore, Alf takes another
|
|
316
|
-
which is similar to duck-typing. In essence, this approach can be
|
|
317
|
-
follows:
|
|
490
|
+
the need to know the heading of used datasources in advance) and is difficult to
|
|
491
|
+
mary with dynamically-typed languages like Ruby. Therefore, Alf takes another
|
|
492
|
+
approach, which is similar to duck-typing. In essence, this approach can be
|
|
493
|
+
summarized as follows:
|
|
318
494
|
|
|
319
|
-
- _You have the responsibility of
|
|
320
|
-
|
|
495
|
+
- _You have the responsibility of not violating operators' preconditions. If you
|
|
496
|
+
do, Alf has the responsibility of returning correct results._.
|
|
321
497
|
- No problem dude!
|
|
322
498
|
|
|
323
|
-
##
|
|
499
|
+
## More about the shell command line
|
|
324
500
|
|
|
325
501
|
% alf --help
|
|
326
502
|
|
|
@@ -440,104 +616,7 @@ Also, mimicing the ruby executable, the following invocation is also possible:
|
|
|
440
616
|
where the argument is a relational expression in Alf's Lispy dialect, which
|
|
441
617
|
is detailed in the next section.
|
|
442
618
|
|
|
443
|
-
##
|
|
444
|
-
|
|
445
|
-
If you take a look at .alf example files, you'll find functional ruby expressions
|
|
446
|
-
like the following:
|
|
447
|
-
|
|
448
|
-
% cat examples/minus.alf
|
|
449
|
-
|
|
450
|
-
# Give all suppliers, except those living in Paris
|
|
451
|
-
(minus :suppliers,
|
|
452
|
-
(restrict :suppliers, lambda{ city == 'Paris' }))
|
|
453
|
-
|
|
454
|
-
# This is a contrived example for illustrating minus, as the
|
|
455
|
-
# following is equivalent
|
|
456
|
-
(restrict :suppliers, lambda{ city != 'Paris' })
|
|
457
|
-
|
|
458
|
-
You can simply execute such expressions with the alf command line itself (the
|
|
459
|
-
three following invocations return the same result):
|
|
460
|
-
|
|
461
|
-
% alf examples/minus.alf | alf show
|
|
462
|
-
% alf show minus
|
|
463
|
-
% alf -e "(restrict :suppliers, lambda{ city != 'Paris' })" | alf show
|
|
464
|
-
|
|
465
|
-
Symbols are magically resolved from the environment, which is wired to the
|
|
466
|
-
examples by default. See the dedicated sections below to update this behavior
|
|
467
|
-
to your needs.
|
|
468
|
-
|
|
469
|
-
### Algebra is closed under its operators!
|
|
470
|
-
|
|
471
|
-
Of course, from the closure property of a relational algebra (that states that
|
|
472
|
-
operators works on relations and return relations), you can use a sub expression
|
|
473
|
-
*everytime* a relational operand is expected, everytime:
|
|
474
|
-
|
|
475
|
-
# Compute the total qty supplied in each country together with the subset
|
|
476
|
-
# of products shipped there. Only consider suppliers that have a status
|
|
477
|
-
# greater than 10, however.
|
|
478
|
-
(summarize \
|
|
479
|
-
(join \
|
|
480
|
-
(join (restrict :suppliers, lambda{ status > 10 }),
|
|
481
|
-
:supplies),
|
|
482
|
-
:cities),
|
|
483
|
-
[:country],
|
|
484
|
-
:which => Agg::group(:pid),
|
|
485
|
-
:total => Agg::sum{ qty })
|
|
486
|
-
|
|
487
|
-
Of course, complex queries quickly become unreadable that way. But you can always
|
|
488
|
-
split complex tasks in more simple ones using _with_:
|
|
489
|
-
|
|
490
|
-
with( :kept_suppliers => (restrict :suppliers, lambda{ status > 10 }),
|
|
491
|
-
:with_countries => (join :kept_suppliers, :cities),
|
|
492
|
-
:supplying => (join :with_countries, :supplies) ) do
|
|
493
|
-
(summarize :supplying,
|
|
494
|
-
[:country],
|
|
495
|
-
:which => Agg::group(:pid),
|
|
496
|
-
:total => Agg::sum{ qty })
|
|
497
|
-
end
|
|
498
|
-
|
|
499
|
-
And here is the result !
|
|
500
|
-
|
|
501
|
-
+----------+----------+--------+
|
|
502
|
-
| :country | :which | :total |
|
|
503
|
-
+----------+----------+--------+
|
|
504
|
-
| England | +------+ | 2200 |
|
|
505
|
-
| | | :pid | | |
|
|
506
|
-
| | +------+ | |
|
|
507
|
-
| | | P1 | | |
|
|
508
|
-
| | | P2 | | |
|
|
509
|
-
| | | P3 | | |
|
|
510
|
-
| | | P4 | | |
|
|
511
|
-
| | | P5 | | |
|
|
512
|
-
| | | P6 | | |
|
|
513
|
-
| | +------+ | |
|
|
514
|
-
| France | +------+ | 200 |
|
|
515
|
-
| | | :pid | | |
|
|
516
|
-
| | +------+ | |
|
|
517
|
-
| | | P2 | | |
|
|
518
|
-
| | +------+ | |
|
|
519
|
-
+----------+----------+--------+
|
|
520
|
-
|
|
521
|
-
|
|
522
|
-
### Going further
|
|
523
|
-
|
|
524
|
-
For now, the Ruby API is documented in the commandline help itself (a cheatsheet
|
|
525
|
-
or something will be provided as soon as possible). For example, you'll find the
|
|
526
|
-
allowed syntaxes for RESTRICT as follows:
|
|
527
|
-
|
|
528
|
-
% alf help restrict
|
|
529
|
-
|
|
530
|
-
...
|
|
531
|
-
API & EXAMPLE
|
|
532
|
-
|
|
533
|
-
# Restrict to suppliers with status greater than 20
|
|
534
|
-
(restrict :suppliers, lambda{ status > 20 })
|
|
535
|
-
|
|
536
|
-
# Restrict to suppliers that live in London
|
|
537
|
-
(restrict :suppliers, lambda{ city == 'London' })
|
|
538
|
-
...
|
|
539
|
-
|
|
540
|
-
## Interfacing Alf in Ruby
|
|
619
|
+
## More about Alf in Ruby
|
|
541
620
|
|
|
542
621
|
### Calling commands 'ala' shell
|
|
543
622
|
|
|
@@ -560,11 +639,10 @@ If this kind of API is not sufficiently expressive for you, you'll have to learn
|
|
|
560
639
|
the APIs deeper, and use the Lispy functional style that Alf provides, which can
|
|
561
640
|
be compiled and used as explained in the next section.
|
|
562
641
|
|
|
563
|
-
###
|
|
642
|
+
### Compiler vs. Relation data structure
|
|
564
643
|
|
|
565
|
-
|
|
566
|
-
|
|
567
|
-
follows:
|
|
644
|
+
The compilers allow you to manipulate algebra expressions. Just obtain a Lispy
|
|
645
|
+
instance on an environment and you're ready:
|
|
568
646
|
|
|
569
647
|
#
|
|
570
648
|
# Expressions can simply be compiled as illustrated below. We use the
|
|
@@ -572,29 +650,37 @@ follows:
|
|
|
572
650
|
# available environments.
|
|
573
651
|
#
|
|
574
652
|
lispy = Alf.lispy(Alf::Environment.examples)
|
|
575
|
-
|
|
653
|
+
london_suppliers = lispy.compile do
|
|
576
654
|
(restrict :suppliers, lambda{ city == 'London' })
|
|
577
655
|
end
|
|
578
656
|
|
|
579
657
|
#
|
|
580
|
-
# Returned
|
|
658
|
+
# Returned operator is an enumerable of ruby hashes. Provided that datasets
|
|
581
659
|
# offered by the environment (:suppliers here) can be enumerated more than
|
|
582
660
|
# once, the operator may be used multiple times and is even thread safe!
|
|
583
661
|
#
|
|
584
|
-
|
|
662
|
+
london_suppliers.each do |tuple|
|
|
585
663
|
# tuple is a ruby Hash
|
|
586
664
|
end
|
|
587
665
|
|
|
588
666
|
#
|
|
589
667
|
# Now, maybe you want to reuse op in a larger query, for example
|
|
590
|
-
# by projecting on the city attribute... Here is how
|
|
591
|
-
#
|
|
668
|
+
# by projecting on the city attribute... Here is how this can be
|
|
669
|
+
# done:
|
|
592
670
|
#
|
|
593
|
-
projection =
|
|
594
|
-
|
|
595
|
-
|
|
671
|
+
projection = (project london_suppliers, [:city])
|
|
672
|
+
|
|
673
|
+
Note that the examples above manipulate algebra operators, not relations per se.
|
|
674
|
+
This means that equality and other such operators, that operate on relation
|
|
675
|
+
_values_, do not operate correctly here:
|
|
676
|
+
|
|
677
|
+
projection == Alf::Relation[{:city => 'London'}]
|
|
678
|
+
# => nil
|
|
679
|
+
|
|
680
|
+
In contrast, you can use such operators when operating on true relation values:
|
|
596
681
|
|
|
597
|
-
|
|
682
|
+
projection.to_rel == Alf::Relation[{:city => 'London'}]
|
|
683
|
+
# => true
|
|
598
684
|
|
|
599
685
|
### Using/Implementing other Environments
|
|
600
686
|
|
|
@@ -681,7 +767,7 @@ following template for contributions in lib/alf/renderer
|
|
|
681
767
|
|
|
682
768
|
## Related Work & Tools
|
|
683
769
|
|
|
684
|
-
- You should certainly have a look at the Third Manifesto website: http://www.thethirdmanifesto.com/
|
|
770
|
+
- You should certainly have a look at the Third Manifesto website: {http://www.thethirdmanifesto.com/}
|
|
685
771
|
- Why not reading the {http://www.dcs.warwick.ac.uk/~hugh/TTM/DBE-Chapter01.pdf
|
|
686
772
|
third manifesto paper} itself?
|
|
687
773
|
- Also have a look at {http://www.dcs.warwick.ac.uk/~hugh/TTM/Projects.html other
|
|
@@ -713,8 +799,8 @@ your needs and I'll see what I can do!
|
|
|
713
799
|
### Internals -- Tribute to Sinatra
|
|
714
800
|
|
|
715
801
|
Alf's code style is very inspired from what I've found in Sinatra when looking
|
|
716
|
-
at its internals a few
|
|
717
|
-
single file, lib/alf.rb. Everything is there except
|
|
802
|
+
at its internals a few months ago. Alf, as Sinatra, is mostly implemented in a
|
|
803
|
+
single file, lib/alf.rb. Everything is there except specific third-party contributions
|
|
718
804
|
(in lib/alf/...). You'll need an editor or IDE that supports code folding/unfolding.
|
|
719
805
|
Then, follow the guide:
|
|
720
806
|
|
data/TODO.md
CHANGED
|
@@ -4,17 +4,17 @@
|
|
|
4
4
|
(rename :suppliers, [:name, :city], :suffix => "_sup")
|
|
5
5
|
(rename :suppliers, [:name, :city], lambda{|name| name.upcase})
|
|
6
6
|
|
|
7
|
-
*
|
|
7
|
+
* WRAP: provide a multi-wraping ability?
|
|
8
8
|
|
|
9
|
-
(
|
|
10
|
-
=> (
|
|
9
|
+
(wrap (wrap :supplies, [:a, :b], :x), [:x, :c], :y)
|
|
10
|
+
=> (wrap :supplies, :x => [:a, :b], :y => [:x, :c])
|
|
11
11
|
|
|
12
12
|
But this would only work with Ruby 1.9 as the hash order would be important
|
|
13
13
|
as such
|
|
14
14
|
|
|
15
15
|
* GROUP: provide a multi-grouping ability?
|
|
16
16
|
|
|
17
|
-
Similar to
|
|
17
|
+
Similar to wrap, with same limitation.
|
|
18
18
|
|
|
19
19
|
* Add PIVOT and UNPIVOT operators
|
|
20
20
|
|
data/alf.gemspec
CHANGED
|
@@ -21,13 +21,13 @@ Gem::Specification.new do |s|
|
|
|
21
21
|
# A short summary of this gem
|
|
22
22
|
#
|
|
23
23
|
# This is displayed in `gem list -d`.
|
|
24
|
-
s.summary = "
|
|
24
|
+
s.summary = "Relational Algebra at your fingertips"
|
|
25
25
|
|
|
26
26
|
# A long description of this gem (required)
|
|
27
27
|
#
|
|
28
28
|
# The description should be more detailed than the summary. For example,
|
|
29
29
|
# you might wish to copy the entire README into the description.
|
|
30
|
-
s.description = "Alf
|
|
30
|
+
s.description = "Alf brings the relational algebra both in Shell and in Ruby. In Shell, because \nmanipulating any relation-like data source should be as straightforward as a \none-liner. In Ruby, because I've never understood why programming languages \nprovide data structures like arrays, hashes, sets, trees and graphs but not \n_relations_... Let's stop the segregation ;-)"
|
|
31
31
|
|
|
32
32
|
# The URL of this gem home page (optional)
|
|
33
33
|
s.homepage = "http://rubydoc.info/github/blambeau/alf/master/frames"
|
|
@@ -123,7 +123,7 @@ Gem::Specification.new do |s|
|
|
|
123
123
|
# One call to add_development_dependency('gem_name', 'gem version requirement')
|
|
124
124
|
# for each development dependency. These gems are required for developers
|
|
125
125
|
#
|
|
126
|
-
s.add_development_dependency("rake", "~> 0.
|
|
126
|
+
s.add_development_dependency("rake", "~> 0.9.2")
|
|
127
127
|
s.add_development_dependency("bundler", "~> 1.0")
|
|
128
128
|
s.add_development_dependency("rspec", "~> 2.6.0")
|
|
129
129
|
s.add_development_dependency("yard", "~> 0.7.2")
|