query-composer 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,200 @@
1
+ # A simple example that demonstrates the use of Query::Composer in a
2
+ # library reporting system. Given a data model that includes sets of
3
+ # libraries, topics, books, and patrons, and permis books to be lended
4
+ # from a library to a patron on a given date, this script builds and
5
+ # executes a query that shows how many books from a given set of topics
6
+ # and libraries each patron borrowed during a given period of time, and
7
+ # compares it to the corresponding period of the previous month.
8
+
9
+ require 'active_record'
10
+ require 'query/composer'
11
+ require 'query/base'
12
+
13
+ # connect to the DB
14
+ ActiveRecord::Base.establish_connection(
15
+ adapter: "sqlite3",
16
+ database: ":memory:"
17
+ )
18
+
19
+ # generate the schema
20
+ ActiveRecord::Schema.verbose = false
21
+ ActiveRecord::Schema.define do
22
+ create_table :libraries do |t|
23
+ t.string :name
24
+ end
25
+
26
+ create_table :topics do |t|
27
+ t.string :name
28
+ end
29
+
30
+ create_table :patrons do |t|
31
+ t.string :name
32
+ end
33
+
34
+ create_table :books do |t|
35
+ t.string :name
36
+ t.integer :library_id
37
+ t.integer :topic_id
38
+ end
39
+
40
+ create_table :lendings do |t|
41
+ t.integer :book_id
42
+ t.integer :patron_id
43
+ t.date :created_at
44
+ end
45
+ end
46
+
47
+ # populate the database
48
+
49
+ ActiveRecord::Base.connection.execute <<-SQL
50
+ INSERT INTO libraries (id, name)
51
+ VALUES (1, 'Gotham'),
52
+ (2, 'Hogwarts')
53
+ SQL
54
+
55
+ ActiveRecord::Base.connection.execute <<-SQL
56
+ INSERT INTO topics (id, name)
57
+ VALUES (1, 'Warts'),
58
+ (2, 'Seaweed'),
59
+ (3, 'Dryer Lint'),
60
+ (4, 'Sitcoms')
61
+ SQL
62
+
63
+ ActiveRecord::Base.connection.execute <<-SQL
64
+ INSERT INTO patrons (id, name)
65
+ VALUES (1, 'Harry'),
66
+ (2, 'Mary'),
67
+ (3, 'Larry'),
68
+ (4, 'Carry'),
69
+ (5, 'Terry'),
70
+ (6, 'Cheri')
71
+ SQL
72
+
73
+ ActiveRecord::Base.connection.execute <<-SQL
74
+ INSERT INTO books (id, name, library_id, topic_id)
75
+ VALUES (1, 'Odd Growths', 1, 1),
76
+ (2, 'Nose Accessories', 2, 1),
77
+ (3, 'Health Foods', 1, 2),
78
+ (4, 'Green', 1, 2),
79
+ (5, 'Slimy Things', 1, 2),
80
+ (6, 'Household Chores', 2, 3),
81
+ (7, 'Starting Fires', 2, 3),
82
+ (8, 'Laughter', 1, 4),
83
+ (9, 'Funny People', 1, 4),
84
+ (10, 'Silliness', 2, 4)
85
+ SQL
86
+
87
+ ActiveRecord::Base.connection.execute <<-SQL
88
+ INSERT INTO lendings (book_id, patron_id, created_at)
89
+ VALUES (1, 1, '2016-01-01'),
90
+ (2, 2, '2016-01-04'),
91
+ (3, 3, '2016-01-05'),
92
+ (4, 4, '2016-01-06'),
93
+ (5, 4, '2016-01-08'),
94
+ (6, 4, '2016-01-08'),
95
+ (7, 5, '2016-01-11'),
96
+ (8, 6, '2016-01-12'),
97
+ (9, 6, '2016-01-14'),
98
+ (10, 6, '2016-01-15'),
99
+ (1, 6, '2016-02-01'),
100
+ (2, 6, '2016-02-04'),
101
+ (3, 5, '2016-02-05'),
102
+ (4, 5, '2016-02-06'),
103
+ (5, 4, '2016-02-08'),
104
+ (6, 3, '2016-02-08'),
105
+ (7, 3, '2016-02-11'),
106
+ (8, 2, '2016-02-12'),
107
+ (9, 2, '2016-02-14'),
108
+ (10, 1, '2016-02-15')
109
+ SQL
110
+
111
+ # define the models
112
+
113
+ class Library < ActiveRecord::Base
114
+ has_many :books
115
+ end
116
+
117
+ class Topic < ActiveRecord::Base
118
+ has_many :books
119
+ end
120
+
121
+ class Patron < ActiveRecord::Base
122
+ has_many :lendings
123
+ has_many :books, through: :lendings
124
+ end
125
+
126
+ class Book < ActiveRecord::Base
127
+ belongs_to :library
128
+ belongs_to :topic
129
+ has_many :lendings
130
+ has_many :patrons, through: :lendings
131
+ end
132
+
133
+ class Lending < ActiveRecord::Base
134
+ belongs_to :patron
135
+ belongs_to :book
136
+ end
137
+
138
+ # Construct the reporting query
139
+
140
+ composer = Query::Composer.new
141
+
142
+ composer.use(:libraries_set) { Library.where(id: [ 1, 2 ]) }
143
+ composer.use(:topics_set) { Topic.where(id: [ 1, 2, 3, 4 ]) }
144
+ composer.use(:patrons_set) { Patron.all }
145
+
146
+ composer.use(:books_set) do |libraries_set, topics_set|
147
+ books = Book.arel_table
148
+
149
+ Query::Base.new(books).
150
+ project(books[:id]).
151
+ join(libraries_set).
152
+ on(books[:library_id].eq(libraries_set[:id])).
153
+ join(topics_set).
154
+ on(books[:topic_id].eq(topics_set[:id]))
155
+ end
156
+
157
+ composer.use(:current_set) do |books_set|
158
+ lendings_set(books_set, '2016-02-01', '2016-02-15')
159
+ end
160
+
161
+ composer.use(:prior_set) do |books_set|
162
+ lendings_set(books_set, '2016-01-01', '2016-01-15')
163
+ end
164
+
165
+ composer.use(:combined_set) do |patrons_set, current_set, prior_set|
166
+ Query::Base.new(patrons_set).
167
+ project(patrons_set[Arel.star],
168
+ current_set[:total].as("current_total"),
169
+ prior_set[:total].as("prior_total")).
170
+ join(current_set).
171
+ on(current_set[:patron_id].eq(patrons_set[:id])).
172
+ join(prior_set, Arel::Nodes::OuterJoin).
173
+ on(prior_set[:patron_id].eq(patrons_set[:id]))
174
+ end
175
+
176
+ def lendings_set(books_set, from_date, to_date)
177
+ lendings = Lending.arel_table
178
+
179
+ patron_id = lendings[:patron_id]
180
+ count = patron_id.count.as("total")
181
+
182
+ Query::Base.new(lendings).
183
+ project(patron_id, count).
184
+ join(books_set).
185
+ on(lendings[:book_id].eq(books_set[:id])).
186
+ where(lendings[:created_at].between(from_date..to_date)).
187
+ group(patron_id)
188
+ end
189
+
190
+ sql = composer.build(:combined_set).to_sql
191
+ puts "---- SQL ----"
192
+ puts sql
193
+
194
+ puts
195
+ puts "%-6s | %3s | %5s" % %w(Patron Now Prior)
196
+ puts "%-6s-+-%3s-+-%5s-" % ["-"*6, "-"*3, "-"*5]
197
+
198
+ Patron.find_by_sql(sql).each do |patron|
199
+ puts "%-6s | %3d | %5d" % [ patron.name, patron.current_total, patron.prior_total||0]
200
+ end
@@ -0,0 +1,55 @@
1
+ require 'arel'
2
+
3
+ module Query
4
+ class Base
5
+ attr_reader :primary_table, :arel
6
+
7
+ def initialize(primary, *args)
8
+ @primary_table = _make_table(primary)
9
+
10
+ @arel = Arel::SelectManager.new(ActiveRecord::Base).
11
+ from(@primary_table)
12
+
13
+ _configure(*args)
14
+ end
15
+
16
+ def reproject(*projections)
17
+ arel.projections = projections
18
+ self
19
+ end
20
+
21
+ def all
22
+ arel.project(primary_table[Arel.star])
23
+ self
24
+ end
25
+
26
+ # ensures #as returns an Arel node, and not the Query object
27
+ # (so that it plays nice with our custom #with method, above).
28
+ def as(name)
29
+ arel.as(name)
30
+ end
31
+
32
+ def to_sql
33
+ @arel.to_sql
34
+ end
35
+
36
+ alias to_s to_sql
37
+
38
+ def method_missing(sym, *args, &block)
39
+ arel.send(sym, *args, &block)
40
+ self
41
+ end
42
+
43
+ def _make_table(value)
44
+ case value
45
+ when Arel::Table then value
46
+ when Arel::Nodes::TableAlias then value
47
+ else Arel::Table.new(value)
48
+ end
49
+ end
50
+
51
+ def _configure(*args)
52
+ # overridden by subclasses for per-query configuration
53
+ end
54
+ end
55
+ end
@@ -0,0 +1,315 @@
1
+ require 'arel'
2
+
3
+ module Query
4
+
5
+ # A class for composing queries into large, complicated reporting
6
+ # monstrosities that return data for trends, histograms, and all
7
+ # kinds of other things.
8
+ #
9
+ # The idea is that you first create a composer object:
10
+ #
11
+ # q = Query::Composer.new
12
+ #
13
+ # Then, you tell the composer about a few queries:
14
+ #
15
+ # q.use(:entities) { User.all }
16
+ # q.use(:companies) { Company.all }
17
+ #
18
+ # These queries are *independent*, in that they have no dependencies.
19
+ # But we can add some queries now that depend on those. We declare
20
+ # another query, giving it one or more parameters. Those parameter
21
+ # names must match the identifiers of queries given to the composer.
22
+ # Here, we have a query that is dependent on the "entities" and
23
+ # "companies" queries, above.
24
+ #
25
+ # q.use(:entities_with_extra) do |entities, companies|
26
+ # team_table = Arel::Table.new(:teams)
27
+ #
28
+ # Arel::SelectManager.new(ActiveRecord::Base).
29
+ # from(entities).
30
+ # project(
31
+ # entities[Arel.star],
32
+ # team_table[:name].as('team_name'),
33
+ # companies[:name].as('company_name')).
34
+ # join(team_table).
35
+ # on(team_table[:id].eq(entities[:team_id])).
36
+ # join(companies).
37
+ # on(companies[:id].eq(entities[:company_id]))
38
+ # end
39
+ #
40
+ # After you've defined a bunch of these queries, you should have
41
+ # one of them (and ONLY one of them) that nothing else depends on.
42
+ # This is the "root" query--the one that returns the data set you're
43
+ # looking for. The composer can now do its job and accumulate and
44
+ # aggregate all those queries together, by calling the #build method
45
+ # with the identifier for the root query you want to build.
46
+ #
47
+ # query = q.build(:some_query_identifier)
48
+ #
49
+ # By default, this will create a query with each component represented
50
+ # as derived tables (nested subqueries):
51
+ #
52
+ # SELECT "a".*,
53
+ # "b"."name" AS "company_name",
54
+ # "c"."name" AS "team_name"
55
+ # FROM (
56
+ # SELECT "users".* FROM "users"
57
+ # ) a
58
+ # INNER JOIN (
59
+ # SELECT "companies".* FROM "companies"
60
+ # ) b
61
+ # ON "b"."id" = "a"."company_id"
62
+ # INNER JOIN (
63
+ # SELECT "teams".* FROM "teams"
64
+ # ) c
65
+ # ON "c"."id" = "a"."team_id"
66
+ # WHERE ...
67
+ #
68
+ # If you would rather use CTEs (Common Table Expressions, or "with"
69
+ # queries), you can pass ":use_cte => true" to generate the following:
70
+ #
71
+ # WITH
72
+ # "a" AS (SELECT "users".* FROM "users"),
73
+ # "b" AS (SELECT "companies".* FROM "companies"),
74
+ # "c" AS (
75
+ # SELECT "a".*,
76
+ # "teams"."name" as "team_name",
77
+ # "b"."name" as "company_name"
78
+ # FROM "a"
79
+ # INNER JOIN "teams"
80
+ # ON "teams"."id" = "a"."team_id"
81
+ # INNER JOIN "b"
82
+ # ON "b".id = "a"."company_id")
83
+ # ...
84
+ # SELECT ...
85
+ # FROM ...
86
+ #
87
+ # Be aware, though, that some DBMS's (like Postgres) do not optimize
88
+ # CTE's, and so the resulting queries may be very inefficient.
89
+ #
90
+ # If you don't want the short, opaque identifiers to be used as
91
+ # aliases, you can pass ":use_aliases => false" to #build:
92
+ #
93
+ # query = q.build(:entities_with_extra, :use_aliases => false)
94
+ #
95
+ # That way, the query identifiers themselves will be used as the
96
+ # query aliases.
97
+
98
+ class Composer
99
+ class Error < RuntimeError; end
100
+ class UnknownQuery < Error; end
101
+ class CircularDependency < Error; end
102
+ class InvalidQuery < Error; end
103
+
104
+ @@prefer_cte = false
105
+ @@prefer_aliases = true
106
+
107
+ class <<self
108
+ def prefer_cte?
109
+ @@prefer_cte
110
+ end
111
+
112
+ # By default, the composer generates queries that use derived
113
+ # tables. If you'd rather default to CTE's,
114
+ # set Query::Composer.prefer_cte to true.
115
+ def prefer_cte=(preference)
116
+ @@prefer_cte = preference
117
+ end
118
+
119
+ def prefer_aliases?
120
+ @@prefer_aliases
121
+ end
122
+
123
+ # By default, the composer generates queries that use shortened
124
+ # names as aliases for the full names of the components. If you'd
125
+ # rather use the full names instead of aliases,
126
+ def prefer_aliases=(preference)
127
+ @@prefer_aliases = preference
128
+ end
129
+ end
130
+
131
+ # Create an empty query object. If a block is given, the query
132
+ # object will be yielded to it.
133
+ def initialize
134
+ @parts = {}
135
+ yield self if block_given?
136
+ end
137
+
138
+ # Indicate that the named identifier should be defined by the given
139
+ # block. The names used for the parameters of the block are significant,
140
+ # and must exactly match the identifiers of other elements in the
141
+ # query.
142
+ #
143
+ # The block should return an Arel object, for use in composing the
144
+ # larger reporting query. If the return value of the block responds
145
+ # to :arel, the result of that method will be returned instead.
146
+ def use(name, &definition)
147
+ @parts[name] = definition
148
+ self
149
+ end
150
+
151
+ # Aliases the given query component with the new name. This can be
152
+ # useful for redefining an existing component, where you still
153
+ # want to retain the old definition.
154
+ #
155
+ # composer.use(:source) { Something.all }
156
+ # composer.alias(:old_source, :source)
157
+ # composer.use(:source) { |old_source| ... }
158
+ def alias(new_name, name)
159
+ @parts[new_name] = @parts[name]
160
+ self
161
+ end
162
+
163
+ # Removes the named component from the composer.
164
+ def delete(name)
165
+ @parts.delete(name)
166
+ self
167
+ end
168
+
169
+ # Return an Arel object representing the query starting at the
170
+ # component named `root`. Supported options are:
171
+ #
172
+ # * :use_cte (false) - the query should use common table expressions.
173
+ # If false, the query will use derived tables, instead.
174
+ # * :use_aliases (true) - the query will use short, opaque identifiers
175
+ # for aliases. If false, the query will use the full dependency
176
+ # names to identify the elements.
177
+ def build(root, options={})
178
+ deps = _resolve(root)
179
+ aliases = _alias_queries(deps, options)
180
+
181
+ if _use_cte?(options)
182
+ _query_with_cte(root, deps, aliases)
183
+ else
184
+ _query_with_derived_table(root, deps, aliases)
185
+ end
186
+ end
187
+
188
+ def _use_cte?(options)
189
+ options.fetch(:use_cte, self.class.prefer_cte?)
190
+ end
191
+
192
+ def _use_aliases?(options)
193
+ options.fetch(:use_aliases, self.class.prefer_aliases?)
194
+ end
195
+
196
+ # Builds an Arel object using derived tables.
197
+ def _query_with_derived_table(root, deps, aliases)
198
+ queries = {}
199
+
200
+ deps.each do |name|
201
+ queries[name] = _invoke(name, queries).as(aliases[name].name)
202
+ end
203
+
204
+ _invoke(root, queries)
205
+ end
206
+
207
+ # Builds an Arel object using common table expressions.
208
+ def _query_with_cte(root, deps, aliases)
209
+ query = _invoke(root, aliases)
210
+ components = []
211
+
212
+ deps.each do |name|
213
+ component = _invoke(name, aliases)
214
+ aliased = Arel::Nodes::As.new(aliases[name], component)
215
+ components << aliased
216
+ end
217
+
218
+ query.with(*components) if components.any?
219
+ query
220
+ end
221
+
222
+ # Invokes the named dependency, using the given aliases mapping.
223
+ def _invoke(name, aliases)
224
+ block = @parts[name]
225
+ params = block.parameters.map { |(_, name)| aliases[name] }
226
+ result = block.call(*params)
227
+
228
+ if result.respond_to?(:arel)
229
+ result.arel
230
+ elsif result.respond_to?(:to_sql)
231
+ result
232
+ else
233
+ raise InvalidQuery, "query elements must quack like #arel or #to_sql (`#{name}` returned #{result.class})"
234
+ end
235
+ end
236
+
237
+ # Ensure that all referenced dependencies exist in the graph.
238
+ # Otherwise, raise Query::Composer::UnknownQuery.
239
+ def _validate_dependencies!(name)
240
+ raise UnknownQuery, "`#{name}`" unless @parts.key?(name)
241
+ dependencies = []
242
+
243
+ @parts[name].parameters.each do |(_, pname)|
244
+ unless @parts.key?(pname)
245
+ raise UnknownQuery, "`#{pname}` referenced by `#{name}`"
246
+ end
247
+
248
+ dependencies << pname
249
+ end
250
+
251
+ dependencies
252
+ end
253
+
254
+ # Resolves the tree of dependent components by traversing the graph
255
+ # starting at `root`. Returns an array of identifiers where elements
256
+ # later in the list depend on zero or more elements earlier in the
257
+ # list. The resulting list includes only the dependencies of the
258
+ # `root` element, but not the `root` element itself.
259
+ def _resolve(root)
260
+ _resolve2(root).flatten.uniq - [root]
261
+ end
262
+
263
+ # This is a utility function, used only by #_resolve. It recursively
264
+ # tranverses the tree, depth-first, and returns a "tree" (array of
265
+ # recursively nested arrays) representing the graph at root. The
266
+ # root of each subtree is at the end of the corresponding array.
267
+ #
268
+ # [ [ [:a], [:b], :c ], [ [:d], [:e], :f ], :root ]
269
+ def _resolve2(root, dependents=[])
270
+ deps = _validate_dependencies!(root)
271
+ return [ root ] if deps.empty?
272
+
273
+ # Circular dependency exists if anything in the dependents
274
+ # (that which depends on root) exists in root's own dependency
275
+ # list
276
+
277
+ dependents = [ root, *dependents ]
278
+ overlap = deps & dependents
279
+ if overlap.any?
280
+ raise CircularDependency, "#{root} -> #{overlap.join(', ')}"
281
+ end
282
+
283
+ all = []
284
+
285
+ deps.each do |dep|
286
+ all << _resolve2(dep, dependents)
287
+ end
288
+
289
+
290
+ all << root
291
+ end
292
+
293
+ # Build a mapping of dependency names, to Arel::Table objects. The
294
+ # Arel::Table names will use opaque, short identifiers ("a", "b", etc.),
295
+ # unless the :use_aliases option is false, when the dependency names
296
+ # themselves will be used.
297
+ def _alias_queries(deps, options={})
298
+ use_aliases = _use_aliases?(options)
299
+
300
+ aliases = {}
301
+ current_alias = "a"
302
+
303
+ deps.each do |key|
304
+ if use_aliases
305
+ aliases[key] = Arel::Table.new(current_alias)
306
+ current_alias = current_alias.succ
307
+ else
308
+ aliases[key] = Arel::Table.new(key)
309
+ end
310
+ end
311
+
312
+ aliases
313
+ end
314
+ end
315
+ end