data_task 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,430 @@
1
+ require 'pg'
2
+ require_relative '../data'
3
+ require_relative 'support/transactions'
4
+ require_relative 'support/booleans'
5
+ require_relative 'support/connection_persistence'
6
+
7
+ module Rake
8
+ module DataTask
9
+
10
+ class Postgres < Db
11
+
12
+ @connections = {}
13
+ extend ConnectionPersistence
14
+
15
+ include StandardBooleans
16
+ include StandardTransactions
17
+
18
+ # Connect to a PostgreSQL database.
19
+ #
20
+ # If we've already used this class to connect to the same host, port, and database with the
21
+ # same username, re-use that connection for this instance.
22
+ #
23
+ # @param [Hash] options the connection parameters
24
+ # @option options [String] 'host' the server hostname or IP address
25
+ # @option options [Integer] 'port' the server port number
26
+ # @option options [String] 'database' the database name
27
+ # @option options [String] 'username' the name of the database user to connect as
28
+ # @option options [String] 'password' the database user's password
29
+ # @return [Sqlite] an instance of this adapter
30
+ def initialize options
31
+ host = options['host'] || 'localhost'
32
+ port = options['port'] || 5432
33
+ database = options['database']
34
+ username = options['username']
35
+
36
+ # always reuse an existing connection if it matches on these connection options
37
+ conn_options = {:host => host, :port => port, :database => database, :username => username}
38
+ existing_connection = self.class.persisted_connection(conn_options)
39
+
40
+ if existing_connection.nil?
41
+ # create and persist a new connection
42
+ @connection = PG::Connection.new(
43
+ host,
44
+ port,
45
+ nil,
46
+ nil,
47
+ database,
48
+ username,
49
+ options['password'] || ''
50
+ )
51
+ @connection.set_notice_processor do |msg|
52
+ if msg =~ /^ERROR:/
53
+ LOG.error('psql') { msg.gsub(/\n/,'; ') }
54
+ else
55
+ LOG.info('psql') { msg.gsub(/\n/,'; ') }
56
+ end
57
+ end
58
+ self.class.persist_connection(@connection, conn_options)
59
+ else
60
+ # reuse an existing connection
61
+ @connection = existing_connection
62
+ end
63
+
64
+ # set up trackig if it isn't set up already
65
+ set_up_tracking if !tracking_tables?
66
+ end
67
+
68
+ def [](name)
69
+ Data.new(name, self)
70
+ end
71
+
72
+ def table_tracker_columns
73
+ # upcase all enum'd column values because system tables store them in upcase
74
+ cols = super
75
+ cols.each do |k1,v1|
76
+ cols[k1].each do |k2, v2|
77
+ if k2 == :values
78
+ cols[k1][k2].each do |k3, v3|
79
+ cols[k1][k2][k3] = v3.upcase
80
+ end
81
+ end
82
+ end
83
+ end
84
+
85
+ cols[:relation_type][:values][:table] = 'BASE TABLE'
86
+ cols[:time][:data_type] = :'timestamp with time zone'
87
+ cols
88
+ end
89
+
90
+ def execute sql
91
+ connect if @connection.nil?
92
+
93
+ begin
94
+
95
+ r = @connection.exec sql
96
+ r.values
97
+
98
+ rescue PG::UndefinedTable => e
99
+
100
+ if /ERROR: relation "(last_operations|.*\.last_operations)" does not exist/ =~ e.message
101
+ LOG.error "Tracking is not set up in this schema. Set up tracking in this schema first."
102
+ end
103
+ execute "rollback;"
104
+ raise e
105
+
106
+ rescue PGError => e
107
+
108
+ LOG.info e.message.chomp
109
+ execute "rollback;"
110
+ raise e
111
+
112
+ end
113
+ end
114
+
115
+ def tracking_tables?
116
+ data_exists?(TABLE_TRACKER_NAME)
117
+ end
118
+
119
+ def set_up_tracking options = {}
120
+ tear_down_tracking options
121
+
122
+ target_search_path = options[:search_path] || search_path.join(',')
123
+ with_search_path(target_search_path) do
124
+
125
+ column_definitions = table_tracker_columns.map do |col, col_defn|
126
+ col.to_s + ' ' + col_defn[:data_type].to_s
127
+ end.join(', ')
128
+ create_table TABLE_TRACKER_NAME, nil, " (#{column_definitions})", false
129
+
130
+ end
131
+ end
132
+
133
+ def tear_down_tracking options = {}
134
+ target_search_path = options[:search_path] || search_path.join(',')
135
+ with_search_path(target_search_path) do
136
+ drop_table TABLE_TRACKER_NAME
137
+ end
138
+ end
139
+
140
+ def reset_tracking options = {}
141
+ target_search_path = options[:search_path] || search_path.join(',')
142
+ with_search_path(target_search_path) do
143
+ truncate_table TABLE_TRACKER_NAME
144
+ end
145
+ end
146
+
147
+ def table_mtime qualified_table_name
148
+ schema_name, table_name = parse_schema_and_table_name(qualified_table_name)
149
+ schema_name = first_schema_for(table_name) if schema_name.nil?
150
+
151
+ with_search_path(schema_name) do
152
+ Sql.get_single_time(
153
+ execute <<-EOSQL
154
+ select max(time)
155
+ from #{schema_name}.#{TABLE_TRACKER_NAME}
156
+ where relation_name = '#{table_name}'
157
+ EOSQL
158
+ )
159
+ end
160
+ end
161
+
162
+ alias_method :data_mtime, :table_mtime
163
+
164
+ def truncate_table table_name
165
+ return if table_name.casecmp(TABLE_TRACKER_NAME) == 0
166
+ execute "truncate table #{table_name}"
167
+ track_truncate table_name
168
+ end
169
+
170
+ alias_method :truncate_data, :truncate_table
171
+
172
+ def drop_table table_name
173
+ execute "drop table if exists #{table_name} cascade"
174
+ return if table_name.casecmp(TABLE_TRACKER_NAME) == 0
175
+ track_drop table_name
176
+ end
177
+
178
+ def track_drop table_name
179
+ schema_name, unqualified_table_name = parse_schema_and_table_name(table_name)
180
+ table_tracker_name = schema_name.nil? ? TABLE_TRACKER_NAME : "#{schema_name}.#{TABLE_TRACKER_NAME}"
181
+
182
+ if table_exists?(table_tracker_name)
183
+ execute <<-EOSQL
184
+ delete from #{table_tracker_name}
185
+ where
186
+ relation_name = '#{unqualified_table_name}' and
187
+ relation_type = '#{relation_type_values[:table]}'
188
+ EOSQL
189
+ end
190
+ end
191
+
192
+ alias_method :drop_data, :drop_table
193
+
194
+ def table_exists? table_name, options = {}
195
+ relation_exists? table_name, :table, options
196
+ end
197
+
198
+ alias_method :data_exists?, :table_exists?
199
+
200
+ def view_exists? view_name, options = {}
201
+ relation_exists? view_name, :view, options
202
+ end
203
+
204
+ def create_table table_name, data_definition, column_definitions, track_table=true
205
+ drop_table table_name
206
+ execute <<-EOSQL
207
+ create table #{table_name} #{column_definitions}
208
+ #{ "as #{data_definition}" if !data_definition.nil? }
209
+ EOSQL
210
+ if track_table
211
+ create_tracking_rules(table_name)
212
+ track_creation table_name, 0
213
+ end
214
+ end
215
+
216
+ alias_method :create_data, :create_table
217
+
218
+ def create_view view_name, view_definition
219
+ drop_view view_name
220
+ execute <<-EOSQL
221
+ create view #{view_name} as
222
+ #{view_definition}
223
+ EOSQL
224
+ end
225
+
226
+ def drop_view view_name
227
+ execute "drop view if exists #{view_name} cascade"
228
+ end
229
+
230
+ def operations_supported
231
+ {
232
+ :by_db => operations_supported_by_db,
233
+ :by_app => [:truncate, :create] - operations_supported_by_db
234
+ }
235
+ end
236
+
237
+ def with_search_path schemas
238
+ original_search_path = search_path
239
+ execute "set search_path to #{Array(schemas).join(',')}"
240
+ r = yield
241
+ execute "set search_path to #{original_search_path.join(',')}"
242
+ r
243
+ end
244
+
245
+ def with_role role
246
+ original_role = current_user
247
+ execute "set role #{role}"
248
+ r = yield
249
+ execute "set role #{original_role}"
250
+ r
251
+ end
252
+
253
+
254
+
255
+ private
256
+
257
+ def operations_supported_by_db
258
+ operations_supported_by_db_rules
259
+ end
260
+
261
+ def operations_supported_by_db_rules
262
+ [:update, :insert, :delete]
263
+ end
264
+
265
+ # Split a table name qualified with a schema name into separate strings for schema and
266
+ # table names.
267
+ #
268
+ # @returns [String, String] the schema name and table name, separately, for table_name. If
269
+ # table_name is unqualified with the schema name, return [nil, table_name].
270
+ def parse_schema_and_table_name table_name
271
+ return [nil, table_name] if table_name.count('.') == 0
272
+
273
+ if table_name.count('.') > 1
274
+ raise "Invalid relation reference #{table_name} (only one '.' is allowed)"
275
+ end
276
+
277
+ schema_name, table_name = table_name.split('.')
278
+ [schema_name, table_name]
279
+ end
280
+
281
+ # @returns [Array] the ordered schema names in the search path as strings
282
+ def search_path
283
+ current_search_path = execute("show search_path").first.first.split(',').map { |s| s.strip }
284
+ username = current_user
285
+
286
+ # the default search path begins with a symbolic reference to the current username
287
+ # if that reference is in the search path, replace it with the resolved current username
288
+ if current_search_path.first == '"$user"'
289
+ user_schema_exists = execute <<-EOSQL
290
+ select 1
291
+ from information_schema.schemata
292
+ where schema_name = '#{username}'
293
+ EOSQL
294
+
295
+ if user_schema_exists.nil? || user_schema_exists.first.nil?
296
+ current_search_path = current_search_path[1..-1]
297
+ else
298
+ current_search_path = [username] + current_search_path[1..-1]
299
+ end
300
+ end
301
+
302
+ current_search_path.map(&:downcase)
303
+ end
304
+
305
+ # @returns [String] the name of the current database user
306
+ def current_user
307
+ execute("select current_user").first.first
308
+ end
309
+
310
+ # @returns [String] the name of the first schema in the search path containing table_name
311
+ def first_schema_for table_name
312
+ return if !table_exists?(table_name)
313
+ schema_name, unqualified_table_name = parse_schema_and_table_name(table_name)
314
+
315
+ search_path_when_stmts = []
316
+ search_path.each_with_index do |s,i|
317
+ search_path_when_stmts << "when table_schema = '#{s}' then #{(i+1).to_s}"
318
+ end
319
+
320
+ schema_name = execute <<-EOSQL
321
+ select
322
+ table_schema,
323
+ search_order
324
+ from (
325
+ select
326
+ table_schema,
327
+ table_name,
328
+ case
329
+ #{search_path_when_stmts.join(' ')}
330
+ else 'NaN'::float
331
+ end as search_order
332
+ from information_schema.tables
333
+ where table_name ilike '#{unqualified_table_name}'
334
+ ) a
335
+ order by search_order
336
+ limit 1
337
+ EOSQL
338
+ schema_name.first.first
339
+ end
340
+
341
+ def rule_name table_name, operation
342
+ "#{table_name}_#{operation.to_s}"
343
+ end
344
+
345
+ def create_tracking_rules table_name
346
+ schema_name, unqualified_table_name = parse_schema_and_table_name(table_name)
347
+ qualified_table_tracker = schema_name.nil? ? TABLE_TRACKER_NAME : "#{schema_name}.#{TABLE_TRACKER_NAME}"
348
+
349
+ operations_supported_by_db_rules.each do |operation|
350
+ execute <<-EOSQL
351
+ create or replace rule "#{rule_name(table_name, operation)}" as
352
+ on #{operation.to_s} to #{table_name} do also (
353
+
354
+ delete from #{qualified_table_tracker} where
355
+ relation_name = '#{unqualified_table_name}' and
356
+ relation_type = '#{relation_type_values[:table]}'
357
+ ;
358
+
359
+ insert into #{qualified_table_tracker} values (
360
+ '#{unqualified_table_name}',
361
+ '#{relation_type_values[:table]}',
362
+ '#{operation_values[operation]}',
363
+ clock_timestamp()
364
+ );
365
+
366
+ )
367
+ EOSQL
368
+ end
369
+ end
370
+
371
+ def track_creation table_name, n_tuples
372
+ schema_name, unqualified_table_name = parse_schema_and_table_name(table_name)
373
+ qualified_table_tracker = schema_name.nil? ? TABLE_TRACKER_NAME : "#{schema_name}.#{TABLE_TRACKER_NAME}"
374
+
375
+ operation = :create
376
+ execute <<-EOSQL
377
+ delete from #{qualified_table_tracker} where
378
+ relation_name = '#{unqualified_table_name}' and
379
+ relation_type = '#{relation_type_values[:table]}'
380
+ ;
381
+ insert into #{qualified_table_tracker} values (
382
+ '#{unqualified_table_name}',
383
+ '#{relation_type_values[:table]}',
384
+ '#{operation_values[operation]}',
385
+ clock_timestamp()
386
+ );
387
+ EOSQL
388
+ end
389
+
390
+ def track_truncate table_name
391
+ schema_name, unqualified_table_name = parse_schema_and_table_name(table_name)
392
+ qualified_table_tracker = schema_name.nil? ? TABLE_TRACKER_NAME : "#{schema_name}.#{TABLE_TRACKER_NAME}"
393
+
394
+ execute <<-EOSQL
395
+ update #{qualified_table_tracker}
396
+ set
397
+ operation = '#{operation_values[:truncate]}',
398
+ time = clock_timestamp()
399
+ where
400
+ relation_name = '#{unqualified_table_name}' and
401
+ relation_type = '#{relation_type_values[:table]}'
402
+ EOSQL
403
+ end
404
+
405
+ def relation_exists? relation_name, relation_type, options = {}
406
+ schema_name, unqualified_relation_name = parse_schema_and_table_name(relation_name)
407
+
408
+ if !schema_name.nil?
409
+ schema_conditions_sql = "table_schema ilike '#{schema_name}'"
410
+ else
411
+ schema_conditions_sql = "table_schema in (#{search_path.to_quoted_s})"
412
+ end
413
+
414
+ n_matches = Sql.get_single_int(
415
+ execute <<-EOSQL
416
+ select count(*)
417
+ from information_schema.tables
418
+ where
419
+ table_name = '#{unqualified_relation_name}' and
420
+ table_type = '#{relation_type_values[relation_type]}' and
421
+ #{ schema_conditions_sql }
422
+ EOSQL
423
+ )
424
+ (n_matches > 0)
425
+ end
426
+
427
+ end
428
+
429
+ end
430
+ end
@@ -0,0 +1,239 @@
1
+ require 'sqlite3'
2
+ require_relative '../data'
3
+ require_relative 'support/transactions'
4
+ require_relative 'support/booleans'
5
+
6
+ module Rake
7
+ module DataTask
8
+
9
+ class Sqlite < Db
10
+
11
+ # Connect to an Sqlite database.
12
+ #
13
+ # @param [Hash] options the connection parameters
14
+ # @option options [String] 'database' the database name
15
+ # @return [Sqlite] an instance of this adapter
16
+ def initialize options
17
+ @connection = SQLite3::Database.new(options['database'] || 'temp')
18
+
19
+ # set up trackig if it isn't set up already
20
+ set_up_tracking if !tracking_tables?
21
+ end
22
+
23
+ def execute sql
24
+ connect if @connection.nil?
25
+ begin
26
+ @connection.execute sql
27
+ rescue SQLite3::SQLException => e
28
+ LOG.info e.message.chomp
29
+ raise e
30
+ end
31
+ end
32
+
33
+ include NumericBooleans
34
+ include StandardTransactions
35
+
36
+ def tracking_tables?
37
+ table_exists?(TABLE_TRACKER_NAME)
38
+ end
39
+
40
+ def table_tracker_columns
41
+ # replace the default datatype for time with SQLite's timestamp
42
+ super.merge({
43
+ :time => {:data_type => :timestamp}
44
+ })
45
+ end
46
+
47
+ def set_up_tracking options = {}
48
+ tear_down_tracking options
49
+ column_definitions = table_tracker_columns.map do |col,col_defn|
50
+ col.to_s + ' ' + col_defn[:data_type].to_s
51
+ end.join(', ')
52
+ create_table TABLE_TRACKER_NAME, nil, " (#{column_definitions})", false
53
+ end
54
+
55
+ def tear_down_tracking options = {}
56
+ drop_table TABLE_TRACKER_NAME
57
+ end
58
+
59
+ def reset_tracking options = {}
60
+ truncate_table TABLE_TRACKER_NAME
61
+ end
62
+
63
+ def table_mtime table_name
64
+ Sql.get_single_time(
65
+ execute <<-EOSQL
66
+ -- assume time is UTC (Sqlite3 default) and add offset for Ruby's Time.parse
67
+ select datetime(max(time)) || ' -0000'
68
+ from #{TABLE_TRACKER_NAME}
69
+ where relation_name = '#{table_name}'
70
+ EOSQL
71
+ )
72
+ end
73
+
74
+ alias_method :data_mtime, :table_mtime
75
+
76
+ def create_table table_name, data_definition, column_definitions, track_table=true
77
+ drop_table table_name
78
+ execute <<-EOSQL
79
+ create table #{table_name} #{column_definitions}
80
+ #{ "as #{data_definition}" if !data_definition.nil? }
81
+ EOSQL
82
+ if track_table
83
+ create_tracking_rules(table_name)
84
+ track_creation table_name, 0
85
+ end
86
+ end
87
+
88
+ alias_method :create_data, :create_table
89
+
90
+ def drop_table table_name
91
+ execute "drop table if exists #{table_name}"
92
+
93
+ # manually cascade the drop operation to views for this table
94
+ views_for_dropped_table = execute <<-EOSQL
95
+ select name from sqlite_master
96
+ where
97
+ type = 'view' and (
98
+ -- add trailing space for views without where statements
99
+ sql || ' ' like "% from #{table_name} %" or
100
+ sql like "% join #{table_name} %"
101
+ )
102
+ EOSQL
103
+ views_for_dropped_table.flatten.each do |view_name|
104
+ drop_view view_name
105
+ end
106
+
107
+ return if table_name.casecmp(TABLE_TRACKER_NAME) == 0
108
+ track_drop table_name
109
+ end
110
+
111
+ alias_method :drop_data, :drop_table
112
+
113
+ def create_view view_name, select_stmt
114
+ drop_view view_name
115
+ execute "create view #{view_name} as #{select_stmt}"
116
+ end
117
+
118
+ def drop_view view_name
119
+ execute "drop view if exists #{view_name}"
120
+ end
121
+
122
+ def track_drop table_name
123
+ execute <<-EOSQL
124
+ delete from #{TABLE_TRACKER_NAME}
125
+ where
126
+ relation_name = '#{table_name}' and
127
+ relation_type = '#{relation_type_values[:table]}'
128
+ EOSQL
129
+ end
130
+
131
+ def table_exists? table_name, options = {}
132
+ relation_exists?(table_name, 'table', options)
133
+ end
134
+
135
+ alias_method :data_exists?, :table_exists?
136
+
137
+ def view_exists? table_name, options = {}
138
+ relation_exists?(table_name, 'view', options)
139
+ end
140
+
141
+ def truncate_table table_name
142
+ return if table_name.casecmp(TABLE_TRACKER_NAME) == 0
143
+ execute "delete from #{table_name}"
144
+ track_truncate table_name
145
+ end
146
+
147
+ alias_method :truncate_data, :truncate_table
148
+
149
+ def track_truncate table_name
150
+ execute <<-EOSQL
151
+ update #{TABLE_TRACKER_NAME}
152
+ set
153
+ operation = '#{operation_values[:truncate]}',
154
+ -- Sqlite generates times at UTC and stores them without zone information
155
+ time = datetime('now')
156
+ where
157
+ relation_name = '#{table_name}' and
158
+ relation_type = '#{relation_type_values[:table]}'
159
+ EOSQL
160
+ end
161
+
162
+ def operations_supported
163
+ {
164
+ :by_db => operations_supported_by_db,
165
+ :by_app => [:truncate, :create]
166
+ }
167
+ end
168
+
169
+ def [](name)
170
+ Data.new(name, self)
171
+ end
172
+
173
+
174
+
175
+ private
176
+
177
+ def operations_supported_by_db
178
+ [:update, :insert, :delete]
179
+ end
180
+
181
+ def rule_name table_name, operation
182
+ "#{table_name}_#{operation.to_s}"
183
+ end
184
+
185
+ def create_tracking_rules table_name
186
+ operations_supported_by_db.each do |operation|
187
+ execute <<-EOSQL
188
+ create trigger #{rule_name(table_name, operation)}
189
+ after #{operation.to_s} on #{table_name} begin
190
+
191
+ update #{TABLE_TRACKER_NAME}
192
+ set
193
+ operation = '#{operation_values[operation]}',
194
+ time = datetime()
195
+ where
196
+ relation_name = '#{table_name}' and
197
+ relation_type = '#{relation_type_values[:table]}'
198
+
199
+ ;
200
+ end
201
+ EOSQL
202
+ end
203
+ end
204
+
205
+ def track_creation table_name, n_tuples
206
+ operation = :create
207
+ execute <<-EOSQL
208
+ delete from #{TABLE_TRACKER_NAME} where
209
+ relation_name = '#{table_name}' and
210
+ relation_type = '#{relation_type_values[:table]}' and
211
+ operation = '#{operation_values[operation]}'
212
+ ;
213
+ EOSQL
214
+ execute <<-EOSQL
215
+ insert into #{TABLE_TRACKER_NAME} values (
216
+ '#{table_name}',
217
+ '#{relation_type_values[:table]}',
218
+ '#{operation_values[operation]}',
219
+ datetime('now')
220
+ );
221
+ EOSQL
222
+ end
223
+
224
+ def relation_exists? relation_name, relation_type, options = {}
225
+ n_matches = Sql.get_single_int(
226
+ execute <<-EOSQL
227
+ select count(*) from sqlite_master
228
+ where
229
+ name = '#{relation_name}' and
230
+ type = '#{relation_type}'
231
+ EOSQL
232
+ )
233
+ (n_matches > 0)
234
+ end
235
+
236
+ end
237
+
238
+ end
239
+ end
@@ -0,0 +1,19 @@
1
+ module Rake
2
+ module DataTask
3
+
4
+ class Db
5
+
6
+ module StandardBooleans
7
+ def truthy_value; 'true'; end
8
+ def falsey_value; 'false'; end
9
+ end
10
+
11
+ module NumericBooleans
12
+ def truthy_value; 1; end
13
+ def falsey_value; 0; end
14
+ end
15
+
16
+ end
17
+
18
+ end
19
+ end
@@ -0,0 +1,18 @@
1
+ module ConnectionPersistence
2
+ # Adapters for datastores with long-lived connections should re-use existing connections instead
3
+ # of creating redundant connections. This module provides methods for persisting a connection
4
+ # across instances of an adapter and retrieving a persisted connection from the adapter class.
5
+ #
6
+ # A class that extends this module should declare an empty @connections = {} class instance var.
7
+
8
+ # Retrieve a connection by the hash of options that uniquely identify it.
9
+ def persisted_connection conn_options
10
+ @connections[conn_options]
11
+ end
12
+
13
+ # Save a connection and key it by a hash of options that uniquely identify it.
14
+ def persist_connection conn, conn_options
15
+ @connections[conn_options] = conn
16
+ end
17
+
18
+ end