data_task 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,430 @@
1
+ require 'pg'
2
+ require_relative '../data'
3
+ require_relative 'support/transactions'
4
+ require_relative 'support/booleans'
5
+ require_relative 'support/connection_persistence'
6
+
7
+ module Rake
8
+ module DataTask
9
+
10
+ class Postgres < Db
11
+
12
+ @connections = {}
13
+ extend ConnectionPersistence
14
+
15
+ include StandardBooleans
16
+ include StandardTransactions
17
+
18
+ # Connect to a PostgreSQL database.
19
+ #
20
+ # If we've already used this class to connect to the same host, port, and database with the
21
+ # same username, re-use that connection for this instance.
22
+ #
23
+ # @param [Hash] options the connection parameters
24
+ # @option options [String] 'host' the server hostname or IP address
25
+ # @option options [Integer] 'port' the server port number
26
+ # @option options [String] 'database' the database name
27
+ # @option options [String] 'username' the name of the database user to connect as
28
+ # @option options [String] 'password' the database user's password
29
+ # @return [Sqlite] an instance of this adapter
30
+ def initialize options
31
+ host = options['host'] || 'localhost'
32
+ port = options['port'] || 5432
33
+ database = options['database']
34
+ username = options['username']
35
+
36
+ # always reuse an existing connection if it matches on these connection options
37
+ conn_options = {:host => host, :port => port, :database => database, :username => username}
38
+ existing_connection = self.class.persisted_connection(conn_options)
39
+
40
+ if existing_connection.nil?
41
+ # create and persist a new connection
42
+ @connection = PG::Connection.new(
43
+ host,
44
+ port,
45
+ nil,
46
+ nil,
47
+ database,
48
+ username,
49
+ options['password'] || ''
50
+ )
51
+ @connection.set_notice_processor do |msg|
52
+ if msg =~ /^ERROR:/
53
+ LOG.error('psql') { msg.gsub(/\n/,'; ') }
54
+ else
55
+ LOG.info('psql') { msg.gsub(/\n/,'; ') }
56
+ end
57
+ end
58
+ self.class.persist_connection(@connection, conn_options)
59
+ else
60
+ # reuse an existing connection
61
+ @connection = existing_connection
62
+ end
63
+
64
+ # set up trackig if it isn't set up already
65
+ set_up_tracking if !tracking_tables?
66
+ end
67
+
68
+ def [](name)
69
+ Data.new(name, self)
70
+ end
71
+
72
+ def table_tracker_columns
73
+ # upcase all enum'd column values because system tables store them in upcase
74
+ cols = super
75
+ cols.each do |k1,v1|
76
+ cols[k1].each do |k2, v2|
77
+ if k2 == :values
78
+ cols[k1][k2].each do |k3, v3|
79
+ cols[k1][k2][k3] = v3.upcase
80
+ end
81
+ end
82
+ end
83
+ end
84
+
85
+ cols[:relation_type][:values][:table] = 'BASE TABLE'
86
+ cols[:time][:data_type] = :'timestamp with time zone'
87
+ cols
88
+ end
89
+
90
+ def execute sql
91
+ connect if @connection.nil?
92
+
93
+ begin
94
+
95
+ r = @connection.exec sql
96
+ r.values
97
+
98
+ rescue PG::UndefinedTable => e
99
+
100
+ if /ERROR: relation "(last_operations|.*\.last_operations)" does not exist/ =~ e.message
101
+ LOG.error "Tracking is not set up in this schema. Set up tracking in this schema first."
102
+ end
103
+ execute "rollback;"
104
+ raise e
105
+
106
+ rescue PGError => e
107
+
108
+ LOG.info e.message.chomp
109
+ execute "rollback;"
110
+ raise e
111
+
112
+ end
113
+ end
114
+
115
+ def tracking_tables?
116
+ data_exists?(TABLE_TRACKER_NAME)
117
+ end
118
+
119
+ def set_up_tracking options = {}
120
+ tear_down_tracking options
121
+
122
+ target_search_path = options[:search_path] || search_path.join(',')
123
+ with_search_path(target_search_path) do
124
+
125
+ column_definitions = table_tracker_columns.map do |col, col_defn|
126
+ col.to_s + ' ' + col_defn[:data_type].to_s
127
+ end.join(', ')
128
+ create_table TABLE_TRACKER_NAME, nil, " (#{column_definitions})", false
129
+
130
+ end
131
+ end
132
+
133
+ def tear_down_tracking options = {}
134
+ target_search_path = options[:search_path] || search_path.join(',')
135
+ with_search_path(target_search_path) do
136
+ drop_table TABLE_TRACKER_NAME
137
+ end
138
+ end
139
+
140
+ def reset_tracking options = {}
141
+ target_search_path = options[:search_path] || search_path.join(',')
142
+ with_search_path(target_search_path) do
143
+ truncate_table TABLE_TRACKER_NAME
144
+ end
145
+ end
146
+
147
+ def table_mtime qualified_table_name
148
+ schema_name, table_name = parse_schema_and_table_name(qualified_table_name)
149
+ schema_name = first_schema_for(table_name) if schema_name.nil?
150
+
151
+ with_search_path(schema_name) do
152
+ Sql.get_single_time(
153
+ execute <<-EOSQL
154
+ select max(time)
155
+ from #{schema_name}.#{TABLE_TRACKER_NAME}
156
+ where relation_name = '#{table_name}'
157
+ EOSQL
158
+ )
159
+ end
160
+ end
161
+
162
+ alias_method :data_mtime, :table_mtime
163
+
164
+ def truncate_table table_name
165
+ return if table_name.casecmp(TABLE_TRACKER_NAME) == 0
166
+ execute "truncate table #{table_name}"
167
+ track_truncate table_name
168
+ end
169
+
170
+ alias_method :truncate_data, :truncate_table
171
+
172
+ def drop_table table_name
173
+ execute "drop table if exists #{table_name} cascade"
174
+ return if table_name.casecmp(TABLE_TRACKER_NAME) == 0
175
+ track_drop table_name
176
+ end
177
+
178
+ def track_drop table_name
179
+ schema_name, unqualified_table_name = parse_schema_and_table_name(table_name)
180
+ table_tracker_name = schema_name.nil? ? TABLE_TRACKER_NAME : "#{schema_name}.#{TABLE_TRACKER_NAME}"
181
+
182
+ if table_exists?(table_tracker_name)
183
+ execute <<-EOSQL
184
+ delete from #{table_tracker_name}
185
+ where
186
+ relation_name = '#{unqualified_table_name}' and
187
+ relation_type = '#{relation_type_values[:table]}'
188
+ EOSQL
189
+ end
190
+ end
191
+
192
+ alias_method :drop_data, :drop_table
193
+
194
+ def table_exists? table_name, options = {}
195
+ relation_exists? table_name, :table, options
196
+ end
197
+
198
+ alias_method :data_exists?, :table_exists?
199
+
200
+ def view_exists? view_name, options = {}
201
+ relation_exists? view_name, :view, options
202
+ end
203
+
204
+ def create_table table_name, data_definition, column_definitions, track_table=true
205
+ drop_table table_name
206
+ execute <<-EOSQL
207
+ create table #{table_name} #{column_definitions}
208
+ #{ "as #{data_definition}" if !data_definition.nil? }
209
+ EOSQL
210
+ if track_table
211
+ create_tracking_rules(table_name)
212
+ track_creation table_name, 0
213
+ end
214
+ end
215
+
216
+ alias_method :create_data, :create_table
217
+
218
+ def create_view view_name, view_definition
219
+ drop_view view_name
220
+ execute <<-EOSQL
221
+ create view #{view_name} as
222
+ #{view_definition}
223
+ EOSQL
224
+ end
225
+
226
+ def drop_view view_name
227
+ execute "drop view if exists #{view_name} cascade"
228
+ end
229
+
230
+ def operations_supported
231
+ {
232
+ :by_db => operations_supported_by_db,
233
+ :by_app => [:truncate, :create] - operations_supported_by_db
234
+ }
235
+ end
236
+
237
+ def with_search_path schemas
238
+ original_search_path = search_path
239
+ execute "set search_path to #{Array(schemas).join(',')}"
240
+ r = yield
241
+ execute "set search_path to #{original_search_path.join(',')}"
242
+ r
243
+ end
244
+
245
+ def with_role role
246
+ original_role = current_user
247
+ execute "set role #{role}"
248
+ r = yield
249
+ execute "set role #{original_role}"
250
+ r
251
+ end
252
+
253
+
254
+
255
+ private
256
+
257
+ def operations_supported_by_db
258
+ operations_supported_by_db_rules
259
+ end
260
+
261
+ def operations_supported_by_db_rules
262
+ [:update, :insert, :delete]
263
+ end
264
+
265
+ # Split a table name qualified with a schema name into separate strings for schema and
266
+ # table names.
267
+ #
268
+ # @returns [String, String] the schema name and table name, separately, for table_name. If
269
+ # table_name is unqualified with the schema name, return [nil, table_name].
270
+ def parse_schema_and_table_name table_name
271
+ return [nil, table_name] if table_name.count('.') == 0
272
+
273
+ if table_name.count('.') > 1
274
+ raise "Invalid relation reference #{table_name} (only one '.' is allowed)"
275
+ end
276
+
277
+ schema_name, table_name = table_name.split('.')
278
+ [schema_name, table_name]
279
+ end
280
+
281
+ # @returns [Array] the ordered schema names in the search path as strings
282
+ def search_path
283
+ current_search_path = execute("show search_path").first.first.split(',').map { |s| s.strip }
284
+ username = current_user
285
+
286
+ # the default search path begins with a symbolic reference to the current username
287
+ # if that reference is in the search path, replace it with the resolved current username
288
+ if current_search_path.first == '"$user"'
289
+ user_schema_exists = execute <<-EOSQL
290
+ select 1
291
+ from information_schema.schemata
292
+ where schema_name = '#{username}'
293
+ EOSQL
294
+
295
+ if user_schema_exists.nil? || user_schema_exists.first.nil?
296
+ current_search_path = current_search_path[1..-1]
297
+ else
298
+ current_search_path = [username] + current_search_path[1..-1]
299
+ end
300
+ end
301
+
302
+ current_search_path.map(&:downcase)
303
+ end
304
+
305
+ # @returns [String] the name of the current database user
306
+ def current_user
307
+ execute("select current_user").first.first
308
+ end
309
+
310
+ # @returns [String] the name of the first schema in the search path containing table_name
311
+ def first_schema_for table_name
312
+ return if !table_exists?(table_name)
313
+ schema_name, unqualified_table_name = parse_schema_and_table_name(table_name)
314
+
315
+ search_path_when_stmts = []
316
+ search_path.each_with_index do |s,i|
317
+ search_path_when_stmts << "when table_schema = '#{s}' then #{(i+1).to_s}"
318
+ end
319
+
320
+ schema_name = execute <<-EOSQL
321
+ select
322
+ table_schema,
323
+ search_order
324
+ from (
325
+ select
326
+ table_schema,
327
+ table_name,
328
+ case
329
+ #{search_path_when_stmts.join(' ')}
330
+ else 'NaN'::float
331
+ end as search_order
332
+ from information_schema.tables
333
+ where table_name ilike '#{unqualified_table_name}'
334
+ ) a
335
+ order by search_order
336
+ limit 1
337
+ EOSQL
338
+ schema_name.first.first
339
+ end
340
+
341
+ def rule_name table_name, operation
342
+ "#{table_name}_#{operation.to_s}"
343
+ end
344
+
345
+ def create_tracking_rules table_name
346
+ schema_name, unqualified_table_name = parse_schema_and_table_name(table_name)
347
+ qualified_table_tracker = schema_name.nil? ? TABLE_TRACKER_NAME : "#{schema_name}.#{TABLE_TRACKER_NAME}"
348
+
349
+ operations_supported_by_db_rules.each do |operation|
350
+ execute <<-EOSQL
351
+ create or replace rule "#{rule_name(table_name, operation)}" as
352
+ on #{operation.to_s} to #{table_name} do also (
353
+
354
+ delete from #{qualified_table_tracker} where
355
+ relation_name = '#{unqualified_table_name}' and
356
+ relation_type = '#{relation_type_values[:table]}'
357
+ ;
358
+
359
+ insert into #{qualified_table_tracker} values (
360
+ '#{unqualified_table_name}',
361
+ '#{relation_type_values[:table]}',
362
+ '#{operation_values[operation]}',
363
+ clock_timestamp()
364
+ );
365
+
366
+ )
367
+ EOSQL
368
+ end
369
+ end
370
+
371
+ def track_creation table_name, n_tuples
372
+ schema_name, unqualified_table_name = parse_schema_and_table_name(table_name)
373
+ qualified_table_tracker = schema_name.nil? ? TABLE_TRACKER_NAME : "#{schema_name}.#{TABLE_TRACKER_NAME}"
374
+
375
+ operation = :create
376
+ execute <<-EOSQL
377
+ delete from #{qualified_table_tracker} where
378
+ relation_name = '#{unqualified_table_name}' and
379
+ relation_type = '#{relation_type_values[:table]}'
380
+ ;
381
+ insert into #{qualified_table_tracker} values (
382
+ '#{unqualified_table_name}',
383
+ '#{relation_type_values[:table]}',
384
+ '#{operation_values[operation]}',
385
+ clock_timestamp()
386
+ );
387
+ EOSQL
388
+ end
389
+
390
+ def track_truncate table_name
391
+ schema_name, unqualified_table_name = parse_schema_and_table_name(table_name)
392
+ qualified_table_tracker = schema_name.nil? ? TABLE_TRACKER_NAME : "#{schema_name}.#{TABLE_TRACKER_NAME}"
393
+
394
+ execute <<-EOSQL
395
+ update #{qualified_table_tracker}
396
+ set
397
+ operation = '#{operation_values[:truncate]}',
398
+ time = clock_timestamp()
399
+ where
400
+ relation_name = '#{unqualified_table_name}' and
401
+ relation_type = '#{relation_type_values[:table]}'
402
+ EOSQL
403
+ end
404
+
405
+ def relation_exists? relation_name, relation_type, options = {}
406
+ schema_name, unqualified_relation_name = parse_schema_and_table_name(relation_name)
407
+
408
+ if !schema_name.nil?
409
+ schema_conditions_sql = "table_schema ilike '#{schema_name}'"
410
+ else
411
+ schema_conditions_sql = "table_schema in (#{search_path.to_quoted_s})"
412
+ end
413
+
414
+ n_matches = Sql.get_single_int(
415
+ execute <<-EOSQL
416
+ select count(*)
417
+ from information_schema.tables
418
+ where
419
+ table_name = '#{unqualified_relation_name}' and
420
+ table_type = '#{relation_type_values[relation_type]}' and
421
+ #{ schema_conditions_sql }
422
+ EOSQL
423
+ )
424
+ (n_matches > 0)
425
+ end
426
+
427
+ end
428
+
429
+ end
430
+ end
@@ -0,0 +1,239 @@
1
+ require 'sqlite3'
2
+ require_relative '../data'
3
+ require_relative 'support/transactions'
4
+ require_relative 'support/booleans'
5
+
6
+ module Rake
7
+ module DataTask
8
+
9
+ class Sqlite < Db
10
+
11
+ # Connect to an Sqlite database.
12
+ #
13
+ # @param [Hash] options the connection parameters
14
+ # @option options [String] 'database' the database name
15
+ # @return [Sqlite] an instance of this adapter
16
+ def initialize options
17
+ @connection = SQLite3::Database.new(options['database'] || 'temp')
18
+
19
+ # set up trackig if it isn't set up already
20
+ set_up_tracking if !tracking_tables?
21
+ end
22
+
23
+ def execute sql
24
+ connect if @connection.nil?
25
+ begin
26
+ @connection.execute sql
27
+ rescue SQLite3::SQLException => e
28
+ LOG.info e.message.chomp
29
+ raise e
30
+ end
31
+ end
32
+
33
+ include NumericBooleans
34
+ include StandardTransactions
35
+
36
+ def tracking_tables?
37
+ table_exists?(TABLE_TRACKER_NAME)
38
+ end
39
+
40
+ def table_tracker_columns
41
+ # replace the default datatype for time with SQLite's timestamp
42
+ super.merge({
43
+ :time => {:data_type => :timestamp}
44
+ })
45
+ end
46
+
47
+ def set_up_tracking options = {}
48
+ tear_down_tracking options
49
+ column_definitions = table_tracker_columns.map do |col,col_defn|
50
+ col.to_s + ' ' + col_defn[:data_type].to_s
51
+ end.join(', ')
52
+ create_table TABLE_TRACKER_NAME, nil, " (#{column_definitions})", false
53
+ end
54
+
55
+ def tear_down_tracking options = {}
56
+ drop_table TABLE_TRACKER_NAME
57
+ end
58
+
59
+ def reset_tracking options = {}
60
+ truncate_table TABLE_TRACKER_NAME
61
+ end
62
+
63
+ def table_mtime table_name
64
+ Sql.get_single_time(
65
+ execute <<-EOSQL
66
+ -- assume time is UTC (Sqlite3 default) and add offset for Ruby's Time.parse
67
+ select datetime(max(time)) || ' -0000'
68
+ from #{TABLE_TRACKER_NAME}
69
+ where relation_name = '#{table_name}'
70
+ EOSQL
71
+ )
72
+ end
73
+
74
+ alias_method :data_mtime, :table_mtime
75
+
76
+ def create_table table_name, data_definition, column_definitions, track_table=true
77
+ drop_table table_name
78
+ execute <<-EOSQL
79
+ create table #{table_name} #{column_definitions}
80
+ #{ "as #{data_definition}" if !data_definition.nil? }
81
+ EOSQL
82
+ if track_table
83
+ create_tracking_rules(table_name)
84
+ track_creation table_name, 0
85
+ end
86
+ end
87
+
88
+ alias_method :create_data, :create_table
89
+
90
+ def drop_table table_name
91
+ execute "drop table if exists #{table_name}"
92
+
93
+ # manually cascade the drop operation to views for this table
94
+ views_for_dropped_table = execute <<-EOSQL
95
+ select name from sqlite_master
96
+ where
97
+ type = 'view' and (
98
+ -- add trailing space for views without where statements
99
+ sql || ' ' like "% from #{table_name} %" or
100
+ sql like "% join #{table_name} %"
101
+ )
102
+ EOSQL
103
+ views_for_dropped_table.flatten.each do |view_name|
104
+ drop_view view_name
105
+ end
106
+
107
+ return if table_name.casecmp(TABLE_TRACKER_NAME) == 0
108
+ track_drop table_name
109
+ end
110
+
111
+ alias_method :drop_data, :drop_table
112
+
113
+ def create_view view_name, select_stmt
114
+ drop_view view_name
115
+ execute "create view #{view_name} as #{select_stmt}"
116
+ end
117
+
118
+ def drop_view view_name
119
+ execute "drop view if exists #{view_name}"
120
+ end
121
+
122
+ def track_drop table_name
123
+ execute <<-EOSQL
124
+ delete from #{TABLE_TRACKER_NAME}
125
+ where
126
+ relation_name = '#{table_name}' and
127
+ relation_type = '#{relation_type_values[:table]}'
128
+ EOSQL
129
+ end
130
+
131
+ def table_exists? table_name, options = {}
132
+ relation_exists?(table_name, 'table', options)
133
+ end
134
+
135
+ alias_method :data_exists?, :table_exists?
136
+
137
+ def view_exists? table_name, options = {}
138
+ relation_exists?(table_name, 'view', options)
139
+ end
140
+
141
+ def truncate_table table_name
142
+ return if table_name.casecmp(TABLE_TRACKER_NAME) == 0
143
+ execute "delete from #{table_name}"
144
+ track_truncate table_name
145
+ end
146
+
147
+ alias_method :truncate_data, :truncate_table
148
+
149
+ def track_truncate table_name
150
+ execute <<-EOSQL
151
+ update #{TABLE_TRACKER_NAME}
152
+ set
153
+ operation = '#{operation_values[:truncate]}',
154
+ -- Sqlite generates times at UTC and stores them without zone information
155
+ time = datetime('now')
156
+ where
157
+ relation_name = '#{table_name}' and
158
+ relation_type = '#{relation_type_values[:table]}'
159
+ EOSQL
160
+ end
161
+
162
+ def operations_supported
163
+ {
164
+ :by_db => operations_supported_by_db,
165
+ :by_app => [:truncate, :create]
166
+ }
167
+ end
168
+
169
+ def [](name)
170
+ Data.new(name, self)
171
+ end
172
+
173
+
174
+
175
+ private
176
+
177
+ def operations_supported_by_db
178
+ [:update, :insert, :delete]
179
+ end
180
+
181
+ def rule_name table_name, operation
182
+ "#{table_name}_#{operation.to_s}"
183
+ end
184
+
185
+ def create_tracking_rules table_name
186
+ operations_supported_by_db.each do |operation|
187
+ execute <<-EOSQL
188
+ create trigger #{rule_name(table_name, operation)}
189
+ after #{operation.to_s} on #{table_name} begin
190
+
191
+ update #{TABLE_TRACKER_NAME}
192
+ set
193
+ operation = '#{operation_values[operation]}',
194
+ time = datetime()
195
+ where
196
+ relation_name = '#{table_name}' and
197
+ relation_type = '#{relation_type_values[:table]}'
198
+
199
+ ;
200
+ end
201
+ EOSQL
202
+ end
203
+ end
204
+
205
+ def track_creation table_name, n_tuples
206
+ operation = :create
207
+ execute <<-EOSQL
208
+ delete from #{TABLE_TRACKER_NAME} where
209
+ relation_name = '#{table_name}' and
210
+ relation_type = '#{relation_type_values[:table]}' and
211
+ operation = '#{operation_values[operation]}'
212
+ ;
213
+ EOSQL
214
+ execute <<-EOSQL
215
+ insert into #{TABLE_TRACKER_NAME} values (
216
+ '#{table_name}',
217
+ '#{relation_type_values[:table]}',
218
+ '#{operation_values[operation]}',
219
+ datetime('now')
220
+ );
221
+ EOSQL
222
+ end
223
+
224
+ def relation_exists? relation_name, relation_type, options = {}
225
+ n_matches = Sql.get_single_int(
226
+ execute <<-EOSQL
227
+ select count(*) from sqlite_master
228
+ where
229
+ name = '#{relation_name}' and
230
+ type = '#{relation_type}'
231
+ EOSQL
232
+ )
233
+ (n_matches > 0)
234
+ end
235
+
236
+ end
237
+
238
+ end
239
+ end
@@ -0,0 +1,19 @@
1
+ module Rake
2
+ module DataTask
3
+
4
+ class Db
5
+
6
+ module StandardBooleans
7
+ def truthy_value; 'true'; end
8
+ def falsey_value; 'false'; end
9
+ end
10
+
11
+ module NumericBooleans
12
+ def truthy_value; 1; end
13
+ def falsey_value; 0; end
14
+ end
15
+
16
+ end
17
+
18
+ end
19
+ end
@@ -0,0 +1,18 @@
1
+ module ConnectionPersistence
2
+ # Adapters for datastores with long-lived connections should re-use existing connections instead
3
+ # of creating redundant connections. This module provides methods for persisting a connection
4
+ # across instances of an adapter and retrieving a persisted connection from the adapter class.
5
+ #
6
+ # A class that extends this module should declare an empty @connections = {} class instance var.
7
+
8
+ # Retrieve a connection by the hash of options that uniquely identify it.
9
+ def persisted_connection conn_options
10
+ @connections[conn_options]
11
+ end
12
+
13
+ # Save a connection and key it by a hash of options that uniquely identify it.
14
+ def persist_connection conn, conn_options
15
+ @connections[conn_options] = conn
16
+ end
17
+
18
+ end