purview 1.0.0.alpha

Sign up to get free protection for your applications and to get access to all the features.
Files changed (83) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +33 -0
  3. data/.travis.yml +18 -0
  4. data/Gemfile +3 -0
  5. data/LICENSE.txt +22 -0
  6. data/README.md +143 -0
  7. data/Rakefile +11 -0
  8. data/TODO +81 -0
  9. data/lib/purview/columns/base.rb +65 -0
  10. data/lib/purview/columns/boolean.rb +11 -0
  11. data/lib/purview/columns/created_timestamp.rb +11 -0
  12. data/lib/purview/columns/date.rb +11 -0
  13. data/lib/purview/columns/float.rb +11 -0
  14. data/lib/purview/columns/id.rb +11 -0
  15. data/lib/purview/columns/integer.rb +11 -0
  16. data/lib/purview/columns/money.rb +11 -0
  17. data/lib/purview/columns/string.rb +11 -0
  18. data/lib/purview/columns/text.rb +11 -0
  19. data/lib/purview/columns/time.rb +11 -0
  20. data/lib/purview/columns/timestamp.rb +11 -0
  21. data/lib/purview/columns/updated_timestamp.rb +11 -0
  22. data/lib/purview/columns/uuid.rb +11 -0
  23. data/lib/purview/columns.rb +14 -0
  24. data/lib/purview/connections/base.rb +55 -0
  25. data/lib/purview/connections/mysql.rb +39 -0
  26. data/lib/purview/connections/postgresql.rb +27 -0
  27. data/lib/purview/connections.rb +3 -0
  28. data/lib/purview/databases/base.rb +559 -0
  29. data/lib/purview/databases/mysql.rb +207 -0
  30. data/lib/purview/databases/postgresql.rb +210 -0
  31. data/lib/purview/databases.rb +3 -0
  32. data/lib/purview/exceptions/base.rb +5 -0
  33. data/lib/purview/exceptions/could_not_acquire_lock.rb +9 -0
  34. data/lib/purview/exceptions/lock_already_released.rb +9 -0
  35. data/lib/purview/exceptions/no_table.rb +9 -0
  36. data/lib/purview/exceptions/no_window.rb +9 -0
  37. data/lib/purview/exceptions/rows_outside_window.rb +18 -0
  38. data/lib/purview/exceptions/table.rb +13 -0
  39. data/lib/purview/exceptions.rb +7 -0
  40. data/lib/purview/loaders/base.rb +154 -0
  41. data/lib/purview/loaders/mysql.rb +81 -0
  42. data/lib/purview/loaders/postgresql.rb +81 -0
  43. data/lib/purview/loaders.rb +3 -0
  44. data/lib/purview/loggers/base.rb +99 -0
  45. data/lib/purview/loggers/console.rb +11 -0
  46. data/lib/purview/loggers.rb +2 -0
  47. data/lib/purview/mixins/helpers.rb +21 -0
  48. data/lib/purview/mixins/logger.rb +21 -0
  49. data/lib/purview/mixins.rb +2 -0
  50. data/lib/purview/parsers/base.rb +39 -0
  51. data/lib/purview/parsers/csv.rb +49 -0
  52. data/lib/purview/parsers/tsv.rb +11 -0
  53. data/lib/purview/parsers.rb +3 -0
  54. data/lib/purview/pullers/base.rb +19 -0
  55. data/lib/purview/pullers/uri.rb +66 -0
  56. data/lib/purview/pullers.rb +2 -0
  57. data/lib/purview/refinements/object.rb +5 -0
  58. data/lib/purview/refinements/time.rb +5 -0
  59. data/lib/purview/refinements.rb +2 -0
  60. data/lib/purview/structs/base.rb +10 -0
  61. data/lib/purview/structs/result.rb +7 -0
  62. data/lib/purview/structs/window.rb +7 -0
  63. data/lib/purview/structs.rb +3 -0
  64. data/lib/purview/tables/base.rb +140 -0
  65. data/lib/purview/tables/raw.rb +13 -0
  66. data/lib/purview/tables.rb +2 -0
  67. data/lib/purview/types/base.rb +9 -0
  68. data/lib/purview/types/boolean.rb +9 -0
  69. data/lib/purview/types/date.rb +9 -0
  70. data/lib/purview/types/float.rb +9 -0
  71. data/lib/purview/types/integer.rb +9 -0
  72. data/lib/purview/types/money.rb +9 -0
  73. data/lib/purview/types/string.rb +9 -0
  74. data/lib/purview/types/text.rb +9 -0
  75. data/lib/purview/types/time.rb +9 -0
  76. data/lib/purview/types/timestamp.rb +9 -0
  77. data/lib/purview/types/uuid.rb +9 -0
  78. data/lib/purview/types.rb +11 -0
  79. data/lib/purview/version.rb +3 -0
  80. data/lib/purview.rb +27 -0
  81. data/purview.gemspec +29 -0
  82. data/spec/spec_helper.rb +5 -0
  83. metadata +210 -0
@@ -0,0 +1,27 @@
1
+ module Purview
2
+ module Connections
3
+ class PostgreSQL < Base
4
+ def with_transaction
5
+ connection.transaction { yield }
6
+ end
7
+
8
+ private
9
+
10
+ def execute_sql(sql)
11
+ connection.exec(sql)
12
+ end
13
+
14
+ def extract_rows(result)
15
+ result && result.to_a
16
+ end
17
+
18
+ def extract_rows_affected(result)
19
+ result && result.cmd_tuples
20
+ end
21
+
22
+ def new_connection
23
+ PG.connect(opts)
24
+ end
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,3 @@
1
+ require 'purview/connections/base'
2
+ require 'purview/connections/mysql'
3
+ require 'purview/connections/postgresql'
@@ -0,0 +1,559 @@
1
+ module Purview
2
+ module Databases
3
+ class Base
4
+ attr_reader :name
5
+
6
+ def initialize(name, opts={})
7
+ @name = name
8
+ @opts = opts
9
+ @tables = Set.new
10
+ end
11
+
12
+ def add_table(table)
13
+ @tables << table
14
+ end
15
+
16
+ def connect
17
+ connection.connect
18
+ end
19
+
20
+ def create_index(connection, table, columns, opts={})
21
+ table_opts = extract_table_options(opts)
22
+ table_name = table_name(table, table_opts)
23
+ index_opts = extract_index_options(opts)
24
+ index_name(
25
+ table_name,
26
+ columns,
27
+ index_opts
28
+ ).tap do |index_name|
29
+ connection.execute(
30
+ create_index_sql(
31
+ table_name,
32
+ index_name,
33
+ table,
34
+ columns,
35
+ index_opts
36
+ )
37
+ )
38
+ end
39
+ end
40
+
41
+ def create_table(connection, table, opts={})
42
+ table_opts = extract_table_options(opts)
43
+ table_name(table, table_opts).tap do |table_name|
44
+ connection.execute(
45
+ create_table_sql(
46
+ table_name,
47
+ table,
48
+ table_opts
49
+ )
50
+ )
51
+ if table_opts[:create_indices]
52
+ table.indexed_columns.each do |columns|
53
+ create_index(
54
+ connection,
55
+ table,
56
+ columns,
57
+ :table => { :name => table_name }
58
+ )
59
+ end
60
+ end
61
+ end
62
+ end
63
+
64
+ def create_temporary_table(connection, table, opts={})
65
+ table_opts = extract_table_options(opts)
66
+ table_name(table, table_opts).tap do |table_name|
67
+ connection.execute(
68
+ create_temporary_table_sql(
69
+ table_name,
70
+ table,
71
+ table_opts
72
+ )
73
+ )
74
+ if table_opts[:create_indices]
75
+ table.indexed_columns.each do |columns|
76
+ create_index(
77
+ connection,
78
+ table,
79
+ columns,
80
+ :table => { :name => table_name }
81
+ )
82
+ end
83
+ end
84
+ end
85
+ end
86
+
87
+ def disable_table(table)
88
+ with_context_logging("`disable_table` for: #{table_name(table)}") do
89
+ with_new_connection do |connection|
90
+ set_enabled_for_table(
91
+ connection,
92
+ table,
93
+ false_value
94
+ )
95
+ end
96
+ end
97
+ end
98
+
99
+ def drop_table(connection, table, opts={})
100
+ table_opts = extract_table_options(opts)
101
+ table_name(table, table_opts).tap do |table_name|
102
+ connection.execute(
103
+ drop_table_sql(
104
+ table_name,
105
+ table,
106
+ table_opts
107
+ )
108
+ )
109
+ end
110
+ end
111
+
112
+ def drop_index(connection, table, columns, opts={})
113
+ table_opts = extract_table_options(opts)
114
+ table_name = table_name(table, table_opts)
115
+ index_opts = extract_index_options(opts)
116
+ index_name(
117
+ table_name,
118
+ columns,
119
+ index_opts
120
+ ).tap do |index_name|
121
+ connection.execute(
122
+ drop_index_sql(
123
+ table_name,
124
+ index_name,
125
+ table,
126
+ columns,
127
+ index_opts
128
+ )
129
+ )
130
+ end
131
+ end
132
+
133
+ def enable_table(table)
134
+ with_context_logging("`enable_table` for: #{table_name(table)}") do
135
+ with_new_connection do |connection|
136
+ set_enabled_for_table(
137
+ connection,
138
+ table,
139
+ true_value
140
+ )
141
+ end
142
+ end
143
+ end
144
+
145
+ def false_value
146
+ raise %{All "#{Base}(s)" must override the "false_value" method}
147
+ end
148
+
149
+ def lock_table(table, timestamp)
150
+ with_context_logging("`lock_table` for: #{table_name(table)}") do
151
+ with_new_connection do |connection|
152
+ rows_affected = \
153
+ connection.execute(lock_table_sql(table, timestamp)).rows_affected
154
+ raise Purview::Exceptions::CouldNotAcquireLock.new(table) \
155
+ if zero?(rows_affected)
156
+ end
157
+ end
158
+ end
159
+
160
+ def null_value
161
+ raise %{All "#{Base}(s)" must override the "null_value" method}
162
+ end
163
+
164
+ def quoted(value)
165
+ value.nil? ? null_value : value.quoted
166
+ end
167
+
168
+ def sync
169
+ with_new_connection do |connection|
170
+ with_transaction(connection) do |timestamp|
171
+ with_next_table(connection, timestamp) do |table|
172
+ with_next_window(
173
+ connection,
174
+ table,
175
+ timestamp
176
+ ) do |window|
177
+ with_table_locked(table, timestamp) do
178
+ table.sync(connection, window)
179
+ end
180
+ end
181
+ end
182
+ end
183
+ end
184
+ end
185
+
186
+ def true_value
187
+ raise %{All "#{Base}(s)" must override the "true_value" method}
188
+ end
189
+
190
+ def unlock_table(table)
191
+ with_context_logging("`unlock_table` for: #{table_name(table)}") do
192
+ with_new_connection do |connection|
193
+ rows_affected = \
194
+ connection.execute(unlock_table_sql(table)).rows_affected
195
+ raise Purview::Exceptions::LockAlreadyReleased.new(table) \
196
+ if zero?(rows_affected)
197
+ end
198
+ end
199
+ end
200
+
201
+ private
202
+
203
+ include Purview::Mixins::Helpers
204
+ include Purview::Mixins::Logger
205
+
206
+ attr_reader :opts, :tables
207
+
208
+ def column_names(columns)
209
+ columns.map(&:name)
210
+ end
211
+
212
+ def column_definition(column)
213
+ column.name.to_s.tap do |column_definition|
214
+ type = type(column)
215
+ column_definition << " #{type}"
216
+ limit = limit(column)
217
+ column_definition << "(#{limit})" if limit
218
+ primary_key = primary_key?(column)
219
+ column_definition << ' PRIMARY KEY' if primary_key
220
+ nullable = nullable?(column)
221
+ column_definition << " #{nullable ? 'NULL' : 'NOT NULL'}"
222
+ default = default(column)
223
+ column_definition << " DEFAULT #{default}" if default
224
+ end
225
+ end
226
+
227
+ def column_definitions(table)
228
+ [].tap do |results|
229
+ results << column_definition(table.id_column)
230
+ results << column_definition(table.created_timestamp_column)
231
+ results << column_definition(table.updated_timestamp_column)
232
+ table.data_columns.each do |column|
233
+ results << column_definition(column)
234
+ end
235
+ end
236
+ end
237
+
238
+ def connection
239
+ connection_type.new(connection_opts)
240
+ end
241
+
242
+ def connection_opts
243
+ {}
244
+ end
245
+
246
+ def connection_type
247
+ raise %{All "#{Base}(s)" must override the "connection_type" method}
248
+ end
249
+
250
+ def create_index_sql(table_name, index_name, table, columns, index_opts={})
251
+ raise %{All "#{Base}(s)" must override the "create_index_sql" method}
252
+ end
253
+
254
+ def create_table_sql(table_name, table, table_opts={})
255
+ raise %{All "#{Base}(s)" must override the "create_table_sql" method}
256
+ end
257
+
258
+ def create_temporary_table_sql(table_name, table, table_opts={})
259
+ raise %{All "#{Base}(s)" must override the "create_temporary_table_sql" method}
260
+ end
261
+
262
+ def default(column)
263
+ column.default || default_map[column.type]
264
+ end
265
+
266
+ def default_map
267
+ {}
268
+ end
269
+
270
+ def drop_index_sql(table_name, index_name, table, columns, index_opts={})
271
+ raise %{All "#{Base}(s)" must override the "drop_index_sql" method}
272
+ end
273
+
274
+ def drop_table_sql(table_name, table, table_opts={})
275
+ raise %{All "#{Base}(s)" must override the "drop_table_sql" method}
276
+ end
277
+
278
+ def ensure_table_metadata_table_exists
279
+ with_new_connection do |connection|
280
+ connection.execute(ensure_table_metadata_table_exists_sql)
281
+ end
282
+ end
283
+
284
+ def ensure_table_metadata_exists_for_table_sql(table)
285
+ raise %{All "#{Base}(s)" must override the "ensure_table_metadata_exists_for_table_sql" method}
286
+ end
287
+
288
+ def ensure_table_metadata_table_exists_sql
289
+ raise %{All "#{Base}(s)" must override the "ensure_table_metadata_table_exists_sql" method}
290
+ end
291
+
292
+ def ensure_table_metadata_exists_for_tables
293
+ with_new_connection do |connection|
294
+ tables.each do |table|
295
+ connection.execute(ensure_table_metadata_exists_for_table_sql(table))
296
+ end
297
+ end
298
+ end
299
+
300
+ def extract_index_options(opts)
301
+ opts[:index] || {}
302
+ end
303
+
304
+ def extract_table_options(opts)
305
+ opts[:table] || { :create_indices => true }
306
+ end
307
+
308
+ def get_enabled_for_table(connection, table)
309
+ row = connection.execute(get_last_pulled_at_for_table_sql(table)).rows[0]
310
+ enabled = row[table_metadata_enabled_column_name]
311
+ !!(enabled =~ /\A(true|t|yes|y|1)\z/i)
312
+ end
313
+
314
+ def get_enabled_for_table_sql(table)
315
+ raise %{All "#{Base}(s)" must override the "get_enabled_for_table_sql" method}
316
+ end
317
+
318
+ def get_last_pulled_at_for_table(connection, table)
319
+ row = connection.execute(get_last_pulled_at_for_table_sql(table)).rows[0]
320
+ timestamp = row[table_metadata_last_pulled_at_column_name]
321
+ timestamp ? Time.parse(timestamp) : nil
322
+ end
323
+
324
+ def get_last_pulled_at_for_table_sql(table)
325
+ raise %{All "#{Base}(s)" must override the "get_last_pulled_at_for_table_sql" method}
326
+ end
327
+
328
+ def get_locked_at_for_table(connection, table)
329
+ row = connection.execute(get_locked_at_for_table_sql(table)).rows[0]
330
+ timestamp = row[table_metadata_locked_at_column_name]
331
+ timestamp ? Time.parse(timestamp) : nil
332
+ end
333
+
334
+ def get_locked_at_for_table_sql(table)
335
+ raise %{All "#{Base}(s)" must override the "get_locked_at_for_table_sql" method}
336
+ end
337
+
338
+ def get_max_timestamp_pulled_for_table(connection, table)
339
+ row = connection.execute(get_max_timestamp_pulled_for_table_sql(table)).rows[0]
340
+ timestamp = row[table_metadata_max_timestamp_pulled_column_name]
341
+ timestamp ? Time.parse(timestamp) : table.starting_timestamp
342
+ end
343
+
344
+ def get_max_timestamp_pulled_for_table_sql(table)
345
+ raise %{All "#{Base}(s)" must override the "get_max_timestamp_pulled_for_table_sql" method}
346
+ end
347
+
348
+ def index_name(table_name, columns, index_opts={})
349
+ index_opts[:name] || 'index_%s_on_%s' % [
350
+ table_name,
351
+ column_names(columns).join('_and_'),
352
+ ]
353
+ end
354
+
355
+ def limit(column)
356
+ return nil if limitless_types.include?(column.type)
357
+ column.limit || limit_map[column.type]
358
+ end
359
+
360
+ def limit_map
361
+ {}
362
+ end
363
+
364
+ def limitless_types
365
+ []
366
+ end
367
+
368
+ def lock_table_sql(table, timestamp)
369
+ raise %{All "#{Base}(s)" must override the "lock_table_sql" method}
370
+ end
371
+
372
+ def next_table(connection, timestamp)
373
+ ensure_table_metadata_table_exists
374
+ ensure_table_metadata_exists_for_tables
375
+ row = connection.execute(next_table_sql(timestamp)).rows[0]
376
+ table_name = row && row[table_metadata_table_name_column_name]
377
+ table_name ? tables_by_name[table_name] : nil
378
+ end
379
+
380
+ def next_table_sql(timestamp)
381
+ raise %{All "#{Base}(s)" must override the "next_table_sql" method}
382
+ end
383
+
384
+ def next_window(connection, table, timestamp)
385
+ min = get_max_timestamp_pulled_for_table(connection, table)
386
+ max = min + table.window_size
387
+ now = timestamp
388
+ return nil if min > now
389
+ max = now if max > now
390
+ Purview::Structs::Window.new(:min => min, :max => max)
391
+ end
392
+
393
+ def nullable?(column)
394
+ column.nullable?
395
+ end
396
+
397
+ def primary_key?(column)
398
+ column.primary_key?
399
+ end
400
+
401
+ def set_enabled_for_table(connection, table, enabled)
402
+ connection.execute(set_enabled_for_table_sql(table, enabled))
403
+ end
404
+
405
+ def set_enabled_for_table_sql(table, enabled)
406
+ raise %{All "#{Base}(s)" must override the "set_enabled_for_table_sql" method}
407
+ end
408
+
409
+ def set_last_pulled_at_for_table(connection, table, timestamp)
410
+ connection.execute(set_last_pulled_at_for_table_sql(table, timestamp))
411
+ end
412
+
413
+ def set_last_pulled_at_for_table_sql(table, timestamp)
414
+ raise %{All "#{Base}(s)" must override the "set_last_pulled_at_for_table_sql" method}
415
+ end
416
+
417
+ def set_locked_at_for_table(connection, table, timestamp)
418
+ connection.execute(set_locked_at_for_table_sql(table, timestamp))
419
+ end
420
+
421
+ def set_locked_at_for_table_sql(table, timestamp)
422
+ raise %{All "#{Base}(s)" must override the "set_locked_at_for_table_sql" method}
423
+ end
424
+
425
+ def set_max_timestamp_pulled_for_table(connection, table, timestamp)
426
+ connection.execute(set_max_timestamp_pulled_for_table_sql(table, timestamp))
427
+ end
428
+
429
+ def set_max_timestamp_pulled_for_table_sql(table, timestamp)
430
+ raise %{All "#{Base}(s)" must override the "set_max_timestamp_pulled_for_table_sql" method}
431
+ end
432
+
433
+ def table_metadata_enabled_column_definition
434
+ column = Purview::Columns::Boolean.new(table_metadata_enabled_column_name)
435
+ column_definition(column)
436
+ end
437
+
438
+ def table_metadata_enabled_column_name
439
+ 'enabled'
440
+ end
441
+
442
+ def table_metadata_last_pulled_at_column_definition
443
+ column = Purview::Columns::Timestamp.new(table_metadata_last_pulled_at_column_name)
444
+ column_definition(column)
445
+ end
446
+
447
+ def table_metadata_last_pulled_at_column_name
448
+ 'last_pulled_at'
449
+ end
450
+
451
+ def table_metadata_locked_at_column_definition
452
+ column = Purview::Columns::Timestamp.new(table_metadata_locked_at_column_name)
453
+ column_definition(column)
454
+ end
455
+
456
+ def table_metadata_locked_at_column_name
457
+ 'locked_at'
458
+ end
459
+
460
+ def table_metadata_max_timestamp_pulled_column_definition
461
+ column = Purview::Columns::Timestamp.new(table_metadata_max_timestamp_pulled_column_name)
462
+ column_definition(column)
463
+ end
464
+
465
+ def table_metadata_max_timestamp_pulled_column_name
466
+ 'max_timestamp_pulled'
467
+ end
468
+
469
+ def table_metadata_table_name
470
+ 'table_metadata'
471
+ end
472
+
473
+ def table_metadata_table_name_column_definition
474
+ column = Purview::Columns::String.new(table_metadata_table_name_column_name)
475
+ column_definition(column)
476
+ end
477
+
478
+ def table_metadata_table_name_column_name
479
+ 'table_name'
480
+ end
481
+
482
+ def tables_by_name
483
+ {}.tap do |result|
484
+ tables.each do |table|
485
+ result[table.name] = table
486
+ end
487
+ end
488
+ end
489
+
490
+ def table_name(table, table_opts={})
491
+ table_opts[:name] || table.name
492
+ end
493
+
494
+ def type(column)
495
+ type_map[column.type]
496
+ end
497
+
498
+ def type_map
499
+ {
500
+ Purview::Types::Boolean => 'boolean',
501
+ Purview::Types::Date => 'date',
502
+ Purview::Types::Float => 'float',
503
+ Purview::Types::Integer => 'integer',
504
+ Purview::Types::String => 'varchar',
505
+ Purview::Types::Text => 'text',
506
+ Purview::Types::Time => 'time',
507
+ Purview::Types::Timestamp => 'timestamp',
508
+ }
509
+ end
510
+
511
+ def unlock_table_sql(table)
512
+ raise %{All "#{Base}(s)" must override the "unlock_table_sql" method}
513
+ end
514
+
515
+ def with_new_connection
516
+ yield connection = connect
517
+ ensure
518
+ connection.disconnect if connection
519
+ end
520
+
521
+ def with_next_table(connection, timestamp)
522
+ table = next_table(connection, timestamp)
523
+ raise Purview::Exceptions::NoTable.new unless table
524
+ yield table
525
+ set_last_pulled_at_for_table(
526
+ connection,
527
+ table,
528
+ timestamp
529
+ )
530
+ end
531
+
532
+ def with_next_window(connection, table, timestamp)
533
+ window = next_window(
534
+ connection,
535
+ table,
536
+ timestamp
537
+ )
538
+ raise Purview::Exceptions::NoWindow.new(table) unless window
539
+ yield window
540
+ set_max_timestamp_pulled_for_table(
541
+ connection,
542
+ table,
543
+ window.max
544
+ )
545
+ end
546
+
547
+ def with_table_locked(table, timestamp)
548
+ lock_table(table, timestamp)
549
+ yield
550
+ ensure
551
+ unlock_table(table)
552
+ end
553
+
554
+ def with_transaction(connection)
555
+ connection.with_transaction { yield Time.now.utc }
556
+ end
557
+ end
558
+ end
559
+ end