purview 1.0.0.alpha

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (83) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +33 -0
  3. data/.travis.yml +18 -0
  4. data/Gemfile +3 -0
  5. data/LICENSE.txt +22 -0
  6. data/README.md +143 -0
  7. data/Rakefile +11 -0
  8. data/TODO +81 -0
  9. data/lib/purview/columns/base.rb +65 -0
  10. data/lib/purview/columns/boolean.rb +11 -0
  11. data/lib/purview/columns/created_timestamp.rb +11 -0
  12. data/lib/purview/columns/date.rb +11 -0
  13. data/lib/purview/columns/float.rb +11 -0
  14. data/lib/purview/columns/id.rb +11 -0
  15. data/lib/purview/columns/integer.rb +11 -0
  16. data/lib/purview/columns/money.rb +11 -0
  17. data/lib/purview/columns/string.rb +11 -0
  18. data/lib/purview/columns/text.rb +11 -0
  19. data/lib/purview/columns/time.rb +11 -0
  20. data/lib/purview/columns/timestamp.rb +11 -0
  21. data/lib/purview/columns/updated_timestamp.rb +11 -0
  22. data/lib/purview/columns/uuid.rb +11 -0
  23. data/lib/purview/columns.rb +14 -0
  24. data/lib/purview/connections/base.rb +55 -0
  25. data/lib/purview/connections/mysql.rb +39 -0
  26. data/lib/purview/connections/postgresql.rb +27 -0
  27. data/lib/purview/connections.rb +3 -0
  28. data/lib/purview/databases/base.rb +559 -0
  29. data/lib/purview/databases/mysql.rb +207 -0
  30. data/lib/purview/databases/postgresql.rb +210 -0
  31. data/lib/purview/databases.rb +3 -0
  32. data/lib/purview/exceptions/base.rb +5 -0
  33. data/lib/purview/exceptions/could_not_acquire_lock.rb +9 -0
  34. data/lib/purview/exceptions/lock_already_released.rb +9 -0
  35. data/lib/purview/exceptions/no_table.rb +9 -0
  36. data/lib/purview/exceptions/no_window.rb +9 -0
  37. data/lib/purview/exceptions/rows_outside_window.rb +18 -0
  38. data/lib/purview/exceptions/table.rb +13 -0
  39. data/lib/purview/exceptions.rb +7 -0
  40. data/lib/purview/loaders/base.rb +154 -0
  41. data/lib/purview/loaders/mysql.rb +81 -0
  42. data/lib/purview/loaders/postgresql.rb +81 -0
  43. data/lib/purview/loaders.rb +3 -0
  44. data/lib/purview/loggers/base.rb +99 -0
  45. data/lib/purview/loggers/console.rb +11 -0
  46. data/lib/purview/loggers.rb +2 -0
  47. data/lib/purview/mixins/helpers.rb +21 -0
  48. data/lib/purview/mixins/logger.rb +21 -0
  49. data/lib/purview/mixins.rb +2 -0
  50. data/lib/purview/parsers/base.rb +39 -0
  51. data/lib/purview/parsers/csv.rb +49 -0
  52. data/lib/purview/parsers/tsv.rb +11 -0
  53. data/lib/purview/parsers.rb +3 -0
  54. data/lib/purview/pullers/base.rb +19 -0
  55. data/lib/purview/pullers/uri.rb +66 -0
  56. data/lib/purview/pullers.rb +2 -0
  57. data/lib/purview/refinements/object.rb +5 -0
  58. data/lib/purview/refinements/time.rb +5 -0
  59. data/lib/purview/refinements.rb +2 -0
  60. data/lib/purview/structs/base.rb +10 -0
  61. data/lib/purview/structs/result.rb +7 -0
  62. data/lib/purview/structs/window.rb +7 -0
  63. data/lib/purview/structs.rb +3 -0
  64. data/lib/purview/tables/base.rb +140 -0
  65. data/lib/purview/tables/raw.rb +13 -0
  66. data/lib/purview/tables.rb +2 -0
  67. data/lib/purview/types/base.rb +9 -0
  68. data/lib/purview/types/boolean.rb +9 -0
  69. data/lib/purview/types/date.rb +9 -0
  70. data/lib/purview/types/float.rb +9 -0
  71. data/lib/purview/types/integer.rb +9 -0
  72. data/lib/purview/types/money.rb +9 -0
  73. data/lib/purview/types/string.rb +9 -0
  74. data/lib/purview/types/text.rb +9 -0
  75. data/lib/purview/types/time.rb +9 -0
  76. data/lib/purview/types/timestamp.rb +9 -0
  77. data/lib/purview/types/uuid.rb +9 -0
  78. data/lib/purview/types.rb +11 -0
  79. data/lib/purview/version.rb +3 -0
  80. data/lib/purview.rb +27 -0
  81. data/purview.gemspec +29 -0
  82. data/spec/spec_helper.rb +5 -0
  83. metadata +210 -0
@@ -0,0 +1,27 @@
1
+ module Purview
2
+ module Connections
3
+ class PostgreSQL < Base
4
+ def with_transaction
5
+ connection.transaction { yield }
6
+ end
7
+
8
+ private
9
+
10
+ def execute_sql(sql)
11
+ connection.exec(sql)
12
+ end
13
+
14
+ def extract_rows(result)
15
+ result && result.to_a
16
+ end
17
+
18
+ def extract_rows_affected(result)
19
+ result && result.cmd_tuples
20
+ end
21
+
22
+ def new_connection
23
+ PG.connect(opts)
24
+ end
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,3 @@
1
+ require 'purview/connections/base'
2
+ require 'purview/connections/mysql'
3
+ require 'purview/connections/postgresql'
@@ -0,0 +1,559 @@
1
+ module Purview
2
+ module Databases
3
+ class Base
4
+ attr_reader :name
5
+
6
+ def initialize(name, opts={})
7
+ @name = name
8
+ @opts = opts
9
+ @tables = Set.new
10
+ end
11
+
12
+ def add_table(table)
13
+ @tables << table
14
+ end
15
+
16
+ def connect
17
+ connection.connect
18
+ end
19
+
20
+ def create_index(connection, table, columns, opts={})
21
+ table_opts = extract_table_options(opts)
22
+ table_name = table_name(table, table_opts)
23
+ index_opts = extract_index_options(opts)
24
+ index_name(
25
+ table_name,
26
+ columns,
27
+ index_opts
28
+ ).tap do |index_name|
29
+ connection.execute(
30
+ create_index_sql(
31
+ table_name,
32
+ index_name,
33
+ table,
34
+ columns,
35
+ index_opts
36
+ )
37
+ )
38
+ end
39
+ end
40
+
41
+ def create_table(connection, table, opts={})
42
+ table_opts = extract_table_options(opts)
43
+ table_name(table, table_opts).tap do |table_name|
44
+ connection.execute(
45
+ create_table_sql(
46
+ table_name,
47
+ table,
48
+ table_opts
49
+ )
50
+ )
51
+ if table_opts[:create_indices]
52
+ table.indexed_columns.each do |columns|
53
+ create_index(
54
+ connection,
55
+ table,
56
+ columns,
57
+ :table => { :name => table_name }
58
+ )
59
+ end
60
+ end
61
+ end
62
+ end
63
+
64
+ def create_temporary_table(connection, table, opts={})
65
+ table_opts = extract_table_options(opts)
66
+ table_name(table, table_opts).tap do |table_name|
67
+ connection.execute(
68
+ create_temporary_table_sql(
69
+ table_name,
70
+ table,
71
+ table_opts
72
+ )
73
+ )
74
+ if table_opts[:create_indices]
75
+ table.indexed_columns.each do |columns|
76
+ create_index(
77
+ connection,
78
+ table,
79
+ columns,
80
+ :table => { :name => table_name }
81
+ )
82
+ end
83
+ end
84
+ end
85
+ end
86
+
87
+ def disable_table(table)
88
+ with_context_logging("`disable_table` for: #{table_name(table)}") do
89
+ with_new_connection do |connection|
90
+ set_enabled_for_table(
91
+ connection,
92
+ table,
93
+ false_value
94
+ )
95
+ end
96
+ end
97
+ end
98
+
99
+ def drop_table(connection, table, opts={})
100
+ table_opts = extract_table_options(opts)
101
+ table_name(table, table_opts).tap do |table_name|
102
+ connection.execute(
103
+ drop_table_sql(
104
+ table_name,
105
+ table,
106
+ table_opts
107
+ )
108
+ )
109
+ end
110
+ end
111
+
112
+ def drop_index(connection, table, columns, opts={})
113
+ table_opts = extract_table_options(opts)
114
+ table_name = table_name(table, table_opts)
115
+ index_opts = extract_index_options(opts)
116
+ index_name(
117
+ table_name,
118
+ columns,
119
+ index_opts
120
+ ).tap do |index_name|
121
+ connection.execute(
122
+ drop_index_sql(
123
+ table_name,
124
+ index_name,
125
+ table,
126
+ columns,
127
+ index_opts
128
+ )
129
+ )
130
+ end
131
+ end
132
+
133
+ def enable_table(table)
134
+ with_context_logging("`enable_table` for: #{table_name(table)}") do
135
+ with_new_connection do |connection|
136
+ set_enabled_for_table(
137
+ connection,
138
+ table,
139
+ true_value
140
+ )
141
+ end
142
+ end
143
+ end
144
+
145
+ def false_value
146
+ raise %{All "#{Base}(s)" must override the "false_value" method}
147
+ end
148
+
149
+ def lock_table(table, timestamp)
150
+ with_context_logging("`lock_table` for: #{table_name(table)}") do
151
+ with_new_connection do |connection|
152
+ rows_affected = \
153
+ connection.execute(lock_table_sql(table, timestamp)).rows_affected
154
+ raise Purview::Exceptions::CouldNotAcquireLock.new(table) \
155
+ if zero?(rows_affected)
156
+ end
157
+ end
158
+ end
159
+
160
+ def null_value
161
+ raise %{All "#{Base}(s)" must override the "null_value" method}
162
+ end
163
+
164
+ def quoted(value)
165
+ value.nil? ? null_value : value.quoted
166
+ end
167
+
168
+ def sync
169
+ with_new_connection do |connection|
170
+ with_transaction(connection) do |timestamp|
171
+ with_next_table(connection, timestamp) do |table|
172
+ with_next_window(
173
+ connection,
174
+ table,
175
+ timestamp
176
+ ) do |window|
177
+ with_table_locked(table, timestamp) do
178
+ table.sync(connection, window)
179
+ end
180
+ end
181
+ end
182
+ end
183
+ end
184
+ end
185
+
186
+ def true_value
187
+ raise %{All "#{Base}(s)" must override the "true_value" method}
188
+ end
189
+
190
+ def unlock_table(table)
191
+ with_context_logging("`unlock_table` for: #{table_name(table)}") do
192
+ with_new_connection do |connection|
193
+ rows_affected = \
194
+ connection.execute(unlock_table_sql(table)).rows_affected
195
+ raise Purview::Exceptions::LockAlreadyReleased.new(table) \
196
+ if zero?(rows_affected)
197
+ end
198
+ end
199
+ end
200
+
201
+ private
202
+
203
+ include Purview::Mixins::Helpers
204
+ include Purview::Mixins::Logger
205
+
206
+ attr_reader :opts, :tables
207
+
208
+ def column_names(columns)
209
+ columns.map(&:name)
210
+ end
211
+
212
+ def column_definition(column)
213
+ column.name.to_s.tap do |column_definition|
214
+ type = type(column)
215
+ column_definition << " #{type}"
216
+ limit = limit(column)
217
+ column_definition << "(#{limit})" if limit
218
+ primary_key = primary_key?(column)
219
+ column_definition << ' PRIMARY KEY' if primary_key
220
+ nullable = nullable?(column)
221
+ column_definition << " #{nullable ? 'NULL' : 'NOT NULL'}"
222
+ default = default(column)
223
+ column_definition << " DEFAULT #{default}" if default
224
+ end
225
+ end
226
+
227
+ def column_definitions(table)
228
+ [].tap do |results|
229
+ results << column_definition(table.id_column)
230
+ results << column_definition(table.created_timestamp_column)
231
+ results << column_definition(table.updated_timestamp_column)
232
+ table.data_columns.each do |column|
233
+ results << column_definition(column)
234
+ end
235
+ end
236
+ end
237
+
238
+ def connection
239
+ connection_type.new(connection_opts)
240
+ end
241
+
242
+ def connection_opts
243
+ {}
244
+ end
245
+
246
+ def connection_type
247
+ raise %{All "#{Base}(s)" must override the "connection_type" method}
248
+ end
249
+
250
+ def create_index_sql(table_name, index_name, table, columns, index_opts={})
251
+ raise %{All "#{Base}(s)" must override the "create_index_sql" method}
252
+ end
253
+
254
+ def create_table_sql(table_name, table, table_opts={})
255
+ raise %{All "#{Base}(s)" must override the "create_table_sql" method}
256
+ end
257
+
258
+ def create_temporary_table_sql(table_name, table, table_opts={})
259
+ raise %{All "#{Base}(s)" must override the "create_temporary_table_sql" method}
260
+ end
261
+
262
+ def default(column)
263
+ column.default || default_map[column.type]
264
+ end
265
+
266
+ def default_map
267
+ {}
268
+ end
269
+
270
+ def drop_index_sql(table_name, index_name, table, columns, index_opts={})
271
+ raise %{All "#{Base}(s)" must override the "drop_index_sql" method}
272
+ end
273
+
274
+ def drop_table_sql(table_name, table, table_opts={})
275
+ raise %{All "#{Base}(s)" must override the "drop_table_sql" method}
276
+ end
277
+
278
+ def ensure_table_metadata_table_exists
279
+ with_new_connection do |connection|
280
+ connection.execute(ensure_table_metadata_table_exists_sql)
281
+ end
282
+ end
283
+
284
+ def ensure_table_metadata_exists_for_table_sql(table)
285
+ raise %{All "#{Base}(s)" must override the "ensure_table_metadata_exists_for_table_sql" method}
286
+ end
287
+
288
+ def ensure_table_metadata_table_exists_sql
289
+ raise %{All "#{Base}(s)" must override the "ensure_table_metadata_table_exists_sql" method}
290
+ end
291
+
292
+ def ensure_table_metadata_exists_for_tables
293
+ with_new_connection do |connection|
294
+ tables.each do |table|
295
+ connection.execute(ensure_table_metadata_exists_for_table_sql(table))
296
+ end
297
+ end
298
+ end
299
+
300
+ def extract_index_options(opts)
301
+ opts[:index] || {}
302
+ end
303
+
304
+ def extract_table_options(opts)
305
+ opts[:table] || { :create_indices => true }
306
+ end
307
+
308
+ def get_enabled_for_table(connection, table)
309
+ row = connection.execute(get_last_pulled_at_for_table_sql(table)).rows[0]
310
+ enabled = row[table_metadata_enabled_column_name]
311
+ !!(enabled =~ /\A(true|t|yes|y|1)\z/i)
312
+ end
313
+
314
+ def get_enabled_for_table_sql(table)
315
+ raise %{All "#{Base}(s)" must override the "get_enabled_for_table_sql" method}
316
+ end
317
+
318
+ def get_last_pulled_at_for_table(connection, table)
319
+ row = connection.execute(get_last_pulled_at_for_table_sql(table)).rows[0]
320
+ timestamp = row[table_metadata_last_pulled_at_column_name]
321
+ timestamp ? Time.parse(timestamp) : nil
322
+ end
323
+
324
+ def get_last_pulled_at_for_table_sql(table)
325
+ raise %{All "#{Base}(s)" must override the "get_last_pulled_at_for_table_sql" method}
326
+ end
327
+
328
+ def get_locked_at_for_table(connection, table)
329
+ row = connection.execute(get_locked_at_for_table_sql(table)).rows[0]
330
+ timestamp = row[table_metadata_locked_at_column_name]
331
+ timestamp ? Time.parse(timestamp) : nil
332
+ end
333
+
334
+ def get_locked_at_for_table_sql(table)
335
+ raise %{All "#{Base}(s)" must override the "get_locked_at_for_table_sql" method}
336
+ end
337
+
338
+ def get_max_timestamp_pulled_for_table(connection, table)
339
+ row = connection.execute(get_max_timestamp_pulled_for_table_sql(table)).rows[0]
340
+ timestamp = row[table_metadata_max_timestamp_pulled_column_name]
341
+ timestamp ? Time.parse(timestamp) : table.starting_timestamp
342
+ end
343
+
344
+ def get_max_timestamp_pulled_for_table_sql(table)
345
+ raise %{All "#{Base}(s)" must override the "get_max_timestamp_pulled_for_table_sql" method}
346
+ end
347
+
348
+ def index_name(table_name, columns, index_opts={})
349
+ index_opts[:name] || 'index_%s_on_%s' % [
350
+ table_name,
351
+ column_names(columns).join('_and_'),
352
+ ]
353
+ end
354
+
355
+ def limit(column)
356
+ return nil if limitless_types.include?(column.type)
357
+ column.limit || limit_map[column.type]
358
+ end
359
+
360
+ def limit_map
361
+ {}
362
+ end
363
+
364
+ def limitless_types
365
+ []
366
+ end
367
+
368
+ def lock_table_sql(table, timestamp)
369
+ raise %{All "#{Base}(s)" must override the "lock_table_sql" method}
370
+ end
371
+
372
+ def next_table(connection, timestamp)
373
+ ensure_table_metadata_table_exists
374
+ ensure_table_metadata_exists_for_tables
375
+ row = connection.execute(next_table_sql(timestamp)).rows[0]
376
+ table_name = row && row[table_metadata_table_name_column_name]
377
+ table_name ? tables_by_name[table_name] : nil
378
+ end
379
+
380
+ def next_table_sql(timestamp)
381
+ raise %{All "#{Base}(s)" must override the "next_table_sql" method}
382
+ end
383
+
384
+ def next_window(connection, table, timestamp)
385
+ min = get_max_timestamp_pulled_for_table(connection, table)
386
+ max = min + table.window_size
387
+ now = timestamp
388
+ return nil if min > now
389
+ max = now if max > now
390
+ Purview::Structs::Window.new(:min => min, :max => max)
391
+ end
392
+
393
+ def nullable?(column)
394
+ column.nullable?
395
+ end
396
+
397
+ def primary_key?(column)
398
+ column.primary_key?
399
+ end
400
+
401
+ def set_enabled_for_table(connection, table, enabled)
402
+ connection.execute(set_enabled_for_table_sql(table, enabled))
403
+ end
404
+
405
+ def set_enabled_for_table_sql(table, enabled)
406
+ raise %{All "#{Base}(s)" must override the "set_enabled_for_table_sql" method}
407
+ end
408
+
409
+ def set_last_pulled_at_for_table(connection, table, timestamp)
410
+ connection.execute(set_last_pulled_at_for_table_sql(table, timestamp))
411
+ end
412
+
413
+ def set_last_pulled_at_for_table_sql(table, timestamp)
414
+ raise %{All "#{Base}(s)" must override the "set_last_pulled_at_for_table_sql" method}
415
+ end
416
+
417
+ def set_locked_at_for_table(connection, table, timestamp)
418
+ connection.execute(set_locked_at_for_table_sql(table, timestamp))
419
+ end
420
+
421
+ def set_locked_at_for_table_sql(table, timestamp)
422
+ raise %{All "#{Base}(s)" must override the "set_locked_at_for_table_sql" method}
423
+ end
424
+
425
+ def set_max_timestamp_pulled_for_table(connection, table, timestamp)
426
+ connection.execute(set_max_timestamp_pulled_for_table_sql(table, timestamp))
427
+ end
428
+
429
+ def set_max_timestamp_pulled_for_table_sql(table, timestamp)
430
+ raise %{All "#{Base}(s)" must override the "set_max_timestamp_pulled_for_table_sql" method}
431
+ end
432
+
433
+ def table_metadata_enabled_column_definition
434
+ column = Purview::Columns::Boolean.new(table_metadata_enabled_column_name)
435
+ column_definition(column)
436
+ end
437
+
438
+ def table_metadata_enabled_column_name
439
+ 'enabled'
440
+ end
441
+
442
+ def table_metadata_last_pulled_at_column_definition
443
+ column = Purview::Columns::Timestamp.new(table_metadata_last_pulled_at_column_name)
444
+ column_definition(column)
445
+ end
446
+
447
+ def table_metadata_last_pulled_at_column_name
448
+ 'last_pulled_at'
449
+ end
450
+
451
+ def table_metadata_locked_at_column_definition
452
+ column = Purview::Columns::Timestamp.new(table_metadata_locked_at_column_name)
453
+ column_definition(column)
454
+ end
455
+
456
+ def table_metadata_locked_at_column_name
457
+ 'locked_at'
458
+ end
459
+
460
+ def table_metadata_max_timestamp_pulled_column_definition
461
+ column = Purview::Columns::Timestamp.new(table_metadata_max_timestamp_pulled_column_name)
462
+ column_definition(column)
463
+ end
464
+
465
+ def table_metadata_max_timestamp_pulled_column_name
466
+ 'max_timestamp_pulled'
467
+ end
468
+
469
+ def table_metadata_table_name
470
+ 'table_metadata'
471
+ end
472
+
473
+ def table_metadata_table_name_column_definition
474
+ column = Purview::Columns::String.new(table_metadata_table_name_column_name)
475
+ column_definition(column)
476
+ end
477
+
478
+ def table_metadata_table_name_column_name
479
+ 'table_name'
480
+ end
481
+
482
+ def tables_by_name
483
+ {}.tap do |result|
484
+ tables.each do |table|
485
+ result[table.name] = table
486
+ end
487
+ end
488
+ end
489
+
490
+ def table_name(table, table_opts={})
491
+ table_opts[:name] || table.name
492
+ end
493
+
494
+ def type(column)
495
+ type_map[column.type]
496
+ end
497
+
498
+ def type_map
499
+ {
500
+ Purview::Types::Boolean => 'boolean',
501
+ Purview::Types::Date => 'date',
502
+ Purview::Types::Float => 'float',
503
+ Purview::Types::Integer => 'integer',
504
+ Purview::Types::String => 'varchar',
505
+ Purview::Types::Text => 'text',
506
+ Purview::Types::Time => 'time',
507
+ Purview::Types::Timestamp => 'timestamp',
508
+ }
509
+ end
510
+
511
+ def unlock_table_sql(table)
512
+ raise %{All "#{Base}(s)" must override the "unlock_table_sql" method}
513
+ end
514
+
515
+ def with_new_connection
516
+ yield connection = connect
517
+ ensure
518
+ connection.disconnect if connection
519
+ end
520
+
521
+ def with_next_table(connection, timestamp)
522
+ table = next_table(connection, timestamp)
523
+ raise Purview::Exceptions::NoTable.new unless table
524
+ yield table
525
+ set_last_pulled_at_for_table(
526
+ connection,
527
+ table,
528
+ timestamp
529
+ )
530
+ end
531
+
532
+ def with_next_window(connection, table, timestamp)
533
+ window = next_window(
534
+ connection,
535
+ table,
536
+ timestamp
537
+ )
538
+ raise Purview::Exceptions::NoWindow.new(table) unless window
539
+ yield window
540
+ set_max_timestamp_pulled_for_table(
541
+ connection,
542
+ table,
543
+ window.max
544
+ )
545
+ end
546
+
547
+ def with_table_locked(table, timestamp)
548
+ lock_table(table, timestamp)
549
+ yield
550
+ ensure
551
+ unlock_table(table)
552
+ end
553
+
554
+ def with_transaction(connection)
555
+ connection.with_transaction { yield Time.now.utc }
556
+ end
557
+ end
558
+ end
559
+ end