purview 1.0.0.alpha

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (83) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +33 -0
  3. data/.travis.yml +18 -0
  4. data/Gemfile +3 -0
  5. data/LICENSE.txt +22 -0
  6. data/README.md +143 -0
  7. data/Rakefile +11 -0
  8. data/TODO +81 -0
  9. data/lib/purview/columns/base.rb +65 -0
  10. data/lib/purview/columns/boolean.rb +11 -0
  11. data/lib/purview/columns/created_timestamp.rb +11 -0
  12. data/lib/purview/columns/date.rb +11 -0
  13. data/lib/purview/columns/float.rb +11 -0
  14. data/lib/purview/columns/id.rb +11 -0
  15. data/lib/purview/columns/integer.rb +11 -0
  16. data/lib/purview/columns/money.rb +11 -0
  17. data/lib/purview/columns/string.rb +11 -0
  18. data/lib/purview/columns/text.rb +11 -0
  19. data/lib/purview/columns/time.rb +11 -0
  20. data/lib/purview/columns/timestamp.rb +11 -0
  21. data/lib/purview/columns/updated_timestamp.rb +11 -0
  22. data/lib/purview/columns/uuid.rb +11 -0
  23. data/lib/purview/columns.rb +14 -0
  24. data/lib/purview/connections/base.rb +55 -0
  25. data/lib/purview/connections/mysql.rb +39 -0
  26. data/lib/purview/connections/postgresql.rb +27 -0
  27. data/lib/purview/connections.rb +3 -0
  28. data/lib/purview/databases/base.rb +559 -0
  29. data/lib/purview/databases/mysql.rb +207 -0
  30. data/lib/purview/databases/postgresql.rb +210 -0
  31. data/lib/purview/databases.rb +3 -0
  32. data/lib/purview/exceptions/base.rb +5 -0
  33. data/lib/purview/exceptions/could_not_acquire_lock.rb +9 -0
  34. data/lib/purview/exceptions/lock_already_released.rb +9 -0
  35. data/lib/purview/exceptions/no_table.rb +9 -0
  36. data/lib/purview/exceptions/no_window.rb +9 -0
  37. data/lib/purview/exceptions/rows_outside_window.rb +18 -0
  38. data/lib/purview/exceptions/table.rb +13 -0
  39. data/lib/purview/exceptions.rb +7 -0
  40. data/lib/purview/loaders/base.rb +154 -0
  41. data/lib/purview/loaders/mysql.rb +81 -0
  42. data/lib/purview/loaders/postgresql.rb +81 -0
  43. data/lib/purview/loaders.rb +3 -0
  44. data/lib/purview/loggers/base.rb +99 -0
  45. data/lib/purview/loggers/console.rb +11 -0
  46. data/lib/purview/loggers.rb +2 -0
  47. data/lib/purview/mixins/helpers.rb +21 -0
  48. data/lib/purview/mixins/logger.rb +21 -0
  49. data/lib/purview/mixins.rb +2 -0
  50. data/lib/purview/parsers/base.rb +39 -0
  51. data/lib/purview/parsers/csv.rb +49 -0
  52. data/lib/purview/parsers/tsv.rb +11 -0
  53. data/lib/purview/parsers.rb +3 -0
  54. data/lib/purview/pullers/base.rb +19 -0
  55. data/lib/purview/pullers/uri.rb +66 -0
  56. data/lib/purview/pullers.rb +2 -0
  57. data/lib/purview/refinements/object.rb +5 -0
  58. data/lib/purview/refinements/time.rb +5 -0
  59. data/lib/purview/refinements.rb +2 -0
  60. data/lib/purview/structs/base.rb +10 -0
  61. data/lib/purview/structs/result.rb +7 -0
  62. data/lib/purview/structs/window.rb +7 -0
  63. data/lib/purview/structs.rb +3 -0
  64. data/lib/purview/tables/base.rb +140 -0
  65. data/lib/purview/tables/raw.rb +13 -0
  66. data/lib/purview/tables.rb +2 -0
  67. data/lib/purview/types/base.rb +9 -0
  68. data/lib/purview/types/boolean.rb +9 -0
  69. data/lib/purview/types/date.rb +9 -0
  70. data/lib/purview/types/float.rb +9 -0
  71. data/lib/purview/types/integer.rb +9 -0
  72. data/lib/purview/types/money.rb +9 -0
  73. data/lib/purview/types/string.rb +9 -0
  74. data/lib/purview/types/text.rb +9 -0
  75. data/lib/purview/types/time.rb +9 -0
  76. data/lib/purview/types/timestamp.rb +9 -0
  77. data/lib/purview/types/uuid.rb +9 -0
  78. data/lib/purview/types.rb +11 -0
  79. data/lib/purview/version.rb +3 -0
  80. data/lib/purview.rb +27 -0
  81. data/purview.gemspec +29 -0
  82. data/spec/spec_helper.rb +5 -0
  83. metadata +210 -0
@@ -0,0 +1,81 @@
1
+ module Purview
2
+ module Loaders
3
+ class MySQL < Base
4
+ private
5
+
6
+ def id_in_sql(temporary_table_name)
7
+ 'SELECT %s FROM %s' % [
8
+ table.id_column.name,
9
+ temporary_table_name,
10
+ ]
11
+ end
12
+
13
+ def in_window_sql(window)
14
+ '%s BETWEEN %s AND %s' % [
15
+ table.updated_timestamp_column.name,
16
+ quoted(window.min),
17
+ quoted(window.max),
18
+ ]
19
+ end
20
+
21
+ def not_in_window_sql(window)
22
+ '%s NOT BETWEEN %s AND %s' % [
23
+ table.updated_timestamp_column.name,
24
+ quoted(window.min),
25
+ quoted(window.max),
26
+ ]
27
+ end
28
+
29
+ def table_delete_sql(window, temporary_table_name)
30
+ 'DELETE FROM %s WHERE %s AND %s NOT IN (%s)' % [
31
+ table.name,
32
+ in_window_sql(window),
33
+ table.id_column.name,
34
+ id_in_sql(temporary_table_name),
35
+ ]
36
+ end
37
+
38
+ def table_insert_sql(window, temporary_table_name)
39
+ 'INSERT INTO %s (%s) SELECT %s FROM %s t1 WHERE NOT EXISTS (SELECT 1 FROM %s t2 WHERE t1.%s = t2.%s)' % [
40
+ table.name,
41
+ table.column_names.join(', '),
42
+ table.column_names.join(', '),
43
+ temporary_table_name,
44
+ table.name,
45
+ table.id_column.name,
46
+ table.id_column.name,
47
+ ]
48
+ end
49
+
50
+ def table_update_sql(window, temporary_table_name)
51
+ 'UPDATE %s t1 JOIN %s t2 ON t1.%s = t2.%s SET %s' % [
52
+ table.name,
53
+ temporary_table_name,
54
+ table.id_column.name,
55
+ table.id_column.name,
56
+ table.column_names.map { |column_name| "t1.#{column_name} = t2.#{column_name}" }.join(', '),
57
+ ]
58
+ end
59
+
60
+ def temporary_table_insert_sql(temporary_table_name, rows)
61
+ 'INSERT INTO %s (%s) VALUES %s' % [
62
+ temporary_table_name,
63
+ table.column_names.join(', '),
64
+ rows.map { |row| "(#{row_values(row)})" }.join(', ')
65
+ ]
66
+ end
67
+
68
+ def temporary_table_opts
69
+ super.merge(:create_indices => false)
70
+ end
71
+
72
+ def temporary_table_verify_sql(temporary_table_name, rows, window)
73
+ 'SELECT COUNT(1) %s FROM %s WHERE %s' % [
74
+ count_column_name,
75
+ temporary_table_name,
76
+ not_in_window_sql(window),
77
+ ]
78
+ end
79
+ end
80
+ end
81
+ end
@@ -0,0 +1,81 @@
1
+ module Purview
2
+ module Loaders
3
+ class PostgreSQL < Base
4
+ private
5
+
6
+ def id_in_sql(temporary_table_name)
7
+ 'SELECT %s FROM %s' % [
8
+ table.id_column.name,
9
+ temporary_table_name,
10
+ ]
11
+ end
12
+
13
+ def in_window_sql(window)
14
+ '%s BETWEEN %s AND %s' % [
15
+ table.updated_timestamp_column.name,
16
+ quoted(window.min),
17
+ quoted(window.max),
18
+ ]
19
+ end
20
+
21
+ def not_in_window_sql(window)
22
+ '%s NOT BETWEEN %s AND %s' % [
23
+ table.updated_timestamp_column.name,
24
+ quoted(window.min),
25
+ quoted(window.max),
26
+ ]
27
+ end
28
+
29
+ def table_delete_sql(window, temporary_table_name)
30
+ 'DELETE FROM %s WHERE %s AND %s NOT IN (%s)' % [
31
+ table.name,
32
+ in_window_sql(window),
33
+ table.id_column.name,
34
+ id_in_sql(temporary_table_name),
35
+ ]
36
+ end
37
+
38
+ def table_insert_sql(window, temporary_table_name)
39
+ 'INSERT INTO %s (%s) SELECT %s FROM %s t1 WHERE NOT EXISTS (SELECT 1 FROM %s t2 WHERE t1.%s = t2.%s)' % [
40
+ table.name,
41
+ table.column_names.join(', '),
42
+ table.column_names.join(', '),
43
+ temporary_table_name,
44
+ table.name,
45
+ table.id_column.name,
46
+ table.id_column.name,
47
+ ]
48
+ end
49
+
50
+ def table_update_sql(window, temporary_table_name)
51
+ 'UPDATE %s t1 SET %s FROM %s t2 WHERE t1.%s = t2.%s' % [
52
+ table.name,
53
+ table.column_names.map { |column_name| "#{column_name} = t2.#{column_name}" }.join(', '),
54
+ temporary_table_name,
55
+ table.id_column.name,
56
+ table.id_column.name,
57
+ ]
58
+ end
59
+
60
+ def temporary_table_insert_sql(temporary_table_name, rows)
61
+ 'INSERT INTO %s (%s) VALUES %s' % [
62
+ temporary_table_name,
63
+ table.column_names.join(', '),
64
+ rows.map { |row| "(#{row_values(row)})" }.join(', ')
65
+ ]
66
+ end
67
+
68
+ def temporary_table_opts
69
+ super.merge(:create_indices => false)
70
+ end
71
+
72
+ def temporary_table_verify_sql(temporary_table_name, rows, window)
73
+ 'SELECT COUNT(1) %s FROM %s WHERE %s' % [
74
+ count_column_name,
75
+ temporary_table_name,
76
+ not_in_window_sql(window),
77
+ ]
78
+ end
79
+ end
80
+ end
81
+ end
@@ -0,0 +1,3 @@
1
+ require 'purview/loaders/base'
2
+ require 'purview/loaders/mysql'
3
+ require 'purview/loaders/postgresql'
@@ -0,0 +1,99 @@
1
+ module Purview
2
+ module Loggers
3
+ class Base
4
+ def initialize(opts={})
5
+ @opts = default_opts.merge(opts)
6
+ end
7
+
8
+ def debug(*args)
9
+ log(DEBUG_LEVEL, *args) if debug?
10
+ end
11
+
12
+ def error(*args)
13
+ log(ERROR_LEVEL, *args) if error?
14
+ end
15
+
16
+ def info(*args)
17
+ log(INFO_LEVEL, *args) if info?
18
+ end
19
+
20
+ def with_context_logging(*args)
21
+ debug(build_starting_message(*args))
22
+ yield.tap { |result| debug(build_finished_message(*args)) }
23
+ end
24
+
25
+ private
26
+
27
+ DEBUG_LEVEL = 'DEBUG'
28
+ ERROR_LEVEL = 'ERROR'
29
+ INFO_LEVEL = 'INFO'
30
+
31
+ attr_reader :opts
32
+
33
+ def build_finished_message(*args)
34
+ case args.length
35
+ when 1; "Finished #{args[0]}"
36
+ when 2; args[-1]
37
+ else; raise
38
+ end
39
+ end
40
+
41
+ def build_message(level, *args)
42
+ message, exception = args[0..1]
43
+ message_template(!!exception) % {
44
+ :exception => format_exception(exception),
45
+ :level => level,
46
+ :message => message,
47
+ :process_id => Process.pid,
48
+ :timestamp => Time.now.strftime('%Y-%m-%d %H:%M:%S.%L %z'),
49
+ }
50
+ end
51
+
52
+ def build_starting_message(*args)
53
+ case args.length
54
+ when 1; "Starting #{args[0]}"
55
+ when 2; args[0]
56
+ else; raise
57
+ end
58
+ end
59
+
60
+ def debug?
61
+ !!opts[:debug]
62
+ end
63
+
64
+ def default_opts
65
+ {
66
+ :debug => true,
67
+ :error => true,
68
+ :info => false,
69
+ }
70
+ end
71
+
72
+ def error?
73
+ !!opts[:error]
74
+ end
75
+
76
+ def format_exception(exception)
77
+ exception && exception.backtrace.map { |line| "\tfrom #{line}" }.join("\n")
78
+ end
79
+
80
+ def info?
81
+ !!opts[:info]
82
+ end
83
+
84
+ def log(level, *args)
85
+ stream.puts build_message(level, *args)
86
+ end
87
+
88
+ def message_template(exception)
89
+ "%{timestamp} %{level} (%{process_id}) %{message}".tap do |result|
90
+ result << ":\n%{exception}" if exception
91
+ end
92
+ end
93
+
94
+ def stream
95
+ opts[:stream]
96
+ end
97
+ end
98
+ end
99
+ end
@@ -0,0 +1,11 @@
1
+ module Purview
2
+ module Loggers
3
+ class Console < Base
4
+ private
5
+
6
+ def default_opts
7
+ super.merge(:stream => STDOUT)
8
+ end
9
+ end
10
+ end
11
+ end
@@ -0,0 +1,2 @@
1
+ require 'purview/loggers/base'
2
+ require 'purview/loggers/console'
@@ -0,0 +1,21 @@
1
+ module Purview
2
+ module Mixins
3
+ module Helpers
4
+ def blank?(value)
5
+ value.to_s.strip.length.zero?
6
+ end
7
+
8
+ def coalesce(value, default)
9
+ value.nil? ? default : value
10
+ end
11
+
12
+ def present?(value)
13
+ !blank?(value)
14
+ end
15
+
16
+ def zero?(value)
17
+ Integer(value).zero?
18
+ end
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,21 @@
1
+ module Purview
2
+ module Mixins
3
+ module Logger
4
+ def logger
5
+ @logger ||= logger_type.new(logger_opts)
6
+ end
7
+
8
+ def logger_opts
9
+ opts[:logger] || {}
10
+ end
11
+
12
+ def logger_type
13
+ opts[:logger_type] || Purview::Loggers::Console
14
+ end
15
+
16
+ def with_context_logging(*args)
17
+ logger.with_context_logging(*args) { yield }
18
+ end
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,2 @@
1
+ require 'purview/mixins/helpers'
2
+ require 'purview/mixins/logger'
@@ -0,0 +1,39 @@
1
+ module Purview
2
+ module Parsers
3
+ class Base
4
+ def initialize(opts={})
5
+ @opts = opts
6
+ end
7
+
8
+ def parse(data)
9
+ raise %{All "#{Base}(s)" must override the "parse" method}
10
+ end
11
+
12
+ def validate(data)
13
+ true
14
+ end
15
+
16
+ private
17
+
18
+ include Purview::Mixins::Logger
19
+
20
+ attr_reader :opts
21
+
22
+ def build_result(row)
23
+ {}.tap do |result|
24
+ row.each do |key, value|
25
+ if column = table.columns_by_name[key]
26
+ result[key] = column.parse(value)
27
+ else
28
+ logger.debug(%{Unexpected column: "#{key}" in data-set})
29
+ end
30
+ end
31
+ end
32
+ end
33
+
34
+ def table
35
+ opts[:table]
36
+ end
37
+ end
38
+ end
39
+ end
@@ -0,0 +1,49 @@
1
+ module Purview
2
+ module Parsers
3
+ class CSV < Base
4
+ def parse(data)
5
+ with_context_logging("`parse` for: #{table.name}") do
6
+ [].tap do |results|
7
+ headers = extract_headers(data)
8
+ extract_rows(data) do |row|
9
+ results << build_result(headers.zip(row))
10
+ end
11
+ end
12
+ end
13
+ end
14
+
15
+ def validate(data)
16
+ with_context_logging("`validate` for: #{table.name}") do
17
+ missing_columns = table.column_names - extract_headers(data)
18
+ raise 'Missing one or more columns: "%s"' % missing_columns.join('", "') \
19
+ unless missing_columns.empty?
20
+ true
21
+ end
22
+ end
23
+
24
+ private
25
+
26
+ def column_separator
27
+ ','
28
+ end
29
+
30
+ def extract_headers(data)
31
+ header_row = data.split(row_separator).first
32
+ parse_row(header_row).map(&:to_sym)
33
+ end
34
+
35
+ def extract_rows(data)
36
+ rows = data.split(row_separator)[1..-1]
37
+ rows.each { |row| yield parse_row(row) }
38
+ end
39
+
40
+ def parse_row(row)
41
+ ::CSV.parse(row, :col_sep => column_separator).first
42
+ end
43
+
44
+ def row_separator
45
+ $/
46
+ end
47
+ end
48
+ end
49
+ end
@@ -0,0 +1,11 @@
1
+ module Purview
2
+ module Parsers
3
+ class TSV < CSV
4
+ private
5
+
6
+ def column_separator
7
+ "\t"
8
+ end
9
+ end
10
+ end
11
+ end
@@ -0,0 +1,3 @@
1
+ require 'purview/parsers/base'
2
+ require 'purview/parsers/csv'
3
+ require 'purview/parsers/tsv'
@@ -0,0 +1,19 @@
1
+ module Purview
2
+ module Pullers
3
+ class Base
4
+ def initialize(opts={})
5
+ @opts = opts
6
+ end
7
+
8
+ def pull(window)
9
+ raise %{All "#{Base}(s)" must override the "pull" method}
10
+ end
11
+
12
+ private
13
+
14
+ include Purview::Mixins::Logger
15
+
16
+ attr_reader :opts
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,66 @@
1
+ module Purview
2
+ module Pullers
3
+ class URI < Base
4
+ def pull(window)
5
+ request = windowed_request(window)
6
+ with_context_logging("`pull` from: #{request.path}") do
7
+ http.request(request).body
8
+ end
9
+ end
10
+
11
+ private
12
+
13
+ def basic_auth?
14
+ username && password
15
+ end
16
+
17
+ def host
18
+ uri.host
19
+ end
20
+
21
+ def http
22
+ Net::HTTP.new(host, port).tap do |http|
23
+ if https?
24
+ http.use_ssl = true
25
+ http.verify_mode = OpenSSL::SSL::VERIFY_NONE
26
+ end
27
+ end
28
+ end
29
+
30
+ def https?
31
+ uri.scheme == 'https'
32
+ end
33
+
34
+ def password
35
+ opts[:password]
36
+ end
37
+
38
+ def port
39
+ uri.port
40
+ end
41
+
42
+ def uri
43
+ ::URI.parse(opts[:uri])
44
+ end
45
+
46
+ def username
47
+ opts[:username]
48
+ end
49
+
50
+ def windowed_request(window)
51
+ Net::HTTP::Get.new(windowed_request_uri(window)).tap do |request|
52
+ if basic_auth?
53
+ request.basic_auth(username, password)
54
+ end
55
+ end
56
+ end
57
+
58
+ def windowed_request_uri(window)
59
+ uri.to_s.tap do |request_uri|
60
+ request_uri << (request_uri.include?('?') ? '&' : '?')
61
+ request_uri << 'ts1=%s&ts2=%s' % [window.min.to_i, window.max.to_i]
62
+ end
63
+ end
64
+ end
65
+ end
66
+ end
@@ -0,0 +1,2 @@
1
+ require 'purview/pullers/base'
2
+ require 'purview/pullers/uri'
@@ -0,0 +1,5 @@
1
+ class Object
2
+ def quoted
3
+ "'#{self}'"
4
+ end
5
+ end
@@ -0,0 +1,5 @@
1
+ class Time
2
+ def quoted
3
+ "'#{self.strftime('%F %T')}'"
4
+ end
5
+ end
@@ -0,0 +1,2 @@
1
+ require 'purview/refinements/object'
2
+ require 'purview/refinements/time'
@@ -0,0 +1,10 @@
1
+ module Purview
2
+ module Structs
3
+ class Base < OpenStruct
4
+ def method_missing(method_name, *args, &block)
5
+ raise NoMethodError if args.size.zero?
6
+ super
7
+ end
8
+ end
9
+ end
10
+ end
@@ -0,0 +1,7 @@
1
+ module Purview
2
+ module Structs
3
+ class Result < Base
4
+ # Helper methods/overrides
5
+ end
6
+ end
7
+ end
@@ -0,0 +1,7 @@
1
+ module Purview
2
+ module Structs
3
+ class Window < Base
4
+ # Helper methods/overrides
5
+ end
6
+ end
7
+ end
@@ -0,0 +1,3 @@
1
+ require 'purview/structs/base'
2
+ require 'purview/structs/result'
3
+ require 'purview/structs/window'