purview 1.0.0.alpha
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +33 -0
- data/.travis.yml +18 -0
- data/Gemfile +3 -0
- data/LICENSE.txt +22 -0
- data/README.md +143 -0
- data/Rakefile +11 -0
- data/TODO +81 -0
- data/lib/purview/columns/base.rb +65 -0
- data/lib/purview/columns/boolean.rb +11 -0
- data/lib/purview/columns/created_timestamp.rb +11 -0
- data/lib/purview/columns/date.rb +11 -0
- data/lib/purview/columns/float.rb +11 -0
- data/lib/purview/columns/id.rb +11 -0
- data/lib/purview/columns/integer.rb +11 -0
- data/lib/purview/columns/money.rb +11 -0
- data/lib/purview/columns/string.rb +11 -0
- data/lib/purview/columns/text.rb +11 -0
- data/lib/purview/columns/time.rb +11 -0
- data/lib/purview/columns/timestamp.rb +11 -0
- data/lib/purview/columns/updated_timestamp.rb +11 -0
- data/lib/purview/columns/uuid.rb +11 -0
- data/lib/purview/columns.rb +14 -0
- data/lib/purview/connections/base.rb +55 -0
- data/lib/purview/connections/mysql.rb +39 -0
- data/lib/purview/connections/postgresql.rb +27 -0
- data/lib/purview/connections.rb +3 -0
- data/lib/purview/databases/base.rb +559 -0
- data/lib/purview/databases/mysql.rb +207 -0
- data/lib/purview/databases/postgresql.rb +210 -0
- data/lib/purview/databases.rb +3 -0
- data/lib/purview/exceptions/base.rb +5 -0
- data/lib/purview/exceptions/could_not_acquire_lock.rb +9 -0
- data/lib/purview/exceptions/lock_already_released.rb +9 -0
- data/lib/purview/exceptions/no_table.rb +9 -0
- data/lib/purview/exceptions/no_window.rb +9 -0
- data/lib/purview/exceptions/rows_outside_window.rb +18 -0
- data/lib/purview/exceptions/table.rb +13 -0
- data/lib/purview/exceptions.rb +7 -0
- data/lib/purview/loaders/base.rb +154 -0
- data/lib/purview/loaders/mysql.rb +81 -0
- data/lib/purview/loaders/postgresql.rb +81 -0
- data/lib/purview/loaders.rb +3 -0
- data/lib/purview/loggers/base.rb +99 -0
- data/lib/purview/loggers/console.rb +11 -0
- data/lib/purview/loggers.rb +2 -0
- data/lib/purview/mixins/helpers.rb +21 -0
- data/lib/purview/mixins/logger.rb +21 -0
- data/lib/purview/mixins.rb +2 -0
- data/lib/purview/parsers/base.rb +39 -0
- data/lib/purview/parsers/csv.rb +49 -0
- data/lib/purview/parsers/tsv.rb +11 -0
- data/lib/purview/parsers.rb +3 -0
- data/lib/purview/pullers/base.rb +19 -0
- data/lib/purview/pullers/uri.rb +66 -0
- data/lib/purview/pullers.rb +2 -0
- data/lib/purview/refinements/object.rb +5 -0
- data/lib/purview/refinements/time.rb +5 -0
- data/lib/purview/refinements.rb +2 -0
- data/lib/purview/structs/base.rb +10 -0
- data/lib/purview/structs/result.rb +7 -0
- data/lib/purview/structs/window.rb +7 -0
- data/lib/purview/structs.rb +3 -0
- data/lib/purview/tables/base.rb +140 -0
- data/lib/purview/tables/raw.rb +13 -0
- data/lib/purview/tables.rb +2 -0
- data/lib/purview/types/base.rb +9 -0
- data/lib/purview/types/boolean.rb +9 -0
- data/lib/purview/types/date.rb +9 -0
- data/lib/purview/types/float.rb +9 -0
- data/lib/purview/types/integer.rb +9 -0
- data/lib/purview/types/money.rb +9 -0
- data/lib/purview/types/string.rb +9 -0
- data/lib/purview/types/text.rb +9 -0
- data/lib/purview/types/time.rb +9 -0
- data/lib/purview/types/timestamp.rb +9 -0
- data/lib/purview/types/uuid.rb +9 -0
- data/lib/purview/types.rb +11 -0
- data/lib/purview/version.rb +3 -0
- data/lib/purview.rb +27 -0
- data/purview.gemspec +29 -0
- data/spec/spec_helper.rb +5 -0
- metadata +210 -0
@@ -0,0 +1,81 @@
|
|
1
|
+
module Purview
|
2
|
+
module Loaders
|
3
|
+
class MySQL < Base
|
4
|
+
private
|
5
|
+
|
6
|
+
def id_in_sql(temporary_table_name)
|
7
|
+
'SELECT %s FROM %s' % [
|
8
|
+
table.id_column.name,
|
9
|
+
temporary_table_name,
|
10
|
+
]
|
11
|
+
end
|
12
|
+
|
13
|
+
def in_window_sql(window)
|
14
|
+
'%s BETWEEN %s AND %s' % [
|
15
|
+
table.updated_timestamp_column.name,
|
16
|
+
quoted(window.min),
|
17
|
+
quoted(window.max),
|
18
|
+
]
|
19
|
+
end
|
20
|
+
|
21
|
+
def not_in_window_sql(window)
|
22
|
+
'%s NOT BETWEEN %s AND %s' % [
|
23
|
+
table.updated_timestamp_column.name,
|
24
|
+
quoted(window.min),
|
25
|
+
quoted(window.max),
|
26
|
+
]
|
27
|
+
end
|
28
|
+
|
29
|
+
def table_delete_sql(window, temporary_table_name)
|
30
|
+
'DELETE FROM %s WHERE %s AND %s NOT IN (%s)' % [
|
31
|
+
table.name,
|
32
|
+
in_window_sql(window),
|
33
|
+
table.id_column.name,
|
34
|
+
id_in_sql(temporary_table_name),
|
35
|
+
]
|
36
|
+
end
|
37
|
+
|
38
|
+
def table_insert_sql(window, temporary_table_name)
|
39
|
+
'INSERT INTO %s (%s) SELECT %s FROM %s t1 WHERE NOT EXISTS (SELECT 1 FROM %s t2 WHERE t1.%s = t2.%s)' % [
|
40
|
+
table.name,
|
41
|
+
table.column_names.join(', '),
|
42
|
+
table.column_names.join(', '),
|
43
|
+
temporary_table_name,
|
44
|
+
table.name,
|
45
|
+
table.id_column.name,
|
46
|
+
table.id_column.name,
|
47
|
+
]
|
48
|
+
end
|
49
|
+
|
50
|
+
def table_update_sql(window, temporary_table_name)
|
51
|
+
'UPDATE %s t1 JOIN %s t2 ON t1.%s = t2.%s SET %s' % [
|
52
|
+
table.name,
|
53
|
+
temporary_table_name,
|
54
|
+
table.id_column.name,
|
55
|
+
table.id_column.name,
|
56
|
+
table.column_names.map { |column_name| "t1.#{column_name} = t2.#{column_name}" }.join(', '),
|
57
|
+
]
|
58
|
+
end
|
59
|
+
|
60
|
+
def temporary_table_insert_sql(temporary_table_name, rows)
|
61
|
+
'INSERT INTO %s (%s) VALUES %s' % [
|
62
|
+
temporary_table_name,
|
63
|
+
table.column_names.join(', '),
|
64
|
+
rows.map { |row| "(#{row_values(row)})" }.join(', ')
|
65
|
+
]
|
66
|
+
end
|
67
|
+
|
68
|
+
def temporary_table_opts
|
69
|
+
super.merge(:create_indices => false)
|
70
|
+
end
|
71
|
+
|
72
|
+
def temporary_table_verify_sql(temporary_table_name, rows, window)
|
73
|
+
'SELECT COUNT(1) %s FROM %s WHERE %s' % [
|
74
|
+
count_column_name,
|
75
|
+
temporary_table_name,
|
76
|
+
not_in_window_sql(window),
|
77
|
+
]
|
78
|
+
end
|
79
|
+
end
|
80
|
+
end
|
81
|
+
end
|
@@ -0,0 +1,81 @@
|
|
1
|
+
module Purview
|
2
|
+
module Loaders
|
3
|
+
class PostgreSQL < Base
|
4
|
+
private
|
5
|
+
|
6
|
+
def id_in_sql(temporary_table_name)
|
7
|
+
'SELECT %s FROM %s' % [
|
8
|
+
table.id_column.name,
|
9
|
+
temporary_table_name,
|
10
|
+
]
|
11
|
+
end
|
12
|
+
|
13
|
+
def in_window_sql(window)
|
14
|
+
'%s BETWEEN %s AND %s' % [
|
15
|
+
table.updated_timestamp_column.name,
|
16
|
+
quoted(window.min),
|
17
|
+
quoted(window.max),
|
18
|
+
]
|
19
|
+
end
|
20
|
+
|
21
|
+
def not_in_window_sql(window)
|
22
|
+
'%s NOT BETWEEN %s AND %s' % [
|
23
|
+
table.updated_timestamp_column.name,
|
24
|
+
quoted(window.min),
|
25
|
+
quoted(window.max),
|
26
|
+
]
|
27
|
+
end
|
28
|
+
|
29
|
+
def table_delete_sql(window, temporary_table_name)
|
30
|
+
'DELETE FROM %s WHERE %s AND %s NOT IN (%s)' % [
|
31
|
+
table.name,
|
32
|
+
in_window_sql(window),
|
33
|
+
table.id_column.name,
|
34
|
+
id_in_sql(temporary_table_name),
|
35
|
+
]
|
36
|
+
end
|
37
|
+
|
38
|
+
def table_insert_sql(window, temporary_table_name)
|
39
|
+
'INSERT INTO %s (%s) SELECT %s FROM %s t1 WHERE NOT EXISTS (SELECT 1 FROM %s t2 WHERE t1.%s = t2.%s)' % [
|
40
|
+
table.name,
|
41
|
+
table.column_names.join(', '),
|
42
|
+
table.column_names.join(', '),
|
43
|
+
temporary_table_name,
|
44
|
+
table.name,
|
45
|
+
table.id_column.name,
|
46
|
+
table.id_column.name,
|
47
|
+
]
|
48
|
+
end
|
49
|
+
|
50
|
+
def table_update_sql(window, temporary_table_name)
|
51
|
+
'UPDATE %s t1 SET %s FROM %s t2 WHERE t1.%s = t2.%s' % [
|
52
|
+
table.name,
|
53
|
+
table.column_names.map { |column_name| "#{column_name} = t2.#{column_name}" }.join(', '),
|
54
|
+
temporary_table_name,
|
55
|
+
table.id_column.name,
|
56
|
+
table.id_column.name,
|
57
|
+
]
|
58
|
+
end
|
59
|
+
|
60
|
+
def temporary_table_insert_sql(temporary_table_name, rows)
|
61
|
+
'INSERT INTO %s (%s) VALUES %s' % [
|
62
|
+
temporary_table_name,
|
63
|
+
table.column_names.join(', '),
|
64
|
+
rows.map { |row| "(#{row_values(row)})" }.join(', ')
|
65
|
+
]
|
66
|
+
end
|
67
|
+
|
68
|
+
def temporary_table_opts
|
69
|
+
super.merge(:create_indices => false)
|
70
|
+
end
|
71
|
+
|
72
|
+
def temporary_table_verify_sql(temporary_table_name, rows, window)
|
73
|
+
'SELECT COUNT(1) %s FROM %s WHERE %s' % [
|
74
|
+
count_column_name,
|
75
|
+
temporary_table_name,
|
76
|
+
not_in_window_sql(window),
|
77
|
+
]
|
78
|
+
end
|
79
|
+
end
|
80
|
+
end
|
81
|
+
end
|
@@ -0,0 +1,99 @@
|
|
1
|
+
module Purview
|
2
|
+
module Loggers
|
3
|
+
class Base
|
4
|
+
def initialize(opts={})
|
5
|
+
@opts = default_opts.merge(opts)
|
6
|
+
end
|
7
|
+
|
8
|
+
def debug(*args)
|
9
|
+
log(DEBUG_LEVEL, *args) if debug?
|
10
|
+
end
|
11
|
+
|
12
|
+
def error(*args)
|
13
|
+
log(ERROR_LEVEL, *args) if error?
|
14
|
+
end
|
15
|
+
|
16
|
+
def info(*args)
|
17
|
+
log(INFO_LEVEL, *args) if info?
|
18
|
+
end
|
19
|
+
|
20
|
+
def with_context_logging(*args)
|
21
|
+
debug(build_starting_message(*args))
|
22
|
+
yield.tap { |result| debug(build_finished_message(*args)) }
|
23
|
+
end
|
24
|
+
|
25
|
+
private
|
26
|
+
|
27
|
+
DEBUG_LEVEL = 'DEBUG'
|
28
|
+
ERROR_LEVEL = 'ERROR'
|
29
|
+
INFO_LEVEL = 'INFO'
|
30
|
+
|
31
|
+
attr_reader :opts
|
32
|
+
|
33
|
+
def build_finished_message(*args)
|
34
|
+
case args.length
|
35
|
+
when 1; "Finished #{args[0]}"
|
36
|
+
when 2; args[-1]
|
37
|
+
else; raise
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
def build_message(level, *args)
|
42
|
+
message, exception = args[0..1]
|
43
|
+
message_template(!!exception) % {
|
44
|
+
:exception => format_exception(exception),
|
45
|
+
:level => level,
|
46
|
+
:message => message,
|
47
|
+
:process_id => Process.pid,
|
48
|
+
:timestamp => Time.now.strftime('%Y-%m-%d %H:%M:%S.%L %z'),
|
49
|
+
}
|
50
|
+
end
|
51
|
+
|
52
|
+
def build_starting_message(*args)
|
53
|
+
case args.length
|
54
|
+
when 1; "Starting #{args[0]}"
|
55
|
+
when 2; args[0]
|
56
|
+
else; raise
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
def debug?
|
61
|
+
!!opts[:debug]
|
62
|
+
end
|
63
|
+
|
64
|
+
def default_opts
|
65
|
+
{
|
66
|
+
:debug => true,
|
67
|
+
:error => true,
|
68
|
+
:info => false,
|
69
|
+
}
|
70
|
+
end
|
71
|
+
|
72
|
+
def error?
|
73
|
+
!!opts[:error]
|
74
|
+
end
|
75
|
+
|
76
|
+
def format_exception(exception)
|
77
|
+
exception && exception.backtrace.map { |line| "\tfrom #{line}" }.join("\n")
|
78
|
+
end
|
79
|
+
|
80
|
+
def info?
|
81
|
+
!!opts[:info]
|
82
|
+
end
|
83
|
+
|
84
|
+
def log(level, *args)
|
85
|
+
stream.puts build_message(level, *args)
|
86
|
+
end
|
87
|
+
|
88
|
+
def message_template(exception)
|
89
|
+
"%{timestamp} %{level} (%{process_id}) %{message}".tap do |result|
|
90
|
+
result << ":\n%{exception}" if exception
|
91
|
+
end
|
92
|
+
end
|
93
|
+
|
94
|
+
def stream
|
95
|
+
opts[:stream]
|
96
|
+
end
|
97
|
+
end
|
98
|
+
end
|
99
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
module Purview
|
2
|
+
module Mixins
|
3
|
+
module Helpers
|
4
|
+
def blank?(value)
|
5
|
+
value.to_s.strip.length.zero?
|
6
|
+
end
|
7
|
+
|
8
|
+
def coalesce(value, default)
|
9
|
+
value.nil? ? default : value
|
10
|
+
end
|
11
|
+
|
12
|
+
def present?(value)
|
13
|
+
!blank?(value)
|
14
|
+
end
|
15
|
+
|
16
|
+
def zero?(value)
|
17
|
+
Integer(value).zero?
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
module Purview
|
2
|
+
module Mixins
|
3
|
+
module Logger
|
4
|
+
def logger
|
5
|
+
@logger ||= logger_type.new(logger_opts)
|
6
|
+
end
|
7
|
+
|
8
|
+
def logger_opts
|
9
|
+
opts[:logger] || {}
|
10
|
+
end
|
11
|
+
|
12
|
+
def logger_type
|
13
|
+
opts[:logger_type] || Purview::Loggers::Console
|
14
|
+
end
|
15
|
+
|
16
|
+
def with_context_logging(*args)
|
17
|
+
logger.with_context_logging(*args) { yield }
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
@@ -0,0 +1,39 @@
|
|
1
|
+
module Purview
|
2
|
+
module Parsers
|
3
|
+
class Base
|
4
|
+
def initialize(opts={})
|
5
|
+
@opts = opts
|
6
|
+
end
|
7
|
+
|
8
|
+
def parse(data)
|
9
|
+
raise %{All "#{Base}(s)" must override the "parse" method}
|
10
|
+
end
|
11
|
+
|
12
|
+
def validate(data)
|
13
|
+
true
|
14
|
+
end
|
15
|
+
|
16
|
+
private
|
17
|
+
|
18
|
+
include Purview::Mixins::Logger
|
19
|
+
|
20
|
+
attr_reader :opts
|
21
|
+
|
22
|
+
def build_result(row)
|
23
|
+
{}.tap do |result|
|
24
|
+
row.each do |key, value|
|
25
|
+
if column = table.columns_by_name[key]
|
26
|
+
result[key] = column.parse(value)
|
27
|
+
else
|
28
|
+
logger.debug(%{Unexpected column: "#{key}" in data-set})
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
def table
|
35
|
+
opts[:table]
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
@@ -0,0 +1,49 @@
|
|
1
|
+
module Purview
|
2
|
+
module Parsers
|
3
|
+
class CSV < Base
|
4
|
+
def parse(data)
|
5
|
+
with_context_logging("`parse` for: #{table.name}") do
|
6
|
+
[].tap do |results|
|
7
|
+
headers = extract_headers(data)
|
8
|
+
extract_rows(data) do |row|
|
9
|
+
results << build_result(headers.zip(row))
|
10
|
+
end
|
11
|
+
end
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
def validate(data)
|
16
|
+
with_context_logging("`validate` for: #{table.name}") do
|
17
|
+
missing_columns = table.column_names - extract_headers(data)
|
18
|
+
raise 'Missing one or more columns: "%s"' % missing_columns.join('", "') \
|
19
|
+
unless missing_columns.empty?
|
20
|
+
true
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
private
|
25
|
+
|
26
|
+
def column_separator
|
27
|
+
','
|
28
|
+
end
|
29
|
+
|
30
|
+
def extract_headers(data)
|
31
|
+
header_row = data.split(row_separator).first
|
32
|
+
parse_row(header_row).map(&:to_sym)
|
33
|
+
end
|
34
|
+
|
35
|
+
def extract_rows(data)
|
36
|
+
rows = data.split(row_separator)[1..-1]
|
37
|
+
rows.each { |row| yield parse_row(row) }
|
38
|
+
end
|
39
|
+
|
40
|
+
def parse_row(row)
|
41
|
+
::CSV.parse(row, :col_sep => column_separator).first
|
42
|
+
end
|
43
|
+
|
44
|
+
def row_separator
|
45
|
+
$/
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
module Purview
|
2
|
+
module Pullers
|
3
|
+
class Base
|
4
|
+
def initialize(opts={})
|
5
|
+
@opts = opts
|
6
|
+
end
|
7
|
+
|
8
|
+
def pull(window)
|
9
|
+
raise %{All "#{Base}(s)" must override the "pull" method}
|
10
|
+
end
|
11
|
+
|
12
|
+
private
|
13
|
+
|
14
|
+
include Purview::Mixins::Logger
|
15
|
+
|
16
|
+
attr_reader :opts
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
@@ -0,0 +1,66 @@
|
|
1
|
+
module Purview
|
2
|
+
module Pullers
|
3
|
+
class URI < Base
|
4
|
+
def pull(window)
|
5
|
+
request = windowed_request(window)
|
6
|
+
with_context_logging("`pull` from: #{request.path}") do
|
7
|
+
http.request(request).body
|
8
|
+
end
|
9
|
+
end
|
10
|
+
|
11
|
+
private
|
12
|
+
|
13
|
+
def basic_auth?
|
14
|
+
username && password
|
15
|
+
end
|
16
|
+
|
17
|
+
def host
|
18
|
+
uri.host
|
19
|
+
end
|
20
|
+
|
21
|
+
def http
|
22
|
+
Net::HTTP.new(host, port).tap do |http|
|
23
|
+
if https?
|
24
|
+
http.use_ssl = true
|
25
|
+
http.verify_mode = OpenSSL::SSL::VERIFY_NONE
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
def https?
|
31
|
+
uri.scheme == 'https'
|
32
|
+
end
|
33
|
+
|
34
|
+
def password
|
35
|
+
opts[:password]
|
36
|
+
end
|
37
|
+
|
38
|
+
def port
|
39
|
+
uri.port
|
40
|
+
end
|
41
|
+
|
42
|
+
def uri
|
43
|
+
::URI.parse(opts[:uri])
|
44
|
+
end
|
45
|
+
|
46
|
+
def username
|
47
|
+
opts[:username]
|
48
|
+
end
|
49
|
+
|
50
|
+
def windowed_request(window)
|
51
|
+
Net::HTTP::Get.new(windowed_request_uri(window)).tap do |request|
|
52
|
+
if basic_auth?
|
53
|
+
request.basic_auth(username, password)
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
def windowed_request_uri(window)
|
59
|
+
uri.to_s.tap do |request_uri|
|
60
|
+
request_uri << (request_uri.include?('?') ? '&' : '?')
|
61
|
+
request_uri << 'ts1=%s&ts2=%s' % [window.min.to_i, window.max.to_i]
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|
66
|
+
end
|