purview 1.0.0.alpha
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +33 -0
- data/.travis.yml +18 -0
- data/Gemfile +3 -0
- data/LICENSE.txt +22 -0
- data/README.md +143 -0
- data/Rakefile +11 -0
- data/TODO +81 -0
- data/lib/purview/columns/base.rb +65 -0
- data/lib/purview/columns/boolean.rb +11 -0
- data/lib/purview/columns/created_timestamp.rb +11 -0
- data/lib/purview/columns/date.rb +11 -0
- data/lib/purview/columns/float.rb +11 -0
- data/lib/purview/columns/id.rb +11 -0
- data/lib/purview/columns/integer.rb +11 -0
- data/lib/purview/columns/money.rb +11 -0
- data/lib/purview/columns/string.rb +11 -0
- data/lib/purview/columns/text.rb +11 -0
- data/lib/purview/columns/time.rb +11 -0
- data/lib/purview/columns/timestamp.rb +11 -0
- data/lib/purview/columns/updated_timestamp.rb +11 -0
- data/lib/purview/columns/uuid.rb +11 -0
- data/lib/purview/columns.rb +14 -0
- data/lib/purview/connections/base.rb +55 -0
- data/lib/purview/connections/mysql.rb +39 -0
- data/lib/purview/connections/postgresql.rb +27 -0
- data/lib/purview/connections.rb +3 -0
- data/lib/purview/databases/base.rb +559 -0
- data/lib/purview/databases/mysql.rb +207 -0
- data/lib/purview/databases/postgresql.rb +210 -0
- data/lib/purview/databases.rb +3 -0
- data/lib/purview/exceptions/base.rb +5 -0
- data/lib/purview/exceptions/could_not_acquire_lock.rb +9 -0
- data/lib/purview/exceptions/lock_already_released.rb +9 -0
- data/lib/purview/exceptions/no_table.rb +9 -0
- data/lib/purview/exceptions/no_window.rb +9 -0
- data/lib/purview/exceptions/rows_outside_window.rb +18 -0
- data/lib/purview/exceptions/table.rb +13 -0
- data/lib/purview/exceptions.rb +7 -0
- data/lib/purview/loaders/base.rb +154 -0
- data/lib/purview/loaders/mysql.rb +81 -0
- data/lib/purview/loaders/postgresql.rb +81 -0
- data/lib/purview/loaders.rb +3 -0
- data/lib/purview/loggers/base.rb +99 -0
- data/lib/purview/loggers/console.rb +11 -0
- data/lib/purview/loggers.rb +2 -0
- data/lib/purview/mixins/helpers.rb +21 -0
- data/lib/purview/mixins/logger.rb +21 -0
- data/lib/purview/mixins.rb +2 -0
- data/lib/purview/parsers/base.rb +39 -0
- data/lib/purview/parsers/csv.rb +49 -0
- data/lib/purview/parsers/tsv.rb +11 -0
- data/lib/purview/parsers.rb +3 -0
- data/lib/purview/pullers/base.rb +19 -0
- data/lib/purview/pullers/uri.rb +66 -0
- data/lib/purview/pullers.rb +2 -0
- data/lib/purview/refinements/object.rb +5 -0
- data/lib/purview/refinements/time.rb +5 -0
- data/lib/purview/refinements.rb +2 -0
- data/lib/purview/structs/base.rb +10 -0
- data/lib/purview/structs/result.rb +7 -0
- data/lib/purview/structs/window.rb +7 -0
- data/lib/purview/structs.rb +3 -0
- data/lib/purview/tables/base.rb +140 -0
- data/lib/purview/tables/raw.rb +13 -0
- data/lib/purview/tables.rb +2 -0
- data/lib/purview/types/base.rb +9 -0
- data/lib/purview/types/boolean.rb +9 -0
- data/lib/purview/types/date.rb +9 -0
- data/lib/purview/types/float.rb +9 -0
- data/lib/purview/types/integer.rb +9 -0
- data/lib/purview/types/money.rb +9 -0
- data/lib/purview/types/string.rb +9 -0
- data/lib/purview/types/text.rb +9 -0
- data/lib/purview/types/time.rb +9 -0
- data/lib/purview/types/timestamp.rb +9 -0
- data/lib/purview/types/uuid.rb +9 -0
- data/lib/purview/types.rb +11 -0
- data/lib/purview/version.rb +3 -0
- data/lib/purview.rb +27 -0
- data/purview.gemspec +29 -0
- data/spec/spec_helper.rb +5 -0
- metadata +210 -0
@@ -0,0 +1,81 @@
|
|
1
|
+
module Purview
|
2
|
+
module Loaders
|
3
|
+
class MySQL < Base
|
4
|
+
private
|
5
|
+
|
6
|
+
def id_in_sql(temporary_table_name)
|
7
|
+
'SELECT %s FROM %s' % [
|
8
|
+
table.id_column.name,
|
9
|
+
temporary_table_name,
|
10
|
+
]
|
11
|
+
end
|
12
|
+
|
13
|
+
def in_window_sql(window)
|
14
|
+
'%s BETWEEN %s AND %s' % [
|
15
|
+
table.updated_timestamp_column.name,
|
16
|
+
quoted(window.min),
|
17
|
+
quoted(window.max),
|
18
|
+
]
|
19
|
+
end
|
20
|
+
|
21
|
+
def not_in_window_sql(window)
|
22
|
+
'%s NOT BETWEEN %s AND %s' % [
|
23
|
+
table.updated_timestamp_column.name,
|
24
|
+
quoted(window.min),
|
25
|
+
quoted(window.max),
|
26
|
+
]
|
27
|
+
end
|
28
|
+
|
29
|
+
def table_delete_sql(window, temporary_table_name)
|
30
|
+
'DELETE FROM %s WHERE %s AND %s NOT IN (%s)' % [
|
31
|
+
table.name,
|
32
|
+
in_window_sql(window),
|
33
|
+
table.id_column.name,
|
34
|
+
id_in_sql(temporary_table_name),
|
35
|
+
]
|
36
|
+
end
|
37
|
+
|
38
|
+
def table_insert_sql(window, temporary_table_name)
|
39
|
+
'INSERT INTO %s (%s) SELECT %s FROM %s t1 WHERE NOT EXISTS (SELECT 1 FROM %s t2 WHERE t1.%s = t2.%s)' % [
|
40
|
+
table.name,
|
41
|
+
table.column_names.join(', '),
|
42
|
+
table.column_names.join(', '),
|
43
|
+
temporary_table_name,
|
44
|
+
table.name,
|
45
|
+
table.id_column.name,
|
46
|
+
table.id_column.name,
|
47
|
+
]
|
48
|
+
end
|
49
|
+
|
50
|
+
def table_update_sql(window, temporary_table_name)
|
51
|
+
'UPDATE %s t1 JOIN %s t2 ON t1.%s = t2.%s SET %s' % [
|
52
|
+
table.name,
|
53
|
+
temporary_table_name,
|
54
|
+
table.id_column.name,
|
55
|
+
table.id_column.name,
|
56
|
+
table.column_names.map { |column_name| "t1.#{column_name} = t2.#{column_name}" }.join(', '),
|
57
|
+
]
|
58
|
+
end
|
59
|
+
|
60
|
+
def temporary_table_insert_sql(temporary_table_name, rows)
|
61
|
+
'INSERT INTO %s (%s) VALUES %s' % [
|
62
|
+
temporary_table_name,
|
63
|
+
table.column_names.join(', '),
|
64
|
+
rows.map { |row| "(#{row_values(row)})" }.join(', ')
|
65
|
+
]
|
66
|
+
end
|
67
|
+
|
68
|
+
def temporary_table_opts
|
69
|
+
super.merge(:create_indices => false)
|
70
|
+
end
|
71
|
+
|
72
|
+
def temporary_table_verify_sql(temporary_table_name, rows, window)
|
73
|
+
'SELECT COUNT(1) %s FROM %s WHERE %s' % [
|
74
|
+
count_column_name,
|
75
|
+
temporary_table_name,
|
76
|
+
not_in_window_sql(window),
|
77
|
+
]
|
78
|
+
end
|
79
|
+
end
|
80
|
+
end
|
81
|
+
end
|
@@ -0,0 +1,81 @@
|
|
1
|
+
module Purview
|
2
|
+
module Loaders
|
3
|
+
class PostgreSQL < Base
|
4
|
+
private
|
5
|
+
|
6
|
+
def id_in_sql(temporary_table_name)
|
7
|
+
'SELECT %s FROM %s' % [
|
8
|
+
table.id_column.name,
|
9
|
+
temporary_table_name,
|
10
|
+
]
|
11
|
+
end
|
12
|
+
|
13
|
+
def in_window_sql(window)
|
14
|
+
'%s BETWEEN %s AND %s' % [
|
15
|
+
table.updated_timestamp_column.name,
|
16
|
+
quoted(window.min),
|
17
|
+
quoted(window.max),
|
18
|
+
]
|
19
|
+
end
|
20
|
+
|
21
|
+
def not_in_window_sql(window)
|
22
|
+
'%s NOT BETWEEN %s AND %s' % [
|
23
|
+
table.updated_timestamp_column.name,
|
24
|
+
quoted(window.min),
|
25
|
+
quoted(window.max),
|
26
|
+
]
|
27
|
+
end
|
28
|
+
|
29
|
+
def table_delete_sql(window, temporary_table_name)
|
30
|
+
'DELETE FROM %s WHERE %s AND %s NOT IN (%s)' % [
|
31
|
+
table.name,
|
32
|
+
in_window_sql(window),
|
33
|
+
table.id_column.name,
|
34
|
+
id_in_sql(temporary_table_name),
|
35
|
+
]
|
36
|
+
end
|
37
|
+
|
38
|
+
def table_insert_sql(window, temporary_table_name)
|
39
|
+
'INSERT INTO %s (%s) SELECT %s FROM %s t1 WHERE NOT EXISTS (SELECT 1 FROM %s t2 WHERE t1.%s = t2.%s)' % [
|
40
|
+
table.name,
|
41
|
+
table.column_names.join(', '),
|
42
|
+
table.column_names.join(', '),
|
43
|
+
temporary_table_name,
|
44
|
+
table.name,
|
45
|
+
table.id_column.name,
|
46
|
+
table.id_column.name,
|
47
|
+
]
|
48
|
+
end
|
49
|
+
|
50
|
+
def table_update_sql(window, temporary_table_name)
|
51
|
+
'UPDATE %s t1 SET %s FROM %s t2 WHERE t1.%s = t2.%s' % [
|
52
|
+
table.name,
|
53
|
+
table.column_names.map { |column_name| "#{column_name} = t2.#{column_name}" }.join(', '),
|
54
|
+
temporary_table_name,
|
55
|
+
table.id_column.name,
|
56
|
+
table.id_column.name,
|
57
|
+
]
|
58
|
+
end
|
59
|
+
|
60
|
+
def temporary_table_insert_sql(temporary_table_name, rows)
|
61
|
+
'INSERT INTO %s (%s) VALUES %s' % [
|
62
|
+
temporary_table_name,
|
63
|
+
table.column_names.join(', '),
|
64
|
+
rows.map { |row| "(#{row_values(row)})" }.join(', ')
|
65
|
+
]
|
66
|
+
end
|
67
|
+
|
68
|
+
def temporary_table_opts
|
69
|
+
super.merge(:create_indices => false)
|
70
|
+
end
|
71
|
+
|
72
|
+
def temporary_table_verify_sql(temporary_table_name, rows, window)
|
73
|
+
'SELECT COUNT(1) %s FROM %s WHERE %s' % [
|
74
|
+
count_column_name,
|
75
|
+
temporary_table_name,
|
76
|
+
not_in_window_sql(window),
|
77
|
+
]
|
78
|
+
end
|
79
|
+
end
|
80
|
+
end
|
81
|
+
end
|
@@ -0,0 +1,99 @@
|
|
1
|
+
module Purview
|
2
|
+
module Loggers
|
3
|
+
class Base
|
4
|
+
def initialize(opts={})
|
5
|
+
@opts = default_opts.merge(opts)
|
6
|
+
end
|
7
|
+
|
8
|
+
def debug(*args)
|
9
|
+
log(DEBUG_LEVEL, *args) if debug?
|
10
|
+
end
|
11
|
+
|
12
|
+
def error(*args)
|
13
|
+
log(ERROR_LEVEL, *args) if error?
|
14
|
+
end
|
15
|
+
|
16
|
+
def info(*args)
|
17
|
+
log(INFO_LEVEL, *args) if info?
|
18
|
+
end
|
19
|
+
|
20
|
+
def with_context_logging(*args)
|
21
|
+
debug(build_starting_message(*args))
|
22
|
+
yield.tap { |result| debug(build_finished_message(*args)) }
|
23
|
+
end
|
24
|
+
|
25
|
+
private
|
26
|
+
|
27
|
+
DEBUG_LEVEL = 'DEBUG'
|
28
|
+
ERROR_LEVEL = 'ERROR'
|
29
|
+
INFO_LEVEL = 'INFO'
|
30
|
+
|
31
|
+
attr_reader :opts
|
32
|
+
|
33
|
+
def build_finished_message(*args)
|
34
|
+
case args.length
|
35
|
+
when 1; "Finished #{args[0]}"
|
36
|
+
when 2; args[-1]
|
37
|
+
else; raise
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
def build_message(level, *args)
|
42
|
+
message, exception = args[0..1]
|
43
|
+
message_template(!!exception) % {
|
44
|
+
:exception => format_exception(exception),
|
45
|
+
:level => level,
|
46
|
+
:message => message,
|
47
|
+
:process_id => Process.pid,
|
48
|
+
:timestamp => Time.now.strftime('%Y-%m-%d %H:%M:%S.%L %z'),
|
49
|
+
}
|
50
|
+
end
|
51
|
+
|
52
|
+
def build_starting_message(*args)
|
53
|
+
case args.length
|
54
|
+
when 1; "Starting #{args[0]}"
|
55
|
+
when 2; args[0]
|
56
|
+
else; raise
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
def debug?
|
61
|
+
!!opts[:debug]
|
62
|
+
end
|
63
|
+
|
64
|
+
def default_opts
|
65
|
+
{
|
66
|
+
:debug => true,
|
67
|
+
:error => true,
|
68
|
+
:info => false,
|
69
|
+
}
|
70
|
+
end
|
71
|
+
|
72
|
+
def error?
|
73
|
+
!!opts[:error]
|
74
|
+
end
|
75
|
+
|
76
|
+
def format_exception(exception)
|
77
|
+
exception && exception.backtrace.map { |line| "\tfrom #{line}" }.join("\n")
|
78
|
+
end
|
79
|
+
|
80
|
+
def info?
|
81
|
+
!!opts[:info]
|
82
|
+
end
|
83
|
+
|
84
|
+
def log(level, *args)
|
85
|
+
stream.puts build_message(level, *args)
|
86
|
+
end
|
87
|
+
|
88
|
+
def message_template(exception)
|
89
|
+
"%{timestamp} %{level} (%{process_id}) %{message}".tap do |result|
|
90
|
+
result << ":\n%{exception}" if exception
|
91
|
+
end
|
92
|
+
end
|
93
|
+
|
94
|
+
def stream
|
95
|
+
opts[:stream]
|
96
|
+
end
|
97
|
+
end
|
98
|
+
end
|
99
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
module Purview
|
2
|
+
module Mixins
|
3
|
+
module Helpers
|
4
|
+
def blank?(value)
|
5
|
+
value.to_s.strip.length.zero?
|
6
|
+
end
|
7
|
+
|
8
|
+
def coalesce(value, default)
|
9
|
+
value.nil? ? default : value
|
10
|
+
end
|
11
|
+
|
12
|
+
def present?(value)
|
13
|
+
!blank?(value)
|
14
|
+
end
|
15
|
+
|
16
|
+
def zero?(value)
|
17
|
+
Integer(value).zero?
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
module Purview
|
2
|
+
module Mixins
|
3
|
+
module Logger
|
4
|
+
def logger
|
5
|
+
@logger ||= logger_type.new(logger_opts)
|
6
|
+
end
|
7
|
+
|
8
|
+
def logger_opts
|
9
|
+
opts[:logger] || {}
|
10
|
+
end
|
11
|
+
|
12
|
+
def logger_type
|
13
|
+
opts[:logger_type] || Purview::Loggers::Console
|
14
|
+
end
|
15
|
+
|
16
|
+
def with_context_logging(*args)
|
17
|
+
logger.with_context_logging(*args) { yield }
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
@@ -0,0 +1,39 @@
|
|
1
|
+
module Purview
|
2
|
+
module Parsers
|
3
|
+
class Base
|
4
|
+
def initialize(opts={})
|
5
|
+
@opts = opts
|
6
|
+
end
|
7
|
+
|
8
|
+
def parse(data)
|
9
|
+
raise %{All "#{Base}(s)" must override the "parse" method}
|
10
|
+
end
|
11
|
+
|
12
|
+
def validate(data)
|
13
|
+
true
|
14
|
+
end
|
15
|
+
|
16
|
+
private
|
17
|
+
|
18
|
+
include Purview::Mixins::Logger
|
19
|
+
|
20
|
+
attr_reader :opts
|
21
|
+
|
22
|
+
def build_result(row)
|
23
|
+
{}.tap do |result|
|
24
|
+
row.each do |key, value|
|
25
|
+
if column = table.columns_by_name[key]
|
26
|
+
result[key] = column.parse(value)
|
27
|
+
else
|
28
|
+
logger.debug(%{Unexpected column: "#{key}" in data-set})
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
def table
|
35
|
+
opts[:table]
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
@@ -0,0 +1,49 @@
|
|
1
|
+
module Purview
|
2
|
+
module Parsers
|
3
|
+
class CSV < Base
|
4
|
+
def parse(data)
|
5
|
+
with_context_logging("`parse` for: #{table.name}") do
|
6
|
+
[].tap do |results|
|
7
|
+
headers = extract_headers(data)
|
8
|
+
extract_rows(data) do |row|
|
9
|
+
results << build_result(headers.zip(row))
|
10
|
+
end
|
11
|
+
end
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
def validate(data)
|
16
|
+
with_context_logging("`validate` for: #{table.name}") do
|
17
|
+
missing_columns = table.column_names - extract_headers(data)
|
18
|
+
raise 'Missing one or more columns: "%s"' % missing_columns.join('", "') \
|
19
|
+
unless missing_columns.empty?
|
20
|
+
true
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
private
|
25
|
+
|
26
|
+
def column_separator
|
27
|
+
','
|
28
|
+
end
|
29
|
+
|
30
|
+
def extract_headers(data)
|
31
|
+
header_row = data.split(row_separator).first
|
32
|
+
parse_row(header_row).map(&:to_sym)
|
33
|
+
end
|
34
|
+
|
35
|
+
def extract_rows(data)
|
36
|
+
rows = data.split(row_separator)[1..-1]
|
37
|
+
rows.each { |row| yield parse_row(row) }
|
38
|
+
end
|
39
|
+
|
40
|
+
def parse_row(row)
|
41
|
+
::CSV.parse(row, :col_sep => column_separator).first
|
42
|
+
end
|
43
|
+
|
44
|
+
def row_separator
|
45
|
+
$/
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
module Purview
|
2
|
+
module Pullers
|
3
|
+
class Base
|
4
|
+
def initialize(opts={})
|
5
|
+
@opts = opts
|
6
|
+
end
|
7
|
+
|
8
|
+
def pull(window)
|
9
|
+
raise %{All "#{Base}(s)" must override the "pull" method}
|
10
|
+
end
|
11
|
+
|
12
|
+
private
|
13
|
+
|
14
|
+
include Purview::Mixins::Logger
|
15
|
+
|
16
|
+
attr_reader :opts
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
@@ -0,0 +1,66 @@
|
|
1
|
+
module Purview
|
2
|
+
module Pullers
|
3
|
+
class URI < Base
|
4
|
+
def pull(window)
|
5
|
+
request = windowed_request(window)
|
6
|
+
with_context_logging("`pull` from: #{request.path}") do
|
7
|
+
http.request(request).body
|
8
|
+
end
|
9
|
+
end
|
10
|
+
|
11
|
+
private
|
12
|
+
|
13
|
+
def basic_auth?
|
14
|
+
username && password
|
15
|
+
end
|
16
|
+
|
17
|
+
def host
|
18
|
+
uri.host
|
19
|
+
end
|
20
|
+
|
21
|
+
def http
|
22
|
+
Net::HTTP.new(host, port).tap do |http|
|
23
|
+
if https?
|
24
|
+
http.use_ssl = true
|
25
|
+
http.verify_mode = OpenSSL::SSL::VERIFY_NONE
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
def https?
|
31
|
+
uri.scheme == 'https'
|
32
|
+
end
|
33
|
+
|
34
|
+
def password
|
35
|
+
opts[:password]
|
36
|
+
end
|
37
|
+
|
38
|
+
def port
|
39
|
+
uri.port
|
40
|
+
end
|
41
|
+
|
42
|
+
def uri
|
43
|
+
::URI.parse(opts[:uri])
|
44
|
+
end
|
45
|
+
|
46
|
+
def username
|
47
|
+
opts[:username]
|
48
|
+
end
|
49
|
+
|
50
|
+
def windowed_request(window)
|
51
|
+
Net::HTTP::Get.new(windowed_request_uri(window)).tap do |request|
|
52
|
+
if basic_auth?
|
53
|
+
request.basic_auth(username, password)
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
def windowed_request_uri(window)
|
59
|
+
uri.to_s.tap do |request_uri|
|
60
|
+
request_uri << (request_uri.include?('?') ? '&' : '?')
|
61
|
+
request_uri << 'ts1=%s&ts2=%s' % [window.min.to_i, window.max.to_i]
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|
66
|
+
end
|