purview 1.0.0.alpha → 1.0.0.beta1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. checksums.yaml +4 -4
  2. data/.travis.yml +0 -4
  3. data/Gemfile +10 -0
  4. data/README.md +2 -2
  5. data/TODO +10 -5
  6. data/lib/purview/columns/base.rb +2 -2
  7. data/lib/purview/columns.rb +1 -0
  8. data/lib/purview/connections/base.rb +17 -31
  9. data/lib/purview/connections/mysql.rb +2 -30
  10. data/lib/purview/connections/postgresql.rb +2 -18
  11. data/lib/purview/databases/base.rb +32 -31
  12. data/lib/purview/databases/mysql.rb +4 -16
  13. data/lib/purview/databases/postgresql.rb +4 -16
  14. data/lib/purview/databases.rb +1 -0
  15. data/lib/purview/dialects/base.rb +25 -0
  16. data/lib/purview/dialects/mysql.rb +25 -0
  17. data/lib/purview/dialects/postgresql.rb +25 -0
  18. data/lib/purview/dialects.rb +4 -0
  19. data/lib/purview/exceptions/{table.rb → base_table.rb} +1 -1
  20. data/lib/purview/exceptions/could_not_acquire_lock.rb +1 -1
  21. data/lib/purview/exceptions/lock_already_released.rb +1 -1
  22. data/lib/purview/exceptions/no_window.rb +1 -1
  23. data/lib/purview/exceptions.rb +2 -1
  24. data/lib/purview/loaders/base.rb +26 -2
  25. data/lib/purview/loaders/mysql.rb +4 -0
  26. data/lib/purview/loaders/postgresql.rb +4 -0
  27. data/lib/purview/loaders.rb +1 -0
  28. data/lib/purview/loggers.rb +1 -0
  29. data/lib/purview/mixins/connection.rb +13 -0
  30. data/lib/purview/mixins/helpers.rb +5 -1
  31. data/lib/purview/mixins/logger.rb +2 -2
  32. data/lib/purview/mixins.rb +1 -0
  33. data/lib/purview/parsers/base.rb +7 -11
  34. data/lib/purview/parsers/csv.rb +19 -3
  35. data/lib/purview/parsers/sql.rb +13 -0
  36. data/lib/purview/parsers.rb +2 -0
  37. data/lib/purview/pullers/base.rb +4 -0
  38. data/lib/purview/pullers/base_sql.rb +97 -0
  39. data/lib/purview/pullers/mysql.rb +15 -0
  40. data/lib/purview/pullers/postgresql.rb +15 -0
  41. data/lib/purview/pullers.rb +4 -0
  42. data/lib/purview/raw_connections/base.rb +118 -0
  43. data/lib/purview/raw_connections/jdbc/base.rb +65 -0
  44. data/lib/purview/raw_connections/jdbc/mysql.rb +19 -0
  45. data/lib/purview/raw_connections/jdbc/postgres.rb +19 -0
  46. data/lib/purview/raw_connections/jdbc.rb +4 -0
  47. data/lib/purview/raw_connections/mysql2.rb +35 -0
  48. data/lib/purview/raw_connections/pg.rb +35 -0
  49. data/lib/purview/raw_connections.rb +8 -0
  50. data/lib/purview/refinements/object.rb +4 -0
  51. data/lib/purview/refinements/string.rb +5 -0
  52. data/lib/purview/refinements.rb +1 -0
  53. data/lib/purview/structs/base.rb +12 -2
  54. data/lib/purview/structs/row.rb +7 -0
  55. data/lib/purview/structs.rb +2 -0
  56. data/lib/purview/tables/base.rb +1 -1
  57. data/lib/purview/types.rb +1 -0
  58. data/lib/purview/version.rb +1 -1
  59. data/lib/purview.rb +3 -3
  60. data/purview.gemspec +2 -2
  61. metadata +25 -32
@@ -3,6 +3,10 @@ module Purview
3
3
  class PostgreSQL < Base
4
4
  private
5
5
 
6
+ def dialect_type
7
+ Purview::Dialects::PostgreSQL
8
+ end
9
+
6
10
  def id_in_sql(temporary_table_name)
7
11
  'SELECT %s FROM %s' % [
8
12
  table.id_column.name,
@@ -1,3 +1,4 @@
1
1
  require 'purview/loaders/base'
2
+
2
3
  require 'purview/loaders/mysql'
3
4
  require 'purview/loaders/postgresql'
@@ -1,2 +1,3 @@
1
1
  require 'purview/loggers/base'
2
+
2
3
  require 'purview/loggers/console'
@@ -0,0 +1,13 @@
1
+ module Purview
2
+ module Mixins
3
+ module Connection
4
+ def connect
5
+ connection_type.new(connection_opts)
6
+ end
7
+
8
+ def with_new_connection
9
+ connection_type.with_new_connection(connection_opts) { |connection| yield connection }
10
+ end
11
+ end
12
+ end
13
+ end
@@ -5,10 +5,14 @@ module Purview
5
5
  value.to_s.strip.length.zero?
6
6
  end
7
7
 
8
- def coalesce(value, default)
8
+ def coalesced(value, default)
9
9
  value.nil? ? default : value
10
10
  end
11
11
 
12
+ def filter_nil_values(hash)
13
+ hash.reject { |_, value| value.nil? }
14
+ end
15
+
12
16
  def present?(value)
13
17
  !blank?(value)
14
18
  end
@@ -6,11 +6,11 @@ module Purview
6
6
  end
7
7
 
8
8
  def logger_opts
9
- opts[:logger] || {}
9
+ (defined?(opts) && opts[:logger]) || {}
10
10
  end
11
11
 
12
12
  def logger_type
13
- opts[:logger_type] || Purview::Loggers::Console
13
+ (defined?(opts) && opts[:logger_type]) || Purview::Loggers::Console
14
14
  end
15
15
 
16
16
  def with_context_logging(*args)
@@ -1,2 +1,3 @@
1
+ require 'purview/mixins/connection'
1
2
  require 'purview/mixins/helpers'
2
3
  require 'purview/mixins/logger'
@@ -10,7 +10,7 @@ module Purview
10
10
  end
11
11
 
12
12
  def validate(data)
13
- true
13
+ raise %{All "#{Base}(s)" must override the "validate" method}
14
14
  end
15
15
 
16
16
  private
@@ -19,16 +19,12 @@ module Purview
19
19
 
20
20
  attr_reader :opts
21
21
 
22
- def build_result(row)
23
- {}.tap do |result|
24
- row.each do |key, value|
25
- if column = table.columns_by_name[key]
26
- result[key] = column.parse(value)
27
- else
28
- logger.debug(%{Unexpected column: "#{key}" in data-set})
29
- end
30
- end
31
- end
22
+ def extract_headers(data)
23
+ raise %{All "#{Base}(s)" must override the "extract_headers" method}
24
+ end
25
+
26
+ def extract_rows(data)
27
+ raise %{All "#{Base}(s)" must override the "extract_rows" method}
32
28
  end
33
29
 
34
30
  def table
@@ -5,7 +5,7 @@ module Purview
5
5
  with_context_logging("`parse` for: #{table.name}") do
6
6
  [].tap do |results|
7
7
  headers = extract_headers(data)
8
- extract_rows(data) do |row|
8
+ extract_rows(data).each do |row|
9
9
  results << build_result(headers.zip(row))
10
10
  end
11
11
  end
@@ -14,7 +14,7 @@ module Purview
14
14
 
15
15
  def validate(data)
16
16
  with_context_logging("`validate` for: #{table.name}") do
17
- missing_columns = table.column_names - extract_headers(data)
17
+ missing_columns = missing_columns(data)
18
18
  raise 'Missing one or more columns: "%s"' % missing_columns.join('", "') \
19
19
  unless missing_columns.empty?
20
20
  true
@@ -23,6 +23,18 @@ module Purview
23
23
 
24
24
  private
25
25
 
26
+ def build_result(row)
27
+ {}.tap do |result|
28
+ row.each do |key, value|
29
+ if column = table.columns_by_name[key]
30
+ result[key] = column.parse(value)
31
+ else
32
+ logger.debug(%{Unexpected column: "#{key}" in data-set})
33
+ end
34
+ end
35
+ end
36
+ end
37
+
26
38
  def column_separator
27
39
  ','
28
40
  end
@@ -34,7 +46,11 @@ module Purview
34
46
 
35
47
  def extract_rows(data)
36
48
  rows = data.split(row_separator)[1..-1]
37
- rows.each { |row| yield parse_row(row) }
49
+ rows.map { |row| parse_row(row) }
50
+ end
51
+
52
+ def missing_columns(data)
53
+ table.column_names - extract_headers(data)
38
54
  end
39
55
 
40
56
  def parse_row(row)
@@ -0,0 +1,13 @@
1
+ module Purview
2
+ module Parsers
3
+ class SQL < Base
4
+ def parse(data)
5
+ data.rows
6
+ end
7
+
8
+ def validate(data)
9
+ true
10
+ end
11
+ end
12
+ end
13
+ end
@@ -1,3 +1,5 @@
1
1
  require 'purview/parsers/base'
2
+
2
3
  require 'purview/parsers/csv'
4
+ require 'purview/parsers/sql'
3
5
  require 'purview/parsers/tsv'
@@ -14,6 +14,10 @@ module Purview
14
14
  include Purview::Mixins::Logger
15
15
 
16
16
  attr_reader :opts
17
+
18
+ def table
19
+ opts[:table]
20
+ end
17
21
  end
18
22
  end
19
23
  end
@@ -0,0 +1,97 @@
1
+ module Purview
2
+ module Pullers
3
+ class BaseSQL < Base
4
+ def pull(window)
5
+ with_new_connection do |connection|
6
+ connection.execute(pull_sql(window))
7
+ end
8
+ end
9
+
10
+ private
11
+
12
+ include Purview::Mixins::Connection
13
+ include Purview::Mixins::Helpers
14
+ include Purview::Mixins::Logger
15
+
16
+ def column_names
17
+ table.column_names
18
+ end
19
+
20
+ def connection_opts
21
+ filter_nil_values(
22
+ :database => database_name,
23
+ :host => database_host,
24
+ :password => database_password,
25
+ :port => database_port,
26
+ :username => database_username
27
+ )
28
+ end
29
+
30
+ def connection_type
31
+ raise %{All "#{BaseSQL}(s)" must override the "connection_type" method}
32
+ end
33
+
34
+ def database_host
35
+ opts[:database_host]
36
+ end
37
+
38
+ def database_name
39
+ opts[:database_name]
40
+ end
41
+
42
+ def database_password
43
+ opts[:database_password]
44
+ end
45
+
46
+ def database_port
47
+ opts[:database_port]
48
+ end
49
+
50
+ def database_username
51
+ opts[:database_username]
52
+ end
53
+
54
+ def dialect
55
+ dialect_type.new
56
+ end
57
+
58
+ def dialect_type
59
+ raise %{All "#{BaseSQL}(s)" must override the "dialect_type" method}
60
+ end
61
+
62
+ def false_value
63
+ dialect.false_value
64
+ end
65
+
66
+ def null_value
67
+ dialect.null_value
68
+ end
69
+
70
+ def pull_sql(window)
71
+ 'SELECT %s FROM %s WHERE %s BETWEEN %s AND %s' % [
72
+ column_names.join(', '),
73
+ table_name,
74
+ table.updated_timestamp_column.name,
75
+ quoted(window.min),
76
+ quoted(window.max),
77
+ ]
78
+ end
79
+
80
+ def quoted(value)
81
+ dialect.quoted(value)
82
+ end
83
+
84
+ def sanitized(value)
85
+ dialect.sanitized(value)
86
+ end
87
+
88
+ def table_name
89
+ opts[:table_name]
90
+ end
91
+
92
+ def true_value
93
+ dialect.true_value
94
+ end
95
+ end
96
+ end
97
+ end
@@ -0,0 +1,15 @@
1
+ module Purview
2
+ module Pullers
3
+ class MySQL < BaseSQL
4
+ private
5
+
6
+ def connection_type
7
+ Purview::Connections::MySQL
8
+ end
9
+
10
+ def dialect_type
11
+ Purview::Dialects::MySQL
12
+ end
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,15 @@
1
+ module Purview
2
+ module Pullers
3
+ class PostgreSQL < BaseSQL
4
+ private
5
+
6
+ def connection_type
7
+ Purview::Connections::PostgreSQL
8
+ end
9
+
10
+ def dialect_type
11
+ Purview::Dialects::PostgreSQL
12
+ end
13
+ end
14
+ end
15
+ end
@@ -1,2 +1,6 @@
1
1
  require 'purview/pullers/base'
2
+ require 'purview/pullers/base_sql'
3
+
4
+ require 'purview/pullers/mysql'
5
+ require 'purview/pullers/postgresql'
2
6
  require 'purview/pullers/uri'
@@ -0,0 +1,118 @@
1
+ module Purview
2
+ module RawConnections
3
+ class Base
4
+ def self.connect(opts)
5
+ new(opts)
6
+ end
7
+
8
+ def self.with_new_connection(opts)
9
+ yield connection = connect(opts)
10
+ ensure
11
+ connection.disconnect if connection
12
+ end
13
+
14
+ def initialize(opts)
15
+ @opts = opts
16
+ @raw_connection = new_connection
17
+ end
18
+
19
+ def disconnect
20
+ raw_connection.close
21
+ @raw_connection = nil
22
+ self
23
+ end
24
+
25
+ def execute(sql, opts={})
26
+ logger.debug("Executing: #{sql}")
27
+ result = execute_sql(sql, opts)
28
+ structify_result(result)
29
+ end
30
+
31
+ def with_transaction
32
+ execute_sql(BEGIN_TRANSACTION)
33
+ yield.tap { |result| execute_sql(COMMIT_TRANSACTION) }
34
+ rescue
35
+ execute_sql(ROLLBACK_TRANSACTION)
36
+ raise
37
+ end
38
+
39
+ private
40
+
41
+ include Purview::Mixins::Helpers
42
+ include Purview::Mixins::Logger
43
+
44
+ BEGIN_TRANSACTION = 'BEGIN'
45
+ COMMIT_TRANSACTION = 'COMMIT'
46
+ ROLLBACK_TRANSACTION = 'ROLLBACK'
47
+
48
+ attr_reader :opts, :raw_connection
49
+
50
+ def database
51
+ opts[:database]
52
+ end
53
+
54
+ def delete?(sql)
55
+ !!(sql.to_s =~ /\ADELETE/i)
56
+ end
57
+
58
+ def execute_sql(sql, opts={})
59
+ raise %{All "#{Base}(s)" must override the "execute_sql" method}
60
+ end
61
+
62
+ def extract_rows(result)
63
+ raise %{All "#{Base}(s)" must override the "extract_rows" method}
64
+ end
65
+
66
+ def extract_rows_affected(result)
67
+ raise %{All "#{Base}(s)" must override the "extract_rows_affected" method}
68
+ end
69
+
70
+ def host
71
+ opts[:host]
72
+ end
73
+
74
+ def insert?(sql)
75
+ !!(sql.to_s =~ /\AINSERT/i)
76
+ end
77
+
78
+ def new_connection
79
+ raise %{All "#{Base}(s)" must override the "new_connection" method}
80
+ end
81
+
82
+ def password
83
+ opts[:password]
84
+ end
85
+
86
+ def port
87
+ opts[:port]
88
+ end
89
+
90
+ def select?(sql)
91
+ !!(sql.to_s =~ /\ASELECT/i)
92
+ end
93
+
94
+ def structify_result(result)
95
+ Purview::Structs::Result.new(
96
+ :rows => structify_rows(extract_rows(result) || []),
97
+ :rows_affected => extract_rows_affected(result)
98
+ )
99
+ end
100
+
101
+ def structify_row(row)
102
+ Purview::Structs::Row.new(row)
103
+ end
104
+
105
+ def structify_rows(rows)
106
+ rows.map { |row| structify_row(row) }
107
+ end
108
+
109
+ def update?(sql)
110
+ !!(sql.to_s =~ /\AUPDATE/i)
111
+ end
112
+
113
+ def username
114
+ opts[:username] || Etc.getlogin
115
+ end
116
+ end
117
+ end
118
+ end
@@ -0,0 +1,65 @@
1
+ module Purview
2
+ module RawConnections
3
+ module JDBC
4
+ class Base < Purview::RawConnections::Base
5
+ private
6
+
7
+ attr_reader :last_sql, :last_statement
8
+
9
+ def delete_or_insert_or_update?(sql)
10
+ delete?(sql) || insert?(sql) || update?(sql)
11
+ end
12
+
13
+ def engine
14
+ raise %{All "#{Base}(s)" must override the "engine" method}
15
+ end
16
+
17
+ def execute_sql(sql, opts={})
18
+ @last_sql = sql
19
+ @last_statement = statement = raw_connection.createStatement
20
+ if select?(sql)
21
+ statement.executeQuery(sql)
22
+ elsif delete_or_insert_or_update?(sql)
23
+ statement.executeUpdate(sql)
24
+ nil
25
+ else
26
+ statement.execute(sql)
27
+ nil
28
+ end
29
+ end
30
+
31
+ def extract_rows(result)
32
+ return unless result
33
+ metadata = result.getMetaData
34
+ column_count = metadata.getColumnCount
35
+ [].tap do |rows|
36
+ while result.next
37
+ rows << {}.tap do |row|
38
+ (1..column_count).each do |index|
39
+ column_name = metadata.getColumnName(index)
40
+ row[column_name] = result.getString(column_name)
41
+ end
42
+ end
43
+ end
44
+ end
45
+ end
46
+
47
+ def extract_rows_affected(result)
48
+ delete_or_insert_or_update?(last_sql) ? last_statement.getUpdateCount : 0
49
+ end
50
+
51
+ def new_connection
52
+ java.sql.DriverManager.getConnection(
53
+ url,
54
+ username,
55
+ password
56
+ )
57
+ end
58
+
59
+ def url
60
+ "jdbc:#{engine}://#{host}#{port && ":#{port}"}/#{database}"
61
+ end
62
+ end
63
+ end
64
+ end
65
+ end
@@ -0,0 +1,19 @@
1
+ if defined?(JRUBY_VERSION) && (require 'jdbc/mysql' rescue nil)
2
+ Jdbc::MySQL.load_driver
3
+
4
+ module Purview
5
+ module RawConnections
6
+ module JDBC
7
+ class MySQL < Base
8
+ private
9
+
10
+ def engine
11
+ 'mysql'
12
+ end
13
+ end
14
+ end
15
+ end
16
+ end
17
+
18
+ Purview::RawConnections::MySQL = Purview::RawConnections::JDBC::MySQL
19
+ end
@@ -0,0 +1,19 @@
1
+ if defined?(JRUBY_VERSION) && (require 'jdbc/postgres' rescue nil)
2
+ Jdbc::Postgres.load_driver
3
+
4
+ module Purview
5
+ module RawConnections
6
+ module JDBC
7
+ class Postgres < Base
8
+ private
9
+
10
+ def engine
11
+ 'postgresql'
12
+ end
13
+ end
14
+ end
15
+ end
16
+ end
17
+
18
+ Purview::RawConnections::PostgreSQL = Purview::RawConnections::JDBC::Postgres
19
+ end
@@ -0,0 +1,4 @@
1
+ require 'purview/raw_connections/jdbc/base'
2
+
3
+ require 'purview/raw_connections/jdbc/mysql'
4
+ require 'purview/raw_connections/jdbc/postgres'
@@ -0,0 +1,35 @@
1
+ if !defined?(JRUBY_VERSION) && (require 'mysql2' rescue nil)
2
+ module Purview
3
+ module RawConnections
4
+ class Mysql2 < Base
5
+ private
6
+
7
+ def execute_sql(sql, opts={})
8
+ raw_connection.query(sql, opts.merge(:cast => false))
9
+ end
10
+
11
+ def extract_rows(result)
12
+ result && result.to_a
13
+ end
14
+
15
+ def extract_rows_affected(result)
16
+ raw_connection.affected_rows
17
+ end
18
+
19
+ def new_connection
20
+ ::Mysql2::Client.new(
21
+ filter_nil_values(
22
+ :database => database,
23
+ :host => host,
24
+ :password => password,
25
+ :port => port,
26
+ :username => username
27
+ )
28
+ )
29
+ end
30
+ end
31
+ end
32
+ end
33
+
34
+ Purview::RawConnections::MySQL = Purview::RawConnections::Mysql2
35
+ end
@@ -0,0 +1,35 @@
1
+ if !defined?(JRUBY_VERSION) && (require 'pg' rescue nil)
2
+ module Purview
3
+ module RawConnections
4
+ class PG < Base
5
+ private
6
+
7
+ def execute_sql(sql, opts={})
8
+ raw_connection.exec(sql)
9
+ end
10
+
11
+ def extract_rows(result)
12
+ result && result.to_a
13
+ end
14
+
15
+ def extract_rows_affected(result)
16
+ result && result.cmd_tuples
17
+ end
18
+
19
+ def new_connection
20
+ ::PG.connect(
21
+ filter_nil_values(
22
+ :dbname => database,
23
+ :host => host,
24
+ :password => password,
25
+ :port => port,
26
+ :user => username
27
+ )
28
+ )
29
+ end
30
+ end
31
+ end
32
+ end
33
+
34
+ Purview::RawConnections::PostgreSQL = Purview::RawConnections::PG
35
+ end
@@ -0,0 +1,8 @@
1
+ require 'purview/raw_connections/base'
2
+ require 'purview/raw_connections/jdbc/base'
3
+
4
+ require 'purview/raw_connections/jdbc/mysql'
5
+ require 'purview/raw_connections/jdbc/postgres'
6
+
7
+ require 'purview/raw_connections/mysql2'
8
+ require 'purview/raw_connections/pg'
@@ -2,4 +2,8 @@ class Object
2
2
  def quoted
3
3
  "'#{self}'"
4
4
  end
5
+
6
+ def sanitized
7
+ self.to_s.sanitized
8
+ end
5
9
  end
@@ -0,0 +1,5 @@
1
+ class String
2
+ def sanitized
3
+ self.gsub("'", "''")
4
+ end
5
+ end
@@ -1,2 +1,3 @@
1
1
  require 'purview/refinements/object'
2
+ require 'purview/refinements/string'
2
3
  require 'purview/refinements/time'
@@ -1,8 +1,18 @@
1
1
  module Purview
2
2
  module Structs
3
3
  class Base < OpenStruct
4
- def method_missing(method_name, *args, &block)
5
- raise NoMethodError if args.size.zero?
4
+ def [](key)
5
+ key = key.to_sym unless key.is_a?(Symbol)
6
+ raise NoMethodError unless respond_to?(key)
7
+ send(key)
8
+ end
9
+
10
+ def []=(key, value)
11
+ send("#{key}=", value)
12
+ end
13
+
14
+ def method_missing(method, *args, &block)
15
+ raise NoMethodError if args.empty?
6
16
  super
7
17
  end
8
18
  end