purview 1.0.0.alpha → 1.0.0.beta1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (61) hide show
  1. checksums.yaml +4 -4
  2. data/.travis.yml +0 -4
  3. data/Gemfile +10 -0
  4. data/README.md +2 -2
  5. data/TODO +10 -5
  6. data/lib/purview/columns/base.rb +2 -2
  7. data/lib/purview/columns.rb +1 -0
  8. data/lib/purview/connections/base.rb +17 -31
  9. data/lib/purview/connections/mysql.rb +2 -30
  10. data/lib/purview/connections/postgresql.rb +2 -18
  11. data/lib/purview/databases/base.rb +32 -31
  12. data/lib/purview/databases/mysql.rb +4 -16
  13. data/lib/purview/databases/postgresql.rb +4 -16
  14. data/lib/purview/databases.rb +1 -0
  15. data/lib/purview/dialects/base.rb +25 -0
  16. data/lib/purview/dialects/mysql.rb +25 -0
  17. data/lib/purview/dialects/postgresql.rb +25 -0
  18. data/lib/purview/dialects.rb +4 -0
  19. data/lib/purview/exceptions/{table.rb → base_table.rb} +1 -1
  20. data/lib/purview/exceptions/could_not_acquire_lock.rb +1 -1
  21. data/lib/purview/exceptions/lock_already_released.rb +1 -1
  22. data/lib/purview/exceptions/no_window.rb +1 -1
  23. data/lib/purview/exceptions.rb +2 -1
  24. data/lib/purview/loaders/base.rb +26 -2
  25. data/lib/purview/loaders/mysql.rb +4 -0
  26. data/lib/purview/loaders/postgresql.rb +4 -0
  27. data/lib/purview/loaders.rb +1 -0
  28. data/lib/purview/loggers.rb +1 -0
  29. data/lib/purview/mixins/connection.rb +13 -0
  30. data/lib/purview/mixins/helpers.rb +5 -1
  31. data/lib/purview/mixins/logger.rb +2 -2
  32. data/lib/purview/mixins.rb +1 -0
  33. data/lib/purview/parsers/base.rb +7 -11
  34. data/lib/purview/parsers/csv.rb +19 -3
  35. data/lib/purview/parsers/sql.rb +13 -0
  36. data/lib/purview/parsers.rb +2 -0
  37. data/lib/purview/pullers/base.rb +4 -0
  38. data/lib/purview/pullers/base_sql.rb +97 -0
  39. data/lib/purview/pullers/mysql.rb +15 -0
  40. data/lib/purview/pullers/postgresql.rb +15 -0
  41. data/lib/purview/pullers.rb +4 -0
  42. data/lib/purview/raw_connections/base.rb +118 -0
  43. data/lib/purview/raw_connections/jdbc/base.rb +65 -0
  44. data/lib/purview/raw_connections/jdbc/mysql.rb +19 -0
  45. data/lib/purview/raw_connections/jdbc/postgres.rb +19 -0
  46. data/lib/purview/raw_connections/jdbc.rb +4 -0
  47. data/lib/purview/raw_connections/mysql2.rb +35 -0
  48. data/lib/purview/raw_connections/pg.rb +35 -0
  49. data/lib/purview/raw_connections.rb +8 -0
  50. data/lib/purview/refinements/object.rb +4 -0
  51. data/lib/purview/refinements/string.rb +5 -0
  52. data/lib/purview/refinements.rb +1 -0
  53. data/lib/purview/structs/base.rb +12 -2
  54. data/lib/purview/structs/row.rb +7 -0
  55. data/lib/purview/structs.rb +2 -0
  56. data/lib/purview/tables/base.rb +1 -1
  57. data/lib/purview/types.rb +1 -0
  58. data/lib/purview/version.rb +1 -1
  59. data/lib/purview.rb +3 -3
  60. data/purview.gemspec +2 -2
  61. metadata +25 -32
@@ -3,6 +3,10 @@ module Purview
3
3
  class PostgreSQL < Base
4
4
  private
5
5
 
6
+ def dialect_type
7
+ Purview::Dialects::PostgreSQL
8
+ end
9
+
6
10
  def id_in_sql(temporary_table_name)
7
11
  'SELECT %s FROM %s' % [
8
12
  table.id_column.name,
@@ -1,3 +1,4 @@
1
1
  require 'purview/loaders/base'
2
+
2
3
  require 'purview/loaders/mysql'
3
4
  require 'purview/loaders/postgresql'
@@ -1,2 +1,3 @@
1
1
  require 'purview/loggers/base'
2
+
2
3
  require 'purview/loggers/console'
@@ -0,0 +1,13 @@
1
+ module Purview
2
+ module Mixins
3
+ module Connection
4
+ def connect
5
+ connection_type.new(connection_opts)
6
+ end
7
+
8
+ def with_new_connection
9
+ connection_type.with_new_connection(connection_opts) { |connection| yield connection }
10
+ end
11
+ end
12
+ end
13
+ end
@@ -5,10 +5,14 @@ module Purview
5
5
  value.to_s.strip.length.zero?
6
6
  end
7
7
 
8
- def coalesce(value, default)
8
+ def coalesced(value, default)
9
9
  value.nil? ? default : value
10
10
  end
11
11
 
12
+ def filter_nil_values(hash)
13
+ hash.reject { |_, value| value.nil? }
14
+ end
15
+
12
16
  def present?(value)
13
17
  !blank?(value)
14
18
  end
@@ -6,11 +6,11 @@ module Purview
6
6
  end
7
7
 
8
8
  def logger_opts
9
- opts[:logger] || {}
9
+ (defined?(opts) && opts[:logger]) || {}
10
10
  end
11
11
 
12
12
  def logger_type
13
- opts[:logger_type] || Purview::Loggers::Console
13
+ (defined?(opts) && opts[:logger_type]) || Purview::Loggers::Console
14
14
  end
15
15
 
16
16
  def with_context_logging(*args)
@@ -1,2 +1,3 @@
1
+ require 'purview/mixins/connection'
1
2
  require 'purview/mixins/helpers'
2
3
  require 'purview/mixins/logger'
@@ -10,7 +10,7 @@ module Purview
10
10
  end
11
11
 
12
12
  def validate(data)
13
- true
13
+ raise %{All "#{Base}(s)" must override the "validate" method}
14
14
  end
15
15
 
16
16
  private
@@ -19,16 +19,12 @@ module Purview
19
19
 
20
20
  attr_reader :opts
21
21
 
22
- def build_result(row)
23
- {}.tap do |result|
24
- row.each do |key, value|
25
- if column = table.columns_by_name[key]
26
- result[key] = column.parse(value)
27
- else
28
- logger.debug(%{Unexpected column: "#{key}" in data-set})
29
- end
30
- end
31
- end
22
+ def extract_headers(data)
23
+ raise %{All "#{Base}(s)" must override the "extract_headers" method}
24
+ end
25
+
26
+ def extract_rows(data)
27
+ raise %{All "#{Base}(s)" must override the "extract_rows" method}
32
28
  end
33
29
 
34
30
  def table
@@ -5,7 +5,7 @@ module Purview
5
5
  with_context_logging("`parse` for: #{table.name}") do
6
6
  [].tap do |results|
7
7
  headers = extract_headers(data)
8
- extract_rows(data) do |row|
8
+ extract_rows(data).each do |row|
9
9
  results << build_result(headers.zip(row))
10
10
  end
11
11
  end
@@ -14,7 +14,7 @@ module Purview
14
14
 
15
15
  def validate(data)
16
16
  with_context_logging("`validate` for: #{table.name}") do
17
- missing_columns = table.column_names - extract_headers(data)
17
+ missing_columns = missing_columns(data)
18
18
  raise 'Missing one or more columns: "%s"' % missing_columns.join('", "') \
19
19
  unless missing_columns.empty?
20
20
  true
@@ -23,6 +23,18 @@ module Purview
23
23
 
24
24
  private
25
25
 
26
+ def build_result(row)
27
+ {}.tap do |result|
28
+ row.each do |key, value|
29
+ if column = table.columns_by_name[key]
30
+ result[key] = column.parse(value)
31
+ else
32
+ logger.debug(%{Unexpected column: "#{key}" in data-set})
33
+ end
34
+ end
35
+ end
36
+ end
37
+
26
38
  def column_separator
27
39
  ','
28
40
  end
@@ -34,7 +46,11 @@ module Purview
34
46
 
35
47
  def extract_rows(data)
36
48
  rows = data.split(row_separator)[1..-1]
37
- rows.each { |row| yield parse_row(row) }
49
+ rows.map { |row| parse_row(row) }
50
+ end
51
+
52
+ def missing_columns(data)
53
+ table.column_names - extract_headers(data)
38
54
  end
39
55
 
40
56
  def parse_row(row)
@@ -0,0 +1,13 @@
1
+ module Purview
2
+ module Parsers
3
+ class SQL < Base
4
+ def parse(data)
5
+ data.rows
6
+ end
7
+
8
+ def validate(data)
9
+ true
10
+ end
11
+ end
12
+ end
13
+ end
@@ -1,3 +1,5 @@
1
1
  require 'purview/parsers/base'
2
+
2
3
  require 'purview/parsers/csv'
4
+ require 'purview/parsers/sql'
3
5
  require 'purview/parsers/tsv'
@@ -14,6 +14,10 @@ module Purview
14
14
  include Purview::Mixins::Logger
15
15
 
16
16
  attr_reader :opts
17
+
18
+ def table
19
+ opts[:table]
20
+ end
17
21
  end
18
22
  end
19
23
  end
@@ -0,0 +1,97 @@
1
+ module Purview
2
+ module Pullers
3
+ class BaseSQL < Base
4
+ def pull(window)
5
+ with_new_connection do |connection|
6
+ connection.execute(pull_sql(window))
7
+ end
8
+ end
9
+
10
+ private
11
+
12
+ include Purview::Mixins::Connection
13
+ include Purview::Mixins::Helpers
14
+ include Purview::Mixins::Logger
15
+
16
+ def column_names
17
+ table.column_names
18
+ end
19
+
20
+ def connection_opts
21
+ filter_nil_values(
22
+ :database => database_name,
23
+ :host => database_host,
24
+ :password => database_password,
25
+ :port => database_port,
26
+ :username => database_username
27
+ )
28
+ end
29
+
30
+ def connection_type
31
+ raise %{All "#{BaseSQL}(s)" must override the "connection_type" method}
32
+ end
33
+
34
+ def database_host
35
+ opts[:database_host]
36
+ end
37
+
38
+ def database_name
39
+ opts[:database_name]
40
+ end
41
+
42
+ def database_password
43
+ opts[:database_password]
44
+ end
45
+
46
+ def database_port
47
+ opts[:database_port]
48
+ end
49
+
50
+ def database_username
51
+ opts[:database_username]
52
+ end
53
+
54
+ def dialect
55
+ dialect_type.new
56
+ end
57
+
58
+ def dialect_type
59
+ raise %{All "#{BaseSQL}(s)" must override the "dialect_type" method}
60
+ end
61
+
62
+ def false_value
63
+ dialect.false_value
64
+ end
65
+
66
+ def null_value
67
+ dialect.null_value
68
+ end
69
+
70
+ def pull_sql(window)
71
+ 'SELECT %s FROM %s WHERE %s BETWEEN %s AND %s' % [
72
+ column_names.join(', '),
73
+ table_name,
74
+ table.updated_timestamp_column.name,
75
+ quoted(window.min),
76
+ quoted(window.max),
77
+ ]
78
+ end
79
+
80
+ def quoted(value)
81
+ dialect.quoted(value)
82
+ end
83
+
84
+ def sanitized(value)
85
+ dialect.sanitized(value)
86
+ end
87
+
88
+ def table_name
89
+ opts[:table_name]
90
+ end
91
+
92
+ def true_value
93
+ dialect.true_value
94
+ end
95
+ end
96
+ end
97
+ end
@@ -0,0 +1,15 @@
1
+ module Purview
2
+ module Pullers
3
+ class MySQL < BaseSQL
4
+ private
5
+
6
+ def connection_type
7
+ Purview::Connections::MySQL
8
+ end
9
+
10
+ def dialect_type
11
+ Purview::Dialects::MySQL
12
+ end
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,15 @@
1
+ module Purview
2
+ module Pullers
3
+ class PostgreSQL < BaseSQL
4
+ private
5
+
6
+ def connection_type
7
+ Purview::Connections::PostgreSQL
8
+ end
9
+
10
+ def dialect_type
11
+ Purview::Dialects::PostgreSQL
12
+ end
13
+ end
14
+ end
15
+ end
@@ -1,2 +1,6 @@
1
1
  require 'purview/pullers/base'
2
+ require 'purview/pullers/base_sql'
3
+
4
+ require 'purview/pullers/mysql'
5
+ require 'purview/pullers/postgresql'
2
6
  require 'purview/pullers/uri'
@@ -0,0 +1,118 @@
1
+ module Purview
2
+ module RawConnections
3
+ class Base
4
+ def self.connect(opts)
5
+ new(opts)
6
+ end
7
+
8
+ def self.with_new_connection(opts)
9
+ yield connection = connect(opts)
10
+ ensure
11
+ connection.disconnect if connection
12
+ end
13
+
14
+ def initialize(opts)
15
+ @opts = opts
16
+ @raw_connection = new_connection
17
+ end
18
+
19
+ def disconnect
20
+ raw_connection.close
21
+ @raw_connection = nil
22
+ self
23
+ end
24
+
25
+ def execute(sql, opts={})
26
+ logger.debug("Executing: #{sql}")
27
+ result = execute_sql(sql, opts)
28
+ structify_result(result)
29
+ end
30
+
31
+ def with_transaction
32
+ execute_sql(BEGIN_TRANSACTION)
33
+ yield.tap { |result| execute_sql(COMMIT_TRANSACTION) }
34
+ rescue
35
+ execute_sql(ROLLBACK_TRANSACTION)
36
+ raise
37
+ end
38
+
39
+ private
40
+
41
+ include Purview::Mixins::Helpers
42
+ include Purview::Mixins::Logger
43
+
44
+ BEGIN_TRANSACTION = 'BEGIN'
45
+ COMMIT_TRANSACTION = 'COMMIT'
46
+ ROLLBACK_TRANSACTION = 'ROLLBACK'
47
+
48
+ attr_reader :opts, :raw_connection
49
+
50
+ def database
51
+ opts[:database]
52
+ end
53
+
54
+ def delete?(sql)
55
+ !!(sql.to_s =~ /\ADELETE/i)
56
+ end
57
+
58
+ def execute_sql(sql, opts={})
59
+ raise %{All "#{Base}(s)" must override the "execute_sql" method}
60
+ end
61
+
62
+ def extract_rows(result)
63
+ raise %{All "#{Base}(s)" must override the "extract_rows" method}
64
+ end
65
+
66
+ def extract_rows_affected(result)
67
+ raise %{All "#{Base}(s)" must override the "extract_rows_affected" method}
68
+ end
69
+
70
+ def host
71
+ opts[:host]
72
+ end
73
+
74
+ def insert?(sql)
75
+ !!(sql.to_s =~ /\AINSERT/i)
76
+ end
77
+
78
+ def new_connection
79
+ raise %{All "#{Base}(s)" must override the "new_connection" method}
80
+ end
81
+
82
+ def password
83
+ opts[:password]
84
+ end
85
+
86
+ def port
87
+ opts[:port]
88
+ end
89
+
90
+ def select?(sql)
91
+ !!(sql.to_s =~ /\ASELECT/i)
92
+ end
93
+
94
+ def structify_result(result)
95
+ Purview::Structs::Result.new(
96
+ :rows => structify_rows(extract_rows(result) || []),
97
+ :rows_affected => extract_rows_affected(result)
98
+ )
99
+ end
100
+
101
+ def structify_row(row)
102
+ Purview::Structs::Row.new(row)
103
+ end
104
+
105
+ def structify_rows(rows)
106
+ rows.map { |row| structify_row(row) }
107
+ end
108
+
109
+ def update?(sql)
110
+ !!(sql.to_s =~ /\AUPDATE/i)
111
+ end
112
+
113
+ def username
114
+ opts[:username] || Etc.getlogin
115
+ end
116
+ end
117
+ end
118
+ end
@@ -0,0 +1,65 @@
1
+ module Purview
2
+ module RawConnections
3
+ module JDBC
4
+ class Base < Purview::RawConnections::Base
5
+ private
6
+
7
+ attr_reader :last_sql, :last_statement
8
+
9
+ def delete_or_insert_or_update?(sql)
10
+ delete?(sql) || insert?(sql) || update?(sql)
11
+ end
12
+
13
+ def engine
14
+ raise %{All "#{Base}(s)" must override the "engine" method}
15
+ end
16
+
17
+ def execute_sql(sql, opts={})
18
+ @last_sql = sql
19
+ @last_statement = statement = raw_connection.createStatement
20
+ if select?(sql)
21
+ statement.executeQuery(sql)
22
+ elsif delete_or_insert_or_update?(sql)
23
+ statement.executeUpdate(sql)
24
+ nil
25
+ else
26
+ statement.execute(sql)
27
+ nil
28
+ end
29
+ end
30
+
31
+ def extract_rows(result)
32
+ return unless result
33
+ metadata = result.getMetaData
34
+ column_count = metadata.getColumnCount
35
+ [].tap do |rows|
36
+ while result.next
37
+ rows << {}.tap do |row|
38
+ (1..column_count).each do |index|
39
+ column_name = metadata.getColumnName(index)
40
+ row[column_name] = result.getString(column_name)
41
+ end
42
+ end
43
+ end
44
+ end
45
+ end
46
+
47
+ def extract_rows_affected(result)
48
+ delete_or_insert_or_update?(last_sql) ? last_statement.getUpdateCount : 0
49
+ end
50
+
51
+ def new_connection
52
+ java.sql.DriverManager.getConnection(
53
+ url,
54
+ username,
55
+ password
56
+ )
57
+ end
58
+
59
+ def url
60
+ "jdbc:#{engine}://#{host}#{port && ":#{port}"}/#{database}"
61
+ end
62
+ end
63
+ end
64
+ end
65
+ end
@@ -0,0 +1,19 @@
1
+ if defined?(JRUBY_VERSION) && (require 'jdbc/mysql' rescue nil)
2
+ Jdbc::MySQL.load_driver
3
+
4
+ module Purview
5
+ module RawConnections
6
+ module JDBC
7
+ class MySQL < Base
8
+ private
9
+
10
+ def engine
11
+ 'mysql'
12
+ end
13
+ end
14
+ end
15
+ end
16
+ end
17
+
18
+ Purview::RawConnections::MySQL = Purview::RawConnections::JDBC::MySQL
19
+ end
@@ -0,0 +1,19 @@
1
+ if defined?(JRUBY_VERSION) && (require 'jdbc/postgres' rescue nil)
2
+ Jdbc::Postgres.load_driver
3
+
4
+ module Purview
5
+ module RawConnections
6
+ module JDBC
7
+ class Postgres < Base
8
+ private
9
+
10
+ def engine
11
+ 'postgresql'
12
+ end
13
+ end
14
+ end
15
+ end
16
+ end
17
+
18
+ Purview::RawConnections::PostgreSQL = Purview::RawConnections::JDBC::Postgres
19
+ end
@@ -0,0 +1,4 @@
1
+ require 'purview/raw_connections/jdbc/base'
2
+
3
+ require 'purview/raw_connections/jdbc/mysql'
4
+ require 'purview/raw_connections/jdbc/postgres'
@@ -0,0 +1,35 @@
1
+ if !defined?(JRUBY_VERSION) && (require 'mysql2' rescue nil)
2
+ module Purview
3
+ module RawConnections
4
+ class Mysql2 < Base
5
+ private
6
+
7
+ def execute_sql(sql, opts={})
8
+ raw_connection.query(sql, opts.merge(:cast => false))
9
+ end
10
+
11
+ def extract_rows(result)
12
+ result && result.to_a
13
+ end
14
+
15
+ def extract_rows_affected(result)
16
+ raw_connection.affected_rows
17
+ end
18
+
19
+ def new_connection
20
+ ::Mysql2::Client.new(
21
+ filter_nil_values(
22
+ :database => database,
23
+ :host => host,
24
+ :password => password,
25
+ :port => port,
26
+ :username => username
27
+ )
28
+ )
29
+ end
30
+ end
31
+ end
32
+ end
33
+
34
+ Purview::RawConnections::MySQL = Purview::RawConnections::Mysql2
35
+ end
@@ -0,0 +1,35 @@
1
+ if !defined?(JRUBY_VERSION) && (require 'pg' rescue nil)
2
+ module Purview
3
+ module RawConnections
4
+ class PG < Base
5
+ private
6
+
7
+ def execute_sql(sql, opts={})
8
+ raw_connection.exec(sql)
9
+ end
10
+
11
+ def extract_rows(result)
12
+ result && result.to_a
13
+ end
14
+
15
+ def extract_rows_affected(result)
16
+ result && result.cmd_tuples
17
+ end
18
+
19
+ def new_connection
20
+ ::PG.connect(
21
+ filter_nil_values(
22
+ :dbname => database,
23
+ :host => host,
24
+ :password => password,
25
+ :port => port,
26
+ :user => username
27
+ )
28
+ )
29
+ end
30
+ end
31
+ end
32
+ end
33
+
34
+ Purview::RawConnections::PostgreSQL = Purview::RawConnections::PG
35
+ end
@@ -0,0 +1,8 @@
1
+ require 'purview/raw_connections/base'
2
+ require 'purview/raw_connections/jdbc/base'
3
+
4
+ require 'purview/raw_connections/jdbc/mysql'
5
+ require 'purview/raw_connections/jdbc/postgres'
6
+
7
+ require 'purview/raw_connections/mysql2'
8
+ require 'purview/raw_connections/pg'
@@ -2,4 +2,8 @@ class Object
2
2
  def quoted
3
3
  "'#{self}'"
4
4
  end
5
+
6
+ def sanitized
7
+ self.to_s.sanitized
8
+ end
5
9
  end
@@ -0,0 +1,5 @@
1
+ class String
2
+ def sanitized
3
+ self.gsub("'", "''")
4
+ end
5
+ end
@@ -1,2 +1,3 @@
1
1
  require 'purview/refinements/object'
2
+ require 'purview/refinements/string'
2
3
  require 'purview/refinements/time'
@@ -1,8 +1,18 @@
1
1
  module Purview
2
2
  module Structs
3
3
  class Base < OpenStruct
4
- def method_missing(method_name, *args, &block)
5
- raise NoMethodError if args.size.zero?
4
+ def [](key)
5
+ key = key.to_sym unless key.is_a?(Symbol)
6
+ raise NoMethodError unless respond_to?(key)
7
+ send(key)
8
+ end
9
+
10
+ def []=(key, value)
11
+ send("#{key}=", value)
12
+ end
13
+
14
+ def method_missing(method, *args, &block)
15
+ raise NoMethodError if args.empty?
6
16
  super
7
17
  end
8
18
  end