purview 1.3.1 → 1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG +9 -0
  3. data/TODO +7 -6
  4. data/lib/purview/columns/base.rb +35 -3
  5. data/lib/purview/databases/base.rb +84 -231
  6. data/lib/purview/databases/mysql.rb +42 -108
  7. data/lib/purview/databases/postgresql.rb +45 -111
  8. data/lib/purview/exceptions/{could_not_baseline.rb → could_not_baseline_table.rb} +1 -1
  9. data/lib/purview/exceptions/{could_not_disable.rb → could_not_disable_table.rb} +1 -1
  10. data/lib/purview/exceptions/{could_not_enable.rb → could_not_enable_table.rb} +1 -1
  11. data/lib/purview/exceptions/{could_not_find_table_metadata.rb → could_not_find_metadata_for_table.rb} +1 -1
  12. data/lib/purview/exceptions/{could_not_initialize.rb → could_not_initialize_table.rb} +1 -1
  13. data/lib/purview/exceptions/{could_not_lock.rb → could_not_lock_table.rb} +1 -1
  14. data/lib/purview/exceptions/{could_not_sync.rb → could_not_sync_table.rb} +1 -1
  15. data/lib/purview/exceptions/{could_not_unlock.rb → could_not_unlock_table.rb} +1 -1
  16. data/lib/purview/exceptions/could_not_update_metadata_for_table.rb +9 -0
  17. data/lib/purview/exceptions/{database_already_assigned.rb → database_already_assigned_for_table.rb} +1 -1
  18. data/lib/purview/exceptions/{no_window.rb → no_window_for_table.rb} +1 -1
  19. data/lib/purview/exceptions/rows_outside_window_for_table.rb +18 -0
  20. data/lib/purview/exceptions/table_already_assigned_for_column.rb +17 -0
  21. data/lib/purview/exceptions/{table_already_assigned.rb → table_already_assigned_for_index.rb} +1 -1
  22. data/lib/purview/exceptions/{wrong_database.rb → wrong_database_for_table.rb} +1 -1
  23. data/lib/purview/exceptions.rb +15 -13
  24. data/lib/purview/indices/base.rb +6 -1
  25. data/lib/purview/loaders/base.rb +2 -25
  26. data/lib/purview/mixins/dialect.rb +29 -0
  27. data/lib/purview/mixins.rb +1 -0
  28. data/lib/purview/parsers/base.rb +3 -2
  29. data/lib/purview/parsers/csv.rb +13 -3
  30. data/lib/purview/pullers/base_sql.rb +6 -25
  31. data/lib/purview/structs/table_metadata.rb +0 -14
  32. data/lib/purview/tables/base.rb +28 -106
  33. data/lib/purview/tables/base_syncable.rb +113 -0
  34. data/lib/purview/tables/raw.rb +1 -1
  35. data/lib/purview/tables/table_metadata.rb +43 -0
  36. data/lib/purview/tables.rb +3 -0
  37. data/lib/purview/version.rb +1 -1
  38. metadata +20 -15
  39. data/lib/purview/exceptions/rows_outside_window.rb +0 -18
@@ -6,6 +6,7 @@ module Purview
6
6
  def initialize(columns, opts={})
7
7
  @columns = columns
8
8
  @opts = opts
9
+ @table = table_opt
9
10
  end
10
11
 
11
12
  def eql?(other)
@@ -19,7 +20,7 @@ module Purview
19
20
  end
20
21
 
21
22
  def table=(value)
22
- raise Purview::Exceptions::TableAlreadyAssigned.new(self) if table
23
+ raise Purview::Exceptions::TableAlreadyAssignedForIndex.new(self) if table
23
24
  @table = value
24
25
  end
25
26
 
@@ -34,6 +35,10 @@ module Purview
34
35
  private
35
36
 
36
37
  attr_reader :opts
38
+
39
+ def table_opt
40
+ opts[:table]
41
+ end
37
42
  end
38
43
  end
39
44
  end
@@ -28,6 +28,7 @@ module Purview
28
28
 
29
29
  private
30
30
 
31
+ include Purview::Mixins::Dialect
31
32
  include Purview::Mixins::Helpers
32
33
  include Purview::Mixins::Logger
33
34
 
@@ -49,18 +50,10 @@ module Purview
49
50
  table.database
50
51
  end
51
52
 
52
- def dialect
53
- dialect_type.new
54
- end
55
-
56
53
  def dialect_type
57
54
  raise %{All "#{Base}(s)" must override the "dialect_type" method}
58
55
  end
59
56
 
60
- def false_value
61
- dialect.false_value
62
- end
63
-
64
57
  def id_in_sql(temporary_table_name)
65
58
  raise %{All "#{Base}(s)" must override the "id_in_sql" method}
66
59
  end
@@ -86,14 +79,6 @@ module Purview
86
79
  raise %{All "#{Base}(s)" must override the "not_in_window_sql" method}
87
80
  end
88
81
 
89
- def null_value
90
- dialect.null_value
91
- end
92
-
93
- def quoted(value)
94
- dialect.quoted(value)
95
- end
96
-
97
82
  def row_values(row)
98
83
  table.column_names.map { |column_name| quoted(sanitized(row[column_name])) }.join(', ')
99
84
  end
@@ -102,10 +87,6 @@ module Purview
102
87
  opts[:rows_per_slice] || 1000
103
88
  end
104
89
 
105
- def sanitized(value)
106
- dialect.sanitized(value)
107
- end
108
-
109
90
  def table
110
91
  opts[:table]
111
92
  end
@@ -138,10 +119,6 @@ module Purview
138
119
  raise %{All "#{Base}(s)" must override the "temporary_table_verify_sql" method}
139
120
  end
140
121
 
141
- def true_value
142
- dialect.true_value
143
- end
144
-
145
122
  def verify_temporary_table(connection, temporary_table_name, rows, window)
146
123
  with_context_logging("`verify_temporary_table` for: #{temporary_table_name}") do
147
124
  rows_outside_window = connection.execute(
@@ -151,7 +128,7 @@ module Purview
151
128
  window
152
129
  )
153
130
  ).rows[0][count_column_name]
154
- raise Purview::Exceptions::RowsOutsideWindow.new(temporary_table_name, rows_outside_window) \
131
+ raise Purview::Exceptions::RowsOutsideWindowForTable.new(table, rows_outside_window) \
155
132
  unless zero?(rows_outside_window)
156
133
  end
157
134
  end
@@ -0,0 +1,29 @@
1
+ module Purview
2
+ module Mixins
3
+ module Dialect
4
+ def dialect
5
+ dialect_type.new
6
+ end
7
+
8
+ def false_value
9
+ dialect.false_value
10
+ end
11
+
12
+ def null_value
13
+ dialect.null_value
14
+ end
15
+
16
+ def quoted(value)
17
+ dialect.quoted(value)
18
+ end
19
+
20
+ def sanitized(value)
21
+ dialect.sanitized(value)
22
+ end
23
+
24
+ def true_value
25
+ dialect.true_value
26
+ end
27
+ end
28
+ end
29
+ end
@@ -1,3 +1,4 @@
1
1
  require 'purview/mixins/connection'
2
+ require 'purview/mixins/dialect'
2
3
  require 'purview/mixins/helpers'
3
4
  require 'purview/mixins/logger'
@@ -3,6 +3,7 @@ module Purview
3
3
  class Base
4
4
  def initialize(opts={})
5
5
  @opts = opts
6
+ @table = table_opt
6
7
  end
7
8
 
8
9
  def parse(data)
@@ -17,7 +18,7 @@ module Purview
17
18
 
18
19
  include Purview::Mixins::Logger
19
20
 
20
- attr_reader :opts
21
+ attr_reader :opts, :table
21
22
 
22
23
  def extract_headers(data)
23
24
  raise %{All "#{Base}(s)" must override the "extract_headers" method}
@@ -27,7 +28,7 @@ module Purview
27
28
  raise %{All "#{Base}(s)" must override the "extract_rows" method}
28
29
  end
29
30
 
30
- def table
31
+ def table_opt
31
32
  opts[:table]
32
33
  end
33
34
  end
@@ -26,8 +26,8 @@ module Purview
26
26
  def build_result(row)
27
27
  {}.tap do |result|
28
28
  row.each do |key, value|
29
- if column = table.columns_by_name[key]
30
- result[key] = column.parse(value)
29
+ if column = table.columns_by_source_name[key]
30
+ result[column.name] = column.parse(value)
31
31
  else
32
32
  logger.debug(%{Unexpected column: "#{key}" in data-set})
33
33
  end
@@ -49,8 +49,18 @@ module Purview
49
49
  rows.map { |row| parse_row(row) }
50
50
  end
51
51
 
52
+ def map_headers(headers)
53
+ headers.map do |header|
54
+ if column = table.columns_by_source_name[header]
55
+ column.name
56
+ else
57
+ logger.debug(%{Could not find column with source_name: "#{header}"})
58
+ end
59
+ end
60
+ end
61
+
52
62
  def missing_columns(data)
53
- table.column_names - extract_headers(data)
63
+ table.column_names - map_headers(extract_headers(data))
54
64
  end
55
65
 
56
66
  def parse_row(row)
@@ -10,11 +10,16 @@ module Purview
10
10
  private
11
11
 
12
12
  include Purview::Mixins::Connection
13
+ include Purview::Mixins::Dialect
13
14
  include Purview::Mixins::Helpers
14
15
  include Purview::Mixins::Logger
15
16
 
16
17
  def column_names
17
- table.column_names
18
+ table.columns.map do |column|
19
+ name = column.name
20
+ source_name = column.source_name
21
+ source_name == name ? name : "#{source_name} AS #{name}"
22
+ end
18
23
  end
19
24
 
20
25
  def connection_type
@@ -41,22 +46,10 @@ module Purview
41
46
  opts[:database_username]
42
47
  end
43
48
 
44
- def dialect
45
- dialect_type.new
46
- end
47
-
48
49
  def dialect_type
49
50
  raise %{All "#{BaseSQL}(s)" must override the "dialect_type" method}
50
51
  end
51
52
 
52
- def false_value
53
- dialect.false_value
54
- end
55
-
56
- def null_value
57
- dialect.null_value
58
- end
59
-
60
53
  def pull_sql(window)
61
54
  'SELECT %s FROM %s WHERE %s BETWEEN %s AND %s' % [
62
55
  column_names.join(', '),
@@ -67,21 +60,9 @@ module Purview
67
60
  ]
68
61
  end
69
62
 
70
- def quoted(value)
71
- dialect.quoted(value)
72
- end
73
-
74
- def sanitized(value)
75
- dialect.sanitized(value)
76
- end
77
-
78
63
  def table_name
79
64
  opts[:table_name]
80
65
  end
81
-
82
- def true_value
83
- dialect.true_value
84
- end
85
66
  end
86
67
  end
87
68
  end
@@ -1,20 +1,6 @@
1
1
  module Purview
2
2
  module Structs
3
3
  class TableMetadata < Base
4
- def initialize(row)
5
- enabled_at = row.enabled_at && Time.parse(row.enabled_at)
6
- last_pulled_at = row.last_pulled_at && Time.parse(row.last_pulled_at)
7
- locked_at = row.locked_at && Time.parse(row.locked_at)
8
- max_timestamp_pulled = row.max_timestamp_pulled && Time.parse(row.max_timestamp_pulled)
9
- super(
10
- :table_name => row.table_name,
11
- :enabled_at => enabled_at,
12
- :last_pulled_at => last_pulled_at,
13
- :locked_at => locked_at,
14
- :max_timestamp_pulled => max_timestamp_pulled,
15
- )
16
- end
17
-
18
4
  def diabled?
19
5
  !enabled?
20
6
  end
@@ -1,90 +1,47 @@
1
1
  module Purview
2
2
  module Tables
3
3
  class Base
4
- attr_reader :database, :indices, :name
4
+ attr_reader :columns, :database, :indices, :name
5
5
 
6
6
  def initialize(name, opts={})
7
- @name = name
7
+ @name = name.to_sym
8
8
  @opts = opts
9
+ @database = database_opt
10
+ @columns = Set.new.tap do |result|
11
+ (default_columns + columns_opt).each do |column|
12
+ column.table = self if result.add?(column)
13
+ end
14
+ end
9
15
  @indices = Set.new.tap do |result|
10
- ((opts[:indices] || []) + default_indices).each do |index|
16
+ (default_indices + indices_opt).each do |index|
11
17
  index.table = self if result.add?(index)
12
18
  end
13
19
  end
14
20
  end
15
21
 
16
- def columns
17
- opts[:columns]
18
- end
19
-
20
22
  def column_names
21
23
  columns.map(&:name)
22
24
  end
23
25
 
24
26
  def columns_by_name
25
- {}.tap do |result|
26
- columns.each do |column|
27
- result[column.name] = column
28
- end
27
+ columns.reduce({}) do |memo, column|
28
+ memo[column.name] = column
29
+ memo
29
30
  end
30
31
  end
31
32
 
32
- def columns_of_type(type)
33
- columns.select { |column| column.is_a?(type) }
34
- end
35
-
36
- def created_timestamp_column
37
- columns_of_type(Purview::Columns::CreatedTimestamp).first
38
- end
39
-
40
- def created_timestamp_index
41
- Purview::Indices::Simple.new(created_timestamp_column)
42
- end
43
-
44
- def data_columns
45
- columns - [
46
- created_timestamp_column,
47
- id_column,
48
- updated_timestamp_column,
49
- ]
33
+ def columns_by_source_name
34
+ columns.reduce({}) do |memo, column|
35
+ memo[column.source_name] = column
36
+ memo
37
+ end
50
38
  end
51
39
 
52
40
  def database=(value)
53
- raise Purview::Exceptions::DatabaseAlreadyAssigned.new(self) if database
41
+ raise Purview::Exceptions::DatabaseAlreadyAssignedForTable.new(self) if database
54
42
  @database = value
55
43
  end
56
44
 
57
- def id_column
58
- columns_of_type(Purview::Columns::Id).first
59
- end
60
-
61
- def sync(connection, window)
62
- raw_data = puller.pull(window)
63
- parser.validate(raw_data)
64
- parsed_data = parser.parse(raw_data)
65
- loader.load(
66
- connection,
67
- parsed_data,
68
- window
69
- )
70
- end
71
-
72
- def temporary_name
73
- "#{name}_#{timestamp.to_i}"
74
- end
75
-
76
- def updated_timestamp_column
77
- columns_of_type(Purview::Columns::UpdatedTimestamp).first
78
- end
79
-
80
- def updated_timestamp_index
81
- Purview::Indices::Simple.new(updated_timestamp_column)
82
- end
83
-
84
- def window_size
85
- opts[:window_size] || (60 * 60)
86
- end
87
-
88
45
  private
89
46
 
90
47
  include Purview::Mixins::Helpers
@@ -92,59 +49,24 @@ module Purview
92
49
 
93
50
  attr_reader :opts
94
51
 
95
- def default_indices
96
- [
97
- created_timestamp_index,
98
- updated_timestamp_index,
99
- ]
100
- end
101
-
102
- def extract_type_option(opts)
103
- opts[:type]
52
+ def columns_opt
53
+ opts[:columns] || []
104
54
  end
105
55
 
106
- def filter_type_option(opts)
107
- opts.select { |key| key != :type }
56
+ def database_opt
57
+ opts[:database]
108
58
  end
109
59
 
110
- def loader
111
- loader_type.new(loader_opts)
60
+ def default_columns
61
+ []
112
62
  end
113
63
 
114
- def loader_opts
115
- merge_table_option(filter_type_option(opts[:loader]))
116
- end
117
-
118
- def loader_type
119
- extract_type_option(opts[:loader])
120
- end
121
-
122
- def merge_table_option(opts)
123
- { :table => self }.merge(opts)
124
- end
125
-
126
- def parser
127
- parser_type.new(parser_opts)
128
- end
129
-
130
- def parser_opts
131
- merge_table_option(filter_type_option(opts[:parser]))
132
- end
133
-
134
- def parser_type
135
- extract_type_option(opts[:parser])
136
- end
137
-
138
- def puller
139
- puller_type.new(puller_opts)
140
- end
141
-
142
- def puller_opts
143
- merge_table_option(filter_type_option(opts[:puller]))
64
+ def default_indices
65
+ []
144
66
  end
145
67
 
146
- def puller_type
147
- extract_type_option(opts[:puller])
68
+ def indices_opt
69
+ opts[:indices] || []
148
70
  end
149
71
  end
150
72
  end
@@ -0,0 +1,113 @@
1
+ module Purview
2
+ module Tables
3
+ class BaseSyncable < Base
4
+ def created_timestamp_column
5
+ column_from_opts_of_type(Purview::Columns::CreatedTimestamp) or raise %{Must specify a column of type: "#{Purview::Columns::CreatedTimestamp}"}
6
+ end
7
+
8
+ def created_timestamp_index
9
+ Purview::Indices::Simple.new(created_timestamp_column)
10
+ end
11
+
12
+ def id_column
13
+ column_from_opts_of_type(Purview::Columns::Id) or raise %{Must specify a column of type: "#{Purview::Columns::Id}"}
14
+ end
15
+
16
+ def sync(connection, window)
17
+ raw_data = puller.pull(window)
18
+ parser.validate(raw_data)
19
+ parsed_data = parser.parse(raw_data)
20
+ loader.load(
21
+ connection,
22
+ parsed_data,
23
+ window
24
+ )
25
+ end
26
+
27
+ def temporary_name
28
+ "#{name}_#{timestamp.to_i}"
29
+ end
30
+
31
+ def updated_timestamp_column
32
+ column_from_opts_of_type(Purview::Columns::UpdatedTimestamp) or raise %{Must specify a column of type: "#{Purview::Columns::UpdatedTimestamp}"}
33
+ end
34
+
35
+ def updated_timestamp_index
36
+ Purview::Indices::Simple.new(updated_timestamp_column)
37
+ end
38
+
39
+ def window_size
40
+ opts[:window_size] || (60 * 60)
41
+ end
42
+
43
+ private
44
+
45
+ def column_from_opts_of_type(type)
46
+ columns_opt.select { |column| column.is_a?(type) }.first
47
+ end
48
+
49
+ def default_columns
50
+ super + [
51
+ id_column,
52
+ created_timestamp_column,
53
+ updated_timestamp_column,
54
+ ]
55
+ end
56
+
57
+ def default_indices
58
+ super + [
59
+ created_timestamp_index,
60
+ updated_timestamp_index,
61
+ ]
62
+ end
63
+
64
+ def extract_type_opt(opts)
65
+ opts[:type]
66
+ end
67
+
68
+ def filter_type_opt(opts)
69
+ opts.select { |key| key != :type }
70
+ end
71
+
72
+ def loader
73
+ loader_type.new(loader_opts)
74
+ end
75
+
76
+ def loader_opts
77
+ merge_table_opt(filter_type_opt(opts[:loader]))
78
+ end
79
+
80
+ def loader_type
81
+ extract_type_opt(opts[:loader])
82
+ end
83
+
84
+ def merge_table_opt(opts)
85
+ { :table => self }.merge(opts)
86
+ end
87
+
88
+ def parser
89
+ parser_type.new(parser_opts)
90
+ end
91
+
92
+ def parser_opts
93
+ merge_table_opt(filter_type_opt(opts[:parser]))
94
+ end
95
+
96
+ def parser_type
97
+ extract_type_opt(opts[:parser])
98
+ end
99
+
100
+ def puller
101
+ puller_type.new(puller_opts)
102
+ end
103
+
104
+ def puller_opts
105
+ merge_table_opt(filter_type_opt(opts[:puller]))
106
+ end
107
+
108
+ def puller_type
109
+ extract_type_opt(opts[:puller])
110
+ end
111
+ end
112
+ end
113
+ end
@@ -1,6 +1,6 @@
1
1
  module Purview
2
2
  module Tables
3
- class Raw < Base
3
+ class Raw < BaseSyncable
4
4
  def name
5
5
  "#{super}_raw"
6
6
  end
@@ -0,0 +1,43 @@
1
+ module Purview
2
+ module Tables
3
+ class TableMetadata < Base
4
+ def initialize(database)
5
+ super(
6
+ :table_metadata,
7
+ :columns => [
8
+ table_name_column,
9
+ enabled_at_column,
10
+ last_pulled_at_column,
11
+ locked_at_column,
12
+ max_timestamp_pulled_column,
13
+ ],
14
+ :database => database
15
+ )
16
+ end
17
+
18
+ def enabled_at_column
19
+ Purview::Columns::Timestamp.new(:enabled_at)
20
+ end
21
+
22
+ def last_pulled_at_column
23
+ Purview::Columns::Timestamp.new(:last_pulled_at)
24
+ end
25
+
26
+ def locked_at_column
27
+ Purview::Columns::Timestamp.new(:locked_at)
28
+ end
29
+
30
+ def max_timestamp_pulled_column
31
+ Purview::Columns::Timestamp.new(:max_timestamp_pulled)
32
+ end
33
+
34
+ def table_name_column
35
+ Purview::Columns::Id.new(
36
+ :table_name,
37
+ :type => Purview::Types::String,
38
+ :limit => 255,
39
+ )
40
+ end
41
+ end
42
+ end
43
+ end
@@ -1,2 +1,5 @@
1
1
  require 'purview/tables/base'
2
+ require 'purview/tables/base_syncable'
3
+
2
4
  require 'purview/tables/raw'
5
+ require 'purview/tables/table_metadata'
@@ -1,3 +1,3 @@
1
1
  module Purview
2
- VERSION = '1.3.1'
2
+ VERSION = '1.4.0'
3
3
  end