purview 1.3.1 → 1.4.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (39) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG +9 -0
  3. data/TODO +7 -6
  4. data/lib/purview/columns/base.rb +35 -3
  5. data/lib/purview/databases/base.rb +84 -231
  6. data/lib/purview/databases/mysql.rb +42 -108
  7. data/lib/purview/databases/postgresql.rb +45 -111
  8. data/lib/purview/exceptions/{could_not_baseline.rb → could_not_baseline_table.rb} +1 -1
  9. data/lib/purview/exceptions/{could_not_disable.rb → could_not_disable_table.rb} +1 -1
  10. data/lib/purview/exceptions/{could_not_enable.rb → could_not_enable_table.rb} +1 -1
  11. data/lib/purview/exceptions/{could_not_find_table_metadata.rb → could_not_find_metadata_for_table.rb} +1 -1
  12. data/lib/purview/exceptions/{could_not_initialize.rb → could_not_initialize_table.rb} +1 -1
  13. data/lib/purview/exceptions/{could_not_lock.rb → could_not_lock_table.rb} +1 -1
  14. data/lib/purview/exceptions/{could_not_sync.rb → could_not_sync_table.rb} +1 -1
  15. data/lib/purview/exceptions/{could_not_unlock.rb → could_not_unlock_table.rb} +1 -1
  16. data/lib/purview/exceptions/could_not_update_metadata_for_table.rb +9 -0
  17. data/lib/purview/exceptions/{database_already_assigned.rb → database_already_assigned_for_table.rb} +1 -1
  18. data/lib/purview/exceptions/{no_window.rb → no_window_for_table.rb} +1 -1
  19. data/lib/purview/exceptions/rows_outside_window_for_table.rb +18 -0
  20. data/lib/purview/exceptions/table_already_assigned_for_column.rb +17 -0
  21. data/lib/purview/exceptions/{table_already_assigned.rb → table_already_assigned_for_index.rb} +1 -1
  22. data/lib/purview/exceptions/{wrong_database.rb → wrong_database_for_table.rb} +1 -1
  23. data/lib/purview/exceptions.rb +15 -13
  24. data/lib/purview/indices/base.rb +6 -1
  25. data/lib/purview/loaders/base.rb +2 -25
  26. data/lib/purview/mixins/dialect.rb +29 -0
  27. data/lib/purview/mixins.rb +1 -0
  28. data/lib/purview/parsers/base.rb +3 -2
  29. data/lib/purview/parsers/csv.rb +13 -3
  30. data/lib/purview/pullers/base_sql.rb +6 -25
  31. data/lib/purview/structs/table_metadata.rb +0 -14
  32. data/lib/purview/tables/base.rb +28 -106
  33. data/lib/purview/tables/base_syncable.rb +113 -0
  34. data/lib/purview/tables/raw.rb +1 -1
  35. data/lib/purview/tables/table_metadata.rb +43 -0
  36. data/lib/purview/tables.rb +3 -0
  37. data/lib/purview/version.rb +1 -1
  38. metadata +20 -15
  39. data/lib/purview/exceptions/rows_outside_window.rb +0 -18
@@ -6,6 +6,7 @@ module Purview
6
6
  def initialize(columns, opts={})
7
7
  @columns = columns
8
8
  @opts = opts
9
+ @table = table_opt
9
10
  end
10
11
 
11
12
  def eql?(other)
@@ -19,7 +20,7 @@ module Purview
19
20
  end
20
21
 
21
22
  def table=(value)
22
- raise Purview::Exceptions::TableAlreadyAssigned.new(self) if table
23
+ raise Purview::Exceptions::TableAlreadyAssignedForIndex.new(self) if table
23
24
  @table = value
24
25
  end
25
26
 
@@ -34,6 +35,10 @@ module Purview
34
35
  private
35
36
 
36
37
  attr_reader :opts
38
+
39
+ def table_opt
40
+ opts[:table]
41
+ end
37
42
  end
38
43
  end
39
44
  end
@@ -28,6 +28,7 @@ module Purview
28
28
 
29
29
  private
30
30
 
31
+ include Purview::Mixins::Dialect
31
32
  include Purview::Mixins::Helpers
32
33
  include Purview::Mixins::Logger
33
34
 
@@ -49,18 +50,10 @@ module Purview
49
50
  table.database
50
51
  end
51
52
 
52
- def dialect
53
- dialect_type.new
54
- end
55
-
56
53
  def dialect_type
57
54
  raise %{All "#{Base}(s)" must override the "dialect_type" method}
58
55
  end
59
56
 
60
- def false_value
61
- dialect.false_value
62
- end
63
-
64
57
  def id_in_sql(temporary_table_name)
65
58
  raise %{All "#{Base}(s)" must override the "id_in_sql" method}
66
59
  end
@@ -86,14 +79,6 @@ module Purview
86
79
  raise %{All "#{Base}(s)" must override the "not_in_window_sql" method}
87
80
  end
88
81
 
89
- def null_value
90
- dialect.null_value
91
- end
92
-
93
- def quoted(value)
94
- dialect.quoted(value)
95
- end
96
-
97
82
  def row_values(row)
98
83
  table.column_names.map { |column_name| quoted(sanitized(row[column_name])) }.join(', ')
99
84
  end
@@ -102,10 +87,6 @@ module Purview
102
87
  opts[:rows_per_slice] || 1000
103
88
  end
104
89
 
105
- def sanitized(value)
106
- dialect.sanitized(value)
107
- end
108
-
109
90
  def table
110
91
  opts[:table]
111
92
  end
@@ -138,10 +119,6 @@ module Purview
138
119
  raise %{All "#{Base}(s)" must override the "temporary_table_verify_sql" method}
139
120
  end
140
121
 
141
- def true_value
142
- dialect.true_value
143
- end
144
-
145
122
  def verify_temporary_table(connection, temporary_table_name, rows, window)
146
123
  with_context_logging("`verify_temporary_table` for: #{temporary_table_name}") do
147
124
  rows_outside_window = connection.execute(
@@ -151,7 +128,7 @@ module Purview
151
128
  window
152
129
  )
153
130
  ).rows[0][count_column_name]
154
- raise Purview::Exceptions::RowsOutsideWindow.new(temporary_table_name, rows_outside_window) \
131
+ raise Purview::Exceptions::RowsOutsideWindowForTable.new(table, rows_outside_window) \
155
132
  unless zero?(rows_outside_window)
156
133
  end
157
134
  end
@@ -0,0 +1,29 @@
1
+ module Purview
2
+ module Mixins
3
+ module Dialect
4
+ def dialect
5
+ dialect_type.new
6
+ end
7
+
8
+ def false_value
9
+ dialect.false_value
10
+ end
11
+
12
+ def null_value
13
+ dialect.null_value
14
+ end
15
+
16
+ def quoted(value)
17
+ dialect.quoted(value)
18
+ end
19
+
20
+ def sanitized(value)
21
+ dialect.sanitized(value)
22
+ end
23
+
24
+ def true_value
25
+ dialect.true_value
26
+ end
27
+ end
28
+ end
29
+ end
@@ -1,3 +1,4 @@
1
1
  require 'purview/mixins/connection'
2
+ require 'purview/mixins/dialect'
2
3
  require 'purview/mixins/helpers'
3
4
  require 'purview/mixins/logger'
@@ -3,6 +3,7 @@ module Purview
3
3
  class Base
4
4
  def initialize(opts={})
5
5
  @opts = opts
6
+ @table = table_opt
6
7
  end
7
8
 
8
9
  def parse(data)
@@ -17,7 +18,7 @@ module Purview
17
18
 
18
19
  include Purview::Mixins::Logger
19
20
 
20
- attr_reader :opts
21
+ attr_reader :opts, :table
21
22
 
22
23
  def extract_headers(data)
23
24
  raise %{All "#{Base}(s)" must override the "extract_headers" method}
@@ -27,7 +28,7 @@ module Purview
27
28
  raise %{All "#{Base}(s)" must override the "extract_rows" method}
28
29
  end
29
30
 
30
- def table
31
+ def table_opt
31
32
  opts[:table]
32
33
  end
33
34
  end
@@ -26,8 +26,8 @@ module Purview
26
26
  def build_result(row)
27
27
  {}.tap do |result|
28
28
  row.each do |key, value|
29
- if column = table.columns_by_name[key]
30
- result[key] = column.parse(value)
29
+ if column = table.columns_by_source_name[key]
30
+ result[column.name] = column.parse(value)
31
31
  else
32
32
  logger.debug(%{Unexpected column: "#{key}" in data-set})
33
33
  end
@@ -49,8 +49,18 @@ module Purview
49
49
  rows.map { |row| parse_row(row) }
50
50
  end
51
51
 
52
+ def map_headers(headers)
53
+ headers.map do |header|
54
+ if column = table.columns_by_source_name[header]
55
+ column.name
56
+ else
57
+ logger.debug(%{Could not find column with source_name: "#{header}"})
58
+ end
59
+ end
60
+ end
61
+
52
62
  def missing_columns(data)
53
- table.column_names - extract_headers(data)
63
+ table.column_names - map_headers(extract_headers(data))
54
64
  end
55
65
 
56
66
  def parse_row(row)
@@ -10,11 +10,16 @@ module Purview
10
10
  private
11
11
 
12
12
  include Purview::Mixins::Connection
13
+ include Purview::Mixins::Dialect
13
14
  include Purview::Mixins::Helpers
14
15
  include Purview::Mixins::Logger
15
16
 
16
17
  def column_names
17
- table.column_names
18
+ table.columns.map do |column|
19
+ name = column.name
20
+ source_name = column.source_name
21
+ source_name == name ? name : "#{source_name} AS #{name}"
22
+ end
18
23
  end
19
24
 
20
25
  def connection_type
@@ -41,22 +46,10 @@ module Purview
41
46
  opts[:database_username]
42
47
  end
43
48
 
44
- def dialect
45
- dialect_type.new
46
- end
47
-
48
49
  def dialect_type
49
50
  raise %{All "#{BaseSQL}(s)" must override the "dialect_type" method}
50
51
  end
51
52
 
52
- def false_value
53
- dialect.false_value
54
- end
55
-
56
- def null_value
57
- dialect.null_value
58
- end
59
-
60
53
  def pull_sql(window)
61
54
  'SELECT %s FROM %s WHERE %s BETWEEN %s AND %s' % [
62
55
  column_names.join(', '),
@@ -67,21 +60,9 @@ module Purview
67
60
  ]
68
61
  end
69
62
 
70
- def quoted(value)
71
- dialect.quoted(value)
72
- end
73
-
74
- def sanitized(value)
75
- dialect.sanitized(value)
76
- end
77
-
78
63
  def table_name
79
64
  opts[:table_name]
80
65
  end
81
-
82
- def true_value
83
- dialect.true_value
84
- end
85
66
  end
86
67
  end
87
68
  end
@@ -1,20 +1,6 @@
1
1
  module Purview
2
2
  module Structs
3
3
  class TableMetadata < Base
4
- def initialize(row)
5
- enabled_at = row.enabled_at && Time.parse(row.enabled_at)
6
- last_pulled_at = row.last_pulled_at && Time.parse(row.last_pulled_at)
7
- locked_at = row.locked_at && Time.parse(row.locked_at)
8
- max_timestamp_pulled = row.max_timestamp_pulled && Time.parse(row.max_timestamp_pulled)
9
- super(
10
- :table_name => row.table_name,
11
- :enabled_at => enabled_at,
12
- :last_pulled_at => last_pulled_at,
13
- :locked_at => locked_at,
14
- :max_timestamp_pulled => max_timestamp_pulled,
15
- )
16
- end
17
-
18
4
  def diabled?
19
5
  !enabled?
20
6
  end
@@ -1,90 +1,47 @@
1
1
  module Purview
2
2
  module Tables
3
3
  class Base
4
- attr_reader :database, :indices, :name
4
+ attr_reader :columns, :database, :indices, :name
5
5
 
6
6
  def initialize(name, opts={})
7
- @name = name
7
+ @name = name.to_sym
8
8
  @opts = opts
9
+ @database = database_opt
10
+ @columns = Set.new.tap do |result|
11
+ (default_columns + columns_opt).each do |column|
12
+ column.table = self if result.add?(column)
13
+ end
14
+ end
9
15
  @indices = Set.new.tap do |result|
10
- ((opts[:indices] || []) + default_indices).each do |index|
16
+ (default_indices + indices_opt).each do |index|
11
17
  index.table = self if result.add?(index)
12
18
  end
13
19
  end
14
20
  end
15
21
 
16
- def columns
17
- opts[:columns]
18
- end
19
-
20
22
  def column_names
21
23
  columns.map(&:name)
22
24
  end
23
25
 
24
26
  def columns_by_name
25
- {}.tap do |result|
26
- columns.each do |column|
27
- result[column.name] = column
28
- end
27
+ columns.reduce({}) do |memo, column|
28
+ memo[column.name] = column
29
+ memo
29
30
  end
30
31
  end
31
32
 
32
- def columns_of_type(type)
33
- columns.select { |column| column.is_a?(type) }
34
- end
35
-
36
- def created_timestamp_column
37
- columns_of_type(Purview::Columns::CreatedTimestamp).first
38
- end
39
-
40
- def created_timestamp_index
41
- Purview::Indices::Simple.new(created_timestamp_column)
42
- end
43
-
44
- def data_columns
45
- columns - [
46
- created_timestamp_column,
47
- id_column,
48
- updated_timestamp_column,
49
- ]
33
+ def columns_by_source_name
34
+ columns.reduce({}) do |memo, column|
35
+ memo[column.source_name] = column
36
+ memo
37
+ end
50
38
  end
51
39
 
52
40
  def database=(value)
53
- raise Purview::Exceptions::DatabaseAlreadyAssigned.new(self) if database
41
+ raise Purview::Exceptions::DatabaseAlreadyAssignedForTable.new(self) if database
54
42
  @database = value
55
43
  end
56
44
 
57
- def id_column
58
- columns_of_type(Purview::Columns::Id).first
59
- end
60
-
61
- def sync(connection, window)
62
- raw_data = puller.pull(window)
63
- parser.validate(raw_data)
64
- parsed_data = parser.parse(raw_data)
65
- loader.load(
66
- connection,
67
- parsed_data,
68
- window
69
- )
70
- end
71
-
72
- def temporary_name
73
- "#{name}_#{timestamp.to_i}"
74
- end
75
-
76
- def updated_timestamp_column
77
- columns_of_type(Purview::Columns::UpdatedTimestamp).first
78
- end
79
-
80
- def updated_timestamp_index
81
- Purview::Indices::Simple.new(updated_timestamp_column)
82
- end
83
-
84
- def window_size
85
- opts[:window_size] || (60 * 60)
86
- end
87
-
88
45
  private
89
46
 
90
47
  include Purview::Mixins::Helpers
@@ -92,59 +49,24 @@ module Purview
92
49
 
93
50
  attr_reader :opts
94
51
 
95
- def default_indices
96
- [
97
- created_timestamp_index,
98
- updated_timestamp_index,
99
- ]
100
- end
101
-
102
- def extract_type_option(opts)
103
- opts[:type]
52
+ def columns_opt
53
+ opts[:columns] || []
104
54
  end
105
55
 
106
- def filter_type_option(opts)
107
- opts.select { |key| key != :type }
56
+ def database_opt
57
+ opts[:database]
108
58
  end
109
59
 
110
- def loader
111
- loader_type.new(loader_opts)
60
+ def default_columns
61
+ []
112
62
  end
113
63
 
114
- def loader_opts
115
- merge_table_option(filter_type_option(opts[:loader]))
116
- end
117
-
118
- def loader_type
119
- extract_type_option(opts[:loader])
120
- end
121
-
122
- def merge_table_option(opts)
123
- { :table => self }.merge(opts)
124
- end
125
-
126
- def parser
127
- parser_type.new(parser_opts)
128
- end
129
-
130
- def parser_opts
131
- merge_table_option(filter_type_option(opts[:parser]))
132
- end
133
-
134
- def parser_type
135
- extract_type_option(opts[:parser])
136
- end
137
-
138
- def puller
139
- puller_type.new(puller_opts)
140
- end
141
-
142
- def puller_opts
143
- merge_table_option(filter_type_option(opts[:puller]))
64
+ def default_indices
65
+ []
144
66
  end
145
67
 
146
- def puller_type
147
- extract_type_option(opts[:puller])
68
+ def indices_opt
69
+ opts[:indices] || []
148
70
  end
149
71
  end
150
72
  end
@@ -0,0 +1,113 @@
1
+ module Purview
2
+ module Tables
3
+ class BaseSyncable < Base
4
+ def created_timestamp_column
5
+ column_from_opts_of_type(Purview::Columns::CreatedTimestamp) or raise %{Must specify a column of type: "#{Purview::Columns::CreatedTimestamp}"}
6
+ end
7
+
8
+ def created_timestamp_index
9
+ Purview::Indices::Simple.new(created_timestamp_column)
10
+ end
11
+
12
+ def id_column
13
+ column_from_opts_of_type(Purview::Columns::Id) or raise %{Must specify a column of type: "#{Purview::Columns::Id}"}
14
+ end
15
+
16
+ def sync(connection, window)
17
+ raw_data = puller.pull(window)
18
+ parser.validate(raw_data)
19
+ parsed_data = parser.parse(raw_data)
20
+ loader.load(
21
+ connection,
22
+ parsed_data,
23
+ window
24
+ )
25
+ end
26
+
27
+ def temporary_name
28
+ "#{name}_#{timestamp.to_i}"
29
+ end
30
+
31
+ def updated_timestamp_column
32
+ column_from_opts_of_type(Purview::Columns::UpdatedTimestamp) or raise %{Must specify a column of type: "#{Purview::Columns::UpdatedTimestamp}"}
33
+ end
34
+
35
+ def updated_timestamp_index
36
+ Purview::Indices::Simple.new(updated_timestamp_column)
37
+ end
38
+
39
+ def window_size
40
+ opts[:window_size] || (60 * 60)
41
+ end
42
+
43
+ private
44
+
45
+ def column_from_opts_of_type(type)
46
+ columns_opt.select { |column| column.is_a?(type) }.first
47
+ end
48
+
49
+ def default_columns
50
+ super + [
51
+ id_column,
52
+ created_timestamp_column,
53
+ updated_timestamp_column,
54
+ ]
55
+ end
56
+
57
+ def default_indices
58
+ super + [
59
+ created_timestamp_index,
60
+ updated_timestamp_index,
61
+ ]
62
+ end
63
+
64
+ def extract_type_opt(opts)
65
+ opts[:type]
66
+ end
67
+
68
+ def filter_type_opt(opts)
69
+ opts.select { |key| key != :type }
70
+ end
71
+
72
+ def loader
73
+ loader_type.new(loader_opts)
74
+ end
75
+
76
+ def loader_opts
77
+ merge_table_opt(filter_type_opt(opts[:loader]))
78
+ end
79
+
80
+ def loader_type
81
+ extract_type_opt(opts[:loader])
82
+ end
83
+
84
+ def merge_table_opt(opts)
85
+ { :table => self }.merge(opts)
86
+ end
87
+
88
+ def parser
89
+ parser_type.new(parser_opts)
90
+ end
91
+
92
+ def parser_opts
93
+ merge_table_opt(filter_type_opt(opts[:parser]))
94
+ end
95
+
96
+ def parser_type
97
+ extract_type_opt(opts[:parser])
98
+ end
99
+
100
+ def puller
101
+ puller_type.new(puller_opts)
102
+ end
103
+
104
+ def puller_opts
105
+ merge_table_opt(filter_type_opt(opts[:puller]))
106
+ end
107
+
108
+ def puller_type
109
+ extract_type_opt(opts[:puller])
110
+ end
111
+ end
112
+ end
113
+ end
@@ -1,6 +1,6 @@
1
1
  module Purview
2
2
  module Tables
3
- class Raw < Base
3
+ class Raw < BaseSyncable
4
4
  def name
5
5
  "#{super}_raw"
6
6
  end
@@ -0,0 +1,43 @@
1
+ module Purview
2
+ module Tables
3
+ class TableMetadata < Base
4
+ def initialize(database)
5
+ super(
6
+ :table_metadata,
7
+ :columns => [
8
+ table_name_column,
9
+ enabled_at_column,
10
+ last_pulled_at_column,
11
+ locked_at_column,
12
+ max_timestamp_pulled_column,
13
+ ],
14
+ :database => database
15
+ )
16
+ end
17
+
18
+ def enabled_at_column
19
+ Purview::Columns::Timestamp.new(:enabled_at)
20
+ end
21
+
22
+ def last_pulled_at_column
23
+ Purview::Columns::Timestamp.new(:last_pulled_at)
24
+ end
25
+
26
+ def locked_at_column
27
+ Purview::Columns::Timestamp.new(:locked_at)
28
+ end
29
+
30
+ def max_timestamp_pulled_column
31
+ Purview::Columns::Timestamp.new(:max_timestamp_pulled)
32
+ end
33
+
34
+ def table_name_column
35
+ Purview::Columns::Id.new(
36
+ :table_name,
37
+ :type => Purview::Types::String,
38
+ :limit => 255,
39
+ )
40
+ end
41
+ end
42
+ end
43
+ end
@@ -1,2 +1,5 @@
1
1
  require 'purview/tables/base'
2
+ require 'purview/tables/base_syncable'
3
+
2
4
  require 'purview/tables/raw'
5
+ require 'purview/tables/table_metadata'
@@ -1,3 +1,3 @@
1
1
  module Purview
2
- VERSION = '1.3.1'
2
+ VERSION = '1.4.0'
3
3
  end