purview 1.0.0.alpha

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (83) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +33 -0
  3. data/.travis.yml +18 -0
  4. data/Gemfile +3 -0
  5. data/LICENSE.txt +22 -0
  6. data/README.md +143 -0
  7. data/Rakefile +11 -0
  8. data/TODO +81 -0
  9. data/lib/purview/columns/base.rb +65 -0
  10. data/lib/purview/columns/boolean.rb +11 -0
  11. data/lib/purview/columns/created_timestamp.rb +11 -0
  12. data/lib/purview/columns/date.rb +11 -0
  13. data/lib/purview/columns/float.rb +11 -0
  14. data/lib/purview/columns/id.rb +11 -0
  15. data/lib/purview/columns/integer.rb +11 -0
  16. data/lib/purview/columns/money.rb +11 -0
  17. data/lib/purview/columns/string.rb +11 -0
  18. data/lib/purview/columns/text.rb +11 -0
  19. data/lib/purview/columns/time.rb +11 -0
  20. data/lib/purview/columns/timestamp.rb +11 -0
  21. data/lib/purview/columns/updated_timestamp.rb +11 -0
  22. data/lib/purview/columns/uuid.rb +11 -0
  23. data/lib/purview/columns.rb +14 -0
  24. data/lib/purview/connections/base.rb +55 -0
  25. data/lib/purview/connections/mysql.rb +39 -0
  26. data/lib/purview/connections/postgresql.rb +27 -0
  27. data/lib/purview/connections.rb +3 -0
  28. data/lib/purview/databases/base.rb +559 -0
  29. data/lib/purview/databases/mysql.rb +207 -0
  30. data/lib/purview/databases/postgresql.rb +210 -0
  31. data/lib/purview/databases.rb +3 -0
  32. data/lib/purview/exceptions/base.rb +5 -0
  33. data/lib/purview/exceptions/could_not_acquire_lock.rb +9 -0
  34. data/lib/purview/exceptions/lock_already_released.rb +9 -0
  35. data/lib/purview/exceptions/no_table.rb +9 -0
  36. data/lib/purview/exceptions/no_window.rb +9 -0
  37. data/lib/purview/exceptions/rows_outside_window.rb +18 -0
  38. data/lib/purview/exceptions/table.rb +13 -0
  39. data/lib/purview/exceptions.rb +7 -0
  40. data/lib/purview/loaders/base.rb +154 -0
  41. data/lib/purview/loaders/mysql.rb +81 -0
  42. data/lib/purview/loaders/postgresql.rb +81 -0
  43. data/lib/purview/loaders.rb +3 -0
  44. data/lib/purview/loggers/base.rb +99 -0
  45. data/lib/purview/loggers/console.rb +11 -0
  46. data/lib/purview/loggers.rb +2 -0
  47. data/lib/purview/mixins/helpers.rb +21 -0
  48. data/lib/purview/mixins/logger.rb +21 -0
  49. data/lib/purview/mixins.rb +2 -0
  50. data/lib/purview/parsers/base.rb +39 -0
  51. data/lib/purview/parsers/csv.rb +49 -0
  52. data/lib/purview/parsers/tsv.rb +11 -0
  53. data/lib/purview/parsers.rb +3 -0
  54. data/lib/purview/pullers/base.rb +19 -0
  55. data/lib/purview/pullers/uri.rb +66 -0
  56. data/lib/purview/pullers.rb +2 -0
  57. data/lib/purview/refinements/object.rb +5 -0
  58. data/lib/purview/refinements/time.rb +5 -0
  59. data/lib/purview/refinements.rb +2 -0
  60. data/lib/purview/structs/base.rb +10 -0
  61. data/lib/purview/structs/result.rb +7 -0
  62. data/lib/purview/structs/window.rb +7 -0
  63. data/lib/purview/structs.rb +3 -0
  64. data/lib/purview/tables/base.rb +140 -0
  65. data/lib/purview/tables/raw.rb +13 -0
  66. data/lib/purview/tables.rb +2 -0
  67. data/lib/purview/types/base.rb +9 -0
  68. data/lib/purview/types/boolean.rb +9 -0
  69. data/lib/purview/types/date.rb +9 -0
  70. data/lib/purview/types/float.rb +9 -0
  71. data/lib/purview/types/integer.rb +9 -0
  72. data/lib/purview/types/money.rb +9 -0
  73. data/lib/purview/types/string.rb +9 -0
  74. data/lib/purview/types/text.rb +9 -0
  75. data/lib/purview/types/time.rb +9 -0
  76. data/lib/purview/types/timestamp.rb +9 -0
  77. data/lib/purview/types/uuid.rb +9 -0
  78. data/lib/purview/types.rb +11 -0
  79. data/lib/purview/version.rb +3 -0
  80. data/lib/purview.rb +27 -0
  81. data/purview.gemspec +29 -0
  82. data/spec/spec_helper.rb +5 -0
  83. metadata +210 -0
@@ -0,0 +1,140 @@
1
+ module Purview
2
+ module Tables
3
+ class Base
4
+ attr_reader :name
5
+
6
+ def initialize(name, opts={})
7
+ @name = name
8
+ @opts = opts
9
+ end
10
+
11
+ def columns
12
+ opts[:columns]
13
+ end
14
+
15
+ def column_names
16
+ columns.map(&:name)
17
+ end
18
+
19
+ def columns_by_name
20
+ {}.tap do |result|
21
+ columns.each do |column|
22
+ result[column.name] = column
23
+ end
24
+ end
25
+ end
26
+
27
+ def columns_of_type(type)
28
+ columns.select { |column| column.is_a?(type) }
29
+ end
30
+
31
+ def created_timestamp_column
32
+ columns_of_type(Purview::Columns::CreatedTimestamp).first
33
+ end
34
+
35
+ def data_columns
36
+ columns - [
37
+ created_timestamp_column,
38
+ id_column,
39
+ updated_timestamp_column,
40
+ ]
41
+ end
42
+
43
+ def database
44
+ opts[:database]
45
+ end
46
+
47
+ def id_column
48
+ columns_of_type(Purview::Columns::Id).first
49
+ end
50
+
51
+ def indexed_columns
52
+ (opts[:indexed_columns] || []).tap do |indexed_columns|
53
+ indexed_columns << [created_timestamp_column]
54
+ indexed_columns << [updated_timestamp_column]
55
+ end
56
+ end
57
+
58
+ def starting_timestamp
59
+ opts[:starting_timestamp]
60
+ end
61
+
62
+ def sync(connection, window)
63
+ raw_data = puller.pull(window)
64
+ parser.validate(raw_data)
65
+ parsed_data = parser.parse(raw_data)
66
+ loader.load(
67
+ connection,
68
+ parsed_data,
69
+ window
70
+ )
71
+ end
72
+
73
+ def temporary_name
74
+ "#{name}_#{Time.now.utc.to_i}"
75
+ end
76
+
77
+ def updated_timestamp_column
78
+ columns_of_type(Purview::Columns::UpdatedTimestamp).first
79
+ end
80
+
81
+ def window_size
82
+ opts[:window_size] || (60 * 60)
83
+ end
84
+
85
+ private
86
+
87
+ include Purview::Mixins::Logger
88
+
89
+ attr_reader :opts
90
+
91
+ def extract_type_option(opts)
92
+ opts[:type]
93
+ end
94
+
95
+ def filter_type_option(opts)
96
+ opts.select { |key| key != :type }
97
+ end
98
+
99
+ def loader
100
+ loader_type.new(loader_opts)
101
+ end
102
+
103
+ def loader_opts
104
+ merge_table_option(filter_type_option(opts[:loader]))
105
+ end
106
+
107
+ def loader_type
108
+ extract_type_option(opts[:loader])
109
+ end
110
+
111
+ def merge_table_option(opts)
112
+ { :table => self }.merge(opts)
113
+ end
114
+
115
+ def parser
116
+ parser_type.new(parser_opts)
117
+ end
118
+
119
+ def parser_opts
120
+ merge_table_option(filter_type_option(opts[:parser]))
121
+ end
122
+
123
+ def parser_type
124
+ extract_type_option(opts[:parser])
125
+ end
126
+
127
+ def puller
128
+ puller_type.new(puller_opts)
129
+ end
130
+
131
+ def puller_opts
132
+ filter_type_option(opts[:puller])
133
+ end
134
+
135
+ def puller_type
136
+ extract_type_option(opts[:puller])
137
+ end
138
+ end
139
+ end
140
+ end
@@ -0,0 +1,13 @@
1
+ module Purview
2
+ module Tables
3
+ class Raw < Base
4
+ def name
5
+ "#{super}_raw"
6
+ end
7
+
8
+ def window_size
9
+ opts[:window_size] || (24 * 60 * 60)
10
+ end
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,2 @@
1
+ require 'purview/tables/base'
2
+ require 'purview/tables/raw'
@@ -0,0 +1,9 @@
1
+ module Purview
2
+ module Types
3
+ class Base
4
+ def self.parse(value)
5
+ raise %{All "#{Base}(s)" must override the "parse" method}
6
+ end
7
+ end
8
+ end
9
+ end
@@ -0,0 +1,9 @@
1
+ module Purview
2
+ module Types
3
+ class Boolean < Base
4
+ def self.parse(value)
5
+ !!(value =~ /\A(true|t|yes|y|1)\z/i)
6
+ end
7
+ end
8
+ end
9
+ end
@@ -0,0 +1,9 @@
1
+ module Purview
2
+ module Types
3
+ class Date < Base
4
+ def self.parse(value)
5
+ ::Date.parse(value)
6
+ end
7
+ end
8
+ end
9
+ end
@@ -0,0 +1,9 @@
1
+ module Purview
2
+ module Types
3
+ class Float < Base
4
+ def self.parse(value)
5
+ Float(value)
6
+ end
7
+ end
8
+ end
9
+ end
@@ -0,0 +1,9 @@
1
+ module Purview
2
+ module Types
3
+ class Integer < Base
4
+ def self.parse(value)
5
+ Integer(value)
6
+ end
7
+ end
8
+ end
9
+ end
@@ -0,0 +1,9 @@
1
+ module Purview
2
+ module Types
3
+ class Money < Base
4
+ def self.parse(value)
5
+ Float(value)
6
+ end
7
+ end
8
+ end
9
+ end
@@ -0,0 +1,9 @@
1
+ module Purview
2
+ module Types
3
+ class String < Base
4
+ def self.parse(value)
5
+ String(value)
6
+ end
7
+ end
8
+ end
9
+ end
@@ -0,0 +1,9 @@
1
+ module Purview
2
+ module Types
3
+ class Text < Base
4
+ def self.parse(value)
5
+ String(value)
6
+ end
7
+ end
8
+ end
9
+ end
@@ -0,0 +1,9 @@
1
+ module Purview
2
+ module Types
3
+ class Time < Base
4
+ def self.parse(value)
5
+ ::Time.parse(value)
6
+ end
7
+ end
8
+ end
9
+ end
@@ -0,0 +1,9 @@
1
+ module Purview
2
+ module Types
3
+ class Timestamp < Base
4
+ def self.parse(value)
5
+ ::Time.parse(value)
6
+ end
7
+ end
8
+ end
9
+ end
@@ -0,0 +1,9 @@
1
+ module Purview
2
+ module Types
3
+ class UUID < Base
4
+ def self.parse(value)
5
+ String(value)
6
+ end
7
+ end
8
+ end
9
+ end
@@ -0,0 +1,11 @@
1
+ require 'purview/types/base'
2
+ require 'purview/types/boolean'
3
+ require 'purview/types/date'
4
+ require 'purview/types/float'
5
+ require 'purview/types/integer'
6
+ require 'purview/types/money'
7
+ require 'purview/types/string'
8
+ require 'purview/types/text'
9
+ require 'purview/types/time'
10
+ require 'purview/types/timestamp'
11
+ require 'purview/types/uuid'
@@ -0,0 +1,3 @@
1
+ module Purview
2
+ VERSION = '1.0.0.alpha'
3
+ end
data/lib/purview.rb ADDED
@@ -0,0 +1,27 @@
1
+ require 'csv'
2
+ require 'date'
3
+ require 'net/http'
4
+ require 'openssl'
5
+ require 'ostruct'
6
+ require 'set'
7
+ require 'time'
8
+ require 'uri'
9
+
10
+ %w[mysql2 pg].each { |gem| begin; require gem; rescue LoadError; end }
11
+ abort 'Could not load the `mysql2` or `pg` gem' unless defined?(Mysql2) || defined?(PG)
12
+
13
+ require 'purview/mixins'
14
+ require 'purview/refinements'
15
+
16
+ require 'purview/columns'
17
+ require 'purview/connections'
18
+ require 'purview/databases'
19
+ require 'purview/exceptions'
20
+ require 'purview/loaders'
21
+ require 'purview/loggers'
22
+ require 'purview/parsers'
23
+ require 'purview/pullers'
24
+ require 'purview/structs'
25
+ require 'purview/tables'
26
+ require 'purview/types'
27
+ require 'purview/version'
data/purview.gemspec ADDED
@@ -0,0 +1,29 @@
1
+ # coding: utf-8
2
+
3
+ lib = File.expand_path('../lib', __FILE__)
4
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
5
+ require 'purview/version'
6
+
7
+ Gem::Specification.new do |gem|
8
+ gem.name = 'purview'
9
+ gem.version = Purview::VERSION
10
+ gem.authors = ['Jonathan W. Zaleski']
11
+ gem.email = ['JonathanZaleski@gmail.com']
12
+ gem.summary = 'A framework designed to simplify data warehousing'
13
+ gem.description = 'Coming soon!'
14
+ gem.homepage = 'https://github.com/jzaleski/purview'
15
+ gem.license = 'MIT'
16
+
17
+ gem.files = `git ls-files`.split($/)
18
+ gem.executables = gem.files.grep(%r{^bin/}) { |file| File.basename(file) }
19
+ gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
20
+ gem.require_paths = ['lib']
21
+
22
+ gem.add_development_dependency 'mysql2', '~> 0.3'
23
+ gem.add_development_dependency 'pg', '~> 0.18'
24
+
25
+ gem.add_development_dependency 'bundler', '~> 1.0'
26
+ gem.add_development_dependency 'pry', '~> 0.10'
27
+ gem.add_development_dependency 'rake', '~> 10.4'
28
+ gem.add_development_dependency 'rspec', '~> 3.2'
29
+ end
@@ -0,0 +1,5 @@
1
+ require 'rspec'
2
+
3
+ RSpec.configure do |config|
4
+ config.color_enabled = true if config.respond_to?(:color_enabled)
5
+ end
metadata ADDED
@@ -0,0 +1,210 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: purview
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.0.0.alpha
5
+ platform: ruby
6
+ authors:
7
+ - Jonathan W. Zaleski
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2015-04-27 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: mysql2
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '0.3'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '0.3'
27
+ - !ruby/object:Gem::Dependency
28
+ name: pg
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '0.18'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '0.18'
41
+ - !ruby/object:Gem::Dependency
42
+ name: bundler
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '1.0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '1.0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: pry
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - "~>"
60
+ - !ruby/object:Gem::Version
61
+ version: '0.10'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - "~>"
67
+ - !ruby/object:Gem::Version
68
+ version: '0.10'
69
+ - !ruby/object:Gem::Dependency
70
+ name: rake
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - "~>"
74
+ - !ruby/object:Gem::Version
75
+ version: '10.4'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - "~>"
81
+ - !ruby/object:Gem::Version
82
+ version: '10.4'
83
+ - !ruby/object:Gem::Dependency
84
+ name: rspec
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - "~>"
88
+ - !ruby/object:Gem::Version
89
+ version: '3.2'
90
+ type: :development
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - "~>"
95
+ - !ruby/object:Gem::Version
96
+ version: '3.2'
97
+ description: Coming soon!
98
+ email:
99
+ - JonathanZaleski@gmail.com
100
+ executables: []
101
+ extensions: []
102
+ extra_rdoc_files: []
103
+ files:
104
+ - ".gitignore"
105
+ - ".travis.yml"
106
+ - Gemfile
107
+ - LICENSE.txt
108
+ - README.md
109
+ - Rakefile
110
+ - TODO
111
+ - lib/purview.rb
112
+ - lib/purview/columns.rb
113
+ - lib/purview/columns/base.rb
114
+ - lib/purview/columns/boolean.rb
115
+ - lib/purview/columns/created_timestamp.rb
116
+ - lib/purview/columns/date.rb
117
+ - lib/purview/columns/float.rb
118
+ - lib/purview/columns/id.rb
119
+ - lib/purview/columns/integer.rb
120
+ - lib/purview/columns/money.rb
121
+ - lib/purview/columns/string.rb
122
+ - lib/purview/columns/text.rb
123
+ - lib/purview/columns/time.rb
124
+ - lib/purview/columns/timestamp.rb
125
+ - lib/purview/columns/updated_timestamp.rb
126
+ - lib/purview/columns/uuid.rb
127
+ - lib/purview/connections.rb
128
+ - lib/purview/connections/base.rb
129
+ - lib/purview/connections/mysql.rb
130
+ - lib/purview/connections/postgresql.rb
131
+ - lib/purview/databases.rb
132
+ - lib/purview/databases/base.rb
133
+ - lib/purview/databases/mysql.rb
134
+ - lib/purview/databases/postgresql.rb
135
+ - lib/purview/exceptions.rb
136
+ - lib/purview/exceptions/base.rb
137
+ - lib/purview/exceptions/could_not_acquire_lock.rb
138
+ - lib/purview/exceptions/lock_already_released.rb
139
+ - lib/purview/exceptions/no_table.rb
140
+ - lib/purview/exceptions/no_window.rb
141
+ - lib/purview/exceptions/rows_outside_window.rb
142
+ - lib/purview/exceptions/table.rb
143
+ - lib/purview/loaders.rb
144
+ - lib/purview/loaders/base.rb
145
+ - lib/purview/loaders/mysql.rb
146
+ - lib/purview/loaders/postgresql.rb
147
+ - lib/purview/loggers.rb
148
+ - lib/purview/loggers/base.rb
149
+ - lib/purview/loggers/console.rb
150
+ - lib/purview/mixins.rb
151
+ - lib/purview/mixins/helpers.rb
152
+ - lib/purview/mixins/logger.rb
153
+ - lib/purview/parsers.rb
154
+ - lib/purview/parsers/base.rb
155
+ - lib/purview/parsers/csv.rb
156
+ - lib/purview/parsers/tsv.rb
157
+ - lib/purview/pullers.rb
158
+ - lib/purview/pullers/base.rb
159
+ - lib/purview/pullers/uri.rb
160
+ - lib/purview/refinements.rb
161
+ - lib/purview/refinements/object.rb
162
+ - lib/purview/refinements/time.rb
163
+ - lib/purview/structs.rb
164
+ - lib/purview/structs/base.rb
165
+ - lib/purview/structs/result.rb
166
+ - lib/purview/structs/window.rb
167
+ - lib/purview/tables.rb
168
+ - lib/purview/tables/base.rb
169
+ - lib/purview/tables/raw.rb
170
+ - lib/purview/types.rb
171
+ - lib/purview/types/base.rb
172
+ - lib/purview/types/boolean.rb
173
+ - lib/purview/types/date.rb
174
+ - lib/purview/types/float.rb
175
+ - lib/purview/types/integer.rb
176
+ - lib/purview/types/money.rb
177
+ - lib/purview/types/string.rb
178
+ - lib/purview/types/text.rb
179
+ - lib/purview/types/time.rb
180
+ - lib/purview/types/timestamp.rb
181
+ - lib/purview/types/uuid.rb
182
+ - lib/purview/version.rb
183
+ - purview.gemspec
184
+ - spec/spec_helper.rb
185
+ homepage: https://github.com/jzaleski/purview
186
+ licenses:
187
+ - MIT
188
+ metadata: {}
189
+ post_install_message:
190
+ rdoc_options: []
191
+ require_paths:
192
+ - lib
193
+ required_ruby_version: !ruby/object:Gem::Requirement
194
+ requirements:
195
+ - - ">="
196
+ - !ruby/object:Gem::Version
197
+ version: '0'
198
+ required_rubygems_version: !ruby/object:Gem::Requirement
199
+ requirements:
200
+ - - ">"
201
+ - !ruby/object:Gem::Version
202
+ version: 1.3.1
203
+ requirements: []
204
+ rubyforge_project:
205
+ rubygems_version: 2.4.3
206
+ signing_key:
207
+ specification_version: 4
208
+ summary: A framework designed to simplify data warehousing
209
+ test_files:
210
+ - spec/spec_helper.rb