redshifter 0.4.0 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +34 -1
- data/lib/redshifter/table.rb +2 -1
- data/lib/redshifter/util/extract_and_transform_updates.rb +1 -1
- data/lib/redshifter/version.rb +1 -1
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 904cec0aad5170e27712addd72009852c1b3a6d5
|
4
|
+
data.tar.gz: 6f5d606f0862a1522288987636928c1be4d0e6e1
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 8cc099b110abcd0d0696219604388879215ebcac444d3f36dd098840a8b8b49cbd91d727383493a857c216633ce6f86970122d298f42b2ab02b2094b091a0c6d
|
7
|
+
data.tar.gz: 33a85ccf7d1b28bacb02d36354e0e43406f73edd0ef6d9f49600a9791e05c0c0a7caea1e5a798bffabf64f12bd02d71237f4de9e0936a7309f0181c6e8fd2707
|
data/README.md
CHANGED
@@ -16,6 +16,7 @@ Feature Roadmap:
|
|
16
16
|
- 0.2.4 - New config format; update and replace rake tasks available
|
17
17
|
- 0.3.0 - Public version
|
18
18
|
- 0.4.0 - Make S3 region configurable
|
19
|
+
- 0.5.0 - Added functionality for source_table_filter to selectively export rows
|
19
20
|
|
20
21
|
## Installation
|
21
22
|
|
@@ -79,6 +80,8 @@ Redshifter.config.tables = {
|
|
79
80
|
source_table_name: 'books',
|
80
81
|
# [required] Prefixing your redshift table with its source is recommended
|
81
82
|
redshift_table_name: 'app_name_books',
|
83
|
+
# [optional] Provide a conditional to specify which rows get exported to Redshift
|
84
|
+
source_table_filter: 'title IS NOT NULL',
|
82
85
|
# [required] Columns with Redshift datatypes to create; may differ from source DB
|
83
86
|
redshift_columns: {
|
84
87
|
'id' => 'INTEGER',
|
@@ -120,7 +123,7 @@ Redshifter.config.tables = {
|
|
120
123
|
```
|
121
124
|
$ rake redshifter:replace[books_with_export_at]
|
122
125
|
```
|
123
|
-
|
126
|
+
|
124
127
|
### Schedule a Redshifter::Job::UpdateRedshiftTableJob resque job per each table you want to export updates for
|
125
128
|
|
126
129
|
Then schedule this meta job to run in `resque_schedule.yml` to run once at 10:00pm
|
@@ -134,6 +137,36 @@ etl_books_to_redshift:
|
|
134
137
|
description: 'Export the books table to Redshift'
|
135
138
|
```
|
136
139
|
|
140
|
+
### Monitoring Rake tasks with New Relic (optional)
|
141
|
+
New Relic offers Rake task instrumentation as of version `3.13.0` of their `newrelic_rpm` agent. Redshifter does not directly use or depend on `newrelic_rpm`. You must use New Relic's rake instrumentation and explicitly identify the tasks in your app that you want to monitor. See [New Relic docs](https://docs.newrelic.com/docs/agents/ruby-agent/background-jobs/rake-instrumentation).
|
142
|
+
|
143
|
+
In addition, to setting `attributes.include` and `rake.tasks` as defined in their docs, it also seems to be necessary to manually start the agent synchonously in your Rakefile to assure that fast running tasks are reported.
|
144
|
+
|
145
|
+
In summary, your `newrelic.yml` should include keys and values like this to monitor redshifter rake tasks:
|
146
|
+
```yaml
|
147
|
+
common: &default_settings
|
148
|
+
#...
|
149
|
+
attributes:
|
150
|
+
include: job.rake.* # allows rake args reporting
|
151
|
+
rake:
|
152
|
+
# rake task monitoring must be white listed here AND not blacklisted via
|
153
|
+
# autostart.blacklisted_* config values
|
154
|
+
tasks: ['redshifter:update', 'redshifter:replace']
|
155
|
+
#...
|
156
|
+
```
|
157
|
+
|
158
|
+
and your `Rakefile` should end up looking something like this:
|
159
|
+
```ruby
|
160
|
+
# Rakefile
|
161
|
+
# ...
|
162
|
+
require 'redshifter/tasks'
|
163
|
+
|
164
|
+
# Force agent start assuring rake.tasks listed in newrelic.yml are instrumented
|
165
|
+
NewRelic::Agent.manual_start(sync_startup: true) if Rails.env.production?
|
166
|
+
# ...
|
167
|
+
```
|
168
|
+
|
169
|
+
|
137
170
|
## Development
|
138
171
|
|
139
172
|
After checking out the repo, run `bin/setup` to install dependencies. Then, run `bin/console` for an interactive prompt that will allow you to experiment.
|
data/lib/redshifter/table.rb
CHANGED
@@ -14,9 +14,10 @@ module Redshifter
|
|
14
14
|
@redshift_sort_keys = config[:redshift_sort_keys]
|
15
15
|
@redshift_sort_style = config[:redshift_sort_style]
|
16
16
|
@redshift_primary_key = config[:redshift_primary_key]
|
17
|
+
@source_table_filter = config[:source_table_filter] || 1
|
17
18
|
end
|
18
19
|
|
19
|
-
attr_reader :source_table_name, :redshift_table_name
|
20
|
+
attr_reader :source_table_name, :redshift_table_name, :source_table_filter
|
20
21
|
|
21
22
|
def redshift_column_names
|
22
23
|
redshift_columns.keys
|
@@ -74,7 +74,7 @@ module Redshifter
|
|
74
74
|
end
|
75
75
|
|
76
76
|
def select_batch_sql(columns:, batch_size:, start_id:)
|
77
|
-
"
|
77
|
+
"SELECT #{columns.join(', ')} FROM #{table.source_table_name} WHERE (#{table.source_table_filter}) AND updated_at >= '#{since}' AND id >= #{start_id} ORDER BY id ASC limit #{batch_size}"
|
78
78
|
end
|
79
79
|
end
|
80
80
|
end
|
data/lib/redshifter/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: redshifter
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.5.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Justin Richard
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-
|
11
|
+
date: 2016-03-30 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: dynosaur
|
@@ -173,7 +173,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
173
173
|
version: '0'
|
174
174
|
requirements: []
|
175
175
|
rubyforge_project:
|
176
|
-
rubygems_version: 2.
|
176
|
+
rubygems_version: 2.4.5.1
|
177
177
|
signing_key:
|
178
178
|
specification_version: 4
|
179
179
|
summary: ETL processing jobs to exporting Rails model tables to Redshift
|