redshifter 0.4.0 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
 - data/README.md +34 -1
 - data/lib/redshifter/table.rb +2 -1
 - data/lib/redshifter/util/extract_and_transform_updates.rb +1 -1
 - data/lib/redshifter/version.rb +1 -1
 - metadata +3 -3
 
    
        checksums.yaml
    CHANGED
    
    | 
         @@ -1,7 +1,7 @@ 
     | 
|
| 
       1 
1 
     | 
    
         
             
            ---
         
     | 
| 
       2 
2 
     | 
    
         
             
            SHA1:
         
     | 
| 
       3 
     | 
    
         
            -
              metadata.gz:  
     | 
| 
       4 
     | 
    
         
            -
              data.tar.gz:  
     | 
| 
      
 3 
     | 
    
         
            +
              metadata.gz: 904cec0aad5170e27712addd72009852c1b3a6d5
         
     | 
| 
      
 4 
     | 
    
         
            +
              data.tar.gz: 6f5d606f0862a1522288987636928c1be4d0e6e1
         
     | 
| 
       5 
5 
     | 
    
         
             
            SHA512:
         
     | 
| 
       6 
     | 
    
         
            -
              metadata.gz:  
     | 
| 
       7 
     | 
    
         
            -
              data.tar.gz:  
     | 
| 
      
 6 
     | 
    
         
            +
              metadata.gz: 8cc099b110abcd0d0696219604388879215ebcac444d3f36dd098840a8b8b49cbd91d727383493a857c216633ce6f86970122d298f42b2ab02b2094b091a0c6d
         
     | 
| 
      
 7 
     | 
    
         
            +
              data.tar.gz: 33a85ccf7d1b28bacb02d36354e0e43406f73edd0ef6d9f49600a9791e05c0c0a7caea1e5a798bffabf64f12bd02d71237f4de9e0936a7309f0181c6e8fd2707
         
     | 
    
        data/README.md
    CHANGED
    
    | 
         @@ -16,6 +16,7 @@ Feature Roadmap: 
     | 
|
| 
       16 
16 
     | 
    
         
             
              - 0.2.4 - New config format; update and replace rake tasks available
         
     | 
| 
       17 
17 
     | 
    
         
             
              - 0.3.0 - Public version
         
     | 
| 
       18 
18 
     | 
    
         
             
              - 0.4.0 - Make S3 region configurable
         
     | 
| 
      
 19 
     | 
    
         
            +
              - 0.5.0 - Added functionality for source_table_filter to selectively export rows
         
     | 
| 
       19 
20 
     | 
    
         | 
| 
       20 
21 
     | 
    
         
             
            ## Installation
         
     | 
| 
       21 
22 
     | 
    
         | 
| 
         @@ -79,6 +80,8 @@ Redshifter.config.tables = { 
     | 
|
| 
       79 
80 
     | 
    
         
             
                source_table_name: 'books',
         
     | 
| 
       80 
81 
     | 
    
         
             
                # [required] Prefixing your redshift table with its source is recommended
         
     | 
| 
       81 
82 
     | 
    
         
             
                redshift_table_name: 'app_name_books',
         
     | 
| 
      
 83 
     | 
    
         
            +
                # [optional] Provide a conditional to specify which rows get exported to Redshift
         
     | 
| 
      
 84 
     | 
    
         
            +
                source_table_filter: 'title IS NOT NULL',
         
     | 
| 
       82 
85 
     | 
    
         
             
                # [required] Columns with Redshift datatypes to create; may differ from source DB
         
     | 
| 
       83 
86 
     | 
    
         
             
                redshift_columns: {
         
     | 
| 
       84 
87 
     | 
    
         
             
                  'id' => 'INTEGER',
         
     | 
| 
         @@ -120,7 +123,7 @@ Redshifter.config.tables = { 
     | 
|
| 
       120 
123 
     | 
    
         
             
            ```
         
     | 
| 
       121 
124 
     | 
    
         
             
            $ rake redshifter:replace[books_with_export_at]
         
     | 
| 
       122 
125 
     | 
    
         
             
            ```
         
     | 
| 
       123 
     | 
    
         
            -
             
     | 
| 
      
 126 
     | 
    
         
            +
             
     | 
| 
       124 
127 
     | 
    
         
             
            ### Schedule a Redshifter::Job::UpdateRedshiftTableJob resque job per each table you want to export updates for
         
     | 
| 
       125 
128 
     | 
    
         | 
| 
       126 
129 
     | 
    
         
             
            Then schedule this meta job to run in `resque_schedule.yml` to run once at 10:00pm
         
     | 
| 
         @@ -134,6 +137,36 @@ etl_books_to_redshift: 
     | 
|
| 
       134 
137 
     | 
    
         
             
              description: 'Export the books table to Redshift'
         
     | 
| 
       135 
138 
     | 
    
         
             
            ```  
         
     | 
| 
       136 
139 
     | 
    
         | 
| 
      
 140 
     | 
    
         
            +
            ### Monitoring Rake tasks with New Relic (optional)
         
     | 
| 
      
 141 
     | 
    
         
            +
            New Relic offers Rake task instrumentation as of version `3.13.0` of their `newrelic_rpm` agent.  Redshifter does not directly use or depend on `newrelic_rpm`.  You must use New Relic's rake instrumentation and explicitly identify the tasks in your app that you want to monitor.  See [New Relic docs](https://docs.newrelic.com/docs/agents/ruby-agent/background-jobs/rake-instrumentation).
         
     | 
| 
      
 142 
     | 
    
         
            +
             
     | 
| 
      
 143 
     | 
    
         
            +
            In addition, to setting `attributes.include` and `rake.tasks` as defined in their docs, it also seems to be necessary to manually start the agent synchonously in your Rakefile to assure that fast running tasks are reported.
         
     | 
| 
      
 144 
     | 
    
         
            +
             
     | 
| 
      
 145 
     | 
    
         
            +
            In summary, your `newrelic.yml` should include keys and values like this to monitor redshifter rake tasks:
         
     | 
| 
      
 146 
     | 
    
         
            +
            ```yaml
         
     | 
| 
      
 147 
     | 
    
         
            +
            common: &default_settings
         
     | 
| 
      
 148 
     | 
    
         
            +
              #...
         
     | 
| 
      
 149 
     | 
    
         
            +
              attributes:
         
     | 
| 
      
 150 
     | 
    
         
            +
                include: job.rake.*  # allows rake args reporting
         
     | 
| 
      
 151 
     | 
    
         
            +
              rake:
         
     | 
| 
      
 152 
     | 
    
         
            +
                # rake task monitoring must be white listed here AND not blacklisted via
         
     | 
| 
      
 153 
     | 
    
         
            +
                #   autostart.blacklisted_* config values
         
     | 
| 
      
 154 
     | 
    
         
            +
                tasks: ['redshifter:update', 'redshifter:replace']
         
     | 
| 
      
 155 
     | 
    
         
            +
            #...
         
     | 
| 
      
 156 
     | 
    
         
            +
            ```
         
     | 
| 
      
 157 
     | 
    
         
            +
             
     | 
| 
      
 158 
     | 
    
         
            +
            and your `Rakefile` should end up looking something like this:
         
     | 
| 
      
 159 
     | 
    
         
            +
            ```ruby
         
     | 
| 
      
 160 
     | 
    
         
            +
            # Rakefile
         
     | 
| 
      
 161 
     | 
    
         
            +
            # ...
         
     | 
| 
      
 162 
     | 
    
         
            +
            require 'redshifter/tasks'
         
     | 
| 
      
 163 
     | 
    
         
            +
             
     | 
| 
      
 164 
     | 
    
         
            +
            # Force agent start assuring rake.tasks listed in newrelic.yml are instrumented
         
     | 
| 
      
 165 
     | 
    
         
            +
            NewRelic::Agent.manual_start(sync_startup: true) if Rails.env.production?
         
     | 
| 
      
 166 
     | 
    
         
            +
            # ...
         
     | 
| 
      
 167 
     | 
    
         
            +
            ```
         
     | 
| 
      
 168 
     | 
    
         
            +
             
     | 
| 
      
 169 
     | 
    
         
            +
             
     | 
| 
       137 
170 
     | 
    
         
             
            ## Development
         
     | 
| 
       138 
171 
     | 
    
         | 
| 
       139 
172 
     | 
    
         
             
            After checking out the repo, run `bin/setup` to install dependencies. Then, run `bin/console` for an interactive prompt that will allow you to experiment.
         
     | 
    
        data/lib/redshifter/table.rb
    CHANGED
    
    | 
         @@ -14,9 +14,10 @@ module Redshifter 
     | 
|
| 
       14 
14 
     | 
    
         
             
                  @redshift_sort_keys = config[:redshift_sort_keys]
         
     | 
| 
       15 
15 
     | 
    
         
             
                  @redshift_sort_style = config[:redshift_sort_style]
         
     | 
| 
       16 
16 
     | 
    
         
             
                  @redshift_primary_key = config[:redshift_primary_key]
         
     | 
| 
      
 17 
     | 
    
         
            +
                  @source_table_filter = config[:source_table_filter] || 1
         
     | 
| 
       17 
18 
     | 
    
         
             
                end
         
     | 
| 
       18 
19 
     | 
    
         | 
| 
       19 
     | 
    
         
            -
                attr_reader :source_table_name, :redshift_table_name
         
     | 
| 
      
 20 
     | 
    
         
            +
                attr_reader :source_table_name, :redshift_table_name, :source_table_filter
         
     | 
| 
       20 
21 
     | 
    
         | 
| 
       21 
22 
     | 
    
         
             
                def redshift_column_names
         
     | 
| 
       22 
23 
     | 
    
         
             
                  redshift_columns.keys
         
     | 
| 
         @@ -74,7 +74,7 @@ module Redshifter 
     | 
|
| 
       74 
74 
     | 
    
         
             
                  end
         
     | 
| 
       75 
75 
     | 
    
         | 
| 
       76 
76 
     | 
    
         
             
                  def select_batch_sql(columns:, batch_size:, start_id:)
         
     | 
| 
       77 
     | 
    
         
            -
                    " 
     | 
| 
      
 77 
     | 
    
         
            +
                    "SELECT #{columns.join(', ')} FROM #{table.source_table_name} WHERE (#{table.source_table_filter}) AND updated_at >= '#{since}' AND id >= #{start_id} ORDER BY id ASC limit #{batch_size}"
         
     | 
| 
       78 
78 
     | 
    
         
             
                  end
         
     | 
| 
       79 
79 
     | 
    
         
             
                end
         
     | 
| 
       80 
80 
     | 
    
         
             
              end
         
     | 
    
        data/lib/redshifter/version.rb
    CHANGED
    
    
    
        metadata
    CHANGED
    
    | 
         @@ -1,14 +1,14 @@ 
     | 
|
| 
       1 
1 
     | 
    
         
             
            --- !ruby/object:Gem::Specification
         
     | 
| 
       2 
2 
     | 
    
         
             
            name: redshifter
         
     | 
| 
       3 
3 
     | 
    
         
             
            version: !ruby/object:Gem::Version
         
     | 
| 
       4 
     | 
    
         
            -
              version: 0. 
     | 
| 
      
 4 
     | 
    
         
            +
              version: 0.5.0
         
     | 
| 
       5 
5 
     | 
    
         
             
            platform: ruby
         
     | 
| 
       6 
6 
     | 
    
         
             
            authors:
         
     | 
| 
       7 
7 
     | 
    
         
             
            - Justin Richard
         
     | 
| 
       8 
8 
     | 
    
         
             
            autorequire: 
         
     | 
| 
       9 
9 
     | 
    
         
             
            bindir: bin
         
     | 
| 
       10 
10 
     | 
    
         
             
            cert_chain: []
         
     | 
| 
       11 
     | 
    
         
            -
            date: 2016- 
     | 
| 
      
 11 
     | 
    
         
            +
            date: 2016-03-30 00:00:00.000000000 Z
         
     | 
| 
       12 
12 
     | 
    
         
             
            dependencies:
         
     | 
| 
       13 
13 
     | 
    
         
             
            - !ruby/object:Gem::Dependency
         
     | 
| 
       14 
14 
     | 
    
         
             
              name: dynosaur
         
     | 
| 
         @@ -173,7 +173,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement 
     | 
|
| 
       173 
173 
     | 
    
         
             
                  version: '0'
         
     | 
| 
       174 
174 
     | 
    
         
             
            requirements: []
         
     | 
| 
       175 
175 
     | 
    
         
             
            rubyforge_project: 
         
     | 
| 
       176 
     | 
    
         
            -
            rubygems_version: 2. 
     | 
| 
      
 176 
     | 
    
         
            +
            rubygems_version: 2.4.5.1
         
     | 
| 
       177 
177 
     | 
    
         
             
            signing_key: 
         
     | 
| 
       178 
178 
     | 
    
         
             
            specification_version: 4
         
     | 
| 
       179 
179 
     | 
    
         
             
            summary: ETL processing jobs to exporting Rails model tables to Redshift
         
     |