bigshift 0.3.1 → 0.3.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +5 -1
- data/lib/bigshift/cli.rb +15 -4
- data/lib/bigshift/redshift_table_schema.rb +4 -3
- data/lib/bigshift/redshift_unloader.rb +3 -3
- data/lib/bigshift/version.rb +1 -1
- metadata +3 -3
    
        checksums.yaml
    CHANGED
    
    | @@ -1,7 +1,7 @@ | |
| 1 1 | 
             
            ---
         | 
| 2 2 | 
             
            SHA1:
         | 
| 3 | 
            -
              metadata.gz:  | 
| 4 | 
            -
              data.tar.gz:  | 
| 3 | 
            +
              metadata.gz: b0a1088f4c4d8c66c8af35a4e67a8377d8b6f805
         | 
| 4 | 
            +
              data.tar.gz: bff611a9528b2b08a3587177cb3a448b7dbea4de
         | 
| 5 5 | 
             
            SHA512:
         | 
| 6 | 
            -
              metadata.gz:  | 
| 7 | 
            -
              data.tar.gz:  | 
| 6 | 
            +
              metadata.gz: 045ba2e30068a4259ac34763f3597d7087053ad8889443c077643576e5cb8df55ad02d0a01e50577b4865afa130a68311188bd4db05bc6c34a632d2ab9bfe39d
         | 
| 7 | 
            +
              data.tar.gz: 31cf2ec5852d2a1c200398a089b1be9c3093abb5e17a90cab3f130f43ef50df5d0c5749f117651f4ddc123d6504397fa1602a02dcf0e99d4fed511d320b803cf
         | 
    
        data/README.md
    CHANGED
    
    | @@ -32,7 +32,7 @@ BigShift tells Redshift to compress the dumps by default, even if that means tha | |
| 32 32 |  | 
| 33 33 | 
             
            ## Arguments
         | 
| 34 34 |  | 
| 35 | 
            -
            Running `bigshift` without any arguments, or with `--help` will show the options. All except `--s3-prefix`, `--bq-table`, `--max-bad-records`, `--steps` and `--[no-]compress` are required.
         | 
| 35 | 
            +
            Running `bigshift` without any arguments, or with `--help` will show the options. All except `--s3-prefix`, `--rs-schema`, `--bq-table`, `--max-bad-records`, `--steps` and `--[no-]compress` are required.
         | 
| 36 36 |  | 
| 37 37 | 
             
            ### GCP credentials
         | 
| 38 38 |  | 
| @@ -106,6 +106,10 @@ If you don't want to put the data dumped from Redshift directly into the root of | |
| 106 106 |  | 
| 107 107 | 
             
            Because of how GCS' Transfer Service works the transferred files will have exactly the same keys in the destination bucket, this cannot be configured.
         | 
| 108 108 |  | 
| 109 | 
            +
            ### Redshift schema
         | 
| 110 | 
            +
             | 
| 111 | 
            +
            By default the schema in Redshift is called `public`, but in case you're not using that one, you can use the argument `--rs-schema` to specify the schema your table is in.
         | 
| 112 | 
            +
             | 
| 109 113 | 
             
            ### BigQuery table ID
         | 
| 110 114 |  | 
| 111 115 | 
             
            By default the BigQuery table ID will be the same as the Redshift table name, but the optional argument `--bq-table` can be used to tell BigShift to use another table ID.
         | 
    
        data/lib/bigshift/cli.rb
    CHANGED
    
    | @@ -4,6 +4,7 @@ require 'json' | |
| 4 4 | 
             
            require 'stringio'
         | 
| 5 5 | 
             
            require 'logger'
         | 
| 6 6 | 
             
            require 'optparse'
         | 
| 7 | 
            +
            require 'socket'
         | 
| 7 8 | 
             
            require 'bigshift'
         | 
| 8 9 |  | 
| 9 10 | 
             
            module BigShift
         | 
| @@ -47,7 +48,7 @@ module BigShift | |
| 47 48 | 
             
                def unload
         | 
| 48 49 | 
             
                  if run?(:unload)
         | 
| 49 50 | 
             
                    s3_uri = "s3://#{@config[:s3_bucket_name]}/#{s3_table_prefix}"
         | 
| 50 | 
            -
                    @factory.redshift_unloader.unload_to(@config[:rs_table_name], s3_uri, allow_overwrite: false, compression: @config[:compression])
         | 
| 51 | 
            +
                    @factory.redshift_unloader.unload_to(@config[:rs_schema_name], @config[:rs_table_name], s3_uri, allow_overwrite: false, compression: @config[:compression])
         | 
| 51 52 | 
             
                  else
         | 
| 52 53 | 
             
                    @logger.debug('Skipping unload')
         | 
| 53 54 | 
             
                  end
         | 
| @@ -56,7 +57,7 @@ module BigShift | |
| 56 57 |  | 
| 57 58 | 
             
                def transfer
         | 
| 58 59 | 
             
                  if run?(:transfer)
         | 
| 59 | 
            -
                    description = "bigshift-#{@config[:rs_database_name]}-#{@config[:rs_table_name]}-#{Time.now.utc.strftime('%Y%m%dT%H%M')}"
         | 
| 60 | 
            +
                    description = "bigshift-#{@config[:rs_database_name]}-#{@config[:rs_schema_name]}-#{@config[:rs_table_name]}-#{Time.now.utc.strftime('%Y%m%dT%H%M')}"
         | 
| 60 61 | 
             
                    @factory.cloud_storage_transfer.copy_to_cloud_storage(@unload_manifest, @config[:cs_bucket_name], description: description, allow_overwrite: false)
         | 
| 61 62 | 
             
                  else
         | 
| 62 63 | 
             
                    @logger.debug('Skipping transfer')
         | 
| @@ -99,6 +100,7 @@ module BigShift | |
| 99 100 | 
             
                  ['--aws-credentials', 'PATH', String, :aws_credentials_path, nil],
         | 
| 100 101 | 
             
                  ['--rs-credentials', 'PATH', String, :rs_credentials_path, :required],
         | 
| 101 102 | 
             
                  ['--rs-database', 'DB_NAME', String, :rs_database_name, :required],
         | 
| 103 | 
            +
                  ['--rs-schema', 'SCHEMA_NAME', String, :rs_schema_name, nil],
         | 
| 102 104 | 
             
                  ['--rs-table', 'TABLE_NAME', String, :rs_table_name, :required],
         | 
| 103 105 | 
             
                  ['--bq-dataset', 'DATASET_ID', String, :bq_dataset_id, :required],
         | 
| 104 106 | 
             
                  ['--bq-table', 'TABLE_ID', String, :bq_table_id, nil],
         | 
| @@ -136,6 +138,7 @@ module BigShift | |
| 136 138 | 
             
                    end
         | 
| 137 139 | 
             
                  end
         | 
| 138 140 | 
             
                  config[:bq_table_id] ||= config[:rs_table_name]
         | 
| 141 | 
            +
                  config[:rs_schema_name] ||= 'public'
         | 
| 139 142 | 
             
                  if config[:steps] && !config[:steps].empty?
         | 
| 140 143 | 
             
                    config[:steps] = STEPS.select { |s| config[:steps].include?(s.to_s) }
         | 
| 141 144 | 
             
                  else
         | 
| @@ -150,8 +153,9 @@ module BigShift | |
| 150 153 | 
             
                def s3_table_prefix
         | 
| 151 154 | 
             
                  @s3_table_prefix ||= begin
         | 
| 152 155 | 
             
                    db_name = @config[:rs_database_name]
         | 
| 156 | 
            +
                    schema_name = @config[:rs_schema_name]
         | 
| 153 157 | 
             
                    table_name = @config[:rs_table_name]
         | 
| 154 | 
            -
                    prefix = "#{db_name}/#{table_name}/#{db_name}-#{table_name}-"
         | 
| 158 | 
            +
                    prefix = "#{db_name}/#{schema_name}/#{table_name}/#{db_name}-#{schema_name}-#{table_name}-"
         | 
| 155 159 | 
             
                    if (s3_prefix = @config[:s3_prefix])
         | 
| 156 160 | 
             
                      s3_prefix = s3_prefix.gsub(%r{\A/|/\Z}, '')
         | 
| 157 161 | 
             
                      prefix = "#{s3_prefix}/#{prefix}"
         | 
| @@ -175,7 +179,7 @@ module BigShift | |
| 175 179 | 
             
                end
         | 
| 176 180 |  | 
| 177 181 | 
             
                def redshift_table_schema
         | 
| 178 | 
            -
                  @redshift_table_schema ||= RedshiftTableSchema.new(@config[:rs_table_name], rs_connection)
         | 
| 182 | 
            +
                  @redshift_table_schema ||= RedshiftTableSchema.new(@config[:rs_schema_name], @config[:rs_table_name], rs_connection)
         | 
| 179 183 | 
             
                end
         | 
| 180 184 |  | 
| 181 185 | 
             
                def big_query_dataset
         | 
| @@ -212,6 +216,13 @@ module BigShift | |
| 212 216 | 
             
                    password: @config[:rs_credentials]['password'],
         | 
| 213 217 | 
             
                    sslmode: 'require'
         | 
| 214 218 | 
             
                  )
         | 
| 219 | 
            +
                  socket = Socket.for_fd(@rs_connection.socket)
         | 
| 220 | 
            +
                  socket.setsockopt(Socket::SOL_SOCKET, Socket::SO_KEEPALIVE, 1)
         | 
| 221 | 
            +
                  socket.setsockopt(Socket::IPPROTO_TCP, Socket::TCP_KEEPCNT, 5)
         | 
| 222 | 
            +
                  socket.setsockopt(Socket::IPPROTO_TCP, Socket::TCP_KEEPINTVL, 2)
         | 
| 223 | 
            +
                  socket.setsockopt(Socket::IPPROTO_TCP, Socket::TCP_KEEPIDLE, 2) if defined?(Socket::TCP_KEEPIDLE)
         | 
| 224 | 
            +
                  @rs_connection.exec("SET search_path = \"#{@config[:rs_schema_name]}\"")
         | 
| 225 | 
            +
                  @rs_connection
         | 
| 215 226 | 
             
                end
         | 
| 216 227 |  | 
| 217 228 | 
             
                def cs_transfer_service
         | 
| @@ -1,15 +1,16 @@ | |
| 1 1 | 
             
            module BigShift
         | 
| 2 2 | 
             
              class RedshiftTableSchema
         | 
| 3 | 
            -
                def initialize(table_name, redshift_connection)
         | 
| 3 | 
            +
                def initialize(schema_name, table_name, redshift_connection)
         | 
| 4 | 
            +
                  @schema_name = schema_name
         | 
| 4 5 | 
             
                  @table_name = table_name
         | 
| 5 6 | 
             
                  @redshift_connection = redshift_connection
         | 
| 6 7 | 
             
                end
         | 
| 7 8 |  | 
| 8 9 | 
             
                def columns
         | 
| 9 10 | 
             
                  @columns ||= begin
         | 
| 10 | 
            -
                    rows = @redshift_connection.exec_params(%|SELECT "column", "type", "notnull" FROM "pg_table_def" WHERE "schemaname" =  | 
| 11 | 
            +
                    rows = @redshift_connection.exec_params(%|SELECT "column", "type", "notnull" FROM "pg_table_def" WHERE "schemaname" = $1 AND "tablename" = $2|, [@schema_name, @table_name])
         | 
| 11 12 | 
             
                    if rows.count == 0
         | 
| 12 | 
            -
                      raise sprintf('Table  | 
| 13 | 
            +
                      raise sprintf('Table %s for schema %s not found', @table_name.inspect, @schema_name.inspect)
         | 
| 13 14 | 
             
                    else
         | 
| 14 15 | 
             
                      columns = rows.map do |row|
         | 
| 15 16 | 
             
                        name = row['column']
         | 
| @@ -6,12 +6,12 @@ module BigShift | |
| 6 6 | 
             
                  @logger = options[:logger] || NullLogger::INSTANCE
         | 
| 7 7 | 
             
                end
         | 
| 8 8 |  | 
| 9 | 
            -
                def unload_to(table_name, s3_uri, options={})
         | 
| 10 | 
            -
                  table_schema = RedshiftTableSchema.new(table_name, @redshift_connection)
         | 
| 9 | 
            +
                def unload_to(schema_name, table_name, s3_uri, options={})
         | 
| 10 | 
            +
                  table_schema = RedshiftTableSchema.new(schema_name, table_name, @redshift_connection)
         | 
| 11 11 | 
             
                  credentials_string = "aws_access_key_id=#{@aws_credentials.access_key_id};aws_secret_access_key=#{@aws_credentials.secret_access_key}"
         | 
| 12 12 | 
             
                  select_sql = 'SELECT '
         | 
| 13 13 | 
             
                  select_sql << table_schema.columns.map(&:to_sql).join(', ')
         | 
| 14 | 
            -
                  select_sql << %Q< FROM "#{table_name}">
         | 
| 14 | 
            +
                  select_sql << %Q< FROM "#{schema_name}"."#{table_name}">
         | 
| 15 15 | 
             
                  select_sql.gsub!('\'') { |s| '\\\'' }
         | 
| 16 16 | 
             
                  unload_sql = %Q<UNLOAD ('#{select_sql}')>
         | 
| 17 17 | 
             
                  unload_sql << %Q< TO '#{s3_uri}'>
         | 
    
        data/lib/bigshift/version.rb
    CHANGED
    
    
    
        metadata
    CHANGED
    
    | @@ -1,14 +1,14 @@ | |
| 1 1 | 
             
            --- !ruby/object:Gem::Specification
         | 
| 2 2 | 
             
            name: bigshift
         | 
| 3 3 | 
             
            version: !ruby/object:Gem::Version
         | 
| 4 | 
            -
              version: 0.3. | 
| 4 | 
            +
              version: 0.3.2
         | 
| 5 5 | 
             
            platform: ruby
         | 
| 6 6 | 
             
            authors:
         | 
| 7 7 | 
             
            - Theo Hultberg
         | 
| 8 8 | 
             
            autorequire: 
         | 
| 9 9 | 
             
            bindir: bin
         | 
| 10 10 | 
             
            cert_chain: []
         | 
| 11 | 
            -
            date: 2016- | 
| 11 | 
            +
            date: 2016-08-19 00:00:00.000000000 Z
         | 
| 12 12 | 
             
            dependencies:
         | 
| 13 13 | 
             
            - !ruby/object:Gem::Dependency
         | 
| 14 14 | 
             
              name: pg
         | 
| @@ -112,7 +112,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement | |
| 112 112 | 
             
                  version: '0'
         | 
| 113 113 | 
             
            requirements: []
         | 
| 114 114 | 
             
            rubyforge_project: 
         | 
| 115 | 
            -
            rubygems_version: 2.4. | 
| 115 | 
            +
            rubygems_version: 2.4.5
         | 
| 116 116 | 
             
            signing_key: 
         | 
| 117 117 | 
             
            specification_version: 4
         | 
| 118 118 | 
             
            summary: A tool for moving tables from Redshift to BigQuery
         |