bigquery_migration 0.1.0.pre1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +10 -0
- data/.rspec +2 -0
- data/.travis.yml +4 -0
- data/CODE_OF_CONDUCT.md +49 -0
- data/Gemfile +3 -0
- data/LICENSE.txt +19 -0
- data/README.md +107 -0
- data/Rakefile +10 -0
- data/bigquery_migration.gemspec +31 -0
- data/bin/console +14 -0
- data/bin/setup +8 -0
- data/example/example.yml +22 -0
- data/example/schema.json +22 -0
- data/exe/bq_migrate +4 -0
- data/lib/bigquery_migration.rb +29 -0
- data/lib/bigquery_migration/action.rb +85 -0
- data/lib/bigquery_migration/action_runner.rb +60 -0
- data/lib/bigquery_migration/bigquery_wrapper.rb +675 -0
- data/lib/bigquery_migration/cli.rb +105 -0
- data/lib/bigquery_migration/config_loader.rb +51 -0
- data/lib/bigquery_migration/error.rb +6 -0
- data/lib/bigquery_migration/hash_util.rb +35 -0
- data/lib/bigquery_migration/logger.rb +45 -0
- data/lib/bigquery_migration/schema.rb +388 -0
- data/lib/bigquery_migration/time_with_zone.rb +38 -0
- data/lib/bigquery_migration/version.rb +3 -0
- metadata +183 -0
| @@ -0,0 +1,60 @@ | |
| 1 | 
            +
            require_relative 'config_loader'
         | 
| 2 | 
            +
            require_relative 'error'
         | 
| 3 | 
            +
            require_relative 'action'
         | 
| 4 | 
            +
            require_relative 'hash_util'
         | 
| 5 | 
            +
             | 
| 6 | 
            +
            class BigqueryMigration
         | 
| 7 | 
            +
              class ActionRunner
         | 
| 8 | 
            +
                attr_reader :config, :config_path, :opts
         | 
| 9 | 
            +
             | 
| 10 | 
            +
                def initialize(config_path = nil, opts = {})
         | 
| 11 | 
            +
                  @config_path = config_path
         | 
| 12 | 
            +
                  @opts = opts
         | 
| 13 | 
            +
                  config = ConfigLoader.new(@config_path, opts[:vars]).load
         | 
| 14 | 
            +
                  @config = HashUtil.deep_symbolize_keys(config)
         | 
| 15 | 
            +
                  validate_config!
         | 
| 16 | 
            +
                end
         | 
| 17 | 
            +
             | 
| 18 | 
            +
                def run
         | 
| 19 | 
            +
                  success, responses = run_actions
         | 
| 20 | 
            +
                  { success: success, dry_run: @opts[:dry_run], actions: responses }
         | 
| 21 | 
            +
                end
         | 
| 22 | 
            +
             | 
| 23 | 
            +
                def run_actions
         | 
| 24 | 
            +
                  success = true
         | 
| 25 | 
            +
                  responses = []
         | 
| 26 | 
            +
             | 
| 27 | 
            +
                  @config[:actions].each do |action_config|
         | 
| 28 | 
            +
                    _success, result = Action.new(action_config, @opts).run
         | 
| 29 | 
            +
                    response = action_config.merge({'result' => result})
         | 
| 30 | 
            +
                    responses << response
         | 
| 31 | 
            +
                    unless _success
         | 
| 32 | 
            +
                      success = false
         | 
| 33 | 
            +
                      break
         | 
| 34 | 
            +
                    end
         | 
| 35 | 
            +
                  end
         | 
| 36 | 
            +
             | 
| 37 | 
            +
                  [success, responses]
         | 
| 38 | 
            +
                end
         | 
| 39 | 
            +
             | 
| 40 | 
            +
                def validate_config!
         | 
| 41 | 
            +
                  unless config.is_a?(Hash)
         | 
| 42 | 
            +
                    raise ConfigError, "config file format has to be YAML Hash"
         | 
| 43 | 
            +
                  end
         | 
| 44 | 
            +
             | 
| 45 | 
            +
                  unless config[:actions]
         | 
| 46 | 
            +
                    raise ConfigError, "config must have `actions` key"
         | 
| 47 | 
            +
                  end
         | 
| 48 | 
            +
             | 
| 49 | 
            +
                  unless config[:actions].is_a?(Array)
         | 
| 50 | 
            +
                    raise ConfigError, "config[:actions] must be an Array"
         | 
| 51 | 
            +
                  end
         | 
| 52 | 
            +
             | 
| 53 | 
            +
                  config[:actions].each do |action_config|
         | 
| 54 | 
            +
                    unless action_config[:action]
         | 
| 55 | 
            +
                      raise ConfigError, "Elements of `config[:actions]` must have `action` key"
         | 
| 56 | 
            +
                    end
         | 
| 57 | 
            +
                  end
         | 
| 58 | 
            +
                end
         | 
| 59 | 
            +
              end
         | 
| 60 | 
            +
            end
         | 
| @@ -0,0 +1,675 @@ | |
| 1 | 
            +
            require 'csv'
         | 
| 2 | 
            +
            require 'json'
         | 
| 3 | 
            +
            require_relative 'schema'
         | 
| 4 | 
            +
            require_relative 'error'
         | 
| 5 | 
            +
            require_relative 'time_with_zone'
         | 
| 6 | 
            +
            require_relative 'hash_util'
         | 
| 7 | 
            +
            require 'google/apis/bigquery_v2'
         | 
| 8 | 
            +
            require 'google/api_client/auth/key_utils'
         | 
| 9 | 
            +
             | 
| 10 | 
            +
            class BigqueryMigration
         | 
| 11 | 
            +
              class BigqueryWrapper
         | 
| 12 | 
            +
                attr_reader :config
         | 
| 13 | 
            +
             | 
| 14 | 
            +
                def logger
         | 
| 15 | 
            +
                  BigqueryMigration.logger
         | 
| 16 | 
            +
                end
         | 
| 17 | 
            +
             | 
| 18 | 
            +
                def initialize(config, opts = {})
         | 
| 19 | 
            +
                  @config = HashUtil.deep_symbolize_keys(config)
         | 
| 20 | 
            +
                  @opts = HashUtil.deep_symbolize_keys(opts)
         | 
| 21 | 
            +
                  configure
         | 
| 22 | 
            +
                end
         | 
| 23 | 
            +
             | 
| 24 | 
            +
                def configure
         | 
| 25 | 
            +
                  if json_keyfile = config[:json_keyfile]
         | 
| 26 | 
            +
                    config[:json_key] = 
         | 
| 27 | 
            +
                      case json_keyfile
         | 
| 28 | 
            +
                      when String
         | 
| 29 | 
            +
                        File.read(json_keyfile)
         | 
| 30 | 
            +
                      when Hash
         | 
| 31 | 
            +
                        json_keyfile[:content]
         | 
| 32 | 
            +
                      else
         | 
| 33 | 
            +
                        raise ConfigError.new "Unsupported json_keyfile type"
         | 
| 34 | 
            +
                      end
         | 
| 35 | 
            +
                  else
         | 
| 36 | 
            +
                    config[:json_key] = {
         | 
| 37 | 
            +
                      project_id: config[:project_id],
         | 
| 38 | 
            +
                      service_email: config[:service_email],
         | 
| 39 | 
            +
                      private_key: config[:private_key],
         | 
| 40 | 
            +
                    }.to_json
         | 
| 41 | 
            +
                  end
         | 
| 42 | 
            +
             | 
| 43 | 
            +
                  if config[:json_key]
         | 
| 44 | 
            +
                    begin
         | 
| 45 | 
            +
                      jsonkey_params = JSON.parse(config[:json_key])
         | 
| 46 | 
            +
                    rescue => e
         | 
| 47 | 
            +
                      raise ConfigError.new "json_keyfile is not a JSON file"
         | 
| 48 | 
            +
                    end
         | 
| 49 | 
            +
                  end
         | 
| 50 | 
            +
             | 
| 51 | 
            +
                  if jsonkey_params
         | 
| 52 | 
            +
                    config[:project] ||= jsonkey_params['project_id']
         | 
| 53 | 
            +
                  end
         | 
| 54 | 
            +
             | 
| 55 | 
            +
                  config[:retries] ||= 5
         | 
| 56 | 
            +
                  config[:timeout_sec] ||= 300
         | 
| 57 | 
            +
                  config[:open_timeout_sec] ||= 300
         | 
| 58 | 
            +
                end
         | 
| 59 | 
            +
             | 
| 60 | 
            +
                def project
         | 
| 61 | 
            +
                  @project ||= config[:project] || raise(ConfigError, '`project` is required.')
         | 
| 62 | 
            +
                end
         | 
| 63 | 
            +
             | 
| 64 | 
            +
                def dataset
         | 
| 65 | 
            +
                  @dataset ||= config[:dataset] || raise(ConfigError, '`dataset` is required.')
         | 
| 66 | 
            +
                end
         | 
| 67 | 
            +
             | 
| 68 | 
            +
                def table
         | 
| 69 | 
            +
                  @table  ||= config[:table]   || raise(ConfigError, '`table` is required.')
         | 
| 70 | 
            +
                end
         | 
| 71 | 
            +
             | 
| 72 | 
            +
                def job_status_polling_interval
         | 
| 73 | 
            +
                  @job_status_polling_interval ||= config[:job_status_polling_interval] || 5
         | 
| 74 | 
            +
                end
         | 
| 75 | 
            +
             | 
| 76 | 
            +
                def job_status_max_polling_time
         | 
| 77 | 
            +
                  @job_status_max_polling_time ||= config[:job_status_polling_time] || 3600
         | 
| 78 | 
            +
                end
         | 
| 79 | 
            +
             | 
| 80 | 
            +
                def dry_run?
         | 
| 81 | 
            +
                  @opts[:dry_run]
         | 
| 82 | 
            +
                end
         | 
| 83 | 
            +
             | 
| 84 | 
            +
                def head
         | 
| 85 | 
            +
                  dry_run? ? '(DRY-RUN) ' : '(EXECUTE) '
         | 
| 86 | 
            +
                end
         | 
| 87 | 
            +
             | 
| 88 | 
            +
                def client
         | 
| 89 | 
            +
                  return @cached_client if @cached_client && @cached_client_expiration > Time.now
         | 
| 90 | 
            +
             | 
| 91 | 
            +
                  client = Google::Apis::BigqueryV2::BigqueryService.new
         | 
| 92 | 
            +
                  client.request_options.retries = config[:retries]
         | 
| 93 | 
            +
                  client.request_options.timeout_sec = config[:timeout_sec]
         | 
| 94 | 
            +
                  client.request_options.open_timeout_sec = config[:open_timeout_sec]
         | 
| 95 | 
            +
                  logger.debug { "client_options: #{client.client_options.to_h}" }
         | 
| 96 | 
            +
                  logger.debug { "request_options: #{client.request_options.to_h}" }
         | 
| 97 | 
            +
             | 
| 98 | 
            +
                  scope = "https://www.googleapis.com/auth/bigquery"
         | 
| 99 | 
            +
             | 
| 100 | 
            +
                  key = StringIO.new(config[:json_key])
         | 
| 101 | 
            +
                  auth = Google::Auth::ServiceAccountCredentials.make_creds(json_key_io: key, scope: scope)
         | 
| 102 | 
            +
                  client.authorization = auth
         | 
| 103 | 
            +
             | 
| 104 | 
            +
                  @cached_client_expiration = Time.now + 1800
         | 
| 105 | 
            +
                  @cached_client = client
         | 
| 106 | 
            +
                end
         | 
| 107 | 
            +
             | 
| 108 | 
            +
                def existing_columns
         | 
| 109 | 
            +
                  begin
         | 
| 110 | 
            +
                    result = get_table
         | 
| 111 | 
            +
                    response = result[:responses][:get_table]
         | 
| 112 | 
            +
                    response.schema.fields.map {|column| column.to_h }
         | 
| 113 | 
            +
                  rescue NotFoundError
         | 
| 114 | 
            +
                    return []
         | 
| 115 | 
            +
                  end
         | 
| 116 | 
            +
                end
         | 
| 117 | 
            +
             | 
| 118 | 
            +
                def get_dataset(dataset: nil)
         | 
| 119 | 
            +
                  dataset ||= self.dataset
         | 
| 120 | 
            +
                  begin
         | 
| 121 | 
            +
                    logger.info { "Get dataset... #{project}:#{dataset}" }
         | 
| 122 | 
            +
                    client.get_dataset(project, dataset)
         | 
| 123 | 
            +
                  rescue Google::Apis::ServerError, Google::Apis::ClientError, Google::Apis::AuthorizationError => e
         | 
| 124 | 
            +
                    if e.status_code == 404
         | 
| 125 | 
            +
                      raise NotFoundError, "Dataset #{project}:#{dataset} is not found"
         | 
| 126 | 
            +
                    end
         | 
| 127 | 
            +
             | 
| 128 | 
            +
                    response = {status_code: e.status_code, message: e.message, error_class: e.class}
         | 
| 129 | 
            +
                    raise Error, "Failed to get_dataset(#{project}, #{dataset}), response:#{response}"
         | 
| 130 | 
            +
                  end
         | 
| 131 | 
            +
             | 
| 132 | 
            +
                  { responses: { get_dataset: response } }
         | 
| 133 | 
            +
                end
         | 
| 134 | 
            +
             | 
| 135 | 
            +
                def insert_dataset(dataset: nil, reference: nil)
         | 
| 136 | 
            +
                  dataset ||= self.dataset
         | 
| 137 | 
            +
                  begin
         | 
| 138 | 
            +
                    logger.info { "#{head}Insert (create) dataset... #{project}:#{dataset}" }
         | 
| 139 | 
            +
                    hint = {}
         | 
| 140 | 
            +
                    if reference
         | 
| 141 | 
            +
                      response = get_dataset(reference)
         | 
| 142 | 
            +
                      hint = { access: response.access }
         | 
| 143 | 
            +
                    end
         | 
| 144 | 
            +
                    body = {
         | 
| 145 | 
            +
                      dataset_reference: {
         | 
| 146 | 
            +
                        project_id: project,
         | 
| 147 | 
            +
                        dataset_id: dataset,
         | 
| 148 | 
            +
                      },
         | 
| 149 | 
            +
                    }.merge(hint)
         | 
| 150 | 
            +
                    opts = {}
         | 
| 151 | 
            +
                    logger.debug { "#{head}insert_dataset(#{project}, #{body}, #{opts})" }
         | 
| 152 | 
            +
                    unless dry_run?
         | 
| 153 | 
            +
                      response = client.insert_dataset(project, body, opts)
         | 
| 154 | 
            +
                    end
         | 
| 155 | 
            +
                  rescue Google::Apis::ServerError, Google::Apis::ClientError, Google::Apis::AuthorizationError => e
         | 
| 156 | 
            +
                    if e.status_code == 409 && /Already Exists:/ =~ e.message
         | 
| 157 | 
            +
                      # ignore 'Already Exists' error
         | 
| 158 | 
            +
                      return {}
         | 
| 159 | 
            +
                    end
         | 
| 160 | 
            +
             | 
| 161 | 
            +
                    response = {status_code: e.status_code, message: e.message, error_class: e.class}
         | 
| 162 | 
            +
                    raise Error, "Failed to insert_dataset(#{project}, #{body}, #{opts}), response:#{response}"
         | 
| 163 | 
            +
                  end
         | 
| 164 | 
            +
             | 
| 165 | 
            +
                  { responses: { insert_dataset: response } }
         | 
| 166 | 
            +
                end
         | 
| 167 | 
            +
                alias :create_dataset :insert_dataset
         | 
| 168 | 
            +
             | 
| 169 | 
            +
                def get_table(dataset: nil, table: nil)
         | 
| 170 | 
            +
                  dataset ||= self.dataset
         | 
| 171 | 
            +
                  table ||= self.table
         | 
| 172 | 
            +
                  begin
         | 
| 173 | 
            +
                    logger.debug { "Get table... #{project}:#{dataset}.#{table}" }
         | 
| 174 | 
            +
                    response = client.get_table(project, dataset, table)
         | 
| 175 | 
            +
                  rescue Google::Apis::ServerError, Google::Apis::ClientError, Google::Apis::AuthorizationError => e
         | 
| 176 | 
            +
                    if e.status_code == 404 # not found
         | 
| 177 | 
            +
                      raise NotFoundError, "Table #{project}:#{dataset}.#{table} is not found"
         | 
| 178 | 
            +
                    end
         | 
| 179 | 
            +
             | 
| 180 | 
            +
                    response = {status_code: e.status_code, message: e.message, error_class: e.class}
         | 
| 181 | 
            +
                    raise Error, "Failed to get_table(#{project}, #{dataset}, #{table}), response:#{response}"
         | 
| 182 | 
            +
                  end
         | 
| 183 | 
            +
                 
         | 
| 184 | 
            +
                  { responses: { get_table: response } }
         | 
| 185 | 
            +
                end
         | 
| 186 | 
            +
             | 
| 187 | 
            +
                def insert_table(dataset: nil, table: nil, columns: )
         | 
| 188 | 
            +
                  dataset ||= self.dataset
         | 
| 189 | 
            +
                  table ||= self.table
         | 
| 190 | 
            +
                  schema = Schema.new(columns)
         | 
| 191 | 
            +
             | 
| 192 | 
            +
                  begin
         | 
| 193 | 
            +
                    logger.info { "#{head}Insert (create) table... #{project}:#{dataset}.#{table}" }
         | 
| 194 | 
            +
                    body = {
         | 
| 195 | 
            +
                      table_reference: {
         | 
| 196 | 
            +
                        table_id: table,
         | 
| 197 | 
            +
                      },
         | 
| 198 | 
            +
                      schema: {
         | 
| 199 | 
            +
                        fields: schema,
         | 
| 200 | 
            +
                      }
         | 
| 201 | 
            +
                    }
         | 
| 202 | 
            +
                    opts = {}
         | 
| 203 | 
            +
                    logger.debug { "#{head}insert_table(#{project}, #{dataset}, #{body}, #{opts})" }
         | 
| 204 | 
            +
                    unless dry_run?
         | 
| 205 | 
            +
                      response = client.insert_table(project, dataset, body, opts)
         | 
| 206 | 
            +
                    end
         | 
| 207 | 
            +
                  rescue Google::Apis::ServerError, Google::Apis::ClientError, Google::Apis::AuthorizationError => e
         | 
| 208 | 
            +
                    if e.status_code == 409 && /Already Exists:/ =~ e.message
         | 
| 209 | 
            +
                      # ignore 'Already Exists' error
         | 
| 210 | 
            +
                      return {}
         | 
| 211 | 
            +
                    end
         | 
| 212 | 
            +
             | 
| 213 | 
            +
                    response = {status_code: e.status_code, message: e.message, error_class: e.class}
         | 
| 214 | 
            +
                    raise Error, "Failed to insert_table(#{project}, #{dataset}, #{body}, #{opts}), response:#{response}"
         | 
| 215 | 
            +
                  end
         | 
| 216 | 
            +
             | 
| 217 | 
            +
                  { responses: { insert_table: response } }
         | 
| 218 | 
            +
                end
         | 
| 219 | 
            +
                alias :create_table :insert_table
         | 
| 220 | 
            +
             | 
| 221 | 
            +
                def delete_table(dataset: nil, table: nil)
         | 
| 222 | 
            +
                  dataset ||= self.dataset
         | 
| 223 | 
            +
                  table ||= self.table
         | 
| 224 | 
            +
             | 
| 225 | 
            +
                  begin
         | 
| 226 | 
            +
                    logger.info { "#{head}Delete (drop) table... #{project}:#{dataset}.#{table}" }
         | 
| 227 | 
            +
                    unless dry_run?
         | 
| 228 | 
            +
                      client.delete_table(project, dataset, table) # no response
         | 
| 229 | 
            +
                      success = true
         | 
| 230 | 
            +
                    end
         | 
| 231 | 
            +
                  rescue Google::Apis::ServerError, Google::Apis::ClientError, Google::Apis::AuthorizationError => e
         | 
| 232 | 
            +
                    if e.status_code == 404 && /Not found:/ =~ e.message
         | 
| 233 | 
            +
                      # ignore 'Not Found' error
         | 
| 234 | 
            +
                      return {}
         | 
| 235 | 
            +
                    end
         | 
| 236 | 
            +
             | 
| 237 | 
            +
                    response = {status_code: e.status_code, message: e.message, error_class: e.class}
         | 
| 238 | 
            +
                    raise Error, "Failed to delete_table(#{project}, #{dataset}, #{table}), response:#{response}"
         | 
| 239 | 
            +
                  end
         | 
| 240 | 
            +
             | 
| 241 | 
            +
                  { success: success }
         | 
| 242 | 
            +
                end
         | 
| 243 | 
            +
                alias :drop_table :delete_table
         | 
| 244 | 
            +
             | 
| 245 | 
            +
                def list_tables(dataset: nil, max_results: 999999)
         | 
| 246 | 
            +
                  dataset ||= self.dataset
         | 
| 247 | 
            +
             | 
| 248 | 
            +
                  tables = []
         | 
| 249 | 
            +
                  begin
         | 
| 250 | 
            +
                    logger.info { "List tables... #{project}:#{dataset}" }
         | 
| 251 | 
            +
                    response = client.list_tables(project, dataset, max_results: max_results)
         | 
| 252 | 
            +
                    while true
         | 
| 253 | 
            +
                      _tables = (response.tables || []).map { |t| t.table_reference.table_id.to_s }
         | 
| 254 | 
            +
                      tables.concat(_tables)
         | 
| 255 | 
            +
                      if next_page_token = response.next_page_token
         | 
| 256 | 
            +
                        response = client.list_tables(project, dataset, page_token: next_page_token, max_results: max_results)
         | 
| 257 | 
            +
                      else
         | 
| 258 | 
            +
                        break
         | 
| 259 | 
            +
                      end
         | 
| 260 | 
            +
                    end
         | 
| 261 | 
            +
                  rescue Google::Apis::ServerError, Google::Apis::ClientError, Google::Apis::AuthorizationError => e
         | 
| 262 | 
            +
                    if e.status_code == 404 && /Not found:/ =~ e.message
         | 
| 263 | 
            +
                      railse NotFoundError, "Dataset #{project}:#{dataset} is not found"
         | 
| 264 | 
            +
                    end
         | 
| 265 | 
            +
             | 
| 266 | 
            +
                    response = {status_code: e.status_code, message: e.message, error_class: e.class}
         | 
| 267 | 
            +
                    logger.error { "list_tables(#{project}, #{dataset}), response:#{response}" }
         | 
| 268 | 
            +
                    raise Error, "failed to list tables #{project}:#{dataset}, response:#{response}"
         | 
| 269 | 
            +
                  end
         | 
| 270 | 
            +
             | 
| 271 | 
            +
                  { tables: tables }
         | 
| 272 | 
            +
                end
         | 
| 273 | 
            +
             | 
| 274 | 
            +
                def purge_tables(dataset: nil, table_prefix: , suffix_format: , purge_before: , timezone: nil)
         | 
| 275 | 
            +
                  dataset ||= self.dataset
         | 
| 276 | 
            +
                  timezone ||= Time.now.strftime('%z')
         | 
| 277 | 
            +
             | 
| 278 | 
            +
                  before_tables = list_tables[:tables]
         | 
| 279 | 
            +
             | 
| 280 | 
            +
                  purge_before_t = TimeWithZone.strptime_with_zone(purge_before, suffix_format, timezone)
         | 
| 281 | 
            +
                  tables = before_tables.select do |tbl|
         | 
| 282 | 
            +
                    suffix = tbl.gsub(table_prefix, '')
         | 
| 283 | 
            +
                    begin
         | 
| 284 | 
            +
                      suffix_t = TimeWithZone.strptime_with_zone(suffix, suffix_format, timezone)
         | 
| 285 | 
            +
                    rescue
         | 
| 286 | 
            +
                      next
         | 
| 287 | 
            +
                    end
         | 
| 288 | 
            +
                    # skip if different from the suffix_format
         | 
| 289 | 
            +
                    next if suffix_t.strftime(suffix_format) != suffix
         | 
| 290 | 
            +
                    suffix_t <= purge_before_t
         | 
| 291 | 
            +
                  end
         | 
| 292 | 
            +
             | 
| 293 | 
            +
                  tables.each do |_table|
         | 
| 294 | 
            +
                    delete_table(table: _table)
         | 
| 295 | 
            +
                    # If you make more than 100 requests per second, throttling might occur.
         | 
| 296 | 
            +
                    # See https://cloud.google.com/bigquery/quota-policy#apirequests
         | 
| 297 | 
            +
                    sleep 1
         | 
| 298 | 
            +
                  end
         | 
| 299 | 
            +
             | 
| 300 | 
            +
                  { delete_tables: tables }
         | 
| 301 | 
            +
                end
         | 
| 302 | 
            +
             | 
| 303 | 
            +
                # rows:
         | 
| 304 | 
            +
                #   - id: 1
         | 
| 305 | 
            +
                #     type: one
         | 
| 306 | 
            +
                #     record:
         | 
| 307 | 
            +
                #       child1: 'child1'
         | 
| 308 | 
            +
                #       child2: 'child2'
         | 
| 309 | 
            +
                #   - id: 2
         | 
| 310 | 
            +
                #     type: two
         | 
| 311 | 
            +
                #     record:
         | 
| 312 | 
            +
                #       child1: 'child3'
         | 
| 313 | 
            +
                #       child2: 'child4'
         | 
| 314 | 
            +
                def insert_all_table_data(dataset: nil, table: nil, rows: )
         | 
| 315 | 
            +
                  dataset ||= self.dataset
         | 
| 316 | 
            +
                  table ||= self.table
         | 
| 317 | 
            +
             | 
| 318 | 
            +
                  begin
         | 
| 319 | 
            +
                    logger.info { "#{head}insertAll tableData... #{project}:#{dataset}.#{table}" }
         | 
| 320 | 
            +
                    body = {
         | 
| 321 | 
            +
                      rows: rows.map {|row| { json: row } },
         | 
| 322 | 
            +
                    }
         | 
| 323 | 
            +
                    opts = {}
         | 
| 324 | 
            +
                    unless dry_run?
         | 
| 325 | 
            +
                      response = client.insert_all_table_data(project, dataset, table, body, opts)
         | 
| 326 | 
            +
                    end
         | 
| 327 | 
            +
                  rescue Google::Apis::ServerError, Google::Apis::ClientError, Google::Apis::AuthorizationError => e
         | 
| 328 | 
            +
                    if e.status_code == 404 # not found
         | 
| 329 | 
            +
                      raise NotFoundError, "Table #{project}:#{dataset}.#{table} is not found"
         | 
| 330 | 
            +
                    end
         | 
| 331 | 
            +
             | 
| 332 | 
            +
                    response = {status_code: e.status_code, message: e.message, error_class: e.class}
         | 
| 333 | 
            +
                    Medjed::Bulk.logger.error {
         | 
| 334 | 
            +
                      "insert_all_table_data(#{project}, #{dataset}, #{table}, #{opts}), response:#{response}"
         | 
| 335 | 
            +
                    }
         | 
| 336 | 
            +
                    raise Error, "failed to insert_all table_data #{project}:#{dataset}.#{table}, response:#{response}"
         | 
| 337 | 
            +
                  end
         | 
| 338 | 
            +
             | 
| 339 | 
            +
                  { responses: { insert_all_table_data: response } }
         | 
| 340 | 
            +
                end
         | 
| 341 | 
            +
             | 
| 342 | 
            +
                # @return Hash result of list table_data
         | 
| 343 | 
            +
                #
         | 
| 344 | 
            +
                # Example:
         | 
| 345 | 
            +
                # {
         | 
| 346 | 
            +
                #   columns:
         | 
| 347 | 
            +
                #     [
         | 
| 348 | 
            +
                #       {
         | 
| 349 | 
            +
                #         name: id,
         | 
| 350 | 
            +
                #         type: INTEGER
         | 
| 351 | 
            +
                #       },
         | 
| 352 | 
            +
                #       {
         | 
| 353 | 
            +
                #         name: type,
         | 
| 354 | 
            +
                #         type: STRING
         | 
| 355 | 
            +
                #       },
         | 
| 356 | 
            +
                #       {
         | 
| 357 | 
            +
                #         name: record.child1,
         | 
| 358 | 
            +
                #         type: STRING
         | 
| 359 | 
            +
                #       },
         | 
| 360 | 
            +
                #       {
         | 
| 361 | 
            +
                #         name: record.child2,
         | 
| 362 | 
            +
                #         type: STRING
         | 
| 363 | 
            +
                #       },
         | 
| 364 | 
            +
                #   values:
         | 
| 365 | 
            +
                #     [
         | 
| 366 | 
            +
                #       [2,"two","child3","child4"],
         | 
| 367 | 
            +
                #       [1,"one","child1","child2"]
         | 
| 368 | 
            +
                #     ],
         | 
| 369 | 
            +
                #   total_rows: 2
         | 
| 370 | 
            +
                # }
         | 
| 371 | 
            +
                def list_table_data(dataset: nil, table: nil, max_results: 100)
         | 
| 372 | 
            +
                  dataset ||= self.dataset
         | 
| 373 | 
            +
                  table ||= self.table
         | 
| 374 | 
            +
             | 
| 375 | 
            +
                  begin
         | 
| 376 | 
            +
                    logger.info  { "list_table_data(#{project}, #{dataset}, #{table}, max_results: #{max_results})" }
         | 
| 377 | 
            +
                    response = client.list_table_data(project, dataset, table, max_results: max_results)
         | 
| 378 | 
            +
                  rescue Google::Apis::ServerError, Google::Apis::ClientError, Google::Apis::AuthorizationError => e
         | 
| 379 | 
            +
                    if e.status_code == 404 # not found
         | 
| 380 | 
            +
                      raise NotFoundError, "Table #{project}:#{dataset}.#{table} is not found"
         | 
| 381 | 
            +
                    end
         | 
| 382 | 
            +
             | 
| 383 | 
            +
                    response = {status_code: e.status_code, message: e.message, error_class: e.class}
         | 
| 384 | 
            +
                    logger.error  { "list_table_data(#{project}, #{dataset}, #{table}, max_results: #{max_results})" }
         | 
| 385 | 
            +
                    raise Error, "Failed to list table_data #{project}:#{dataset}.#{table}, response:#{response}"
         | 
| 386 | 
            +
                  end
         | 
| 387 | 
            +
             | 
| 388 | 
            +
                  flattened_columns = Schema.new(existing_columns).flattened_columns.map do |name, column|
         | 
| 389 | 
            +
                    {name: name}.merge!(column)
         | 
| 390 | 
            +
                  end
         | 
| 391 | 
            +
                  if rows = response.to_h[:rows]
         | 
| 392 | 
            +
                    flattened_values = flatten_values(rows)
         | 
| 393 | 
            +
                  end
         | 
| 394 | 
            +
             | 
| 395 | 
            +
                  {
         | 
| 396 | 
            +
                    total_rows: response.total_rows,
         | 
| 397 | 
            +
                    columns: flattened_columns,
         | 
| 398 | 
            +
                    values: flattened_values,
         | 
| 399 | 
            +
                    response: {
         | 
| 400 | 
            +
                      list_table_data: response,
         | 
| 401 | 
            +
                    }
         | 
| 402 | 
            +
                  }
         | 
| 403 | 
            +
                end
         | 
| 404 | 
            +
             | 
| 405 | 
            +
                private def flatten_values(rows)
         | 
| 406 | 
            +
                  rows.map do |r|
         | 
| 407 | 
            +
                    if r.key?(:f)
         | 
| 408 | 
            +
                      r[:f].map do |f|
         | 
| 409 | 
            +
                        if f[:v].respond_to?(:key?) && f[:v].key?(:f)
         | 
| 410 | 
            +
                          flatten_values(f[:v][:f])
         | 
| 411 | 
            +
                        else
         | 
| 412 | 
            +
                          f[:v]
         | 
| 413 | 
            +
                        end
         | 
| 414 | 
            +
                      end.flatten
         | 
| 415 | 
            +
                    else
         | 
| 416 | 
            +
                      r[:v]
         | 
| 417 | 
            +
                    end
         | 
| 418 | 
            +
                  end
         | 
| 419 | 
            +
                end
         | 
| 420 | 
            +
             | 
| 421 | 
            +
                def patch_table(dataset: nil, table: nil, columns: nil, add_columns: nil)
         | 
| 422 | 
            +
                  dataset ||= self.dataset
         | 
| 423 | 
            +
                  table ||= self.table
         | 
| 424 | 
            +
                  
         | 
| 425 | 
            +
                  if columns.nil? and add_columns.nil?
         | 
| 426 | 
            +
                    raise ArgumentError, 'patch_table: `columns` or `add_columns` is required'
         | 
| 427 | 
            +
                  end
         | 
| 428 | 
            +
             | 
| 429 | 
            +
                  before_columns = existing_columns
         | 
| 430 | 
            +
                  if columns # if already given
         | 
| 431 | 
            +
                    schema = Schema.new(columns)
         | 
| 432 | 
            +
                  else
         | 
| 433 | 
            +
                    schema = Schema.new(add_columns)
         | 
| 434 | 
            +
                    schema.reverse_merge!(before_columns)
         | 
| 435 | 
            +
                  end
         | 
| 436 | 
            +
                  schema.validate_permitted_operations!(before_columns)
         | 
| 437 | 
            +
             | 
| 438 | 
            +
                  begin
         | 
| 439 | 
            +
                    logger.info { "#{head}Patch table... #{project}:#{dataset}.#{table}" }
         | 
| 440 | 
            +
                    fields = schema.map {|column| HashUtil.deep_symbolize_keys(column) }
         | 
| 441 | 
            +
                    body = {
         | 
| 442 | 
            +
                      schema: {
         | 
| 443 | 
            +
                        fields: fields,
         | 
| 444 | 
            +
                      }
         | 
| 445 | 
            +
                    }
         | 
| 446 | 
            +
                    opts = {}
         | 
| 447 | 
            +
                    logger.debug { "#{head}patch_table(#{project}, #{dataset}, #{table}, #{body}, options: #{opts})" }
         | 
| 448 | 
            +
                    unless dry_run?
         | 
| 449 | 
            +
                      response = client.patch_table(project, dataset, table, body, options: opts)
         | 
| 450 | 
            +
                    end
         | 
| 451 | 
            +
                  rescue Google::Apis::ServerError, Google::Apis::ClientError, Google::Apis::AuthorizationError => e
         | 
| 452 | 
            +
                    if e.status_code == 404 # not found
         | 
| 453 | 
            +
                      raise NotFoundError, "Table #{project}:#{dataset}.#{table} is not found"
         | 
| 454 | 
            +
                    end
         | 
| 455 | 
            +
             | 
| 456 | 
            +
                    response = {status_code: e.status_code, message: e.message, error_class: e.class}
         | 
| 457 | 
            +
                    logger.error {
         | 
| 458 | 
            +
                      "patch_table(#{project}, #{dataset}, #{table}, #{body}, options: #{opts}), response:#{response}"
         | 
| 459 | 
            +
                    }
         | 
| 460 | 
            +
                    raise Error, "Failed to patch table #{project}:#{dataset}.#{table}, response:#{response}"
         | 
| 461 | 
            +
                  end
         | 
| 462 | 
            +
             | 
| 463 | 
            +
                  after_columns = existing_columns
         | 
| 464 | 
            +
             | 
| 465 | 
            +
                  {
         | 
| 466 | 
            +
                    before_columns: before_columns,
         | 
| 467 | 
            +
                    after_columns:  after_columns,
         | 
| 468 | 
            +
                    responses: { patch_table: response },
         | 
| 469 | 
            +
                  }
         | 
| 470 | 
            +
                end
         | 
| 471 | 
            +
                alias :add_column :patch_table
         | 
| 472 | 
            +
             | 
| 473 | 
            +
                def copy_table(destination_table:, destination_dataset: nil, source_table: nil, source_dataset: nil, write_disposition: 'WRITE_TRUNCATE')
         | 
| 474 | 
            +
                  source_table ||= self.table
         | 
| 475 | 
            +
                  source_dataset ||= self.dataset
         | 
| 476 | 
            +
                  destination_dataset ||= source_dataset
         | 
| 477 | 
            +
             | 
| 478 | 
            +
                  body = {
         | 
| 479 | 
            +
                    configuration: {
         | 
| 480 | 
            +
                      copy: {
         | 
| 481 | 
            +
                        create_deposition: 'CREATE_IF_NEEDED',
         | 
| 482 | 
            +
                        write_disposition: write_disposition,
         | 
| 483 | 
            +
                        source_table: {
         | 
| 484 | 
            +
                          project_id: project,
         | 
| 485 | 
            +
                          dataset_id: source_dataset,
         | 
| 486 | 
            +
                          table_id: source_table,
         | 
| 487 | 
            +
                        },
         | 
| 488 | 
            +
                        destination_table: {
         | 
| 489 | 
            +
                          project_id: project,
         | 
| 490 | 
            +
                          dataset_id: destination_dataset,
         | 
| 491 | 
            +
                          table_id: destination_table,
         | 
| 492 | 
            +
                        },
         | 
| 493 | 
            +
                      }
         | 
| 494 | 
            +
                    }
         | 
| 495 | 
            +
                  }
         | 
| 496 | 
            +
                  opts = {}
         | 
| 497 | 
            +
             | 
| 498 | 
            +
                  logger.info  { "#{head}insert_job(#{project}, #{body}, #{opts})" }
         | 
| 499 | 
            +
                  unless dry_run?
         | 
| 500 | 
            +
                    response = client.insert_job(project, body, opts)
         | 
| 501 | 
            +
                    get_response = wait_load('copy', response)
         | 
| 502 | 
            +
                  end
         | 
| 503 | 
            +
             | 
| 504 | 
            +
                  {
         | 
| 505 | 
            +
                    responses: {
         | 
| 506 | 
            +
                      insert_job: response,
         | 
| 507 | 
            +
                      last_get_job: get_response,
         | 
| 508 | 
            +
                    }
         | 
| 509 | 
            +
                  }
         | 
| 510 | 
            +
                end
         | 
| 511 | 
            +
             | 
| 512 | 
            +
                def insert_select(query:, destination_table: nil, destination_dataset: nil, write_disposition: 'WRITE_TRUNCATE')
         | 
| 513 | 
            +
                  destination_table   ||= self.table
         | 
| 514 | 
            +
                  destination_dataset ||= self.dataset
         | 
| 515 | 
            +
             | 
| 516 | 
            +
                  body  = {
         | 
| 517 | 
            +
                    configuration: {
         | 
| 518 | 
            +
                      query: {
         | 
| 519 | 
            +
                        allow_large_results: true,
         | 
| 520 | 
            +
                        flatten_results: false,
         | 
| 521 | 
            +
                        write_disposition: write_disposition,
         | 
| 522 | 
            +
                        query: query,
         | 
| 523 | 
            +
                        destination_table: {
         | 
| 524 | 
            +
                          project_id: self.project,
         | 
| 525 | 
            +
                          dataset_id: destination_dataset,
         | 
| 526 | 
            +
                          table_id: destination_table,
         | 
| 527 | 
            +
                        },
         | 
| 528 | 
            +
                      }
         | 
| 529 | 
            +
                    }
         | 
| 530 | 
            +
                  }
         | 
| 531 | 
            +
                  opts = {}
         | 
| 532 | 
            +
             | 
| 533 | 
            +
                  logger.info { "#{head}insert_job(#{project}, #{body}, #{opts})" }
         | 
| 534 | 
            +
                  unless dry_run?
         | 
| 535 | 
            +
                    response = client.insert_job(project, body, opts)
         | 
| 536 | 
            +
                    get_response = wait_load('query', response)
         | 
| 537 | 
            +
                  end
         | 
| 538 | 
            +
             | 
| 539 | 
            +
                  {
         | 
| 540 | 
            +
                    responses: {
         | 
| 541 | 
            +
                      insert_job: response,
         | 
| 542 | 
            +
                      last_get_job: get_response,
         | 
| 543 | 
            +
                    }
         | 
| 544 | 
            +
                  }
         | 
| 545 | 
            +
                end
         | 
| 546 | 
            +
             | 
| 547 | 
            +
                private def wait_load(kind, response)
         | 
| 548 | 
            +
                  started = Time.now
         | 
| 549 | 
            +
             | 
| 550 | 
            +
                  wait_interval = self.job_status_polling_interval
         | 
| 551 | 
            +
                  max_polling_time = self.job_status_max_polling_time
         | 
| 552 | 
            +
                  _response = response
         | 
| 553 | 
            +
             | 
| 554 | 
            +
                  while true
         | 
| 555 | 
            +
                    job_id = _response.job_reference.job_id
         | 
| 556 | 
            +
                    elapsed = Time.now - started
         | 
| 557 | 
            +
                    status = _response.status.state
         | 
| 558 | 
            +
                    if status == "DONE"
         | 
| 559 | 
            +
                      logger.info {
         | 
| 560 | 
            +
                        "#{kind} job completed... " \
         | 
| 561 | 
            +
                        "job id:[#{job_id}] elapsed_time:#{elapsed.to_f}sec status:[#{status}]"
         | 
| 562 | 
            +
                      }
         | 
| 563 | 
            +
                      break
         | 
| 564 | 
            +
                    elsif elapsed.to_i > max_polling_time
         | 
| 565 | 
            +
                      message = "Checking #{kind} job status... " \
         | 
| 566 | 
            +
                        "job id:[#{job_id}] elapsed_time:#{elapsed.to_f}sec status:[TIMEOUT]"
         | 
| 567 | 
            +
                      logger.info { message }
         | 
| 568 | 
            +
                      raise JobTimeoutError.new(message)
         | 
| 569 | 
            +
                    else
         | 
| 570 | 
            +
                      logger.info {
         | 
| 571 | 
            +
                        "Checking #{kind} job status... " \
         | 
| 572 | 
            +
                        "job id:[#{job_id}] elapsed_time:#{elapsed.to_f}sec status:[#{status}]"
         | 
| 573 | 
            +
                      }
         | 
| 574 | 
            +
                      sleep wait_interval
         | 
| 575 | 
            +
                      _response = client.get_job(project, job_id)
         | 
| 576 | 
            +
                    end
         | 
| 577 | 
            +
                  end
         | 
| 578 | 
            +
             | 
| 579 | 
            +
                  # cf. http://www.rubydoc.info/github/google/google-api-ruby-client/Google/Apis/BigqueryV2/JobStatus#errors-instance_method
         | 
| 580 | 
            +
                  # `errors` returns Array<Google::Apis::BigqueryV2::ErrorProto> if any error exists.
         | 
| 581 | 
            +
                  # Otherwise, this returns nil.
         | 
| 582 | 
            +
                  if _errors = _response.status.errors
         | 
| 583 | 
            +
                    raise Error, "Failed during waiting a job, get_job(#{project}, #{job_id}), errors:#{_errors.map(&:to_h)}"
         | 
| 584 | 
            +
                  end
         | 
| 585 | 
            +
             | 
| 586 | 
            +
                  _response
         | 
| 587 | 
            +
                end
         | 
| 588 | 
            +
             | 
| 589 | 
            +
                def drop_column(table: nil, columns: nil, drop_columns: nil, backup_dataset: nil, backup_table: nil)
         | 
| 590 | 
            +
                  table ||= self.table
         | 
| 591 | 
            +
                  backup_dataset ||= self.dataset
         | 
| 592 | 
            +
                  if columns.nil? and drop_columns.nil?
         | 
| 593 | 
            +
                    raise ArgumentError, '`drop_columns` or `columns` is required'
         | 
| 594 | 
            +
                  end
         | 
| 595 | 
            +
             | 
| 596 | 
            +
                  result = { responses: {} }
         | 
| 597 | 
            +
             | 
| 598 | 
            +
                  before_columns = existing_columns
         | 
| 599 | 
            +
             | 
| 600 | 
            +
                  if columns # if already given
         | 
| 601 | 
            +
                    schema = Schema.new(columns)
         | 
| 602 | 
            +
                  else
         | 
| 603 | 
            +
                    schema = Schema.new(existing_columns)
         | 
| 604 | 
            +
                    schema.reject_columns!(drop_columns)
         | 
| 605 | 
            +
                  end
         | 
| 606 | 
            +
                  if schema.empty? && !dry_run?
         | 
| 607 | 
            +
                    raise Error, 'No column is remained'
         | 
| 608 | 
            +
                  end
         | 
| 609 | 
            +
             | 
| 610 | 
            +
                  schema.validate_permitted_operations!(before_columns)
         | 
| 611 | 
            +
             | 
| 612 | 
            +
                  unless backup_dataset == self.dataset
         | 
| 613 | 
            +
                    create_dataset(dataset: backup_dataset)
         | 
| 614 | 
            +
                  end
         | 
| 615 | 
            +
             | 
| 616 | 
            +
                  if backup_table
         | 
| 617 | 
            +
                    _result = copy_table(source_table: table, destination_table: backup_table, destination_dataset: backup_dataset)
         | 
| 618 | 
            +
                    result[:responses].merge!(_result[:responses])
         | 
| 619 | 
            +
                  end
         | 
| 620 | 
            +
             | 
| 621 | 
            +
                  unless (add_columns = schema.diff_columns_by_name(before_columns)).empty?
         | 
| 622 | 
            +
                    _result = patch_table(add_columns: add_columns)
         | 
| 623 | 
            +
                    result[:responses].merge!(_result[:responses])
         | 
| 624 | 
            +
                  end
         | 
| 625 | 
            +
             | 
| 626 | 
            +
                  query_fields = schema.build_query_fields(before_columns)
         | 
| 627 | 
            +
                  query = "SELECT #{query_fields.join(',')} FROM [#{dataset}.#{table}]"
         | 
| 628 | 
            +
                  _result = insert_select(query: query, destination_table: table)
         | 
| 629 | 
            +
                  result[:responses].merge!(_result[:responses])
         | 
| 630 | 
            +
             | 
| 631 | 
            +
                  after_columns = existing_columns
         | 
| 632 | 
            +
             | 
| 633 | 
            +
                  result.merge!({before_columns: before_columns, after_columns: after_columns})
         | 
| 634 | 
            +
                end
         | 
| 635 | 
            +
             | 
| 636 | 
            +
                def migrate_table(table: nil, schema_file: nil, columns: nil, backup_dataset: nil, backup_table: nil)
         | 
| 637 | 
            +
                  table ||= self.table
         | 
| 638 | 
            +
                  backup_dataset ||= self.dataset
         | 
| 639 | 
            +
             | 
| 640 | 
            +
                  if schema_file.nil? and columns.nil?
         | 
| 641 | 
            +
                    raise ArgumentError, '`schema_file` or `columns` is required'
         | 
| 642 | 
            +
                  end
         | 
| 643 | 
            +
                  if schema_file
         | 
| 644 | 
            +
                    columns = HashUtil.deep_symbolize_keys(JSON.parse(File.read(schema_file)))
         | 
| 645 | 
            +
                  end
         | 
| 646 | 
            +
                  Schema.validate_columns!(columns)
         | 
| 647 | 
            +
             | 
| 648 | 
            +
                  before_columns = existing_columns
         | 
| 649 | 
            +
             | 
| 650 | 
            +
                  result = {}
         | 
| 651 | 
            +
                  if before_columns.empty?
         | 
| 652 | 
            +
                    result = create_table(table: table, columns: columns)
         | 
| 653 | 
            +
                  else
         | 
| 654 | 
            +
                    add_columns  = Schema.diff_columns(before_columns, columns)
         | 
| 655 | 
            +
                    drop_columns = Schema.diff_columns(columns, before_columns)
         | 
| 656 | 
            +
             | 
| 657 | 
            +
                    if !drop_columns.empty?
         | 
| 658 | 
            +
                      drop_column(table: table, columns: columns,
         | 
| 659 | 
            +
                                  backup_dataset: backup_dataset, backup_table: backup_table)
         | 
| 660 | 
            +
                    elsif !add_columns.empty?
         | 
| 661 | 
            +
                      add_column(table: table, columns: columns)
         | 
| 662 | 
            +
                    end
         | 
| 663 | 
            +
                  end
         | 
| 664 | 
            +
             | 
| 665 | 
            +
                  after_columns = existing_columns
         | 
| 666 | 
            +
             | 
| 667 | 
            +
                  if after_columns.empty? and !dry_run?
         | 
| 668 | 
            +
                    raise Error, "after_columns is empty. " \
         | 
| 669 | 
            +
                      "before_columns: #{before_columns}, after_columns: after_columns, columns: #{columns}"
         | 
| 670 | 
            +
                  end
         | 
| 671 | 
            +
             | 
| 672 | 
            +
                  result.merge!( before_columns: before_columns, after_columns: after_columns )
         | 
| 673 | 
            +
                end
         | 
| 674 | 
            +
              end
         | 
| 675 | 
            +
            end
         |