bigquery_migration 0.1.4 → 0.1.5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +6 -1
- data/lib/bigquery_migration/bigquery_wrapper.rb +6 -22
- data/lib/bigquery_migration/table_data.rb +118 -0
- data/lib/bigquery_migration/version.rb +1 -1
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b64d00c827e85ec40f7c37bc2153ee2c06ce80c1
|
4
|
+
data.tar.gz: 9ff2ebad940ec9f46739d5c4a934f7c773f9f5ea
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 075e151988a8566dbcff6c9a20debd29fb9642fecc8bfe952a3fe0bae33f2a63a6f8e49d2a186f39a4943abd2de29d8692ab4985fea397cb4ede5b25a797493b
|
7
|
+
data.tar.gz: 1eeb968a5161bb89089a38bf5aacee696c07672f5f1a977bf00bcf10f2a5d7eba87038f86048dedb00ec746e725255f57507c1dae49690584b71c39caeffa538
|
data/CHANGELOG.md
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
require 'csv'
|
2
2
|
require 'json'
|
3
3
|
require_relative 'schema'
|
4
|
+
require_relative 'table_data'
|
4
5
|
require_relative 'error'
|
5
6
|
require_relative 'time_with_zone'
|
6
7
|
require_relative 'hash_util'
|
@@ -399,45 +400,28 @@ class BigqueryMigration
|
|
399
400
|
raise Error, "Failed to list table_data #{project}:#{dataset}.#{table}, response:#{response}"
|
400
401
|
end
|
401
402
|
|
402
|
-
|
403
|
+
columns = existing_columns
|
404
|
+
flattened_columns = Schema.new(columns).flattened_columns.map do |name, column|
|
403
405
|
{name: name}.merge!(column)
|
404
406
|
end
|
405
407
|
if rows = response.to_h[:rows]
|
406
|
-
|
408
|
+
values = TableData.new(columns, rows).generate_values
|
407
409
|
end
|
408
410
|
|
409
411
|
{
|
410
412
|
total_rows: response.total_rows,
|
411
413
|
columns: flattened_columns,
|
412
|
-
values:
|
414
|
+
values: values,
|
413
415
|
responses: {
|
414
416
|
list_table_data: response,
|
415
417
|
}
|
416
418
|
}
|
417
419
|
end
|
418
420
|
|
419
|
-
private def flatten_values(rows)
|
420
|
-
rows.map do |r|
|
421
|
-
if r.key?(:f)
|
422
|
-
r[:f].map do |f|
|
423
|
-
if f[:v].respond_to?(:key?) && f[:v].key?(:f)
|
424
|
-
flatten_values(f[:v][:f])
|
425
|
-
elsif f[:v].is_a?(Hash) && f[:v].empty? # nil is converted into {} by to_h
|
426
|
-
nil
|
427
|
-
else
|
428
|
-
f[:v]
|
429
|
-
end
|
430
|
-
end.flatten
|
431
|
-
else
|
432
|
-
r[:v]
|
433
|
-
end
|
434
|
-
end
|
435
|
-
end
|
436
|
-
|
437
421
|
def patch_table(dataset: nil, table: nil, columns: nil, add_columns: nil)
|
438
422
|
dataset ||= self.dataset
|
439
423
|
table ||= self.table
|
440
|
-
|
424
|
+
|
441
425
|
if columns.nil? and add_columns.nil?
|
442
426
|
raise ArgumentError, 'patch_table: `columns` or `add_columns` is required'
|
443
427
|
end
|
@@ -0,0 +1,118 @@
|
|
1
|
+
# Convert from BigQuery Web console's JavaScript
|
2
|
+
require_relative 'error'
|
3
|
+
|
4
|
+
class BigqueryMigration
|
5
|
+
class TableData
|
6
|
+
attr_reader :rows
|
7
|
+
attr_reader :columns
|
8
|
+
|
9
|
+
def logger
|
10
|
+
BigqueryMigration.logger
|
11
|
+
end
|
12
|
+
|
13
|
+
def initialize(columns, rows)
|
14
|
+
@columns = columns || raise(ConfigError, '`columns` is required.')
|
15
|
+
@rows = rows || raise(ConfigError, '`rows` is required.')
|
16
|
+
end
|
17
|
+
|
18
|
+
def generate_values
|
19
|
+
rows = @rows.map do |row|
|
20
|
+
values = []
|
21
|
+
max_repeated_count = calculate_repeated_count(columns: @columns, rows: row).max
|
22
|
+
max_repeated_count.times do |count|
|
23
|
+
values.push(generate_value(columns: @columns, rows: row, count: count))
|
24
|
+
end
|
25
|
+
values
|
26
|
+
end
|
27
|
+
# For backword compatibility
|
28
|
+
max_row_count = (rows.map(&:length) || []).max
|
29
|
+
max_row_count > 1 ? rows : rows.map(&:flatten)
|
30
|
+
end
|
31
|
+
|
32
|
+
# This method called recursively.
|
33
|
+
# So, rows must be a hash and hash has key f:.
|
34
|
+
private def calculate_repeated_count(columns: nil, rows: nil)
|
35
|
+
logger.info { "calculate_repeated_count(columns: #{columns}, rows: #{rows})" }
|
36
|
+
return [1] if rows.nil?
|
37
|
+
validate_rows!(rows)
|
38
|
+
rows[:f].zip(columns).map do |row, column|
|
39
|
+
if column[:type] == 'RECORD'
|
40
|
+
if column[:mode] == 'REPEATED'
|
41
|
+
if row[:v].length == 0
|
42
|
+
1
|
43
|
+
else
|
44
|
+
recursive_repeated_counts = row[:v].map do |v|
|
45
|
+
_repeated_counts = calculate_repeated_count(columns: column[:fields], rows: v[:v])
|
46
|
+
repeated_count = _repeated_counts.inject(0) { |acc, n| [acc, n].max }
|
47
|
+
v[:repeated_count] = repeated_count
|
48
|
+
end
|
49
|
+
recursive_repeated_counts.inject(0) { |acc, n| acc + n }
|
50
|
+
end
|
51
|
+
else
|
52
|
+
_repeated_counts = calculate_repeated_count(columns: column[:fields], rows: row[:v])
|
53
|
+
_repeated_counts.inject(0) { |acc, n| [acc, n].max }
|
54
|
+
end
|
55
|
+
elsif column[:mode] == 'REPEATED'
|
56
|
+
[(row[:v] || []).length, 1].max
|
57
|
+
else
|
58
|
+
1
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
# This method called recursively.
|
64
|
+
# So, rows must be a hash and hash has key f:.
|
65
|
+
private def generate_value(columns: nil, rows: nil, count: nil)
|
66
|
+
logger.info { "generate_value(columns: #{columns}, rows: #{rows}, count: #{count})" }
|
67
|
+
value = []
|
68
|
+
return value if rows.nil?
|
69
|
+
validate_rows!(rows)
|
70
|
+
rows[:f].zip(columns).each do |row, column|
|
71
|
+
if column[:type] == 'RECORD'
|
72
|
+
if column[:mode] == 'REPEATED'
|
73
|
+
recursive = false
|
74
|
+
# Fixme: would like to avoid using the index counter
|
75
|
+
current = 0
|
76
|
+
row[:v].each do |v|
|
77
|
+
repeated_count = v[:repeated_count]
|
78
|
+
if current <= count && count < (current + repeated_count)
|
79
|
+
generated_values = generate_value(columns: column[:fields], rows: v[:v], count: count - current)
|
80
|
+
value.concat(generated_values)
|
81
|
+
recursive = true
|
82
|
+
end
|
83
|
+
current = current + repeated_count
|
84
|
+
end
|
85
|
+
unless recursive
|
86
|
+
nil_count = generate_nil_count(column[:fields])
|
87
|
+
value.concat(Array.new(nil_count))
|
88
|
+
end
|
89
|
+
elsif row[:v].nil?
|
90
|
+
nil_count = generate_nil_count(column[:fields])
|
91
|
+
value.concat(Array.new(nil_count))
|
92
|
+
else
|
93
|
+
generated_values = generate_value(columns: column[:fields], rows: row[:v], count: count)
|
94
|
+
value.concat(generated_values)
|
95
|
+
end
|
96
|
+
elsif column[:mode] == 'REPEATED'
|
97
|
+
v = row[:v]
|
98
|
+
count < v.length ? value.push(v[count][:v]) : value.push(nil)
|
99
|
+
elsif count == 0
|
100
|
+
value.push(row[:v])
|
101
|
+
else
|
102
|
+
value.push(nil)
|
103
|
+
end
|
104
|
+
end
|
105
|
+
value
|
106
|
+
end
|
107
|
+
|
108
|
+
private def generate_nil_count(fields)
|
109
|
+
fields.inject(0) do |acc, f|
|
110
|
+
f[:type] == 'RECORD' ? acc + generate_nil_count(f[:fields]) : acc + 1
|
111
|
+
end
|
112
|
+
end
|
113
|
+
|
114
|
+
private def validate_rows!(rows)
|
115
|
+
raise ConfigError, '`rows` must be a hash and hash has key `:f`.' if !rows.is_a?(Hash) || !rows.has_key?(:f)
|
116
|
+
end
|
117
|
+
end
|
118
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bigquery_migration
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Naotoshi Seo
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-07-
|
11
|
+
date: 2016-07-25 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: google-api-client
|
@@ -161,6 +161,7 @@ files:
|
|
161
161
|
- lib/bigquery_migration/hash_util.rb
|
162
162
|
- lib/bigquery_migration/logger.rb
|
163
163
|
- lib/bigquery_migration/schema.rb
|
164
|
+
- lib/bigquery_migration/table_data.rb
|
164
165
|
- lib/bigquery_migration/time_with_zone.rb
|
165
166
|
- lib/bigquery_migration/version.rb
|
166
167
|
homepage: https://github.com/sonots/bigquery_migration
|