bigquery_migration 0.1.4 → 0.1.5

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 7d3ea2055c104bf0217847c2f8d1efedbfdc5ebd
4
- data.tar.gz: 531d0a9fd96ed38a099ab4bdae85682d844c617c
3
+ metadata.gz: b64d00c827e85ec40f7c37bc2153ee2c06ce80c1
4
+ data.tar.gz: 9ff2ebad940ec9f46739d5c4a934f7c773f9f5ea
5
5
  SHA512:
6
- metadata.gz: fa9676f4b90a977a81d1b3faeaa96fcabac9fd014b185d9e2108914650b4a902609d6b9b6680ac9b52fb5d5a9e50d54e5d47dccedcbd93aee4a1a1791c22f209
7
- data.tar.gz: d01485a18936f412c3b59185d0c41df62b49f28da130853cec76f205169959988316550c252ba6aa593e0414ef674d2d025c26fee32a23d27ea1f2ceb3a89556
6
+ metadata.gz: 075e151988a8566dbcff6c9a20debd29fb9642fecc8bfe952a3fe0bae33f2a63a6f8e49d2a186f39a4943abd2de29d8692ab4985fea397cb4ede5b25a797493b
7
+ data.tar.gz: 1eeb968a5161bb89089a38bf5aacee696c07672f5f1a977bf00bcf10f2a5d7eba87038f86048dedb00ec746e725255f57507c1dae49690584b71c39caeffa538
data/CHANGELOG.md CHANGED
@@ -1,3 +1,9 @@
1
+ # 0.1.5 (2016/07/25)
2
+
3
+ Enhancements:
4
+
5
+ * Support record type and repeated mode for list table data
6
+
1
7
  # 0.1.4 (2016/07/12)
2
8
 
3
9
  Fixes:
@@ -26,4 +32,3 @@ Changes:
26
32
  # 0.1.0 (2016/04/08)
27
33
 
28
34
  Initial release
29
-
@@ -1,6 +1,7 @@
1
1
  require 'csv'
2
2
  require 'json'
3
3
  require_relative 'schema'
4
+ require_relative 'table_data'
4
5
  require_relative 'error'
5
6
  require_relative 'time_with_zone'
6
7
  require_relative 'hash_util'
@@ -399,45 +400,28 @@ class BigqueryMigration
399
400
  raise Error, "Failed to list table_data #{project}:#{dataset}.#{table}, response:#{response}"
400
401
  end
401
402
 
402
- flattened_columns = Schema.new(existing_columns).flattened_columns.map do |name, column|
403
+ columns = existing_columns
404
+ flattened_columns = Schema.new(columns).flattened_columns.map do |name, column|
403
405
  {name: name}.merge!(column)
404
406
  end
405
407
  if rows = response.to_h[:rows]
406
- flattened_values = flatten_values(rows)
408
+ values = TableData.new(columns, rows).generate_values
407
409
  end
408
410
 
409
411
  {
410
412
  total_rows: response.total_rows,
411
413
  columns: flattened_columns,
412
- values: flattened_values,
414
+ values: values,
413
415
  responses: {
414
416
  list_table_data: response,
415
417
  }
416
418
  }
417
419
  end
418
420
 
419
- private def flatten_values(rows)
420
- rows.map do |r|
421
- if r.key?(:f)
422
- r[:f].map do |f|
423
- if f[:v].respond_to?(:key?) && f[:v].key?(:f)
424
- flatten_values(f[:v][:f])
425
- elsif f[:v].is_a?(Hash) && f[:v].empty? # nil is converted into {} by to_h
426
- nil
427
- else
428
- f[:v]
429
- end
430
- end.flatten
431
- else
432
- r[:v]
433
- end
434
- end
435
- end
436
-
437
421
  def patch_table(dataset: nil, table: nil, columns: nil, add_columns: nil)
438
422
  dataset ||= self.dataset
439
423
  table ||= self.table
440
-
424
+
441
425
  if columns.nil? and add_columns.nil?
442
426
  raise ArgumentError, 'patch_table: `columns` or `add_columns` is required'
443
427
  end
@@ -0,0 +1,118 @@
1
+ # Convert from BigQuery Web console's JavaScript
2
+ require_relative 'error'
3
+
4
+ class BigqueryMigration
5
+ class TableData
6
+ attr_reader :rows
7
+ attr_reader :columns
8
+
9
+ def logger
10
+ BigqueryMigration.logger
11
+ end
12
+
13
+ def initialize(columns, rows)
14
+ @columns = columns || raise(ConfigError, '`columns` is required.')
15
+ @rows = rows || raise(ConfigError, '`rows` is required.')
16
+ end
17
+
18
+ def generate_values
19
+ rows = @rows.map do |row|
20
+ values = []
21
+ max_repeated_count = calculate_repeated_count(columns: @columns, rows: row).max
22
+ max_repeated_count.times do |count|
23
+ values.push(generate_value(columns: @columns, rows: row, count: count))
24
+ end
25
+ values
26
+ end
27
+ # For backword compatibility
28
+ max_row_count = (rows.map(&:length) || []).max
29
+ max_row_count > 1 ? rows : rows.map(&:flatten)
30
+ end
31
+
32
+ # This method called recursively.
33
+ # So, rows must be a hash and hash has key f:.
34
+ private def calculate_repeated_count(columns: nil, rows: nil)
35
+ logger.info { "calculate_repeated_count(columns: #{columns}, rows: #{rows})" }
36
+ return [1] if rows.nil?
37
+ validate_rows!(rows)
38
+ rows[:f].zip(columns).map do |row, column|
39
+ if column[:type] == 'RECORD'
40
+ if column[:mode] == 'REPEATED'
41
+ if row[:v].length == 0
42
+ 1
43
+ else
44
+ recursive_repeated_counts = row[:v].map do |v|
45
+ _repeated_counts = calculate_repeated_count(columns: column[:fields], rows: v[:v])
46
+ repeated_count = _repeated_counts.inject(0) { |acc, n| [acc, n].max }
47
+ v[:repeated_count] = repeated_count
48
+ end
49
+ recursive_repeated_counts.inject(0) { |acc, n| acc + n }
50
+ end
51
+ else
52
+ _repeated_counts = calculate_repeated_count(columns: column[:fields], rows: row[:v])
53
+ _repeated_counts.inject(0) { |acc, n| [acc, n].max }
54
+ end
55
+ elsif column[:mode] == 'REPEATED'
56
+ [(row[:v] || []).length, 1].max
57
+ else
58
+ 1
59
+ end
60
+ end
61
+ end
62
+
63
+ # This method called recursively.
64
+ # So, rows must be a hash and hash has key f:.
65
+ private def generate_value(columns: nil, rows: nil, count: nil)
66
+ logger.info { "generate_value(columns: #{columns}, rows: #{rows}, count: #{count})" }
67
+ value = []
68
+ return value if rows.nil?
69
+ validate_rows!(rows)
70
+ rows[:f].zip(columns).each do |row, column|
71
+ if column[:type] == 'RECORD'
72
+ if column[:mode] == 'REPEATED'
73
+ recursive = false
74
+ # Fixme: would like to avoid using the index counter
75
+ current = 0
76
+ row[:v].each do |v|
77
+ repeated_count = v[:repeated_count]
78
+ if current <= count && count < (current + repeated_count)
79
+ generated_values = generate_value(columns: column[:fields], rows: v[:v], count: count - current)
80
+ value.concat(generated_values)
81
+ recursive = true
82
+ end
83
+ current = current + repeated_count
84
+ end
85
+ unless recursive
86
+ nil_count = generate_nil_count(column[:fields])
87
+ value.concat(Array.new(nil_count))
88
+ end
89
+ elsif row[:v].nil?
90
+ nil_count = generate_nil_count(column[:fields])
91
+ value.concat(Array.new(nil_count))
92
+ else
93
+ generated_values = generate_value(columns: column[:fields], rows: row[:v], count: count)
94
+ value.concat(generated_values)
95
+ end
96
+ elsif column[:mode] == 'REPEATED'
97
+ v = row[:v]
98
+ count < v.length ? value.push(v[count][:v]) : value.push(nil)
99
+ elsif count == 0
100
+ value.push(row[:v])
101
+ else
102
+ value.push(nil)
103
+ end
104
+ end
105
+ value
106
+ end
107
+
108
+ private def generate_nil_count(fields)
109
+ fields.inject(0) do |acc, f|
110
+ f[:type] == 'RECORD' ? acc + generate_nil_count(f[:fields]) : acc + 1
111
+ end
112
+ end
113
+
114
+ private def validate_rows!(rows)
115
+ raise ConfigError, '`rows` must be a hash and hash has key `:f`.' if !rows.is_a?(Hash) || !rows.has_key?(:f)
116
+ end
117
+ end
118
+ end
@@ -1,3 +1,3 @@
1
1
  class BigqueryMigration
2
- VERSION = "0.1.4"
2
+ VERSION = "0.1.5"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: bigquery_migration
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.4
4
+ version: 0.1.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Naotoshi Seo
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2016-07-12 00:00:00.000000000 Z
11
+ date: 2016-07-25 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: google-api-client
@@ -161,6 +161,7 @@ files:
161
161
  - lib/bigquery_migration/hash_util.rb
162
162
  - lib/bigquery_migration/logger.rb
163
163
  - lib/bigquery_migration/schema.rb
164
+ - lib/bigquery_migration/table_data.rb
164
165
  - lib/bigquery_migration/time_with_zone.rb
165
166
  - lib/bigquery_migration/version.rb
166
167
  homepage: https://github.com/sonots/bigquery_migration