bigquery_migration 0.1.4 → 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 7d3ea2055c104bf0217847c2f8d1efedbfdc5ebd
4
- data.tar.gz: 531d0a9fd96ed38a099ab4bdae85682d844c617c
3
+ metadata.gz: b64d00c827e85ec40f7c37bc2153ee2c06ce80c1
4
+ data.tar.gz: 9ff2ebad940ec9f46739d5c4a934f7c773f9f5ea
5
5
  SHA512:
6
- metadata.gz: fa9676f4b90a977a81d1b3faeaa96fcabac9fd014b185d9e2108914650b4a902609d6b9b6680ac9b52fb5d5a9e50d54e5d47dccedcbd93aee4a1a1791c22f209
7
- data.tar.gz: d01485a18936f412c3b59185d0c41df62b49f28da130853cec76f205169959988316550c252ba6aa593e0414ef674d2d025c26fee32a23d27ea1f2ceb3a89556
6
+ metadata.gz: 075e151988a8566dbcff6c9a20debd29fb9642fecc8bfe952a3fe0bae33f2a63a6f8e49d2a186f39a4943abd2de29d8692ab4985fea397cb4ede5b25a797493b
7
+ data.tar.gz: 1eeb968a5161bb89089a38bf5aacee696c07672f5f1a977bf00bcf10f2a5d7eba87038f86048dedb00ec746e725255f57507c1dae49690584b71c39caeffa538
data/CHANGELOG.md CHANGED
@@ -1,3 +1,9 @@
1
+ # 0.1.5 (2016/07/25)
2
+
3
+ Enhancements:
4
+
5
+ * Support record type and repeated mode for list table data
6
+
1
7
  # 0.1.4 (2016/07/12)
2
8
 
3
9
  Fixes:
@@ -26,4 +32,3 @@ Changes:
26
32
  # 0.1.0 (2016/04/08)
27
33
 
28
34
  Initial release
29
-
@@ -1,6 +1,7 @@
1
1
  require 'csv'
2
2
  require 'json'
3
3
  require_relative 'schema'
4
+ require_relative 'table_data'
4
5
  require_relative 'error'
5
6
  require_relative 'time_with_zone'
6
7
  require_relative 'hash_util'
@@ -399,45 +400,28 @@ class BigqueryMigration
399
400
  raise Error, "Failed to list table_data #{project}:#{dataset}.#{table}, response:#{response}"
400
401
  end
401
402
 
402
- flattened_columns = Schema.new(existing_columns).flattened_columns.map do |name, column|
403
+ columns = existing_columns
404
+ flattened_columns = Schema.new(columns).flattened_columns.map do |name, column|
403
405
  {name: name}.merge!(column)
404
406
  end
405
407
  if rows = response.to_h[:rows]
406
- flattened_values = flatten_values(rows)
408
+ values = TableData.new(columns, rows).generate_values
407
409
  end
408
410
 
409
411
  {
410
412
  total_rows: response.total_rows,
411
413
  columns: flattened_columns,
412
- values: flattened_values,
414
+ values: values,
413
415
  responses: {
414
416
  list_table_data: response,
415
417
  }
416
418
  }
417
419
  end
418
420
 
419
- private def flatten_values(rows)
420
- rows.map do |r|
421
- if r.key?(:f)
422
- r[:f].map do |f|
423
- if f[:v].respond_to?(:key?) && f[:v].key?(:f)
424
- flatten_values(f[:v][:f])
425
- elsif f[:v].is_a?(Hash) && f[:v].empty? # nil is converted into {} by to_h
426
- nil
427
- else
428
- f[:v]
429
- end
430
- end.flatten
431
- else
432
- r[:v]
433
- end
434
- end
435
- end
436
-
437
421
  def patch_table(dataset: nil, table: nil, columns: nil, add_columns: nil)
438
422
  dataset ||= self.dataset
439
423
  table ||= self.table
440
-
424
+
441
425
  if columns.nil? and add_columns.nil?
442
426
  raise ArgumentError, 'patch_table: `columns` or `add_columns` is required'
443
427
  end
@@ -0,0 +1,118 @@
1
+ # Convert from BigQuery Web console's JavaScript
2
+ require_relative 'error'
3
+
4
+ class BigqueryMigration
5
+ class TableData
6
+ attr_reader :rows
7
+ attr_reader :columns
8
+
9
+ def logger
10
+ BigqueryMigration.logger
11
+ end
12
+
13
+ def initialize(columns, rows)
14
+ @columns = columns || raise(ConfigError, '`columns` is required.')
15
+ @rows = rows || raise(ConfigError, '`rows` is required.')
16
+ end
17
+
18
+ def generate_values
19
+ rows = @rows.map do |row|
20
+ values = []
21
+ max_repeated_count = calculate_repeated_count(columns: @columns, rows: row).max
22
+ max_repeated_count.times do |count|
23
+ values.push(generate_value(columns: @columns, rows: row, count: count))
24
+ end
25
+ values
26
+ end
27
+ # For backword compatibility
28
+ max_row_count = (rows.map(&:length) || []).max
29
+ max_row_count > 1 ? rows : rows.map(&:flatten)
30
+ end
31
+
32
+ # This method called recursively.
33
+ # So, rows must be a hash and hash has key f:.
34
+ private def calculate_repeated_count(columns: nil, rows: nil)
35
+ logger.info { "calculate_repeated_count(columns: #{columns}, rows: #{rows})" }
36
+ return [1] if rows.nil?
37
+ validate_rows!(rows)
38
+ rows[:f].zip(columns).map do |row, column|
39
+ if column[:type] == 'RECORD'
40
+ if column[:mode] == 'REPEATED'
41
+ if row[:v].length == 0
42
+ 1
43
+ else
44
+ recursive_repeated_counts = row[:v].map do |v|
45
+ _repeated_counts = calculate_repeated_count(columns: column[:fields], rows: v[:v])
46
+ repeated_count = _repeated_counts.inject(0) { |acc, n| [acc, n].max }
47
+ v[:repeated_count] = repeated_count
48
+ end
49
+ recursive_repeated_counts.inject(0) { |acc, n| acc + n }
50
+ end
51
+ else
52
+ _repeated_counts = calculate_repeated_count(columns: column[:fields], rows: row[:v])
53
+ _repeated_counts.inject(0) { |acc, n| [acc, n].max }
54
+ end
55
+ elsif column[:mode] == 'REPEATED'
56
+ [(row[:v] || []).length, 1].max
57
+ else
58
+ 1
59
+ end
60
+ end
61
+ end
62
+
63
+ # This method called recursively.
64
+ # So, rows must be a hash and hash has key f:.
65
+ private def generate_value(columns: nil, rows: nil, count: nil)
66
+ logger.info { "generate_value(columns: #{columns}, rows: #{rows}, count: #{count})" }
67
+ value = []
68
+ return value if rows.nil?
69
+ validate_rows!(rows)
70
+ rows[:f].zip(columns).each do |row, column|
71
+ if column[:type] == 'RECORD'
72
+ if column[:mode] == 'REPEATED'
73
+ recursive = false
74
+ # Fixme: would like to avoid using the index counter
75
+ current = 0
76
+ row[:v].each do |v|
77
+ repeated_count = v[:repeated_count]
78
+ if current <= count && count < (current + repeated_count)
79
+ generated_values = generate_value(columns: column[:fields], rows: v[:v], count: count - current)
80
+ value.concat(generated_values)
81
+ recursive = true
82
+ end
83
+ current = current + repeated_count
84
+ end
85
+ unless recursive
86
+ nil_count = generate_nil_count(column[:fields])
87
+ value.concat(Array.new(nil_count))
88
+ end
89
+ elsif row[:v].nil?
90
+ nil_count = generate_nil_count(column[:fields])
91
+ value.concat(Array.new(nil_count))
92
+ else
93
+ generated_values = generate_value(columns: column[:fields], rows: row[:v], count: count)
94
+ value.concat(generated_values)
95
+ end
96
+ elsif column[:mode] == 'REPEATED'
97
+ v = row[:v]
98
+ count < v.length ? value.push(v[count][:v]) : value.push(nil)
99
+ elsif count == 0
100
+ value.push(row[:v])
101
+ else
102
+ value.push(nil)
103
+ end
104
+ end
105
+ value
106
+ end
107
+
108
+ private def generate_nil_count(fields)
109
+ fields.inject(0) do |acc, f|
110
+ f[:type] == 'RECORD' ? acc + generate_nil_count(f[:fields]) : acc + 1
111
+ end
112
+ end
113
+
114
+ private def validate_rows!(rows)
115
+ raise ConfigError, '`rows` must be a hash and hash has key `:f`.' if !rows.is_a?(Hash) || !rows.has_key?(:f)
116
+ end
117
+ end
118
+ end
@@ -1,3 +1,3 @@
1
1
  class BigqueryMigration
2
- VERSION = "0.1.4"
2
+ VERSION = "0.1.5"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: bigquery_migration
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.4
4
+ version: 0.1.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Naotoshi Seo
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2016-07-12 00:00:00.000000000 Z
11
+ date: 2016-07-25 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: google-api-client
@@ -161,6 +161,7 @@ files:
161
161
  - lib/bigquery_migration/hash_util.rb
162
162
  - lib/bigquery_migration/logger.rb
163
163
  - lib/bigquery_migration/schema.rb
164
+ - lib/bigquery_migration/table_data.rb
164
165
  - lib/bigquery_migration/time_with_zone.rb
165
166
  - lib/bigquery_migration/version.rb
166
167
  homepage: https://github.com/sonots/bigquery_migration