exwiw 0.3.4 → 0.3.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 3b68b4eed496cf67cbac408c7f1c233a881b9778b46f0f5a67d0ff50b9321e03
4
- data.tar.gz: 521a8cf7d5af0dee4538e407d06d9605d37cb059f5023ee922e11807ff6d1b17
3
+ metadata.gz: 7c7566e034ea908557d839b53908b3c847444c8097e3b77baa4725603bbf4e52
4
+ data.tar.gz: 3ca68cf8173d39e3c59f7d7551549ad2a7b6456c1bd16db4f489d11c7bbd9efa
5
5
  SHA512:
6
- metadata.gz: bdd88c3f13879d23a4ecc7c239f22db2c5c5ddbd3bb06b838ae570d48309ba4f2460a6260d9f848da71717dcf43cae0de3b593e3e6e99f43406d700b39e79a37
7
- data.tar.gz: ac2e73630034663ef1ef299fd09925843c086776d10e5998d30758e079a4066f388bdb76672867acacbb57024a5a990336c46e5d8f21d42548862125d1004748
6
+ metadata.gz: 0b9c68109f7fc4c9abbd814099d38def0aae44fd732de5ec792094bd848dd948b27c17e7ad0139b486eaf658fb5f18422e0ca4cb2b8d26e66098147cbd3a309c
7
+ data.tar.gz: b3a7ee638ec36b7e9d03bf074b5f35110d7c0fd77db89a079e887c8b2b55bff058cd31d19a4b9479e36cafe59d3b3e55dad175725bdc7c34ed2467c52f20ef08
data/CHANGELOG.md CHANGED
@@ -2,6 +2,14 @@
2
2
 
3
3
  ## [Unreleased]
4
4
 
5
+ ## [0.3.6] - 2026-06-01
6
+
7
+ ## [0.3.5] - 2026-06-01
8
+
9
+ ### Fixed
10
+
11
+ - MySQL export no longer crashes with `IO#write: "\xE4" from ASCII-8BIT to UTF-8 (Encoding::UndefinedConversionError)` when a value comes from a binary-collation / `VARBINARY` / `BLOB` column but holds UTF-8 text (e.g. Japanese). Both the `mysql2` and `trilogy` drivers tag such values as `ASCII-8BIT`; writing them to the UTF-8 INSERT file failed inside host processes whose `Encoding.default_internal` is UTF-8 (a Rails app, or `RUBYOPT=-EUTF-8`). The driver-returned strings are now re-tagged UTF-8 (bytes unchanged) so the write is a no-op conversion.
12
+
5
13
  ## [0.3.4] - 2026-05-31
6
14
 
7
15
  ### Changed
@@ -120,6 +120,12 @@ module Exwiw
120
120
  raise NotImplementedError, "MongodbAdapter does not support explain yet"
121
121
  end
122
122
 
123
+ def describe_query(query)
124
+ "find collection=#{query.collection} filter=#{query.filter.inspect} projection=#{query.projection.inspect}"
125
+ rescue => e
126
+ "<unavailable: #{e.class}: #{e.message}>"
127
+ end
128
+
123
129
  def output_extension
124
130
  'jsonl'
125
131
  end
@@ -57,7 +57,7 @@ module Exwiw
57
57
  def self.stringify_value(value)
58
58
  case value
59
59
  when nil then nil
60
- when String then value
60
+ when String then normalize_encoding(value)
61
61
  when Time
62
62
  # Emit fractional seconds only when present. A Time can't tell us the
63
63
  # column's declared precision, so a zero fraction on a DATETIME(6)
@@ -76,6 +76,21 @@ module Exwiw
76
76
  end
77
77
  end
78
78
 
79
+ # Re-tag a value string as UTF-8 when it comes back as ASCII-8BIT (BINARY).
80
+ # Both drivers tag values from binary-collation / VARBINARY / BLOB columns
81
+ # as ASCII-8BIT even when the bytes are really UTF-8 text. When exwiw runs
82
+ # inside a host process whose Encoding.default_internal is UTF-8 (e.g. a
83
+ # Rails app, or RUBYOPT=-EUTF-8), IO#write enables conversion, so writing
84
+ # such a binary string carrying multi-byte bytes (e.g. Japanese "\xE4...")
85
+ # to the INSERT file raises "\xE4 from ASCII-8BIT to UTF-8"
86
+ # (Encoding::UndefinedConversionError). Re-tagging makes that write a
87
+ # UTF-8 -> UTF-8 no-op; only the tag changes, the bytes pass through.
88
+ def self.normalize_encoding(str)
89
+ return str unless str.encoding == Encoding::ASCII_8BIT
90
+
91
+ str.dup.force_encoding(Encoding::UTF_8)
92
+ end
93
+
79
94
  attr_reader :driver
80
95
 
81
96
  # `driver:` is mainly a test seam to force a specific driver; in normal use
@@ -92,7 +107,8 @@ module Exwiw
92
107
  case @driver
93
108
  when :mysql2
94
109
  res = raw.query(sql, cast: false, as: :array)
95
- Result.new(res.fields, res.to_a)
110
+ rows = res.to_a.map { |row| row.map { |value| self.class.stringify_value(value) } }
111
+ Result.new(res.fields, rows)
96
112
  when :trilogy
97
113
  res = raw.query(sql)
98
114
  rows = res.rows.map { |row| row.map { |value| self.class.stringify_value(value) } }
data/lib/exwiw/adapter.rb CHANGED
@@ -139,6 +139,23 @@ module Exwiw
139
139
  def commented_sql(query_ast)
140
140
  "#{sql_query_comment(query_ast)} #{compile_ast(query_ast)}"
141
141
  end
142
+
143
+ # One-line, human-readable description of the extraction query, used by the
144
+ # Runner in error messages so a failure during INSERT/COPY generation (or
145
+ # query execution) can be traced back to the query that produced the data.
146
+ # SQL adapters expose the compiled, comment-prefixed SELECT; non-SQL
147
+ # adapters (e.g. MongodbAdapter) override or fall back to the query object's
148
+ # own inspect output. Best-effort: never raise from here, since it runs on
149
+ # an error path.
150
+ def describe_query(query_ast)
151
+ if respond_to?(:compile_ast)
152
+ commented_sql(query_ast)
153
+ else
154
+ query_ast.inspect
155
+ end
156
+ rescue => e
157
+ "<unavailable: #{e.class}: #{e.message}>"
158
+ end
142
159
  end
143
160
 
144
161
  # @params [Exwiw::QueryAst] query_ast
data/lib/exwiw/runner.rb CHANGED
@@ -60,51 +60,67 @@ module Exwiw
60
60
  @logger.info("Processing table '#{table_name}'... (#{idx + 1}/#{total_size})")
61
61
 
62
62
  query_ast = adapter.build_query(table, @dump_target, table_by_name)
63
- results = adapter.execute(query_ast)
64
- record_num = results.size
65
63
 
66
- if record_num.zero?
67
- @logger.info(" No records matched. skip this table.")
68
- next
69
- end
70
- insert_idx = (idx + 1).to_s.rjust(3, '0')
71
-
72
- if @output_format == 'copy'
73
- @logger.debug(" Generate COPY statement...")
74
- copy_sql = adapter.to_copy_from_stdin(results, table)
75
- @logger.info(" Generated COPY statement for #{record_num} records.")
76
-
77
- File.open(File.join(@output_dir, "insert-#{insert_idx}-#{table_name}.#{adapter.output_extension}"), 'w') do |file|
78
- file.puts(copy_sql)
79
- post = adapter.post_insert_sql(table)
80
- file.puts(post) if post
64
+ # Track which phase we are in so that, if an error is raised while
65
+ # turning the fetched rows into SQL/JSONL, the rescue below can report
66
+ # both the failing step and the exact extraction query that produced the
67
+ # data being processed.
68
+ phase = "executing extraction query"
69
+ begin
70
+ results = adapter.execute(query_ast)
71
+ record_num = results.size
72
+
73
+ if record_num.zero?
74
+ @logger.info(" No records matched. skip this table.")
75
+ next
81
76
  end
82
- else
83
- @logger.debug(" Generate INSERT statement...")
84
- chunk_size = table.bulk_insert_chunk_size
85
- chunks = chunk_size ? results.each_slice(chunk_size).to_a : [results]
86
- insert_sql = chunks.map { |chunk_rows| adapter.to_bulk_insert(chunk_rows, table) }.join("\n")
87
-
88
- @logger.info(" Generated INSERT statement for #{record_num} records (#{chunks.size} statement(s)).")
89
- File.open(File.join(@output_dir, "insert-#{insert_idx}-#{table_name}.#{adapter.output_extension}"), 'w') do |file|
90
- file.puts(insert_sql)
91
- post = adapter.post_insert_sql(table)
92
- file.puts(post) if post
93
- end
94
- end
95
-
96
- if adapter.supports_bulk_delete? && !@insert_only && !(table.respond_to?(:rails_managed?) && table.rails_managed?)
97
- @logger.debug(" Generate DELETE statement...")
98
- delete_sql = adapter.to_bulk_delete(query_ast, table)
99
- if @logger.debug?
100
- @logger.debug(" Generated DELETE statement:\n#{delete_sql}")
77
+ insert_idx = (idx + 1).to_s.rjust(3, '0')
78
+
79
+ if @output_format == 'copy'
80
+ phase = "generating COPY statement"
81
+ @logger.debug(" Generate COPY statement...")
82
+ copy_sql = adapter.to_copy_from_stdin(results, table)
83
+ @logger.info(" Generated COPY statement for #{record_num} records.")
84
+
85
+ File.open(File.join(@output_dir, "insert-#{insert_idx}-#{table_name}.#{adapter.output_extension}"), 'w') do |file|
86
+ file.puts(copy_sql)
87
+ post = adapter.post_insert_sql(table)
88
+ file.puts(post) if post
89
+ end
101
90
  else
102
- @logger.info(" Generated DELETE statement.")
91
+ phase = "generating INSERT statement"
92
+ @logger.debug(" Generate INSERT statement...")
93
+ chunk_size = table.bulk_insert_chunk_size
94
+ chunks = chunk_size ? results.each_slice(chunk_size).to_a : [results]
95
+ insert_sql = chunks.map { |chunk_rows| adapter.to_bulk_insert(chunk_rows, table) }.join("\n")
96
+
97
+ @logger.info(" Generated INSERT statement for #{record_num} records (#{chunks.size} statement(s)).")
98
+ File.open(File.join(@output_dir, "insert-#{insert_idx}-#{table_name}.#{adapter.output_extension}"), 'w') do |file|
99
+ file.puts(insert_sql)
100
+ post = adapter.post_insert_sql(table)
101
+ file.puts(post) if post
102
+ end
103
103
  end
104
- delete_idx = (total_size - idx).to_s.rjust(3, '0')
105
- File.open(File.join(@output_dir, "delete-#{delete_idx}-#{table_name}.#{adapter.output_extension}"), 'w') do |file|
106
- file.puts(delete_sql)
104
+
105
+ if adapter.supports_bulk_delete? && !@insert_only && !(table.respond_to?(:rails_managed?) && table.rails_managed?)
106
+ phase = "generating DELETE statement"
107
+ @logger.debug(" Generate DELETE statement...")
108
+ delete_sql = adapter.to_bulk_delete(query_ast, table)
109
+ if @logger.debug?
110
+ @logger.debug(" Generated DELETE statement:\n#{delete_sql}")
111
+ else
112
+ @logger.info(" Generated DELETE statement.")
113
+ end
114
+ delete_idx = (total_size - idx).to_s.rjust(3, '0')
115
+ File.open(File.join(@output_dir, "delete-#{delete_idx}-#{table_name}.#{adapter.output_extension}"), 'w') do |file|
116
+ file.puts(delete_sql)
117
+ end
107
118
  end
119
+ rescue => e
120
+ @logger.error("Error while #{phase} for table '#{table_name}' (#{idx + 1}/#{total_size}): #{e.class}: #{e.message}")
121
+ @logger.error(" Extraction query that produced the data being processed:")
122
+ @logger.error(" #{adapter.describe_query(query_ast)}")
123
+ raise
108
124
  end
109
125
  end
110
126
 
data/lib/exwiw/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Exwiw
4
- VERSION = "0.3.4"
4
+ VERSION = "0.3.6"
5
5
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: exwiw
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.4
4
+ version: 0.3.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - Shia