exwiw 0.3.4 → 0.3.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +8 -0
- data/lib/exwiw/adapter/mongodb_adapter.rb +6 -0
- data/lib/exwiw/adapter/mysql_client.rb +18 -2
- data/lib/exwiw/adapter.rb +17 -0
- data/lib/exwiw/runner.rb +56 -40
- data/lib/exwiw/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 7c7566e034ea908557d839b53908b3c847444c8097e3b77baa4725603bbf4e52
|
|
4
|
+
data.tar.gz: 3ca68cf8173d39e3c59f7d7551549ad2a7b6456c1bd16db4f489d11c7bbd9efa
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 0b9c68109f7fc4c9abbd814099d38def0aae44fd732de5ec792094bd848dd948b27c17e7ad0139b486eaf658fb5f18422e0ca4cb2b8d26e66098147cbd3a309c
|
|
7
|
+
data.tar.gz: b3a7ee638ec36b7e9d03bf074b5f35110d7c0fd77db89a079e887c8b2b55bff058cd31d19a4b9479e36cafe59d3b3e55dad175725bdc7c34ed2467c52f20ef08
|
data/CHANGELOG.md
CHANGED
|
@@ -2,6 +2,14 @@
|
|
|
2
2
|
|
|
3
3
|
## [Unreleased]
|
|
4
4
|
|
|
5
|
+
## [0.3.6] - 2026-06-01
|
|
6
|
+
|
|
7
|
+
## [0.3.5] - 2026-06-01
|
|
8
|
+
|
|
9
|
+
### Fixed
|
|
10
|
+
|
|
11
|
+
- MySQL export no longer crashes with `IO#write: "\xE4" from ASCII-8BIT to UTF-8 (Encoding::UndefinedConversionError)` when a value comes from a binary-collation / `VARBINARY` / `BLOB` column but holds UTF-8 text (e.g. Japanese). Both the `mysql2` and `trilogy` drivers tag such values as `ASCII-8BIT`; writing them to the UTF-8 INSERT file failed inside host processes whose `Encoding.default_internal` is UTF-8 (a Rails app, or `RUBYOPT=-EUTF-8`). The driver-returned strings are now re-tagged UTF-8 (bytes unchanged) so the write is a no-op conversion.
|
|
12
|
+
|
|
5
13
|
## [0.3.4] - 2026-05-31
|
|
6
14
|
|
|
7
15
|
### Changed
|
|
@@ -120,6 +120,12 @@ module Exwiw
|
|
|
120
120
|
raise NotImplementedError, "MongodbAdapter does not support explain yet"
|
|
121
121
|
end
|
|
122
122
|
|
|
123
|
+
def describe_query(query)
|
|
124
|
+
"find collection=#{query.collection} filter=#{query.filter.inspect} projection=#{query.projection.inspect}"
|
|
125
|
+
rescue => e
|
|
126
|
+
"<unavailable: #{e.class}: #{e.message}>"
|
|
127
|
+
end
|
|
128
|
+
|
|
123
129
|
def output_extension
|
|
124
130
|
'jsonl'
|
|
125
131
|
end
|
|
@@ -57,7 +57,7 @@ module Exwiw
|
|
|
57
57
|
def self.stringify_value(value)
|
|
58
58
|
case value
|
|
59
59
|
when nil then nil
|
|
60
|
-
when String then value
|
|
60
|
+
when String then normalize_encoding(value)
|
|
61
61
|
when Time
|
|
62
62
|
# Emit fractional seconds only when present. A Time can't tell us the
|
|
63
63
|
# column's declared precision, so a zero fraction on a DATETIME(6)
|
|
@@ -76,6 +76,21 @@ module Exwiw
|
|
|
76
76
|
end
|
|
77
77
|
end
|
|
78
78
|
|
|
79
|
+
# Re-tag a value string as UTF-8 when it comes back as ASCII-8BIT (BINARY).
|
|
80
|
+
# Both drivers tag values from binary-collation / VARBINARY / BLOB columns
|
|
81
|
+
# as ASCII-8BIT even when the bytes are really UTF-8 text. When exwiw runs
|
|
82
|
+
# inside a host process whose Encoding.default_internal is UTF-8 (e.g. a
|
|
83
|
+
# Rails app, or RUBYOPT=-EUTF-8), IO#write enables conversion, so writing
|
|
84
|
+
# such a binary string carrying multi-byte bytes (e.g. Japanese "\xE4...")
|
|
85
|
+
# to the INSERT file raises "\xE4 from ASCII-8BIT to UTF-8"
|
|
86
|
+
# (Encoding::UndefinedConversionError). Re-tagging makes that write a
|
|
87
|
+
# UTF-8 -> UTF-8 no-op; only the tag changes, the bytes pass through.
|
|
88
|
+
def self.normalize_encoding(str)
|
|
89
|
+
return str unless str.encoding == Encoding::ASCII_8BIT
|
|
90
|
+
|
|
91
|
+
str.dup.force_encoding(Encoding::UTF_8)
|
|
92
|
+
end
|
|
93
|
+
|
|
79
94
|
attr_reader :driver
|
|
80
95
|
|
|
81
96
|
# `driver:` is mainly a test seam to force a specific driver; in normal use
|
|
@@ -92,7 +107,8 @@ module Exwiw
|
|
|
92
107
|
case @driver
|
|
93
108
|
when :mysql2
|
|
94
109
|
res = raw.query(sql, cast: false, as: :array)
|
|
95
|
-
|
|
110
|
+
rows = res.to_a.map { |row| row.map { |value| self.class.stringify_value(value) } }
|
|
111
|
+
Result.new(res.fields, rows)
|
|
96
112
|
when :trilogy
|
|
97
113
|
res = raw.query(sql)
|
|
98
114
|
rows = res.rows.map { |row| row.map { |value| self.class.stringify_value(value) } }
|
data/lib/exwiw/adapter.rb
CHANGED
|
@@ -139,6 +139,23 @@ module Exwiw
|
|
|
139
139
|
def commented_sql(query_ast)
|
|
140
140
|
"#{sql_query_comment(query_ast)} #{compile_ast(query_ast)}"
|
|
141
141
|
end
|
|
142
|
+
|
|
143
|
+
# One-line, human-readable description of the extraction query, used by the
|
|
144
|
+
# Runner in error messages so a failure during INSERT/COPY generation (or
|
|
145
|
+
# query execution) can be traced back to the query that produced the data.
|
|
146
|
+
# SQL adapters expose the compiled, comment-prefixed SELECT; non-SQL
|
|
147
|
+
# adapters (e.g. MongodbAdapter) override or fall back to the query object's
|
|
148
|
+
# own inspect output. Best-effort: never raise from here, since it runs on
|
|
149
|
+
# an error path.
|
|
150
|
+
def describe_query(query_ast)
|
|
151
|
+
if respond_to?(:compile_ast)
|
|
152
|
+
commented_sql(query_ast)
|
|
153
|
+
else
|
|
154
|
+
query_ast.inspect
|
|
155
|
+
end
|
|
156
|
+
rescue => e
|
|
157
|
+
"<unavailable: #{e.class}: #{e.message}>"
|
|
158
|
+
end
|
|
142
159
|
end
|
|
143
160
|
|
|
144
161
|
# @params [Exwiw::QueryAst] query_ast
|
data/lib/exwiw/runner.rb
CHANGED
|
@@ -60,51 +60,67 @@ module Exwiw
|
|
|
60
60
|
@logger.info("Processing table '#{table_name}'... (#{idx + 1}/#{total_size})")
|
|
61
61
|
|
|
62
62
|
query_ast = adapter.build_query(table, @dump_target, table_by_name)
|
|
63
|
-
results = adapter.execute(query_ast)
|
|
64
|
-
record_num = results.size
|
|
65
63
|
|
|
66
|
-
if
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
file.puts(copy_sql)
|
|
79
|
-
post = adapter.post_insert_sql(table)
|
|
80
|
-
file.puts(post) if post
|
|
64
|
+
# Track which phase we are in so that, if an error is raised while
|
|
65
|
+
# turning the fetched rows into SQL/JSONL, the rescue below can report
|
|
66
|
+
# both the failing step and the exact extraction query that produced the
|
|
67
|
+
# data being processed.
|
|
68
|
+
phase = "executing extraction query"
|
|
69
|
+
begin
|
|
70
|
+
results = adapter.execute(query_ast)
|
|
71
|
+
record_num = results.size
|
|
72
|
+
|
|
73
|
+
if record_num.zero?
|
|
74
|
+
@logger.info(" No records matched. skip this table.")
|
|
75
|
+
next
|
|
81
76
|
end
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
if adapter.supports_bulk_delete? && !@insert_only && !(table.respond_to?(:rails_managed?) && table.rails_managed?)
|
|
97
|
-
@logger.debug(" Generate DELETE statement...")
|
|
98
|
-
delete_sql = adapter.to_bulk_delete(query_ast, table)
|
|
99
|
-
if @logger.debug?
|
|
100
|
-
@logger.debug(" Generated DELETE statement:\n#{delete_sql}")
|
|
77
|
+
insert_idx = (idx + 1).to_s.rjust(3, '0')
|
|
78
|
+
|
|
79
|
+
if @output_format == 'copy'
|
|
80
|
+
phase = "generating COPY statement"
|
|
81
|
+
@logger.debug(" Generate COPY statement...")
|
|
82
|
+
copy_sql = adapter.to_copy_from_stdin(results, table)
|
|
83
|
+
@logger.info(" Generated COPY statement for #{record_num} records.")
|
|
84
|
+
|
|
85
|
+
File.open(File.join(@output_dir, "insert-#{insert_idx}-#{table_name}.#{adapter.output_extension}"), 'w') do |file|
|
|
86
|
+
file.puts(copy_sql)
|
|
87
|
+
post = adapter.post_insert_sql(table)
|
|
88
|
+
file.puts(post) if post
|
|
89
|
+
end
|
|
101
90
|
else
|
|
102
|
-
|
|
91
|
+
phase = "generating INSERT statement"
|
|
92
|
+
@logger.debug(" Generate INSERT statement...")
|
|
93
|
+
chunk_size = table.bulk_insert_chunk_size
|
|
94
|
+
chunks = chunk_size ? results.each_slice(chunk_size).to_a : [results]
|
|
95
|
+
insert_sql = chunks.map { |chunk_rows| adapter.to_bulk_insert(chunk_rows, table) }.join("\n")
|
|
96
|
+
|
|
97
|
+
@logger.info(" Generated INSERT statement for #{record_num} records (#{chunks.size} statement(s)).")
|
|
98
|
+
File.open(File.join(@output_dir, "insert-#{insert_idx}-#{table_name}.#{adapter.output_extension}"), 'w') do |file|
|
|
99
|
+
file.puts(insert_sql)
|
|
100
|
+
post = adapter.post_insert_sql(table)
|
|
101
|
+
file.puts(post) if post
|
|
102
|
+
end
|
|
103
103
|
end
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
104
|
+
|
|
105
|
+
if adapter.supports_bulk_delete? && !@insert_only && !(table.respond_to?(:rails_managed?) && table.rails_managed?)
|
|
106
|
+
phase = "generating DELETE statement"
|
|
107
|
+
@logger.debug(" Generate DELETE statement...")
|
|
108
|
+
delete_sql = adapter.to_bulk_delete(query_ast, table)
|
|
109
|
+
if @logger.debug?
|
|
110
|
+
@logger.debug(" Generated DELETE statement:\n#{delete_sql}")
|
|
111
|
+
else
|
|
112
|
+
@logger.info(" Generated DELETE statement.")
|
|
113
|
+
end
|
|
114
|
+
delete_idx = (total_size - idx).to_s.rjust(3, '0')
|
|
115
|
+
File.open(File.join(@output_dir, "delete-#{delete_idx}-#{table_name}.#{adapter.output_extension}"), 'w') do |file|
|
|
116
|
+
file.puts(delete_sql)
|
|
117
|
+
end
|
|
107
118
|
end
|
|
119
|
+
rescue => e
|
|
120
|
+
@logger.error("Error while #{phase} for table '#{table_name}' (#{idx + 1}/#{total_size}): #{e.class}: #{e.message}")
|
|
121
|
+
@logger.error(" Extraction query that produced the data being processed:")
|
|
122
|
+
@logger.error(" #{adapter.describe_query(query_ast)}")
|
|
123
|
+
raise
|
|
108
124
|
end
|
|
109
125
|
end
|
|
110
126
|
|
data/lib/exwiw/version.rb
CHANGED