exwiw 0.5.3 → 0.6.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +9 -0
- data/README.md +1 -1
- data/docs/mongodb-scoping-fullscan-notes.md +145 -0
- data/docs/optimize-mongodb-export-with-native-ext.md +31 -11
- data/docs/sql-dump-optimization-notes.md +278 -0
- data/ext/exwiw/ext_json/ext_json.c +274 -0
- data/ext/exwiw/ext_json/extconf.rb +8 -0
- data/lib/exwiw/adapter/mongodb_adapter.rb +90 -22
- data/lib/exwiw/adapter/mysql_adapter.rb +70 -18
- data/lib/exwiw/adapter/mysql_client.rb +43 -0
- data/lib/exwiw/adapter/postgresql_adapter.rb +85 -15
- data/lib/exwiw/adapter/sql_bulk_insert.rb +71 -0
- data/lib/exwiw/adapter/sqlite_adapter.rb +75 -18
- data/lib/exwiw/adapter.rb +28 -0
- data/lib/exwiw/ext_json.rb +33 -0
- data/lib/exwiw/runner.rb +10 -16
- data/lib/exwiw/version.rb +1 -1
- data/lib/exwiw.rb +2 -0
- metadata +9 -2
|
@@ -3,15 +3,91 @@
|
|
|
3
3
|
module Exwiw
|
|
4
4
|
module Adapter
|
|
5
5
|
class PostgresqlAdapter < Base
|
|
6
|
+
include SqlBulkInsert
|
|
7
|
+
|
|
8
|
+
# A lazy, streaming stand-in for the materialized rows #execute used to
|
|
9
|
+
# return (`connection.exec(sql).values`). It pulls rows off the wire one
|
|
10
|
+
# at a time via libpq's single-row mode instead of buffering the whole
|
|
11
|
+
# result set, so the dump's dominant memory cost — a Ruby array as large
|
|
12
|
+
# as the table — never materializes. The Runner drives it exactly like the
|
|
13
|
+
# old Array: #size to skip empty tables and log the count, then a single
|
|
14
|
+
# streaming pass (SqlBulkInsert#write_inserts -> each_slice) to write the
|
|
15
|
+
# INSERT.
|
|
16
|
+
#
|
|
17
|
+
# Mirrors MongodbAdapter::StreamingResult; two SQL-specific differences:
|
|
18
|
+
# - #size cannot be answered cheaply from the cursor, so it runs a
|
|
19
|
+
# separate `SELECT COUNT(*)` of the same query. (MongoDB uses
|
|
20
|
+
# count_documents, an index-only walk; the SQL COUNT re-runs the query
|
|
21
|
+
# plan but transfers no row data — Postgres prunes the unused
|
|
22
|
+
# projection of the wrapped subquery.) This keeps the Runner contract
|
|
23
|
+
# unchanged, so MongoDB and the other SQL adapters are untouched.
|
|
24
|
+
# - the streaming pass ties up the connection until fully drained. The
|
|
25
|
+
# Runner always drains it (write_inserts) before issuing any further
|
|
26
|
+
# query (post_insert_sql / DELETE) on the same connection, so the
|
|
27
|
+
# ordering invariant holds.
|
|
28
|
+
class StreamingResult
|
|
29
|
+
include Enumerable
|
|
30
|
+
|
|
31
|
+
def initialize(connection:, data_sql:, count_sql:)
|
|
32
|
+
@connection = connection
|
|
33
|
+
@data_sql = data_sql
|
|
34
|
+
@count_sql = count_sql
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
def size
|
|
38
|
+
@size ||= @connection.exec(@count_sql).getvalue(0, 0).to_i
|
|
39
|
+
end
|
|
40
|
+
alias length size
|
|
41
|
+
|
|
42
|
+
# Stream the result set row by row. Each row is an Array of String|nil
|
|
43
|
+
# in libpq's text format — byte-identical to what `#exec(sql).values`
|
|
44
|
+
# produced, so the generated INSERT is unchanged.
|
|
45
|
+
def each
|
|
46
|
+
return enum_for(:each) { size } unless block_given?
|
|
47
|
+
|
|
48
|
+
@connection.send_query(@data_sql)
|
|
49
|
+
@connection.set_single_row_mode
|
|
50
|
+
begin
|
|
51
|
+
while (result = @connection.get_result)
|
|
52
|
+
begin
|
|
53
|
+
result.check
|
|
54
|
+
result.each_row { |row| yield row }
|
|
55
|
+
ensure
|
|
56
|
+
result.clear
|
|
57
|
+
end
|
|
58
|
+
end
|
|
59
|
+
rescue StandardError
|
|
60
|
+
# If iteration is abandoned mid-stream (a SQL error surfaced by
|
|
61
|
+
# #check, or the consumer raised), drain any results still queued so
|
|
62
|
+
# a later query on this same connection does not fail with "another
|
|
63
|
+
# command is already in progress".
|
|
64
|
+
drain
|
|
65
|
+
raise
|
|
66
|
+
end
|
|
67
|
+
self
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
private def drain
|
|
71
|
+
while (result = @connection.get_result)
|
|
72
|
+
result.clear
|
|
73
|
+
end
|
|
74
|
+
rescue PG::Error
|
|
75
|
+
# Connection already errored/clean; nothing left to drain.
|
|
76
|
+
end
|
|
77
|
+
end
|
|
78
|
+
|
|
6
79
|
def build_query(table, dump_target, table_by_name)
|
|
7
80
|
Exwiw::QueryAstBuilder.run(table.name, table_by_name, dump_target, @logger)
|
|
8
81
|
end
|
|
9
82
|
|
|
10
83
|
def execute(query_ast)
|
|
11
|
-
|
|
84
|
+
data_sql = commented_sql(query_ast)
|
|
85
|
+
# Count via the same query (wrapped as a subquery) so the Runner can
|
|
86
|
+
# skip empty tables and log the row count without draining the stream.
|
|
87
|
+
count_sql = "#{sql_query_comment(query_ast)} SELECT COUNT(*) FROM (#{compile_ast(query_ast)}) AS exwiw_count_src"
|
|
12
88
|
|
|
13
|
-
@logger.debug(" Executing SQL: \n#{
|
|
14
|
-
|
|
89
|
+
@logger.debug(" Executing SQL (single-row stream): \n#{data_sql}")
|
|
90
|
+
StreamingResult.new(connection: connection, data_sql: data_sql, count_sql: count_sql)
|
|
15
91
|
end
|
|
16
92
|
|
|
17
93
|
def explain(query_ast)
|
|
@@ -97,22 +173,16 @@ module Exwiw
|
|
|
97
173
|
@logger.info(" Wrote schema for #{table_names.size} table(s) to #{output_path}.")
|
|
98
174
|
end
|
|
99
175
|
|
|
100
|
-
|
|
176
|
+
# The INSERT header for this adapter. PostgreSQL uses bare identifiers.
|
|
177
|
+
# #to_bulk_insert / #write_inserts (SqlBulkInsert) append the value tuples
|
|
178
|
+
# and the trailing `;`.
|
|
179
|
+
private def insert_header(table)
|
|
101
180
|
table_name = table.name
|
|
102
|
-
|
|
103
|
-
value_list = results.map do |row|
|
|
104
|
-
quoted_values = row.map do |value|
|
|
105
|
-
escape_value(value)
|
|
106
|
-
end
|
|
107
|
-
"(" + quoted_values.join(', ') + ")"
|
|
108
|
-
end
|
|
109
|
-
values = value_list.join(",\n")
|
|
110
|
-
|
|
111
181
|
if table.rails_managed?
|
|
112
|
-
"INSERT INTO #{table_name} VALUES\n
|
|
182
|
+
"INSERT INTO #{table_name} VALUES\n"
|
|
113
183
|
else
|
|
114
184
|
column_names = table.columns.map(&:name).join(', ')
|
|
115
|
-
"INSERT INTO #{table_name} (#{column_names}) VALUES\n
|
|
185
|
+
"INSERT INTO #{table_name} (#{column_names}) VALUES\n"
|
|
116
186
|
end
|
|
117
187
|
end
|
|
118
188
|
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Exwiw
|
|
4
|
+
module Adapter
|
|
5
|
+
# Shared bulk-INSERT construction for the SQL adapters (mysql / postgresql /
|
|
6
|
+
# sqlite). They produce the same `INSERT INTO ... VALUES (...),(...);` shape
|
|
7
|
+
# and differ only in the header's identifier quoting (see #insert_header) and
|
|
8
|
+
# in #escape_value, so both the in-memory builder (#to_bulk_insert) and the
|
|
9
|
+
# bounded-memory streaming writer (#write_inserts) live here.
|
|
10
|
+
#
|
|
11
|
+
# Each including adapter must provide two private methods:
|
|
12
|
+
# - insert_header(table) -> the "INSERT INTO ... VALUES\n" prefix
|
|
13
|
+
# - escape_value(value) -> the SQL literal for one column value
|
|
14
|
+
module SqlBulkInsert
|
|
15
|
+
# How many rows' tuples to build-and-flush at a time when streaming. Bounds
|
|
16
|
+
# peak memory to this many tuples (plus their joined string) instead of the
|
|
17
|
+
# whole table's INSERT string, while keeping each flush a single fast
|
|
18
|
+
# Array#map + Array#join (the same C-level path #to_bulk_insert uses) so it
|
|
19
|
+
# stays close to whole-string speed — far faster than a naive row-at-a-time
|
|
20
|
+
# IO#print (see script/bench_sql_dump.rb / docs/sql-dump-optimization-notes.md).
|
|
21
|
+
# Mirrors MongoDB's default chunk size: bounded work per flush, but the SQL
|
|
22
|
+
# adapters still emit ONE statement (byte-identical to the un-chunked build).
|
|
23
|
+
STREAM_FLUSH_ROWS = 2_000
|
|
24
|
+
|
|
25
|
+
# Build the whole INSERT statement as a single String. Kept for callers
|
|
26
|
+
# that want the string form (and as the readable definition of the exact
|
|
27
|
+
# bytes #write_inserts streams).
|
|
28
|
+
def to_bulk_insert(results, table)
|
|
29
|
+
value_list = results.map { |row| insert_tuple(row) }
|
|
30
|
+
"#{insert_header(table)}#{value_list.join(",\n")};"
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
# Stream the bulk INSERT(s) straight to `io` instead of materializing the
|
|
34
|
+
# whole statement string first. Byte-for-byte identical to writing
|
|
35
|
+
# #to_bulk_insert per chunk joined by "\n" (verified by
|
|
36
|
+
# insert_output_snapshot_spec), but only one ~STREAM_FLUSH_BYTES buffer is
|
|
37
|
+
# resident at a time rather than the entire table's INSERT string. Returns
|
|
38
|
+
# the number of statements written.
|
|
39
|
+
def write_inserts(io, results, table, chunk_size)
|
|
40
|
+
chunks = chunk_size ? results.each_slice(chunk_size) : [results]
|
|
41
|
+
statement_count = 0
|
|
42
|
+
chunks.each do |chunk_rows|
|
|
43
|
+
io.print("\n") if statement_count.positive?
|
|
44
|
+
stream_single_insert(io, chunk_rows, table)
|
|
45
|
+
statement_count += 1
|
|
46
|
+
end
|
|
47
|
+
statement_count
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
# Emit one `INSERT INTO ... VALUES <tuples>;` statement to `io`, building
|
|
51
|
+
# and flushing the value tuples STREAM_FLUSH_ROWS at a time so the full
|
|
52
|
+
# statement text is never held in memory at once. Each slice is one fast
|
|
53
|
+
# map+join; the ",\n" between slices reproduces the same separator
|
|
54
|
+
# #to_bulk_insert puts between every tuple, so the bytes are identical.
|
|
55
|
+
private def stream_single_insert(io, rows, table)
|
|
56
|
+
io.print(insert_header(table))
|
|
57
|
+
first = true
|
|
58
|
+
rows.each_slice(STREAM_FLUSH_ROWS) do |slice|
|
|
59
|
+
io.print(",\n") unless first
|
|
60
|
+
first = false
|
|
61
|
+
io.print(slice.map { |row| insert_tuple(row) }.join(",\n"))
|
|
62
|
+
end
|
|
63
|
+
io.print(";")
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
private def insert_tuple(row)
|
|
67
|
+
"(" + row.map { |value| escape_value(value) }.join(', ') + ")"
|
|
68
|
+
end
|
|
69
|
+
end
|
|
70
|
+
end
|
|
71
|
+
end
|
|
@@ -3,15 +3,72 @@
|
|
|
3
3
|
module Exwiw
|
|
4
4
|
module Adapter
|
|
5
5
|
class SqliteAdapter < Base
|
|
6
|
+
include SqlBulkInsert
|
|
7
|
+
|
|
8
|
+
# A lazy, streaming stand-in for the materialized rows #execute used to
|
|
9
|
+
# return (`connection.execute(sql)`). It walks the result one row at a time
|
|
10
|
+
# via SQLite's statement cursor (Statement#each -> sqlite3_step) instead of
|
|
11
|
+
# buffering the whole result set, so the dump's dominant memory cost — a
|
|
12
|
+
# Ruby array as large as the table — never materializes. The Runner drives
|
|
13
|
+
# it exactly like the old Array: #size to skip empty tables and log the
|
|
14
|
+
# count, then a single streaming pass (SqlBulkInsert#write_inserts ->
|
|
15
|
+
# each_slice) to write the INSERT.
|
|
16
|
+
#
|
|
17
|
+
# Mirrors Mysql/PostgresqlAdapter::StreamingResult, with two SQLite
|
|
18
|
+
# specifics:
|
|
19
|
+
# - #size runs a separate `SELECT COUNT(*)` of the same query with the
|
|
20
|
+
# projection replaced by COUNT(*) (compile_ast(count_only: true)) —
|
|
21
|
+
# exact because exwiw's extraction queries have no DISTINCT/GROUP
|
|
22
|
+
# BY/LIMIT, so the row count is independent of the projection. (Unlike
|
|
23
|
+
# MySQL, SQLite tolerates a duplicate-column subquery wrap too, but the
|
|
24
|
+
# count_only form is shared with MySQL and avoids the extra subquery.)
|
|
25
|
+
# - SQLite is an embedded, single-connection engine that allows several
|
|
26
|
+
# active prepared statements at once, so the #size COUNT and the data
|
|
27
|
+
# cursor do not contend. The statement is closed in an ensure block so
|
|
28
|
+
# an abandoned mid-stream iteration still releases the cursor.
|
|
29
|
+
class StreamingResult
|
|
30
|
+
include Enumerable
|
|
31
|
+
|
|
32
|
+
def initialize(connection:, data_sql:, count_sql:)
|
|
33
|
+
@connection = connection
|
|
34
|
+
@data_sql = data_sql
|
|
35
|
+
@count_sql = count_sql
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
def size
|
|
39
|
+
@size ||= @connection.execute(@count_sql).dig(0, 0).to_i
|
|
40
|
+
end
|
|
41
|
+
alias length size
|
|
42
|
+
|
|
43
|
+
# Stream the result set row by row. Each row is an Array of values in
|
|
44
|
+
# SQLite's native type mapping — byte-identical to what
|
|
45
|
+
# `connection.execute(sql)` produced, so the generated INSERT is unchanged.
|
|
46
|
+
def each
|
|
47
|
+
return enum_for(:each) { size } unless block_given?
|
|
48
|
+
|
|
49
|
+
statement = @connection.prepare(@data_sql)
|
|
50
|
+
begin
|
|
51
|
+
statement.each { |row| yield row }
|
|
52
|
+
ensure
|
|
53
|
+
statement.close
|
|
54
|
+
end
|
|
55
|
+
self
|
|
56
|
+
end
|
|
57
|
+
end
|
|
58
|
+
|
|
6
59
|
def build_query(table, dump_target, table_by_name)
|
|
7
60
|
Exwiw::QueryAstBuilder.run(table.name, table_by_name, dump_target, @logger)
|
|
8
61
|
end
|
|
9
62
|
|
|
10
63
|
def execute(query_ast)
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
64
|
+
data_sql = commented_sql(query_ast)
|
|
65
|
+
# Count via the same FROM/JOIN/WHERE (projection replaced by COUNT(*)) so
|
|
66
|
+
# the Runner can skip empty tables and log the row count without draining
|
|
67
|
+
# the cursor. See StreamingResult for why this is exact.
|
|
68
|
+
count_sql = "#{sql_query_comment(query_ast)} #{compile_ast(query_ast, count_only: true)}"
|
|
69
|
+
|
|
70
|
+
@logger.debug(" Executing SQL (cursor stream): \n#{data_sql}")
|
|
71
|
+
StreamingResult.new(connection: connection, data_sql: data_sql, count_sql: count_sql)
|
|
15
72
|
end
|
|
16
73
|
|
|
17
74
|
def explain(query_ast)
|
|
@@ -63,22 +120,16 @@ module Exwiw
|
|
|
63
120
|
stmt.end_with?(';') ? stmt : "#{stmt};"
|
|
64
121
|
end
|
|
65
122
|
|
|
66
|
-
|
|
123
|
+
# The INSERT header for this adapter. SQLite uses bare identifiers.
|
|
124
|
+
# #to_bulk_insert / #write_inserts (SqlBulkInsert) append the value tuples
|
|
125
|
+
# and the trailing `;`.
|
|
126
|
+
private def insert_header(table)
|
|
67
127
|
table_name = table.name
|
|
68
|
-
|
|
69
|
-
value_list = results.map do |row|
|
|
70
|
-
quoted_values = row.map do |value|
|
|
71
|
-
escape_value(value)
|
|
72
|
-
end
|
|
73
|
-
"(" + quoted_values.join(', ') + ")"
|
|
74
|
-
end
|
|
75
|
-
values = value_list.join(",\n")
|
|
76
|
-
|
|
77
128
|
if table.rails_managed?
|
|
78
|
-
"INSERT INTO #{table_name} VALUES\n
|
|
129
|
+
"INSERT INTO #{table_name} VALUES\n"
|
|
79
130
|
else
|
|
80
131
|
column_names = table.columns.map(&:name).join(', ')
|
|
81
|
-
"INSERT INTO #{table_name} (#{column_names}) VALUES\n
|
|
132
|
+
"INSERT INTO #{table_name} (#{column_names}) VALUES\n"
|
|
82
133
|
end
|
|
83
134
|
end
|
|
84
135
|
|
|
@@ -140,11 +191,17 @@ module Exwiw
|
|
|
140
191
|
sql
|
|
141
192
|
end
|
|
142
193
|
|
|
143
|
-
|
|
194
|
+
# @param count_only [Boolean] emit `SELECT COUNT(*)` instead of the
|
|
195
|
+
# projected columns (used by StreamingResult#size). Safe because exwiw's
|
|
196
|
+
# extraction queries have no DISTINCT/GROUP BY/LIMIT, so the count does
|
|
197
|
+
# not depend on the projection.
|
|
198
|
+
def compile_ast(query_ast, count_only: false)
|
|
144
199
|
raise NotImplementedError unless query_ast.is_a?(Exwiw::QueryAst::Select)
|
|
145
200
|
|
|
146
201
|
sql = "SELECT "
|
|
147
|
-
sql += if
|
|
202
|
+
sql += if count_only
|
|
203
|
+
"COUNT(*)"
|
|
204
|
+
elsif query_ast.select_all
|
|
148
205
|
"*"
|
|
149
206
|
else
|
|
150
207
|
query_ast.columns.map { |col| compile_column_name(query_ast, col) }.join(', ')
|
data/lib/exwiw/adapter.rb
CHANGED
|
@@ -123,6 +123,34 @@ module Exwiw
|
|
|
123
123
|
nil
|
|
124
124
|
end
|
|
125
125
|
|
|
126
|
+
# Write the bulk INSERT/JSONL output for `results` to the open `io`,
|
|
127
|
+
# returning the number of statements written. The Runner calls this once
|
|
128
|
+
# per table for the non-COPY path.
|
|
129
|
+
#
|
|
130
|
+
# Default: build each chunk's output as a full string via #to_bulk_insert
|
|
131
|
+
# and write it, separating statements with "\n" — exactly what the Runner
|
|
132
|
+
# used to inline. This keeps the dominant memory cost at one chunk's
|
|
133
|
+
# serialized string (bounded by `chunk_size`), which is why MongoDB sets a
|
|
134
|
+
# positive default chunk size. Adapters whose output is a single large
|
|
135
|
+
# statement (the SQL adapters, where chunk_size is nil) override this to
|
|
136
|
+
# stream the statement to `io` in bounded buffers instead of holding the
|
|
137
|
+
# whole thing in memory.
|
|
138
|
+
#
|
|
139
|
+
# @param io [IO] open output file
|
|
140
|
+
# @param results [Enumerable] rows/documents from #execute
|
|
141
|
+
# @param table the table/collection config
|
|
142
|
+
# @param chunk_size [Integer, nil] rows per statement (nil => one statement)
|
|
143
|
+
def write_inserts(io, results, table, chunk_size)
|
|
144
|
+
chunks = chunk_size ? results.each_slice(chunk_size) : [results]
|
|
145
|
+
statement_count = 0
|
|
146
|
+
chunks.each do |chunk_rows|
|
|
147
|
+
io.print("\n") if statement_count.positive?
|
|
148
|
+
io.print(to_bulk_insert(chunk_rows, table))
|
|
149
|
+
statement_count += 1
|
|
150
|
+
end
|
|
151
|
+
statement_count
|
|
152
|
+
end
|
|
153
|
+
|
|
126
154
|
# Run the database-specific EXPLAIN for the given query and return the
|
|
127
155
|
# output as a single string for `explain` subcommand to print.
|
|
128
156
|
# SQL adapters override; MongodbAdapter currently raises.
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "json"
|
|
4
|
+
|
|
5
|
+
module Exwiw
|
|
6
|
+
# MongoDB Relaxed Extended JSON encoder for a single dumped document.
|
|
7
|
+
#
|
|
8
|
+
# `encode` is the one entry point. When the optional native extension compiled
|
|
9
|
+
# (the common case once `gem install exwiw` builds it), it emits the line in a
|
|
10
|
+
# single C tree-walk; otherwise it falls back to the pure-Ruby path. Both are
|
|
11
|
+
# byte-for-byte identical — the native path delegates every value it does not
|
|
12
|
+
# format itself back to `encode_fragment` (see ext/exwiw/ext_json/ext_json.c).
|
|
13
|
+
module ExtJson
|
|
14
|
+
module_function
|
|
15
|
+
|
|
16
|
+
# Pure-Ruby encoder for one value, identical to the historical
|
|
17
|
+
# `JSON.generate(doc.as_extended_json(mode: :relaxed))`. Used both as the
|
|
18
|
+
# whole-document fallback and as the native path's per-value delegate, so the
|
|
19
|
+
# two paths cannot diverge.
|
|
20
|
+
def encode_fragment(value)
|
|
21
|
+
JSON.generate(value.respond_to?(:as_extended_json) ? value.as_extended_json(mode: :relaxed) : value)
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
begin
|
|
25
|
+
require "exwiw/ext_json_native" # defines Exwiw::ExtJson.encode_native
|
|
26
|
+
def encode(doc) = encode_native(doc)
|
|
27
|
+
rescue LoadError
|
|
28
|
+
# No compiled extension (JRuby/TruffleRuby, or a host where the build
|
|
29
|
+
# failed): keep exwiw working as a pure-Ruby gem.
|
|
30
|
+
def encode(doc) = encode_fragment(doc)
|
|
31
|
+
end
|
|
32
|
+
end
|
|
33
|
+
end
|
data/lib/exwiw/runner.rb
CHANGED
|
@@ -97,30 +97,24 @@ module Exwiw
|
|
|
97
97
|
else
|
|
98
98
|
phase = "generating INSERT statement"
|
|
99
99
|
@logger.debug(" Generate INSERT statement...")
|
|
100
|
-
#
|
|
101
|
-
# table's
|
|
102
|
-
#
|
|
103
|
-
#
|
|
104
|
-
#
|
|
105
|
-
# full alongside the already-large in-memory result set.
|
|
100
|
+
# Let the adapter write the INSERT/JSONL output straight to the file
|
|
101
|
+
# instead of building the whole table's output as one string first,
|
|
102
|
+
# so only a bounded amount of serialized text is resident at a time —
|
|
103
|
+
# important for large tables/collections whose one-shot output would
|
|
104
|
+
# otherwise be held in full alongside the already-large result set.
|
|
106
105
|
#
|
|
107
106
|
# The chunk size falls back to the adapter's default when the table
|
|
108
|
-
# config does not set one (SQL adapters: nil -> one statement,
|
|
109
|
-
#
|
|
110
|
-
#
|
|
111
|
-
#
|
|
107
|
+
# config does not set one (SQL adapters: nil -> one statement, but
|
|
108
|
+
# streamed in bounded buffers; MongoDB: a positive default so the
|
|
109
|
+
# JSONL is chunked). #write_inserts emits bytes identical to the
|
|
110
|
+
# previous inline chunk loop and returns the statement count.
|
|
112
111
|
chunk_size = table.bulk_insert_chunk_size || adapter.default_bulk_insert_chunk_size
|
|
113
|
-
chunks = chunk_size ? results.each_slice(chunk_size) : [results]
|
|
114
112
|
|
|
115
113
|
statement_count = 0
|
|
116
114
|
File.open(File.join(@output_dir, "insert-#{insert_idx}-#{table_name}.#{adapter.output_extension}"), 'w') do |file|
|
|
117
115
|
pre = adapter.pre_insert_sql(table)
|
|
118
116
|
file.puts(pre) if pre
|
|
119
|
-
|
|
120
|
-
file.print("\n") if statement_count.positive?
|
|
121
|
-
file.print(adapter.to_bulk_insert(chunk_rows, table))
|
|
122
|
-
statement_count += 1
|
|
123
|
-
end
|
|
117
|
+
statement_count = adapter.write_inserts(file, results, table, chunk_size)
|
|
124
118
|
file.print("\n")
|
|
125
119
|
post = adapter.post_insert_sql(table)
|
|
126
120
|
file.puts(post) if post
|
data/lib/exwiw/version.rb
CHANGED
data/lib/exwiw.rb
CHANGED
|
@@ -5,6 +5,7 @@ require_relative "exwiw/version"
|
|
|
5
5
|
require "json"
|
|
6
6
|
require "serdes"
|
|
7
7
|
|
|
8
|
+
require_relative "exwiw/ext_json"
|
|
8
9
|
require_relative "exwiw/belongs_to"
|
|
9
10
|
require_relative "exwiw/table_column"
|
|
10
11
|
require_relative "exwiw/table_config"
|
|
@@ -13,6 +14,7 @@ require_relative "exwiw/mongodb_field"
|
|
|
13
14
|
require_relative "exwiw/mongodb_collection_config"
|
|
14
15
|
require_relative "exwiw/ddl_postprocessor"
|
|
15
16
|
require_relative "exwiw/adapter"
|
|
17
|
+
require_relative "exwiw/adapter/sql_bulk_insert"
|
|
16
18
|
require_relative "exwiw/adapter/sqlite_adapter"
|
|
17
19
|
require_relative "exwiw/adapter/mysql_client"
|
|
18
20
|
require_relative "exwiw/adapter/mysql_adapter"
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: exwiw
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 0.6.1
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Shia
|
|
@@ -29,12 +29,14 @@ email:
|
|
|
29
29
|
- rise.shia@gmail.com
|
|
30
30
|
executables:
|
|
31
31
|
- exwiw
|
|
32
|
-
extensions:
|
|
32
|
+
extensions:
|
|
33
|
+
- ext/exwiw/ext_json/extconf.rb
|
|
33
34
|
extra_rdoc_files: []
|
|
34
35
|
files:
|
|
35
36
|
- CHANGELOG.md
|
|
36
37
|
- LICENSE.txt
|
|
37
38
|
- README.md
|
|
39
|
+
- docs/mongodb-scoping-fullscan-notes.md
|
|
38
40
|
- docs/optimization-notes.md
|
|
39
41
|
- docs/optimize-mongodb-export-with-native-ext.md
|
|
40
42
|
- docs/plans/2026-05-15-insert-000-schema-file.md
|
|
@@ -44,13 +46,17 @@ files:
|
|
|
44
46
|
- docs/plans/2026-05-29-rails-managed-tables.md
|
|
45
47
|
- docs/plans/2026-05-31-ids-column-for-sql-adapters.md
|
|
46
48
|
- docs/plans/2026-06-19-mongodb-export-remove-parallelism-native-ext.md
|
|
49
|
+
- docs/sql-dump-optimization-notes.md
|
|
47
50
|
- exe/exwiw
|
|
51
|
+
- ext/exwiw/ext_json/ext_json.c
|
|
52
|
+
- ext/exwiw/ext_json/extconf.rb
|
|
48
53
|
- lib/exwiw.rb
|
|
49
54
|
- lib/exwiw/adapter.rb
|
|
50
55
|
- lib/exwiw/adapter/mongodb_adapter.rb
|
|
51
56
|
- lib/exwiw/adapter/mysql_adapter.rb
|
|
52
57
|
- lib/exwiw/adapter/mysql_client.rb
|
|
53
58
|
- lib/exwiw/adapter/postgresql_adapter.rb
|
|
59
|
+
- lib/exwiw/adapter/sql_bulk_insert.rb
|
|
54
60
|
- lib/exwiw/adapter/sqlite_adapter.rb
|
|
55
61
|
- lib/exwiw/after_insert_hook.rb
|
|
56
62
|
- lib/exwiw/belongs_to.rb
|
|
@@ -59,6 +65,7 @@ files:
|
|
|
59
65
|
- lib/exwiw/determine_table_processing_order.rb
|
|
60
66
|
- lib/exwiw/embedded_in.rb
|
|
61
67
|
- lib/exwiw/explain_runner.rb
|
|
68
|
+
- lib/exwiw/ext_json.rb
|
|
62
69
|
- lib/exwiw/mongo_query.rb
|
|
63
70
|
- lib/exwiw/mongodb_collection_config.rb
|
|
64
71
|
- lib/exwiw/mongodb_field.rb
|