e621_export_downloader 0.0.13 → 0.0.14
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/e621/csv_importable.rb +73 -13
- data/lib/e621_export_downloader/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 89d525162aa1da43623f66a8bab4618ac17910c2c3e0db7a272e8149be91efb8
|
|
4
|
+
data.tar.gz: 6152f2a56d9ad995dd5868f2762d4a95c38e2b4d95a289b8806ef74870b50ea7
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 494ccb09c9727311ec327cdc8926816bd0957f9d0a9455b6b5789364b30d4728d95763a99707b5861fb1d858092acaed1c220abd97dfb22f3f66e9de05fc6eaf
|
|
7
|
+
data.tar.gz: a0dbdb4a693365f88b5986ffcdef1a23190a3c6ce85e98297d344bf74b91bf4794700d5f0cdf4ab087cb695e0017f6464d21dfe5013419a29930d438e1786f43
|
data/lib/e621/csv_importable.rb
CHANGED
|
@@ -1,8 +1,6 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
# typed: true
|
|
3
3
|
|
|
4
|
-
require("csv")
|
|
5
|
-
|
|
6
4
|
module E621
|
|
7
5
|
module CsvImportable
|
|
8
6
|
extend(T::Sig)
|
|
@@ -17,22 +15,84 @@ module E621
|
|
|
17
15
|
E621::RowCount.set(T.unsafe(self).table_name.split(".").last, count)
|
|
18
16
|
end
|
|
19
17
|
|
|
20
|
-
|
|
21
|
-
|
|
18
|
+
# Loads a CSV export into this model's table via PostgreSQL COPY.
|
|
19
|
+
#
|
|
20
|
+
# The file's raw bytes are streamed straight into COPY's native CSV parser
|
|
21
|
+
# (only the header line is read, for the column list) rather than parsing
|
|
22
|
+
# each row into a CSV::Row and re-serializing it in Ruby. That avoids a
|
|
23
|
+
# full single-threaded parse + re-encode of every row and preserves the
|
|
24
|
+
# exact bytes of fields containing embedded newlines/quotes. The row count
|
|
25
|
+
# is taken from COPY itself (PG::Result#cmd_tuples).
|
|
26
|
+
#
|
|
27
|
+
# truncate: empty the table before loading (full reload).
|
|
28
|
+
# recreate_indexes: drop every secondary (non-PK, non-constraint) index
|
|
29
|
+
# before the COPY and rebuild it afterward. Building the
|
|
30
|
+
# GIN/btree indexes once over the finished table is far
|
|
31
|
+
# cheaper than maintaining them row-by-row during COPY,
|
|
32
|
+
# and CREATE INDEX can use parallel workers.
|
|
33
|
+
#
|
|
34
|
+
# Physical-storage policy (UNLOGGED) and session tuning
|
|
35
|
+
# (maintenance_work_mem, max_parallel_maintenance_workers, ...) are the
|
|
36
|
+
# caller's responsibility — set them around this call.
|
|
37
|
+
sig do
|
|
38
|
+
params(
|
|
39
|
+
csv_path: String,
|
|
40
|
+
truncate: T::Boolean,
|
|
41
|
+
recreate_indexes: T::Boolean,
|
|
42
|
+
chunk_bytes: Integer,
|
|
43
|
+
).returns(Integer)
|
|
44
|
+
end
|
|
45
|
+
def import_from_csv(csv_path, truncate: false, recreate_indexes: false, chunk_bytes: 1 << 20)
|
|
46
|
+
model = T.unsafe(self)
|
|
47
|
+
indexes = recreate_indexes ? secondary_index_definitions : {}
|
|
48
|
+
|
|
49
|
+
model.connection.execute("TRUNCATE #{model.quoted_table_name}") if truncate
|
|
50
|
+
indexes.each_key { |name| model.connection.execute("DROP INDEX IF EXISTS #{name}") }
|
|
51
|
+
|
|
52
|
+
count = copy_csv(csv_path, chunk_bytes: chunk_bytes)
|
|
53
|
+
|
|
54
|
+
indexes.each_value { |ddl| model.connection.execute(ddl) }
|
|
55
|
+
self.row_count = count
|
|
56
|
+
count
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
private
|
|
60
|
+
|
|
61
|
+
sig { params(csv_path: String, chunk_bytes: Integer).returns(Integer) }
|
|
62
|
+
def copy_csv(csv_path, chunk_bytes:)
|
|
22
63
|
model = T.unsafe(self)
|
|
23
|
-
|
|
24
|
-
columns =
|
|
64
|
+
header = File.open(csv_path, "rb", &:readline)
|
|
65
|
+
columns = header.chomp.split(",").map { |h| model.connection.quote_column_name(h.strip) }.join(", ")
|
|
25
66
|
|
|
26
|
-
count = 0
|
|
27
67
|
raw = model.connection.raw_connection
|
|
28
|
-
raw.copy_data("COPY #{model.quoted_table_name} (#{columns}) FROM STDIN WITH (FORMAT CSV)") do
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
68
|
+
result = raw.copy_data("COPY #{model.quoted_table_name} (#{columns}) FROM STDIN WITH (FORMAT CSV)") do
|
|
69
|
+
File.open(csv_path, "rb") do |io|
|
|
70
|
+
io.readline # skip header row
|
|
71
|
+
while (chunk = io.read(chunk_bytes))
|
|
72
|
+
raw.put_copy_data(chunk)
|
|
73
|
+
end
|
|
32
74
|
end
|
|
33
75
|
end
|
|
34
|
-
|
|
35
|
-
|
|
76
|
+
result.cmd_tuples
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
# Every non-primary-key, non-constraint-backed index on the table, paired
|
|
80
|
+
# with the DDL needed to recreate it.
|
|
81
|
+
sig { returns(T::Hash[String, String]) }
|
|
82
|
+
def secondary_index_definitions
|
|
83
|
+
model = T.unsafe(self)
|
|
84
|
+
conn = model.connection
|
|
85
|
+
sql = <<~SQL
|
|
86
|
+
SELECT i.indexrelid::regclass::text AS name,
|
|
87
|
+
pg_get_indexdef(i.indexrelid) AS ddl
|
|
88
|
+
FROM pg_index i
|
|
89
|
+
WHERE i.indrelid = #{conn.quote(model.quoted_table_name)}::regclass
|
|
90
|
+
AND NOT i.indisprimary
|
|
91
|
+
AND NOT EXISTS (
|
|
92
|
+
SELECT 1 FROM pg_constraint c WHERE c.conindid = i.indexrelid
|
|
93
|
+
)
|
|
94
|
+
SQL
|
|
95
|
+
conn.exec_query(sql).to_h { |r| [r["name"], r["ddl"]] }
|
|
36
96
|
end
|
|
37
97
|
end
|
|
38
98
|
end
|
metadata
CHANGED
|
@@ -1,13 +1,13 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: e621_export_downloader
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.0.
|
|
4
|
+
version: 0.0.14
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Donovan_DMC
|
|
8
8
|
bindir: exe
|
|
9
9
|
cert_chain: []
|
|
10
|
-
date: 2026-06-
|
|
10
|
+
date: 2026-06-14 00:00:00.000000000 Z
|
|
11
11
|
dependencies:
|
|
12
12
|
- !ruby/object:Gem::Dependency
|
|
13
13
|
name: csv
|