e621_export_downloader 0.0.12 → 0.0.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 71464892548f5aea175d642767ec9c081992971fa05cfa591786e30f05849fc2
4
- data.tar.gz: 6dab31bc8938e5289981d952e2e674bf32e407e10f83d4fc5bef754174e21750
3
+ metadata.gz: 89d525162aa1da43623f66a8bab4618ac17910c2c3e0db7a272e8149be91efb8
4
+ data.tar.gz: 6152f2a56d9ad995dd5868f2762d4a95c38e2b4d95a289b8806ef74870b50ea7
5
5
  SHA512:
6
- metadata.gz: e5f44050f050ef991ac4ab7fecef628c665f449c46208a6d9e3734dbec95e9f088169adad6f3b81ab9133aef4afecce9f36109c25c7f007a3e2e8d2cb2d4807b
7
- data.tar.gz: 453c7a2885eebc6681ec5954ae86430147f336806c2f34052cecf862cb6926626ec039df7a1f798b2f467e50a184883f4dc7c0bd28e3dc0fe37ca3d927ab57ad
6
+ metadata.gz: 494ccb09c9727311ec327cdc8926816bd0957f9d0a9455b6b5789364b30d4728d95763a99707b5861fb1d858092acaed1c220abd97dfb22f3f66e9de05fc6eaf
7
+ data.tar.gz: a0dbdb4a693365f88b5986ffcdef1a23190a3c6ce85e98297d344bf74b91bf4794700d5f0cdf4ab087cb695e0017f6464d21dfe5013419a29930d438e1786f43
@@ -5,17 +5,94 @@ module E621
5
5
  module CsvImportable
6
6
  extend(T::Sig)
7
7
 
8
- sig { params(csv_path: String).returns(T.untyped) }
9
- def import_from_csv(csv_path)
8
+ sig { returns(Integer) }
9
+ def row_count
10
+ E621::RowCount[T.unsafe(self).table_name.split(".").last]
11
+ end
12
+
13
+ sig { params(count: Integer).returns(T.untyped) }
14
+ def row_count=(count)
15
+ E621::RowCount.set(T.unsafe(self).table_name.split(".").last, count)
16
+ end
17
+
18
+ # Loads a CSV export into this model's table via PostgreSQL COPY.
19
+ #
20
+ # The file's raw bytes are streamed straight into COPY's native CSV parser
21
+ # (only the header line is read, for the column list) rather than parsing
22
+ # each row into a CSV::Row and re-serializing it in Ruby. That avoids a
23
+ # full single-threaded parse + re-encode of every row and preserves the
24
+ # exact bytes of fields containing embedded newlines/quotes. The row count
25
+ # is taken from COPY itself (PG::Result#cmd_tuples).
26
+ #
27
+ # truncate: empty the table before loading (full reload).
28
+ # recreate_indexes: drop every secondary (non-PK, non-constraint) index
29
+ # before the COPY and rebuild it afterward. Building the
30
+ # GIN/btree indexes once over the finished table is far
31
+ # cheaper than maintaining them row-by-row during COPY,
32
+ # and CREATE INDEX can use parallel workers.
33
+ #
34
+ # Physical-storage policy (UNLOGGED) and session tuning
35
+ # (maintenance_work_mem, max_parallel_maintenance_workers, ...) are the
36
+ # caller's responsibility — set them around this call.
37
+ sig do
38
+ params(
39
+ csv_path: String,
40
+ truncate: T::Boolean,
41
+ recreate_indexes: T::Boolean,
42
+ chunk_bytes: Integer,
43
+ ).returns(Integer)
44
+ end
45
+ def import_from_csv(csv_path, truncate: false, recreate_indexes: false, chunk_bytes: 1 << 20)
46
+ model = T.unsafe(self)
47
+ indexes = recreate_indexes ? secondary_index_definitions : {}
48
+
49
+ model.connection.execute("TRUNCATE #{model.quoted_table_name}") if truncate
50
+ indexes.each_key { |name| model.connection.execute("DROP INDEX IF EXISTS #{name}") }
51
+
52
+ count = copy_csv(csv_path, chunk_bytes: chunk_bytes)
53
+
54
+ indexes.each_value { |ddl| model.connection.execute(ddl) }
55
+ self.row_count = count
56
+ count
57
+ end
58
+
59
+ private
60
+
61
+ sig { params(csv_path: String, chunk_bytes: Integer).returns(Integer) }
62
+ def copy_csv(csv_path, chunk_bytes:)
10
63
  model = T.unsafe(self)
11
- headers = File.open(csv_path, "rb", &:readline).chomp
12
- columns = headers.split(",").map { |h| model.connection.quote_column_name(h.strip) }.join(", ")
64
+ header = File.open(csv_path, "rb", &:readline)
65
+ columns = header.chomp.split(",").map { |h| model.connection.quote_column_name(h.strip) }.join(", ")
66
+
13
67
  raw = model.connection.raw_connection
14
- raw.copy_data("COPY #{model.quoted_table_name} (#{columns}) FROM STDIN WITH (FORMAT CSV, HEADER TRUE)") do
15
- File.open(csv_path, "rb") do |f|
16
- raw.put_copy_data(f.read(65_536)) until f.eof?
68
+ result = raw.copy_data("COPY #{model.quoted_table_name} (#{columns}) FROM STDIN WITH (FORMAT CSV)") do
69
+ File.open(csv_path, "rb") do |io|
70
+ io.readline # skip header row
71
+ while (chunk = io.read(chunk_bytes))
72
+ raw.put_copy_data(chunk)
73
+ end
17
74
  end
18
75
  end
76
+ result.cmd_tuples
77
+ end
78
+
79
+ # Every non-primary-key, non-constraint-backed index on the table, paired
80
+ # with the DDL needed to recreate it.
81
+ sig { returns(T::Hash[String, String]) }
82
+ def secondary_index_definitions
83
+ model = T.unsafe(self)
84
+ conn = model.connection
85
+ sql = <<~SQL
86
+ SELECT i.indexrelid::regclass::text AS name,
87
+ pg_get_indexdef(i.indexrelid) AS ddl
88
+ FROM pg_index i
89
+ WHERE i.indrelid = #{conn.quote(model.quoted_table_name)}::regclass
90
+ AND NOT i.indisprimary
91
+ AND NOT EXISTS (
92
+ SELECT 1 FROM pg_constraint c WHERE c.conindid = i.indexrelid
93
+ )
94
+ SQL
95
+ conn.exec_query(sql).to_h { |r| [r["name"], r["ddl"]] }
19
96
  end
20
97
  end
21
98
  end
@@ -0,0 +1,29 @@
1
+ # frozen_string_literal: true
2
+ # typed: true
3
+
4
+ require("active_record")
5
+
6
+ module E621
7
+ class RowCount < ActiveRecord::Base
8
+ extend(T::Sig)
9
+
10
+ self.table_name = "e621.row_counts"
11
+ self.primary_key = "table_name"
12
+ self.record_timestamps = false
13
+
14
+ sig { params(table_name: String).returns(Integer) }
15
+ def self.[](table_name)
16
+ find_by(table_name: table_name)&.count || 0
17
+ end
18
+
19
+ sig { params(table_name: String, count: Integer).returns(T.untyped) }
20
+ def self.set(table_name, count)
21
+ upsert({ table_name: table_name, count: count }, unique_by: :table_name)
22
+ end
23
+
24
+ sig { params(table_name: String, by: Integer).returns(T.untyped) }
25
+ def self.increment(table_name, by)
26
+ where(table_name: table_name).update_all("count = count + #{by.to_i}")
27
+ end
28
+ end
29
+ end
@@ -6,6 +6,7 @@ require_relative("../e621/pool")
6
6
  require_relative("../e621/post")
7
7
  require_relative("../e621/post_replacement")
8
8
  require_relative("../e621/post_version")
9
+ require_relative("../e621/row_count")
9
10
  require_relative("../e621/tag")
10
11
  require_relative("../e621/tag_alias")
11
12
  require_relative("../e621/tag_implication")
@@ -4,7 +4,7 @@
4
4
  # loaded by bundler
5
5
  module E621ExportDownloader
6
6
  module Constants
7
- VERSION = "0.0.12"
7
+ VERSION = "0.0.14"
8
8
  WEBSITE = "https://github.com/DonovanDMC/E621ExportDownloader.rb"
9
9
  end
10
10
  end
@@ -2,6 +2,8 @@
2
2
 
3
3
  class CreateE621Tables < ActiveRecord::Migration<%= migration_version %>
4
4
  def change
5
+ enable_extension("pg_trgm")
6
+
5
7
  <% if use_schema? -%>
6
8
  reversible do |r|
7
9
  r.up { execute("CREATE SCHEMA <%= schema %>") }
@@ -15,22 +17,27 @@ class CreateE621Tables < ActiveRecord::Migration<%= migration_version %>
15
17
  t.string(:group_name)
16
18
  t.boolean(:is_active, null: false)
17
19
  t.boolean(:is_locked, null: false)
18
- t.bigint(:linked_user_id)
20
+ t.bigint(:linked_user_id, index: true)
19
21
  t.string(:name, null: false, index: { unique: true })
20
22
  t.text(:other_names, array: true, null: false, default: [])
21
23
  t.datetime(:updated_at, null: false)
22
- t.text(:urls, null: false)
24
+ t.text(:urls)
25
+ t.index("group_name gin_trgm_ops", using: :gin)
26
+ t.index("name gin_trgm_ops", using: :gin)
23
27
  end
24
28
 
25
29
  create_table(<%= table_ref("bulk_update_requests") %>, id: :bigint, default: nil, force: :cascade) do |t|
26
- t.bigint(:approver_id)
30
+ t.bigint(:approver_id, index: true)
27
31
  t.datetime(:created_at, null: false)
28
- t.bigint(:forum_topic_id)
32
+ t.bigint(:forum_topic_id, index: true)
33
+ t.bigint(:forum_post_id, index: true)
29
34
  t.text(:script, null: false)
30
35
  t.string(:status, null: false, index: true)
31
36
  t.string(:title)
32
37
  t.datetime(:updated_at, null: false)
33
38
  t.bigint(:user_id, null: false, index: true)
39
+ t.index("script gin_trgm_ops", using: :gin)
40
+ t.index("title gin_trgm_ops", using: :gin)
34
41
  end
35
42
 
36
43
  create_table(<%= table_ref("pools") %>, id: :bigint, default: nil, force: :cascade) do |t|
@@ -40,12 +47,14 @@ class CreateE621Tables < ActiveRecord::Migration<%= migration_version %>
40
47
  t.text(:description, null: false)
41
48
  t.boolean(:is_active, null: false)
42
49
  t.string(:name, null: false, index: true)
43
- t.bigint(:post_ids, array: true, null: false, default: [])
50
+ t.bigint(:post_ids, array: true, null: false, default: [], index: { using: :gin })
44
51
  t.datetime(:updated_at)
52
+ t.index("description gin_trgm_ops", using: :gin)
53
+ t.index("name gin_trgm_ops", using: :gin)
45
54
  end
46
55
 
47
56
  create_table(<%= table_ref("posts") %>, id: :bigint, default: nil, force: :cascade) do |t|
48
- t.bigint(:approver_id)
57
+ t.bigint(:approver_id, index: true)
49
58
  t.bigint(:change_seq, null: false)
50
59
  t.integer(:comment_count, null: false)
51
60
  t.datetime(:created_at, null: false)
@@ -58,60 +67,70 @@ class CreateE621Tables < ActiveRecord::Migration<%= migration_version %>
58
67
  t.integer(:image_height, null: false)
59
68
  t.integer(:image_width, null: false)
60
69
  t.boolean(:is_deleted, null: false, index: true)
61
- t.boolean(:is_flagged, null: false)
62
- t.boolean(:is_note_locked, null: false)
63
- t.boolean(:is_pending, null: false)
64
- t.boolean(:is_rating_locked, null: false)
65
- t.boolean(:is_status_locked, null: false)
70
+ t.boolean(:is_flagged, null: false, index: true)
71
+ t.boolean(:is_note_locked, null: false, index: true)
72
+ t.boolean(:is_pending, null: false, index: true)
73
+ t.boolean(:is_rating_locked, null: false, index: true)
74
+ t.boolean(:is_status_locked, null: false, index: true)
66
75
  t.text(:locked_tags, null: false)
67
76
  t.string(:md5, index: { unique: true }, null: false)
68
- t.bigint(:parent_id)
77
+ t.bigint(:parent_id, index: true)
69
78
  t.string(:rating, null: false, index: true)
70
79
  t.integer(:score, null: false)
71
- t.text(:sources, array: true, null: false, default: [])
72
- t.text(:tags, array: true, null: false, default: [])
80
+ t.text(:source, null: false)
81
+ t.text(:tag_string, null: false)
73
82
  t.integer(:up_score, null: false)
74
- t.datetime(:updated_at)
83
+ t.datetime(:updated_at, index: true)
75
84
  t.bigint(:uploader_id, index: true)
85
+ t.index("description gin_trgm_ops", using: :gin)
86
+ t.index("source gin_trgm_ops", using: :gin)
87
+ t.index("tag_string gin_trgm_ops", using: :gin)
76
88
  end
77
89
 
78
90
  create_table(<%= table_ref("post_replacements") %>, id: :bigint, default: nil, force: :cascade) do |t|
79
- t.bigint(:approver_id)
91
+ t.bigint(:approver_id, index: true)
80
92
  t.datetime(:created_at, null: false)
81
93
  t.bigint(:creator_id, null: false, index: true)
82
- t.string(:file_ext, null: false)
83
- t.string(:file_name, null: false)
84
- t.bigint(:file_size, null: false)
85
- t.integer(:image_height, null: false)
86
- t.integer(:image_width, null: false)
87
- t.string(:md5, null: false)
94
+ t.string(:file_ext, null: false, index: true)
95
+ t.string(:file_name, null: false, index: true)
96
+ t.bigint(:file_size, null: false, index: true)
97
+ t.integer(:image_height, null: false, index: true)
98
+ t.integer(:image_width, null: false, index: true)
99
+ t.string(:md5, null: false, index: true)
88
100
  t.bigint(:post_id, null: false, index: true)
89
101
  t.text(:reason, null: false)
90
102
  t.text(:source)
91
103
  t.string(:status, null: false, index: true)
92
104
  t.datetime(:updated_at, null: false)
105
+ t.index("reason gin_trgm_ops", using: :gin)
106
+ t.index("source gin_trgm_ops", using: :gin)
93
107
  end
94
108
 
95
109
  create_table(<%= table_ref("post_versions") %>, id: :bigint, default: nil, force: :cascade) do |t|
96
110
  t.text(:added_locked_tags, array: true, null: false, default: [])
97
111
  t.text(:added_tags, array: true, null: false, default: [])
98
112
  t.text(:description)
99
- t.boolean(:description_changed, null: false)
113
+ t.boolean(:description_changed, null: false, index: true)
100
114
  t.text(:locked_tags)
101
- t.boolean(:parent_changed, null: false)
115
+ t.boolean(:parent_changed, null: false, index: true)
102
116
  t.bigint(:parent_id)
103
- t.bigint(:post_id, null: false)
117
+ t.bigint(:post_id, null: false, index: true)
104
118
  t.string(:rating)
105
- t.boolean(:rating_changed, null: false)
119
+ t.boolean(:rating_changed, null: false, index: true)
106
120
  t.text(:reason)
107
121
  t.text(:removed_locked_tags, array: true, null: false, default: [])
108
122
  t.text(:removed_tags, array: true, null: false, default: [])
109
123
  t.text(:source)
110
- t.boolean(:source_changed, null: false)
124
+ t.boolean(:source_changed, null: false, index: true)
111
125
  t.text(:tags)
112
126
  t.datetime(:updated_at, null: false, index: true)
113
- t.bigint(:updater_id, null: false, index: true)
114
- t.integer(:version, null: false)
127
+ t.bigint(:updater_id, index: true)
128
+ t.integer(:version, null: false, index: true)
129
+ t.index("description gin_trgm_ops", using: :gin)
130
+ t.index("locked_tags gin_trgm_ops", using: :gin)
131
+ t.index("reason gin_trgm_ops", using: :gin)
132
+ t.index("source gin_trgm_ops", using: :gin)
133
+ t.index("tags gin_trgm_ops", using: :gin)
115
134
  end
116
135
 
117
136
  create_table(<%= table_ref("tag_aliases") %>, id: :bigint, default: nil, force: :cascade) do |t|
@@ -119,6 +138,9 @@ class CreateE621Tables < ActiveRecord::Migration<%= migration_version %>
119
138
  t.string(:consequent_name, null: false, index: true)
120
139
  t.datetime(:created_at)
121
140
  t.string(:status, null: false, index: true)
141
+ t.index(%i[status antecedent_name])
142
+ t.index("antecedent_name gin_trgm_ops", using: :gin)
143
+ t.index("consequent_name gin_trgm_ops", using: :gin)
122
144
  end
123
145
 
124
146
  create_table(<%= table_ref("tag_implications") %>, id: :bigint, default: nil, force: :cascade) do |t|
@@ -126,22 +148,34 @@ class CreateE621Tables < ActiveRecord::Migration<%= migration_version %>
126
148
  t.string(:consequent_name, null: false, index: true)
127
149
  t.datetime(:created_at)
128
150
  t.string(:status, null: false, index: true)
151
+ t.index(%i[status consequent_name])
152
+ t.index(%i[status antecedent_name])
153
+ t.index("antecedent_name gin_trgm_ops", using: :gin)
154
+ t.index("consequent_name gin_trgm_ops", using: :gin)
129
155
  end
130
156
 
131
157
  create_table(<%= table_ref("tags") %>, id: :bigint, default: nil, force: :cascade) do |t|
132
158
  t.string(:category, null: false, index: true)
133
159
  t.string(:name, null: false, index: { unique: true })
134
- t.integer(:post_count, null: false)
160
+ t.integer(:post_count, null: false, index: { order: { post_count: :desc } })
161
+ t.index(%i[category post_count], order: { post_count: :desc })
162
+ t.index("name gin_trgm_ops", using: :gin)
135
163
  end
136
164
 
137
165
  create_table(<%= table_ref("wiki_pages") %>, id: :bigint, default: nil, force: :cascade) do |t|
138
- t.text(:body, null: false)
166
+ t.text(:body, null: false, index: true)
139
167
  t.datetime(:created_at, null: false)
140
- t.bigint(:creator_id, index: true)
141
- t.boolean(:is_locked, null: false)
168
+ t.bigint(:creator_id, null: false, index: true)
169
+ t.boolean(:is_locked, null: false, index: true)
142
170
  t.string(:title, null: false, index: { unique: true })
143
- t.datetime(:updated_at)
144
- t.bigint(:uploader_id)
171
+ t.datetime(:updated_at, null: false)
172
+ t.bigint(:updater_id, index: true)
173
+ t.index("body gin_trgm_ops", using: :gin)
174
+ t.index("title gin_trgm_ops", using: :gin)
175
+ end
176
+
177
+ create_table(<%= table_ref("row_counts") %>, primary_key: :table_name, id: :string, force: :cascade) do |t|
178
+ t.bigint(:count, null: false, default: 0)
145
179
  end
146
180
  end
147
181
  end
@@ -13,5 +13,14 @@ module ActiveRecord
13
13
 
14
14
  sig { params(attributes: T::Array[T::Hash[Symbol, T.untyped]], kwargs: T.untyped).returns(T.untyped) }
15
15
  def self.upsert_all(attributes, **kwargs); end
16
+
17
+ sig { params(value: T.untyped).void }
18
+ def self.primary_key=(value); end
19
+
20
+ sig { params(kwargs: T.untyped).returns(T.untyped) }
21
+ def self.find_by(**kwargs); end
22
+
23
+ sig { params(kwargs: T.untyped).returns(T.untyped) }
24
+ def self.where(**kwargs); end
16
25
  end
17
26
  end
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: e621_export_downloader
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.12
4
+ version: 0.0.14
5
5
  platform: ruby
6
6
  authors:
7
7
  - Donovan_DMC
8
8
  bindir: exe
9
9
  cert_chain: []
10
- date: 2026-06-13 00:00:00.000000000 Z
10
+ date: 2026-06-14 00:00:00.000000000 Z
11
11
  dependencies:
12
12
  - !ruby/object:Gem::Dependency
13
13
  name: csv
@@ -86,6 +86,7 @@ files:
86
86
  - lib/e621/post.rb
87
87
  - lib/e621/post_replacement.rb
88
88
  - lib/e621/post_version.rb
89
+ - lib/e621/row_count.rb
89
90
  - lib/e621/tag.rb
90
91
  - lib/e621/tag_alias.rb
91
92
  - lib/e621/tag_implication.rb