RubyGems - e621_export_downloader - Versions diffs - 0.0.13 → 0.0.14 - Mend

e621_export_downloader 0.0.13 → 0.0.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

checksums.yaml +4 -4
data/lib/e621/csv_importable.rb +73 -13
data/lib/e621_export_downloader/version.rb +1 -1
metadata +2 -2

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: a133742764b3b599b2cfd4c3494391538730be19daae10e9df43956aa1913709
-  data.tar.gz: ec28fc8889a2ad4bef174f365985e89868068fe2f206b7325c65a3c881716942
+  metadata.gz: 89d525162aa1da43623f66a8bab4618ac17910c2c3e0db7a272e8149be91efb8
+  data.tar.gz: 6152f2a56d9ad995dd5868f2762d4a95c38e2b4d95a289b8806ef74870b50ea7
 SHA512:
-  metadata.gz: 74262c9fc4b22df8847f75e80abb3ac9951fa7e265a306b168dcf087c64946849c2df5432d75e77a1ae8b0cc5d5869addb2348bd3b217abc7dcf02395e316521
-  data.tar.gz: 61dfc91a23e7c34c69562534f9a42583f362558391264d12c4761b3f2368b9633f8d0f356939efcb78a3a27d71d5a2cefbf8047d536526e02dc8d41ec2b04853
+  metadata.gz: 494ccb09c9727311ec327cdc8926816bd0957f9d0a9455b6b5789364b30d4728d95763a99707b5861fb1d858092acaed1c220abd97dfb22f3f66e9de05fc6eaf
+  data.tar.gz: a0dbdb4a693365f88b5986ffcdef1a23190a3c6ce85e98297d344bf74b91bf4794700d5f0cdf4ab087cb695e0017f6464d21dfe5013419a29930d438e1786f43

data/lib/e621/csv_importable.rb CHANGED Viewed

@@ -1,8 +1,6 @@
 # frozen_string_literal: true
 # typed: true
-require("csv")
 module E621
   module CsvImportable
     extend(T::Sig)
@@ -17,22 +15,84 @@ module E621
       E621::RowCount.set(T.unsafe(self).table_name.split(".").last, count)
     end
-    sig { params(csv_path: String).returns(Integer) }
-    def import_from_csv(csv_path)
+    # Loads a CSV export into this model's table via PostgreSQL COPY.
+    #
+    # The file's raw bytes are streamed straight into COPY's native CSV parser
+    # (only the header line is read, for the column list) rather than parsing
+    # each row into a CSV::Row and re-serializing it in Ruby. That avoids a
+    # full single-threaded parse + re-encode of every row and preserves the
+    # exact bytes of fields containing embedded newlines/quotes. The row count
+    # is taken from COPY itself (PG::Result#cmd_tuples).
+    #
+    # truncate:         empty the table before loading (full reload).
+    # recreate_indexes: drop every secondary (non-PK, non-constraint) index
+    #                   before the COPY and rebuild it afterward. Building the
+    #                   GIN/btree indexes once over the finished table is far
+    #                   cheaper than maintaining them row-by-row during COPY,
+    #                   and CREATE INDEX can use parallel workers.
+    #
+    # Physical-storage policy (UNLOGGED) and session tuning
+    # (maintenance_work_mem, max_parallel_maintenance_workers, ...) are the
+    # caller's responsibility — set them around this call.
+    sig do
+      params(
+        csv_path:         String,
+        truncate:         T::Boolean,
+        recreate_indexes: T::Boolean,
+        chunk_bytes:      Integer,
+      ).returns(Integer)
+    end
+    def import_from_csv(csv_path, truncate: false, recreate_indexes: false, chunk_bytes: 1 << 20)
+      model = T.unsafe(self)
+      indexes = recreate_indexes ? secondary_index_definitions : {}
+      model.connection.execute("TRUNCATE #{model.quoted_table_name}") if truncate
+      indexes.each_key { |name| model.connection.execute("DROP INDEX IF EXISTS #{name}") }
+      count = copy_csv(csv_path, chunk_bytes: chunk_bytes)
+      indexes.each_value { |ddl| model.connection.execute(ddl) }
+      self.row_count = count
+      count
+    end
+    private
+    sig { params(csv_path: String, chunk_bytes: Integer).returns(Integer) }
+    def copy_csv(csv_path, chunk_bytes:)
       model = T.unsafe(self)
-      csv_headers = File.open(csv_path, "rb", &:readline).chomp.split(",").map(&:strip)
-      columns = csv_headers.map { |h| model.connection.quote_column_name(h) }.join(", ")
+      header = File.open(csv_path, "rb", &:readline)
+      columns = header.chomp.split(",").map { |h| model.connection.quote_column_name(h.strip) }.join(", ")
-      count = 0
       raw = model.connection.raw_connection
-      raw.copy_data("COPY #{model.quoted_table_name} (#{columns}) FROM STDIN WITH (FORMAT CSV)") do
-        CSV.foreach(csv_path, headers: true) do |row|
-          raw.put_copy_data(CSV.generate_line(T.cast(row, CSV::Row).fields))
-          count += 1
+      result = raw.copy_data("COPY #{model.quoted_table_name} (#{columns}) FROM STDIN WITH (FORMAT CSV)") do
+        File.open(csv_path, "rb") do |io|
+          io.readline # skip header row
+          while (chunk = io.read(chunk_bytes))
+            raw.put_copy_data(chunk)
+          end
         end
       end
-      self.row_count = count
-      count
+      result.cmd_tuples
+    end
+    # Every non-primary-key, non-constraint-backed index on the table, paired
+    # with the DDL needed to recreate it.
+    sig { returns(T::Hash[String, String]) }
+    def secondary_index_definitions
+      model = T.unsafe(self)
+      conn = model.connection
+      sql = <<~SQL
+        SELECT i.indexrelid::regclass::text AS name,
+               pg_get_indexdef(i.indexrelid)  AS ddl
+        FROM pg_index i
+        WHERE i.indrelid = #{conn.quote(model.quoted_table_name)}::regclass
+          AND NOT i.indisprimary
+          AND NOT EXISTS (
+            SELECT 1 FROM pg_constraint c WHERE c.conindid = i.indexrelid
+          )
+      SQL
+      conn.exec_query(sql).to_h { |r| [r["name"], r["ddl"]] }
     end
   end
 end

data/lib/e621_export_downloader/version.rb CHANGED Viewed

@@ -4,7 +4,7 @@
 # loaded by bundler
 module E621ExportDownloader
   module Constants
-    VERSION = "0.0.13"
+    VERSION = "0.0.14"
     WEBSITE = "https://github.com/DonovanDMC/E621ExportDownloader.rb"
   end
 end

metadata CHANGED Viewed

@@ -1,13 +1,13 @@
 --- !ruby/object:Gem::Specification
 name: e621_export_downloader
 version: !ruby/object:Gem::Version
-  version: 0.0.13
+  version: 0.0.14
 platform: ruby
 authors:
 - Donovan_DMC
 bindir: exe
 cert_chain: []
-date: 2026-06-13 00:00:00.000000000 Z
+date: 2026-06-14 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: csv