data_drain 0.1.5 → 0.1.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 05fca5dc293bebcb71247b8659d52933d8a5baca6b73e38ba9d9b21236c02643
4
- data.tar.gz: c7d89c3fb45dc4deb3b871bd403a2e1b39172977893882df2658deb8eafcb22f
3
+ metadata.gz: a5a4767539f7a1e03be113cbdb218eba708228e2d4255734bd6150a913e414a0
4
+ data.tar.gz: d7a5d26b5ce4cb545a5e2132e09d72b65569aad6c2716b681f9c07dcfea80747
5
5
  SHA512:
6
- metadata.gz: fce9cc7ecc7008136c456ed3442bdd4b6c8171e0b28c37a1ba6bc897866b3e4469dfd7af8285a9b1022c70f00cf8e14a0e16a8fd280038f2a0b604418471be4b
7
- data.tar.gz: 766e0ea20985a90e39fd84727318b6b1a753bad695af44f78bbf94da088f3aef06c1b428811af27fdb46466c9c694a70974ef113f9ee9bdb90b3a616fe353f9c
6
+ metadata.gz: 5b2c335c98d509d951d6fdea54ad3068c2fd539a88033ef0d4be8e6c12127d019b24086b02eb60c31e63d65ba99bd51daa2d10e595b46c21a01ebdc1516082dc
7
+ data.tar.gz: 95a6fe6c0b97862c624668a8123383f0206744bfecf09f04e57bc17cc95eba27ea86f0f5d083dd8e948f9490eb640189f8bfcc7b9a39a2c924a419cbe54edda4
data/CHANGELOG.md CHANGED
@@ -1,8 +1,16 @@
1
1
  ## [Unreleased]
2
2
 
3
+ ## [0.1.7] - 2026-03-16
4
+
5
+ - Se agrego soporte para idle_in_transaction_session_timeout.
6
+
7
+ ## [0.1.6] - 2026-03-16
8
+
9
+ - Se agrego el tem_directory para duckdb.
10
+
3
11
  ## [0.1.5] - 2026-03-16
4
12
 
5
- - Se agrego el attacha para duckdb.
13
+ - Se agrego el attach para duckdb.
6
14
 
7
15
  ## [0.1.4] - 2026-03-16
8
16
 
data/README.md CHANGED
@@ -47,12 +47,26 @@ DataDrain.configure do |config|
47
47
  config.db_pass = ENV.fetch('DB_PASS', '')
48
48
  config.db_name = ENV.fetch('DB_NAME', 'core_production')
49
49
 
50
- # Rendimiento y Tuning
50
+ # Rendimiento y Tuning de Postgres
51
51
  config.batch_size = 5000 # Registros a borrar por transacción
52
52
  config.throttle_delay = 0.5 # Segundos de pausa entre borrados
53
+
54
+ # Timeout de inactividad de transacciones en PostgreSQL (en milisegundos).
55
+ # Útil establecerlo en 0 para evitar que la conexión se cierre prematuramente
56
+ # durante el borrado de grandes volúmenes de datos.
57
+ config.idle_in_transaction_session_timeout = 0
58
+
53
59
  config.logger = Rails.logger
54
60
 
61
+ # Tuning de DuckDB
62
+ # Límite máximo de RAM para las consultas en memoria de DuckDB (ej. '2GB', '512MB').
63
+ # Evita que el proceso OOM (Out Of Memory) si el contenedor o servidor tiene memoria limitada.
55
64
  config.limit_ram = '2GB'
65
+
66
+ # Directorio temporal de DuckDB para desbordar memoria (spill to disk) durante
67
+ # transformaciones pesadas o creación de archivos Parquet masivos.
68
+ # Es muy recomendable que este directorio resida en un disco SSD/NVMe rápido.
69
+ config.tmp_directory = '/tmp/duckdb_work'
56
70
  end
57
71
  ```
58
72
 
@@ -8,7 +8,8 @@ module DataDrain
8
8
  attr_accessor :storage_mode, :aws_region,
9
9
  :aws_access_key_id, :aws_secret_access_key,
10
10
  :db_host, :db_port, :db_user, :db_pass, :db_name,
11
- :batch_size, :throttle_delay, :logger, :limit_ram
11
+ :batch_size, :throttle_delay, :logger, :limit_ram, :tmp_directory,
12
+ :idle_in_transaction_session_timeout
12
13
 
13
14
  def initialize
14
15
  @storage_mode = :local
@@ -17,6 +18,8 @@ module DataDrain
17
18
  @batch_size = 5000
18
19
  @throttle_delay = 0.5
19
20
  @limit_ram = nil # eg 2GB
21
+ @tmp_directory = nil # eg /tmp/duckdb_work
22
+ @idle_in_transaction_session_timeout = 0
20
23
  @logger = Logger.new($stdout)
21
24
  end
22
25
 
@@ -81,6 +81,7 @@ module DataDrain
81
81
  def setup_duckdb
82
82
  @duckdb.query("INSTALL postgres; LOAD postgres;")
83
83
  @duckdb.query("SET max_memory='#{@config.limit_ram}';") if @config.limit_ram.present?
84
+ @duckdb.query("SET temp_directory='#{@config.tmp_directory}'") if @config.tmp_directory.present?
84
85
  @duckdb.query("ATTACH '#{@config.duckdb_connection_string}' AS pg_source (TYPE POSTGRES, READ_ONLY)")
85
86
 
86
87
  # 💡 Magia del Adapter: Él sabe si cargar httpfs y setear credenciales o no hacer nada
@@ -156,6 +157,10 @@ module DataDrain
156
157
  dbname: @config.db_name
157
158
  )
158
159
 
160
+ if @config.idle_in_transaction_session_timeout.present?
161
+ conn.exec("SET idle_in_transaction_session_timeout = #{@config.idle_in_transaction_session_timeout};")
162
+ end
163
+
159
164
  loop do
160
165
  sql = <<~SQL
161
166
  DELETE FROM #{@table_name}
@@ -37,6 +37,9 @@ module DataDrain
37
37
  return false
38
38
  end
39
39
 
40
+ @duckdb.query("SET max_memory='#{@config.limit_ram}';") if @config.limit_ram.present?
41
+ @duckdb.query("SET temp_directory='#{@config.tmp_directory}'") if @config.tmp_directory.present?
42
+
40
43
  @adapter.setup_duckdb(@duckdb)
41
44
 
42
45
  # Determinamos la función lectora de DuckDB según la extensión del archivo
@@ -30,6 +30,11 @@ module DataDrain
30
30
  Thread.current[:data_drain_duckdb_conn] ||= begin
31
31
  db = DuckDB::Database.open(":memory:")
32
32
  conn = db.connect
33
+
34
+ config = DataDrain.configuration
35
+ conn.query("SET max_memory='#{config.limit_ram}';") if config.limit_ram.present?
36
+ conn.query("SET temp_directory='#{config.tmp_directory}'") if config.tmp_directory.present?
37
+
33
38
  DataDrain::Storage.adapter.setup_duckdb(conn)
34
39
  conn
35
40
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module DataDrain
4
- VERSION = "0.1.5"
4
+ VERSION = "0.1.7"
5
5
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: data_drain
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.5
4
+ version: 0.1.7
5
5
  platform: ruby
6
6
  authors:
7
7
  - Gabriel