data_drain 0.1.4 → 0.1.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +8 -0
- data/lib/data_drain/configuration.rb +2 -1
- data/lib/data_drain/engine.rb +4 -2
- data/lib/data_drain/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: eb6e8dad588e5512397bb9ed8f99b50bb32dcacf6eb493fbef2dfcf0cb4f8289
|
|
4
|
+
data.tar.gz: 241eb6c7e340b25d95757f0e663489eba65cb1b9293426c81148c45c78ea534f
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 04ca70eca218c7a9aadd292c4b8def9b4895c2b223a2d46f4297fdd4d078b1d8fd05153ce7410fda35f6783242dddfc1a25993c3b2b3db1aad81572da119cdf2
|
|
7
|
+
data.tar.gz: dd168e87460ac02365bbc7f1d9f80050c6a103de1f8f0820545933458cce7aa736c6edb4f9c5e8292f436253082ab43ba874182b14709aeb71430cece9a6f56b
|
data/CHANGELOG.md
CHANGED
|
@@ -8,7 +8,7 @@ module DataDrain
|
|
|
8
8
|
attr_accessor :storage_mode, :aws_region,
|
|
9
9
|
:aws_access_key_id, :aws_secret_access_key,
|
|
10
10
|
:db_host, :db_port, :db_user, :db_pass, :db_name,
|
|
11
|
-
:batch_size, :throttle_delay, :logger, :limit_ram
|
|
11
|
+
:batch_size, :throttle_delay, :logger, :limit_ram, :tmp_directory
|
|
12
12
|
|
|
13
13
|
def initialize
|
|
14
14
|
@storage_mode = :local
|
|
@@ -17,6 +17,7 @@ module DataDrain
|
|
|
17
17
|
@batch_size = 5000
|
|
18
18
|
@throttle_delay = 0.5
|
|
19
19
|
@limit_ram = nil # eg 2GB
|
|
20
|
+
@tmp_directory = nil # eg /tmp/duckdb_work
|
|
20
21
|
@logger = Logger.new($stdout)
|
|
21
22
|
end
|
|
22
23
|
|
data/lib/data_drain/engine.rb
CHANGED
|
@@ -81,6 +81,8 @@ module DataDrain
|
|
|
81
81
|
def setup_duckdb
|
|
82
82
|
@duckdb.query("INSTALL postgres; LOAD postgres;")
|
|
83
83
|
@duckdb.query("SET max_memory='#{@config.limit_ram}';") if @config.limit_ram.present?
|
|
84
|
+
@duckdb.query("SET temp_directory='#{@config.tmp_directory}'") if @config.tmp_directory.present?
|
|
85
|
+
@duckdb.query("ATTACH '#{@config.duckdb_connection_string}' AS pg_source (TYPE POSTGRES, READ_ONLY)")
|
|
84
86
|
|
|
85
87
|
# 💡 Magia del Adapter: Él sabe si cargar httpfs y setear credenciales o no hacer nada
|
|
86
88
|
@adapter.setup_duckdb(@duckdb)
|
|
@@ -91,7 +93,7 @@ module DataDrain
|
|
|
91
93
|
def get_postgres_count
|
|
92
94
|
pg_sql = "SELECT COUNT(*) AS row_count FROM public.#{@table_name} WHERE #{base_where_sql}"
|
|
93
95
|
pg_sql = pg_sql.gsub("'", "''")
|
|
94
|
-
query = "SELECT row_count FROM postgres_query('
|
|
96
|
+
query = "SELECT row_count FROM postgres_query('pg_source', '#{pg_sql}')"
|
|
95
97
|
@duckdb.query(query).first.first
|
|
96
98
|
end
|
|
97
99
|
|
|
@@ -109,7 +111,7 @@ module DataDrain
|
|
|
109
111
|
query = <<~SQL
|
|
110
112
|
COPY (
|
|
111
113
|
SELECT #{@select_sql}
|
|
112
|
-
FROM postgres_query('
|
|
114
|
+
FROM postgres_query('pg_source', '#{pg_sql}')
|
|
113
115
|
) TO '#{dest_path}'
|
|
114
116
|
(
|
|
115
117
|
FORMAT PARQUET,
|
data/lib/data_drain/version.rb
CHANGED