ingestr 0.12.11__py3-none-any.whl → 0.13.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ingestr might be problematic. Click here for more details.
- ingestr/main.py +1 -1
- ingestr/src/destinations.py +80 -14
- ingestr/src/factory.py +3 -0
- ingestr/src/sources.py +1 -0
- ingestr/src/version.py +1 -1
- {ingestr-0.12.11.dist-info → ingestr-0.13.1.dist-info}/METADATA +4 -1
- {ingestr-0.12.11.dist-info → ingestr-0.13.1.dist-info}/RECORD +10 -10
- {ingestr-0.12.11.dist-info → ingestr-0.13.1.dist-info}/WHEEL +0 -0
- {ingestr-0.12.11.dist-info → ingestr-0.13.1.dist-info}/entry_points.txt +0 -0
- {ingestr-0.12.11.dist-info → ingestr-0.13.1.dist-info}/licenses/LICENSE.md +0 -0
ingestr/main.py
CHANGED
|
@@ -451,7 +451,7 @@ def ingest(
|
|
|
451
451
|
pipelines_dir = tempfile.mkdtemp()
|
|
452
452
|
is_pipelines_dir_temp = True
|
|
453
453
|
|
|
454
|
-
dlt_dest = destination.dlt_dest(uri=dest_uri)
|
|
454
|
+
dlt_dest = destination.dlt_dest(uri=dest_uri, dest_table=dest_table)
|
|
455
455
|
validate_loader_file_format(dlt_dest, loader_file_format)
|
|
456
456
|
|
|
457
457
|
if partition_by:
|
ingestr/src/destinations.py
CHANGED
|
@@ -1,6 +1,5 @@
|
|
|
1
1
|
import base64
|
|
2
2
|
import csv
|
|
3
|
-
import gzip
|
|
4
3
|
import json
|
|
5
4
|
import os
|
|
6
5
|
import shutil
|
|
@@ -8,7 +7,11 @@ import tempfile
|
|
|
8
7
|
from urllib.parse import parse_qs, quote, urlparse
|
|
9
8
|
|
|
10
9
|
import dlt
|
|
10
|
+
import pyarrow.parquet # type: ignore
|
|
11
11
|
from dlt.common.configuration.specs import AwsCredentials
|
|
12
|
+
from dlt.destinations.impl.clickhouse.configuration import (
|
|
13
|
+
ClickHouseCredentials,
|
|
14
|
+
)
|
|
12
15
|
|
|
13
16
|
|
|
14
17
|
class GenericSqlDestination:
|
|
@@ -181,19 +184,17 @@ class CsvDestination(GenericSqlDestination):
|
|
|
181
184
|
if output_path.count("/") > 1:
|
|
182
185
|
os.makedirs(os.path.dirname(output_path), exist_ok=True)
|
|
183
186
|
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
csv_writer.writerow(json_obj)
|
|
196
|
-
|
|
187
|
+
table = pyarrow.parquet.read_table(first_file_path)
|
|
188
|
+
rows = table.to_pylist()
|
|
189
|
+
with open(output_path, "w", newline="") as csv_file:
|
|
190
|
+
csv_writer = None
|
|
191
|
+
for row in rows:
|
|
192
|
+
row = filter_keys(row)
|
|
193
|
+
if csv_writer is None:
|
|
194
|
+
csv_writer = csv.DictWriter(csv_file, fieldnames=row.keys())
|
|
195
|
+
csv_writer.writeheader()
|
|
196
|
+
|
|
197
|
+
csv_writer.writerow(row)
|
|
197
198
|
shutil.rmtree(self.temp_path)
|
|
198
199
|
|
|
199
200
|
|
|
@@ -261,3 +262,68 @@ class AthenaDestination:
|
|
|
261
262
|
|
|
262
263
|
def post_load(self):
|
|
263
264
|
pass
|
|
265
|
+
|
|
266
|
+
|
|
267
|
+
class ClickhouseDestination:
|
|
268
|
+
def dlt_dest(self, uri: str, **kwargs):
|
|
269
|
+
parsed_uri = urlparse(uri)
|
|
270
|
+
|
|
271
|
+
if "dest_table" in kwargs:
|
|
272
|
+
table = kwargs["dest_table"]
|
|
273
|
+
database = table.split(".")[0]
|
|
274
|
+
else:
|
|
275
|
+
database = parsed_uri.path.lstrip("/")
|
|
276
|
+
|
|
277
|
+
username = parsed_uri.username
|
|
278
|
+
if not username:
|
|
279
|
+
raise ValueError(
|
|
280
|
+
"A username is required to connect to the ClickHouse database."
|
|
281
|
+
)
|
|
282
|
+
|
|
283
|
+
password = parsed_uri.password
|
|
284
|
+
if not password:
|
|
285
|
+
raise ValueError(
|
|
286
|
+
"A password is required to authenticate with the ClickHouse database."
|
|
287
|
+
)
|
|
288
|
+
|
|
289
|
+
host = parsed_uri.hostname
|
|
290
|
+
if not host:
|
|
291
|
+
raise ValueError(
|
|
292
|
+
"The hostname or IP address of the ClickHouse server is required to establish a connection."
|
|
293
|
+
)
|
|
294
|
+
|
|
295
|
+
port = parsed_uri.port
|
|
296
|
+
if not port:
|
|
297
|
+
raise ValueError(
|
|
298
|
+
"The TCP port of the ClickHouse server is required to establish a connection."
|
|
299
|
+
)
|
|
300
|
+
|
|
301
|
+
query_params = parse_qs(parsed_uri.query)
|
|
302
|
+
http_port = (
|
|
303
|
+
int(query_params["http_port"][0]) if "http_port" in query_params else 8123
|
|
304
|
+
)
|
|
305
|
+
|
|
306
|
+
credentials = ClickHouseCredentials(
|
|
307
|
+
{
|
|
308
|
+
"host": host,
|
|
309
|
+
"port": port,
|
|
310
|
+
"username": username,
|
|
311
|
+
"password": password,
|
|
312
|
+
"database": database,
|
|
313
|
+
"http_port": http_port,
|
|
314
|
+
"secure": 0,
|
|
315
|
+
}
|
|
316
|
+
)
|
|
317
|
+
|
|
318
|
+
return dlt.destinations.clickhouse(credentials=credentials)
|
|
319
|
+
|
|
320
|
+
def dlt_run_params(self, uri: str, table: str, **kwargs) -> dict:
|
|
321
|
+
table_fields = table.split(".")
|
|
322
|
+
if len(table_fields) != 2:
|
|
323
|
+
raise ValueError("Table name must be in the format <schema>.<table>")
|
|
324
|
+
return {
|
|
325
|
+
"table_name": table_fields[-1],
|
|
326
|
+
}
|
|
327
|
+
|
|
328
|
+
def post_load(self):
|
|
329
|
+
pass
|
ingestr/src/factory.py
CHANGED
|
@@ -6,6 +6,7 @@ from dlt.common.destination import Destination
|
|
|
6
6
|
from ingestr.src.destinations import (
|
|
7
7
|
AthenaDestination,
|
|
8
8
|
BigQueryDestination,
|
|
9
|
+
ClickhouseDestination,
|
|
9
10
|
CsvDestination,
|
|
10
11
|
DatabricksDestination,
|
|
11
12
|
DuckDBDestination,
|
|
@@ -146,6 +147,8 @@ class SourceDestinationFactory:
|
|
|
146
147
|
"synapse": SynapseDestination,
|
|
147
148
|
"csv": CsvDestination,
|
|
148
149
|
"athena": AthenaDestination,
|
|
150
|
+
"clickhouse+native": ClickhouseDestination,
|
|
151
|
+
"clickhouse": ClickhouseDestination,
|
|
149
152
|
}
|
|
150
153
|
|
|
151
154
|
def __init__(self, source_uri: str, destination_uri: str):
|
ingestr/src/sources.py
CHANGED
ingestr/src/version.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = "0.
|
|
1
|
+
__version__ = "0.13.1"
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: ingestr
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.13.1
|
|
4
4
|
Summary: ingestr is a command-line application that ingests data from various sources and stores them in any database.
|
|
5
5
|
Project-URL: Homepage, https://github.com/bruin-data/ingestr
|
|
6
6
|
Project-URL: Issues, https://github.com/bruin-data/ingestr/issues
|
|
@@ -15,6 +15,9 @@ Classifier: Programming Language :: Python :: 3
|
|
|
15
15
|
Classifier: Topic :: Database
|
|
16
16
|
Requires-Python: >=3.9
|
|
17
17
|
Requires-Dist: asana==3.2.3
|
|
18
|
+
Requires-Dist: clickhouse-connect==0.8.14
|
|
19
|
+
Requires-Dist: clickhouse-driver==0.2.9
|
|
20
|
+
Requires-Dist: clickhouse-sqlalchemy==0.2.7
|
|
18
21
|
Requires-Dist: confluent-kafka>=2.6.1
|
|
19
22
|
Requires-Dist: databricks-sql-connector==2.9.3
|
|
20
23
|
Requires-Dist: dataclasses-json==0.6.7
|
|
@@ -1,14 +1,14 @@
|
|
|
1
|
-
ingestr/main.py,sha256=
|
|
1
|
+
ingestr/main.py,sha256=ufn8AcM2ID80ChUApJzYDjnQaurMXOkYfTm6GzAggSQ,24746
|
|
2
2
|
ingestr/src/.gitignore,sha256=8cX1AZTSI0TcdZFGTmS_oyBjpfCzhOEt0DdAo2dFIY8,203
|
|
3
3
|
ingestr/src/blob.py,sha256=XDk_XqmU_He4sQ1brY3ceoZgpq_ZBZihz1gHW9MzqUk,1381
|
|
4
|
-
ingestr/src/destinations.py,sha256=
|
|
4
|
+
ingestr/src/destinations.py,sha256=tY9-1mOyGkXl5dbSiE-eOsK-jnbYIu4EaoqhKDZ5JuU,10829
|
|
5
5
|
ingestr/src/errors.py,sha256=Ufs4_DfE77_E3vnA1fOQdi6cmuLVNm7_SbFLkL1XPGk,686
|
|
6
|
-
ingestr/src/factory.py,sha256=
|
|
6
|
+
ingestr/src/factory.py,sha256=3XM2rilA69vkkOCHNzUt1XqCOc3gLMnOnlQmW5d1V5s,4870
|
|
7
7
|
ingestr/src/filters.py,sha256=0JQXeAr2APFMnW2sd-6BlAMWv93bXV17j8b5MM8sHmM,580
|
|
8
|
-
ingestr/src/sources.py,sha256=
|
|
8
|
+
ingestr/src/sources.py,sha256=VBuD6ngMHKaCLeYZ9Oe9tw67578hPc1dP_5iBNtEJdM,61683
|
|
9
9
|
ingestr/src/table_definition.py,sha256=REbAbqdlmUMUuRh8nEQRreWjPVOQ5ZcfqGkScKdCrmk,390
|
|
10
10
|
ingestr/src/time.py,sha256=H_Fk2J4ShXyUM-EMY7MqCLZQhlnZMZvO952bmZPc4yE,254
|
|
11
|
-
ingestr/src/version.py,sha256=
|
|
11
|
+
ingestr/src/version.py,sha256=Zg3oo58_HXe_ieb_PwWnYkKGH2zTvu6G2jly-7GnPGo,23
|
|
12
12
|
ingestr/src/adjust/__init__.py,sha256=ULjtJqrNS6XDvUyGl0tjl12-tLyXlCgeFe2icTbtu3Q,3255
|
|
13
13
|
ingestr/src/adjust/adjust_helpers.py,sha256=av97NPSn-hQtTbAC0vUSCAWYePmOiG5R-DGdMssm7FQ,3646
|
|
14
14
|
ingestr/src/airtable/__init__.py,sha256=GHWYrjI2qhs_JihdNJysB0Ni3bzqT_MLXn_S9_Q5zRA,2775
|
|
@@ -100,8 +100,8 @@ ingestr/testdata/delete_insert_part2.csv,sha256=B_KUzpzbNdDY_n7wWop1mT2cz36TmayS
|
|
|
100
100
|
ingestr/testdata/merge_expected.csv,sha256=DReHqWGnQMsf2PBv_Q2pfjsgvikYFnf1zYcQZ7ZqYN0,276
|
|
101
101
|
ingestr/testdata/merge_part1.csv,sha256=Pw8Z9IDKcNU0qQHx1z6BUf4rF_-SxKGFOvymCt4OY9I,185
|
|
102
102
|
ingestr/testdata/merge_part2.csv,sha256=T_GiWxA81SN63_tMOIuemcvboEFeAmbKc7xRXvL9esw,287
|
|
103
|
-
ingestr-0.
|
|
104
|
-
ingestr-0.
|
|
105
|
-
ingestr-0.
|
|
106
|
-
ingestr-0.
|
|
107
|
-
ingestr-0.
|
|
103
|
+
ingestr-0.13.1.dist-info/METADATA,sha256=c2bUEbUHRYoKiHXNgHSbzNsQL3M2dLiCZK1fzbjzylU,8252
|
|
104
|
+
ingestr-0.13.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
105
|
+
ingestr-0.13.1.dist-info/entry_points.txt,sha256=oPJy0KBnPWYjDtP1k8qwAihcTLHSZokSQvRAw_wtfJM,46
|
|
106
|
+
ingestr-0.13.1.dist-info/licenses/LICENSE.md,sha256=cW8wIhn8HFE-KLStDF9jHQ1O_ARWP3kTpk_-eOccL24,1075
|
|
107
|
+
ingestr-0.13.1.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|