ingestr 0.12.11__py3-none-any.whl → 0.13.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ingestr might be problematic. Click here for more details.

ingestr/main.py CHANGED
@@ -451,7 +451,7 @@ def ingest(
451
451
  pipelines_dir = tempfile.mkdtemp()
452
452
  is_pipelines_dir_temp = True
453
453
 
454
- dlt_dest = destination.dlt_dest(uri=dest_uri)
454
+ dlt_dest = destination.dlt_dest(uri=dest_uri, dest_table=dest_table)
455
455
  validate_loader_file_format(dlt_dest, loader_file_format)
456
456
 
457
457
  if partition_by:
@@ -1,6 +1,5 @@
1
1
  import base64
2
2
  import csv
3
- import gzip
4
3
  import json
5
4
  import os
6
5
  import shutil
@@ -8,7 +7,11 @@ import tempfile
8
7
  from urllib.parse import parse_qs, quote, urlparse
9
8
 
10
9
  import dlt
10
+ import pyarrow.parquet # type: ignore
11
11
  from dlt.common.configuration.specs import AwsCredentials
12
+ from dlt.destinations.impl.clickhouse.configuration import (
13
+ ClickHouseCredentials,
14
+ )
12
15
 
13
16
 
14
17
  class GenericSqlDestination:
@@ -181,19 +184,17 @@ class CsvDestination(GenericSqlDestination):
181
184
  if output_path.count("/") > 1:
182
185
  os.makedirs(os.path.dirname(output_path), exist_ok=True)
183
186
 
184
- with gzip.open(first_file_path, "rt", encoding="utf-8") as jsonl_file: # type: ignore
185
- with open(output_path, "w", newline="") as csv_file:
186
- csv_writer = None
187
- for line in jsonl_file:
188
- json_obj = filter_keys(json.loads(line))
189
- if csv_writer is None:
190
- csv_writer = csv.DictWriter(
191
- csv_file, fieldnames=json_obj.keys()
192
- )
193
- csv_writer.writeheader()
194
-
195
- csv_writer.writerow(json_obj)
196
-
187
+ table = pyarrow.parquet.read_table(first_file_path)
188
+ rows = table.to_pylist()
189
+ with open(output_path, "w", newline="") as csv_file:
190
+ csv_writer = None
191
+ for row in rows:
192
+ row = filter_keys(row)
193
+ if csv_writer is None:
194
+ csv_writer = csv.DictWriter(csv_file, fieldnames=row.keys())
195
+ csv_writer.writeheader()
196
+
197
+ csv_writer.writerow(row)
197
198
  shutil.rmtree(self.temp_path)
198
199
 
199
200
 
@@ -261,3 +262,68 @@ class AthenaDestination:
261
262
 
262
263
  def post_load(self):
263
264
  pass
265
+
266
+
267
+ class ClickhouseDestination:
268
+ def dlt_dest(self, uri: str, **kwargs):
269
+ parsed_uri = urlparse(uri)
270
+
271
+ if "dest_table" in kwargs:
272
+ table = kwargs["dest_table"]
273
+ database = table.split(".")[0]
274
+ else:
275
+ database = parsed_uri.path.lstrip("/")
276
+
277
+ username = parsed_uri.username
278
+ if not username:
279
+ raise ValueError(
280
+ "A username is required to connect to the ClickHouse database."
281
+ )
282
+
283
+ password = parsed_uri.password
284
+ if not password:
285
+ raise ValueError(
286
+ "A password is required to authenticate with the ClickHouse database."
287
+ )
288
+
289
+ host = parsed_uri.hostname
290
+ if not host:
291
+ raise ValueError(
292
+ "The hostname or IP address of the ClickHouse server is required to establish a connection."
293
+ )
294
+
295
+ port = parsed_uri.port
296
+ if not port:
297
+ raise ValueError(
298
+ "The TCP port of the ClickHouse server is required to establish a connection."
299
+ )
300
+
301
+ query_params = parse_qs(parsed_uri.query)
302
+ http_port = (
303
+ int(query_params["http_port"][0]) if "http_port" in query_params else 8123
304
+ )
305
+
306
+ credentials = ClickHouseCredentials(
307
+ {
308
+ "host": host,
309
+ "port": port,
310
+ "username": username,
311
+ "password": password,
312
+ "database": database,
313
+ "http_port": http_port,
314
+ "secure": 0,
315
+ }
316
+ )
317
+
318
+ return dlt.destinations.clickhouse(credentials=credentials)
319
+
320
+ def dlt_run_params(self, uri: str, table: str, **kwargs) -> dict:
321
+ table_fields = table.split(".")
322
+ if len(table_fields) != 2:
323
+ raise ValueError("Table name must be in the format <schema>.<table>")
324
+ return {
325
+ "table_name": table_fields[-1],
326
+ }
327
+
328
+ def post_load(self):
329
+ pass
ingestr/src/factory.py CHANGED
@@ -6,6 +6,7 @@ from dlt.common.destination import Destination
6
6
  from ingestr.src.destinations import (
7
7
  AthenaDestination,
8
8
  BigQueryDestination,
9
+ ClickhouseDestination,
9
10
  CsvDestination,
10
11
  DatabricksDestination,
11
12
  DuckDBDestination,
@@ -146,6 +147,8 @@ class SourceDestinationFactory:
146
147
  "synapse": SynapseDestination,
147
148
  "csv": CsvDestination,
148
149
  "athena": AthenaDestination,
150
+ "clickhouse+native": ClickhouseDestination,
151
+ "clickhouse": ClickhouseDestination,
149
152
  }
150
153
 
151
154
  def __init__(self, source_uri: str, destination_uri: str):
ingestr/src/sources.py CHANGED
@@ -1568,6 +1568,7 @@ class GCSSource:
1568
1568
 
1569
1569
  return readers(bucket_url, fs, path_to_file).with_resources(endpoint)
1570
1570
 
1571
+
1571
1572
  class GoogleAdsSource:
1572
1573
  def handles_incrementality(self) -> bool:
1573
1574
  return True
ingestr/src/version.py CHANGED
@@ -1 +1 @@
1
- __version__ = "0.12.11"
1
+ __version__ = "0.13.1"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ingestr
3
- Version: 0.12.11
3
+ Version: 0.13.1
4
4
  Summary: ingestr is a command-line application that ingests data from various sources and stores them in any database.
5
5
  Project-URL: Homepage, https://github.com/bruin-data/ingestr
6
6
  Project-URL: Issues, https://github.com/bruin-data/ingestr/issues
@@ -15,6 +15,9 @@ Classifier: Programming Language :: Python :: 3
15
15
  Classifier: Topic :: Database
16
16
  Requires-Python: >=3.9
17
17
  Requires-Dist: asana==3.2.3
18
+ Requires-Dist: clickhouse-connect==0.8.14
19
+ Requires-Dist: clickhouse-driver==0.2.9
20
+ Requires-Dist: clickhouse-sqlalchemy==0.2.7
18
21
  Requires-Dist: confluent-kafka>=2.6.1
19
22
  Requires-Dist: databricks-sql-connector==2.9.3
20
23
  Requires-Dist: dataclasses-json==0.6.7
@@ -1,14 +1,14 @@
1
- ingestr/main.py,sha256=fRWnyoPzMvvxTa61EIAP_dsKu0B_0yOwoyt0Slq9WQU,24723
1
+ ingestr/main.py,sha256=ufn8AcM2ID80ChUApJzYDjnQaurMXOkYfTm6GzAggSQ,24746
2
2
  ingestr/src/.gitignore,sha256=8cX1AZTSI0TcdZFGTmS_oyBjpfCzhOEt0DdAo2dFIY8,203
3
3
  ingestr/src/blob.py,sha256=XDk_XqmU_He4sQ1brY3ceoZgpq_ZBZihz1gHW9MzqUk,1381
4
- ingestr/src/destinations.py,sha256=zcHJIIHAZmcD9sJomd6G1Bc-1KsxnBD2aByOSV_9L3g,8850
4
+ ingestr/src/destinations.py,sha256=tY9-1mOyGkXl5dbSiE-eOsK-jnbYIu4EaoqhKDZ5JuU,10829
5
5
  ingestr/src/errors.py,sha256=Ufs4_DfE77_E3vnA1fOQdi6cmuLVNm7_SbFLkL1XPGk,686
6
- ingestr/src/factory.py,sha256=D__Oy029z6y2OsAUMGab5K5ZmYhRXxDbD_SDc21b9Eo,4746
6
+ ingestr/src/factory.py,sha256=3XM2rilA69vkkOCHNzUt1XqCOc3gLMnOnlQmW5d1V5s,4870
7
7
  ingestr/src/filters.py,sha256=0JQXeAr2APFMnW2sd-6BlAMWv93bXV17j8b5MM8sHmM,580
8
- ingestr/src/sources.py,sha256=jIq1qVj8_uOVbdrVuvs2uHkrLydd1i8XHMx5vhPVqAo,61682
8
+ ingestr/src/sources.py,sha256=VBuD6ngMHKaCLeYZ9Oe9tw67578hPc1dP_5iBNtEJdM,61683
9
9
  ingestr/src/table_definition.py,sha256=REbAbqdlmUMUuRh8nEQRreWjPVOQ5ZcfqGkScKdCrmk,390
10
10
  ingestr/src/time.py,sha256=H_Fk2J4ShXyUM-EMY7MqCLZQhlnZMZvO952bmZPc4yE,254
11
- ingestr/src/version.py,sha256=92OWM_xUUgc7wxFngCUAzVKFahsSWsF4UXOgDEn2uVI,24
11
+ ingestr/src/version.py,sha256=Zg3oo58_HXe_ieb_PwWnYkKGH2zTvu6G2jly-7GnPGo,23
12
12
  ingestr/src/adjust/__init__.py,sha256=ULjtJqrNS6XDvUyGl0tjl12-tLyXlCgeFe2icTbtu3Q,3255
13
13
  ingestr/src/adjust/adjust_helpers.py,sha256=av97NPSn-hQtTbAC0vUSCAWYePmOiG5R-DGdMssm7FQ,3646
14
14
  ingestr/src/airtable/__init__.py,sha256=GHWYrjI2qhs_JihdNJysB0Ni3bzqT_MLXn_S9_Q5zRA,2775
@@ -100,8 +100,8 @@ ingestr/testdata/delete_insert_part2.csv,sha256=B_KUzpzbNdDY_n7wWop1mT2cz36TmayS
100
100
  ingestr/testdata/merge_expected.csv,sha256=DReHqWGnQMsf2PBv_Q2pfjsgvikYFnf1zYcQZ7ZqYN0,276
101
101
  ingestr/testdata/merge_part1.csv,sha256=Pw8Z9IDKcNU0qQHx1z6BUf4rF_-SxKGFOvymCt4OY9I,185
102
102
  ingestr/testdata/merge_part2.csv,sha256=T_GiWxA81SN63_tMOIuemcvboEFeAmbKc7xRXvL9esw,287
103
- ingestr-0.12.11.dist-info/METADATA,sha256=fxNa7pb3GLEvLuUjHSOviflBwIBJto0ck1PyQp893jU,8127
104
- ingestr-0.12.11.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
105
- ingestr-0.12.11.dist-info/entry_points.txt,sha256=oPJy0KBnPWYjDtP1k8qwAihcTLHSZokSQvRAw_wtfJM,46
106
- ingestr-0.12.11.dist-info/licenses/LICENSE.md,sha256=cW8wIhn8HFE-KLStDF9jHQ1O_ARWP3kTpk_-eOccL24,1075
107
- ingestr-0.12.11.dist-info/RECORD,,
103
+ ingestr-0.13.1.dist-info/METADATA,sha256=c2bUEbUHRYoKiHXNgHSbzNsQL3M2dLiCZK1fzbjzylU,8252
104
+ ingestr-0.13.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
105
+ ingestr-0.13.1.dist-info/entry_points.txt,sha256=oPJy0KBnPWYjDtP1k8qwAihcTLHSZokSQvRAw_wtfJM,46
106
+ ingestr-0.13.1.dist-info/licenses/LICENSE.md,sha256=cW8wIhn8HFE-KLStDF9jHQ1O_ARWP3kTpk_-eOccL24,1075
107
+ ingestr-0.13.1.dist-info/RECORD,,