ingestr 0.13.60__py3-none-any.whl → 0.13.61__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ingestr might be problematic. Click here for more details.
- ingestr/src/buildinfo.py +1 -1
- ingestr/src/destinations.py +102 -45
- ingestr/src/factory.py +4 -0
- ingestr/src/isoc_pulse/__init__.py +1 -1
- ingestr/src/sources.py +1 -1
- {ingestr-0.13.60.dist-info → ingestr-0.13.61.dist-info}/METADATA +3 -2
- {ingestr-0.13.60.dist-info → ingestr-0.13.61.dist-info}/RECORD +10 -10
- {ingestr-0.13.60.dist-info → ingestr-0.13.61.dist-info}/WHEEL +0 -0
- {ingestr-0.13.60.dist-info → ingestr-0.13.61.dist-info}/entry_points.txt +0 -0
- {ingestr-0.13.60.dist-info → ingestr-0.13.61.dist-info}/licenses/LICENSE.md +0 -0
ingestr/src/buildinfo.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
version = "v0.13.
|
|
1
|
+
version = "v0.13.61"
|
ingestr/src/destinations.py
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import abc
|
|
1
2
|
import base64
|
|
2
3
|
import csv
|
|
3
4
|
import json
|
|
@@ -9,6 +10,7 @@ from urllib.parse import parse_qs, quote, urlparse
|
|
|
9
10
|
import dlt
|
|
10
11
|
import dlt.destinations.impl.filesystem.filesystem
|
|
11
12
|
from dlt.common.configuration.specs import AwsCredentials
|
|
13
|
+
from dlt.common.storages.configuration import FileSystemCredentials
|
|
12
14
|
from dlt.destinations.impl.clickhouse.configuration import (
|
|
13
15
|
ClickHouseCredentials,
|
|
14
16
|
)
|
|
@@ -111,6 +113,14 @@ class BigQueryDestination:
|
|
|
111
113
|
pass
|
|
112
114
|
|
|
113
115
|
|
|
116
|
+
class CrateDBDestination(GenericSqlDestination):
|
|
117
|
+
def dlt_dest(self, uri: str, **kwargs):
|
|
118
|
+
uri = uri.replace("cratedb://", "postgres://")
|
|
119
|
+
import dlt_cratedb.impl.cratedb.factory
|
|
120
|
+
|
|
121
|
+
return dlt_cratedb.impl.cratedb.factory.cratedb(credentials=uri, **kwargs)
|
|
122
|
+
|
|
123
|
+
|
|
114
124
|
class PostgresDestination(GenericSqlDestination):
|
|
115
125
|
def dlt_dest(self, uri: str, **kwargs):
|
|
116
126
|
return dlt.destinations.postgres(credentials=uri, **kwargs)
|
|
@@ -386,43 +396,62 @@ class ClickhouseDestination:
|
|
|
386
396
|
pass
|
|
387
397
|
|
|
388
398
|
|
|
389
|
-
class
|
|
399
|
+
class BlobFSClient(dlt.destinations.impl.filesystem.filesystem.FilesystemClient):
|
|
390
400
|
@property
|
|
391
401
|
def dataset_path(self):
|
|
392
402
|
# override to remove dataset path
|
|
393
403
|
return self.bucket_path
|
|
394
404
|
|
|
395
405
|
|
|
396
|
-
class
|
|
406
|
+
class BlobFS(dlt.destinations.filesystem):
|
|
397
407
|
@property
|
|
398
408
|
def client_class(self):
|
|
399
|
-
return
|
|
409
|
+
return BlobFSClient
|
|
400
410
|
|
|
401
411
|
|
|
402
|
-
class
|
|
412
|
+
class SqliteDestination(GenericSqlDestination):
|
|
403
413
|
def dlt_dest(self, uri: str, **kwargs):
|
|
404
|
-
|
|
405
|
-
params = parse_qs(parsed_uri.query)
|
|
414
|
+
return dlt.destinations.sqlalchemy(credentials=uri)
|
|
406
415
|
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
416
|
+
def dlt_run_params(self, uri: str, table: str, **kwargs):
|
|
417
|
+
return {
|
|
418
|
+
# https://dlthub.com/docs/dlt-ecosystem/destinations/sqlalchemy#dataset-files
|
|
419
|
+
"dataset_name": "main",
|
|
420
|
+
"table_name": table,
|
|
421
|
+
}
|
|
410
422
|
|
|
411
|
-
secret_access_key = params.get("secret_access_key", [None])[0]
|
|
412
|
-
if secret_access_key is None:
|
|
413
|
-
raise MissingValueError("secret_access_key", "S3")
|
|
414
423
|
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
if not parsed_endpoint.scheme or not parsed_endpoint.netloc:
|
|
419
|
-
raise ValueError("Invalid endpoint_url. Must be a valid URL.")
|
|
424
|
+
class MySqlDestination(GenericSqlDestination):
|
|
425
|
+
def dlt_dest(self, uri: str, **kwargs):
|
|
426
|
+
return dlt.destinations.sqlalchemy(credentials=uri)
|
|
420
427
|
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
428
|
+
def dlt_run_params(self, uri: str, table: str, **kwargs):
|
|
429
|
+
parsed = urlparse(uri)
|
|
430
|
+
database = parsed.path.lstrip("/")
|
|
431
|
+
if not database:
|
|
432
|
+
raise ValueError("You need to specify a database")
|
|
433
|
+
return {
|
|
434
|
+
"dataset_name": database,
|
|
435
|
+
"table_name": table,
|
|
436
|
+
}
|
|
437
|
+
|
|
438
|
+
|
|
439
|
+
class BlobStorageDestination(abc.ABC):
|
|
440
|
+
@abc.abstractmethod
|
|
441
|
+
def credentials(self, params: dict) -> FileSystemCredentials:
|
|
442
|
+
"""Build credentials for the blob storage destination."""
|
|
443
|
+
pass
|
|
444
|
+
|
|
445
|
+
@property
|
|
446
|
+
@abc.abstractmethod
|
|
447
|
+
def protocol(self) -> str:
|
|
448
|
+
"""The protocol used for the blob storage destination."""
|
|
449
|
+
pass
|
|
450
|
+
|
|
451
|
+
def dlt_dest(self, uri: str, **kwargs):
|
|
452
|
+
parsed_uri = urlparse(uri)
|
|
453
|
+
params = parse_qs(parsed_uri.query)
|
|
454
|
+
creds = self.credentials(params)
|
|
426
455
|
|
|
427
456
|
dest_table = kwargs["dest_table"]
|
|
428
457
|
|
|
@@ -442,7 +471,7 @@ class S3Destination:
|
|
|
442
471
|
base_path = "/".join(table_parts[:-1])
|
|
443
472
|
|
|
444
473
|
opts = {
|
|
445
|
-
"bucket_url": f"
|
|
474
|
+
"bucket_url": f"{self.protocol}://{base_path}",
|
|
446
475
|
"credentials": creds,
|
|
447
476
|
# supresses dlt warnings about dataset name normalization.
|
|
448
477
|
# we don't use dataset names in S3 so it's fine to disable this.
|
|
@@ -452,7 +481,7 @@ class S3Destination:
|
|
|
452
481
|
if layout is not None:
|
|
453
482
|
opts["layout"] = layout
|
|
454
483
|
|
|
455
|
-
return
|
|
484
|
+
return BlobFS(**opts) # type: ignore
|
|
456
485
|
|
|
457
486
|
def validate_table(self, table: str):
|
|
458
487
|
table = table.strip("/ ")
|
|
@@ -470,28 +499,56 @@ class S3Destination:
|
|
|
470
499
|
pass
|
|
471
500
|
|
|
472
501
|
|
|
473
|
-
class
|
|
474
|
-
|
|
475
|
-
|
|
502
|
+
class S3Destination(BlobStorageDestination):
|
|
503
|
+
@property
|
|
504
|
+
def protocol(self) -> str:
|
|
505
|
+
return "s3"
|
|
476
506
|
|
|
477
|
-
def
|
|
478
|
-
|
|
479
|
-
|
|
480
|
-
"
|
|
481
|
-
"table_name": table,
|
|
482
|
-
}
|
|
507
|
+
def credentials(self, params: dict) -> FileSystemCredentials:
|
|
508
|
+
access_key_id = params.get("access_key_id", [None])[0]
|
|
509
|
+
if access_key_id is None:
|
|
510
|
+
raise MissingValueError("access_key_id", "S3")
|
|
483
511
|
|
|
512
|
+
secret_access_key = params.get("secret_access_key", [None])[0]
|
|
513
|
+
if secret_access_key is None:
|
|
514
|
+
raise MissingValueError("secret_access_key", "S3")
|
|
484
515
|
|
|
485
|
-
|
|
486
|
-
|
|
487
|
-
|
|
516
|
+
endpoint_url = params.get("endpoint_url", [None])[0]
|
|
517
|
+
if endpoint_url is not None:
|
|
518
|
+
parsed_endpoint = urlparse(endpoint_url)
|
|
519
|
+
if not parsed_endpoint.scheme or not parsed_endpoint.netloc:
|
|
520
|
+
raise ValueError("Invalid endpoint_url. Must be a valid URL.")
|
|
488
521
|
|
|
489
|
-
|
|
490
|
-
|
|
491
|
-
|
|
492
|
-
|
|
493
|
-
|
|
494
|
-
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
|
|
522
|
+
return AwsCredentials(
|
|
523
|
+
aws_access_key_id=access_key_id,
|
|
524
|
+
aws_secret_access_key=secret_access_key,
|
|
525
|
+
endpoint_url=endpoint_url,
|
|
526
|
+
)
|
|
527
|
+
|
|
528
|
+
|
|
529
|
+
class GCSDestination(BlobStorageDestination):
|
|
530
|
+
@property
|
|
531
|
+
def protocol(self) -> str:
|
|
532
|
+
return "gs"
|
|
533
|
+
|
|
534
|
+
def credentials(self, params: dict) -> FileSystemCredentials:
|
|
535
|
+
"""Builds GCS credentials from the provided parameters."""
|
|
536
|
+
credentials_path = params.get("credentials_path")
|
|
537
|
+
credentials_base64 = params.get("credentials_base64")
|
|
538
|
+
credentials_available = any(
|
|
539
|
+
map(
|
|
540
|
+
lambda x: x is not None,
|
|
541
|
+
[credentials_path, credentials_base64],
|
|
542
|
+
)
|
|
543
|
+
)
|
|
544
|
+
if credentials_available is False:
|
|
545
|
+
raise MissingValueError("credentials_path or credentials_base64", "GCS")
|
|
546
|
+
|
|
547
|
+
credentials = None
|
|
548
|
+
if credentials_path:
|
|
549
|
+
with open(credentials_path[0], "r") as f:
|
|
550
|
+
credentials = json.load(f)
|
|
551
|
+
else:
|
|
552
|
+
credentials = json.loads(base64.b64decode(credentials_base64[0]).decode()) # type: ignore
|
|
553
|
+
|
|
554
|
+
return credentials
|
ingestr/src/factory.py
CHANGED
|
@@ -7,9 +7,11 @@ from ingestr.src.destinations import (
|
|
|
7
7
|
AthenaDestination,
|
|
8
8
|
BigQueryDestination,
|
|
9
9
|
ClickhouseDestination,
|
|
10
|
+
CrateDBDestination,
|
|
10
11
|
CsvDestination,
|
|
11
12
|
DatabricksDestination,
|
|
12
13
|
DuckDBDestination,
|
|
14
|
+
GCSDestination,
|
|
13
15
|
MsSQLDestination,
|
|
14
16
|
MySqlDestination,
|
|
15
17
|
PostgresDestination,
|
|
@@ -181,6 +183,7 @@ class SourceDestinationFactory:
|
|
|
181
183
|
}
|
|
182
184
|
destinations: Dict[str, Type[DestinationProtocol]] = {
|
|
183
185
|
"bigquery": BigQueryDestination,
|
|
186
|
+
"cratedb": CrateDBDestination,
|
|
184
187
|
"databricks": DatabricksDestination,
|
|
185
188
|
"duckdb": DuckDBDestination,
|
|
186
189
|
"mssql": MsSQLDestination,
|
|
@@ -197,6 +200,7 @@ class SourceDestinationFactory:
|
|
|
197
200
|
"clickhouse+native": ClickhouseDestination,
|
|
198
201
|
"clickhouse": ClickhouseDestination,
|
|
199
202
|
"s3": S3Destination,
|
|
203
|
+
"gs": GCSDestination,
|
|
200
204
|
"sqlite": SqliteDestination,
|
|
201
205
|
"mysql": MySqlDestination,
|
|
202
206
|
"mysql+pymysql": MySqlDestination,
|
ingestr/src/sources.py
CHANGED
|
@@ -1885,7 +1885,7 @@ class GCSSource:
|
|
|
1885
1885
|
endpoint = blob.parse_endpoint(path_to_file)
|
|
1886
1886
|
except blob.UnsupportedEndpointError:
|
|
1887
1887
|
raise ValueError(
|
|
1888
|
-
"
|
|
1888
|
+
"GCS Source only supports specific formats files: csv, jsonl, parquet"
|
|
1889
1889
|
)
|
|
1890
1890
|
except Exception as e:
|
|
1891
1891
|
raise ValueError(
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: ingestr
|
|
3
|
-
Version: 0.13.
|
|
3
|
+
Version: 0.13.61
|
|
4
4
|
Summary: ingestr is a command-line application that ingests data from various sources and stores them in any database.
|
|
5
5
|
Project-URL: Homepage, https://github.com/bruin-data/ingestr
|
|
6
6
|
Project-URL: Issues, https://github.com/bruin-data/ingestr/issues
|
|
@@ -47,6 +47,7 @@ Requires-Dist: databricks-sqlalchemy==1.0.2
|
|
|
47
47
|
Requires-Dist: dataclasses-json==0.6.7
|
|
48
48
|
Requires-Dist: decorator==5.2.1
|
|
49
49
|
Requires-Dist: deprecation==2.1.0
|
|
50
|
+
Requires-Dist: dlt-cratedb==0.0.1
|
|
50
51
|
Requires-Dist: dlt==1.10.0
|
|
51
52
|
Requires-Dist: dnspython==2.7.0
|
|
52
53
|
Requires-Dist: duckdb-engine==0.17.0
|
|
@@ -305,7 +306,7 @@ Pull requests are welcome. However, please open an issue first to discuss what y
|
|
|
305
306
|
<tr>
|
|
306
307
|
<td>CrateDB</td>
|
|
307
308
|
<td>✅</td>
|
|
308
|
-
<td
|
|
309
|
+
<td>✅</td>
|
|
309
310
|
</tr>
|
|
310
311
|
<tr>
|
|
311
312
|
<td>Databricks</td>
|
|
@@ -2,16 +2,16 @@ ingestr/conftest.py,sha256=Q03FIJIZpLBbpj55cfCHIKEjc1FCvWJhMF2cidUJKQU,1748
|
|
|
2
2
|
ingestr/main.py,sha256=taDyHyaVSpB17iNLl8zA0gmr4CqDO-MSTQX1CaRBB9U,26364
|
|
3
3
|
ingestr/src/.gitignore,sha256=8cX1AZTSI0TcdZFGTmS_oyBjpfCzhOEt0DdAo2dFIY8,203
|
|
4
4
|
ingestr/src/blob.py,sha256=UUWMjHUuoR9xP1XZQ6UANQmnMVyDx3d0X4-2FQC271I,2138
|
|
5
|
-
ingestr/src/buildinfo.py,sha256=
|
|
6
|
-
ingestr/src/destinations.py,sha256=
|
|
5
|
+
ingestr/src/buildinfo.py,sha256=slTtbF5rCzvz-7Z_L3XF0kUk-fOkQu3INJIPtywQb5Y,21
|
|
6
|
+
ingestr/src/destinations.py,sha256=YU7c5cNqViCIJ9NDhgGuorh5jMvLi7yViPWpEJ57Xx0,18788
|
|
7
7
|
ingestr/src/errors.py,sha256=Ufs4_DfE77_E3vnA1fOQdi6cmuLVNm7_SbFLkL1XPGk,686
|
|
8
|
-
ingestr/src/factory.py,sha256=
|
|
8
|
+
ingestr/src/factory.py,sha256=j_ANB1X5BUa_iUOLqreHMGWnDPFV4bGgd0lL35lZ6XE,6432
|
|
9
9
|
ingestr/src/filters.py,sha256=LLecXe9QkLFkFLUZ92OXNdcANr1a8edDxrflc2ko_KA,1452
|
|
10
10
|
ingestr/src/http_client.py,sha256=bxqsk6nJNXCo-79gW04B53DQO-yr25vaSsqP0AKtjx4,732
|
|
11
11
|
ingestr/src/loader.py,sha256=9NaWAyfkXdqAZSS-N72Iwo36Lbx4PyqIfaaH1dNdkFs,1712
|
|
12
12
|
ingestr/src/partition.py,sha256=BrIP6wFJvyR7Nus_3ElnfxknUXeCipK_E_bB8kZowfc,969
|
|
13
13
|
ingestr/src/resource.py,sha256=ZqmZxFQVGlF8rFPhBiUB08HES0yoTj8sZ--jKfaaVps,1164
|
|
14
|
-
ingestr/src/sources.py,sha256=
|
|
14
|
+
ingestr/src/sources.py,sha256=SZo44oi7DBTEJrAvVhbj0lpXWd4aGSQTMgwZ17lgO9c,99048
|
|
15
15
|
ingestr/src/table_definition.py,sha256=REbAbqdlmUMUuRh8nEQRreWjPVOQ5ZcfqGkScKdCrmk,390
|
|
16
16
|
ingestr/src/time.py,sha256=H_Fk2J4ShXyUM-EMY7MqCLZQhlnZMZvO952bmZPc4yE,254
|
|
17
17
|
ingestr/src/version.py,sha256=J_2xgZ0mKlvuHcjdKCx2nlioneLH0I47JiU_Slr_Nwc,189
|
|
@@ -73,7 +73,7 @@ ingestr/src/gorgias/helpers.py,sha256=DamuijnvhGY9hysQO4txrVMf4izkGbh5qfBKImdOIN
|
|
|
73
73
|
ingestr/src/hubspot/__init__.py,sha256=wqHefhc_YRI5dNFCcpvH-UUilNThE49sbGouSBiHYsw,11776
|
|
74
74
|
ingestr/src/hubspot/helpers.py,sha256=k2b-lhxqBNKHoOSHoHegFSsk8xxjjGA0I04V0XyX2b4,7883
|
|
75
75
|
ingestr/src/hubspot/settings.py,sha256=i73MkSiJfRLMFLfiJgYdhp-rhymHTfoqFzZ4uOJdFJM,2456
|
|
76
|
-
ingestr/src/isoc_pulse/__init__.py,sha256=
|
|
76
|
+
ingestr/src/isoc_pulse/__init__.py,sha256=9b4eN4faatpiwTuRNPuYcEt1hEFDEjua9XhfakUigBk,4648
|
|
77
77
|
ingestr/src/kafka/__init__.py,sha256=wMCXdiraeKd1Kssi9WcVCGZaNGm2tJEtnNyuB4aR5_k,3541
|
|
78
78
|
ingestr/src/kafka/helpers.py,sha256=V9WcVn3PKnEpggArHda4vnAcaV8VDuh__dSmRviJb5Y,7502
|
|
79
79
|
ingestr/src/kinesis/__init__.py,sha256=YretSz4F28tbkcPhd55mBp2Xk7XE9unyWx0nmvl8iEc,6235
|
|
@@ -143,8 +143,8 @@ ingestr/testdata/merge_expected.csv,sha256=DReHqWGnQMsf2PBv_Q2pfjsgvikYFnf1zYcQZ
|
|
|
143
143
|
ingestr/testdata/merge_part1.csv,sha256=Pw8Z9IDKcNU0qQHx1z6BUf4rF_-SxKGFOvymCt4OY9I,185
|
|
144
144
|
ingestr/testdata/merge_part2.csv,sha256=T_GiWxA81SN63_tMOIuemcvboEFeAmbKc7xRXvL9esw,287
|
|
145
145
|
ingestr/tests/unit/test_smartsheets.py,sha256=eiC2CCO4iNJcuN36ONvqmEDryCA1bA1REpayHpu42lk,5058
|
|
146
|
-
ingestr-0.13.
|
|
147
|
-
ingestr-0.13.
|
|
148
|
-
ingestr-0.13.
|
|
149
|
-
ingestr-0.13.
|
|
150
|
-
ingestr-0.13.
|
|
146
|
+
ingestr-0.13.61.dist-info/METADATA,sha256=0VM6DuyeROIX3TUb8FBXRMiUg4KktwK8Gr95ZespXHE,15027
|
|
147
|
+
ingestr-0.13.61.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
148
|
+
ingestr-0.13.61.dist-info/entry_points.txt,sha256=oPJy0KBnPWYjDtP1k8qwAihcTLHSZokSQvRAw_wtfJM,46
|
|
149
|
+
ingestr-0.13.61.dist-info/licenses/LICENSE.md,sha256=cW8wIhn8HFE-KLStDF9jHQ1O_ARWP3kTpk_-eOccL24,1075
|
|
150
|
+
ingestr-0.13.61.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|