ingestr 0.13.60__py3-none-any.whl → 0.13.61__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ingestr might be problematic. Click here for more details.

ingestr/src/buildinfo.py CHANGED
@@ -1 +1 @@
1
- version = "v0.13.60"
1
+ version = "v0.13.61"
@@ -1,3 +1,4 @@
1
+ import abc
1
2
  import base64
2
3
  import csv
3
4
  import json
@@ -9,6 +10,7 @@ from urllib.parse import parse_qs, quote, urlparse
9
10
  import dlt
10
11
  import dlt.destinations.impl.filesystem.filesystem
11
12
  from dlt.common.configuration.specs import AwsCredentials
13
+ from dlt.common.storages.configuration import FileSystemCredentials
12
14
  from dlt.destinations.impl.clickhouse.configuration import (
13
15
  ClickHouseCredentials,
14
16
  )
@@ -111,6 +113,14 @@ class BigQueryDestination:
111
113
  pass
112
114
 
113
115
 
116
+ class CrateDBDestination(GenericSqlDestination):
117
+ def dlt_dest(self, uri: str, **kwargs):
118
+ uri = uri.replace("cratedb://", "postgres://")
119
+ import dlt_cratedb.impl.cratedb.factory
120
+
121
+ return dlt_cratedb.impl.cratedb.factory.cratedb(credentials=uri, **kwargs)
122
+
123
+
114
124
  class PostgresDestination(GenericSqlDestination):
115
125
  def dlt_dest(self, uri: str, **kwargs):
116
126
  return dlt.destinations.postgres(credentials=uri, **kwargs)
@@ -386,43 +396,62 @@ class ClickhouseDestination:
386
396
  pass
387
397
 
388
398
 
389
- class S3FSClient(dlt.destinations.impl.filesystem.filesystem.FilesystemClient):
399
+ class BlobFSClient(dlt.destinations.impl.filesystem.filesystem.FilesystemClient):
390
400
  @property
391
401
  def dataset_path(self):
392
402
  # override to remove dataset path
393
403
  return self.bucket_path
394
404
 
395
405
 
396
- class S3FS(dlt.destinations.filesystem):
406
+ class BlobFS(dlt.destinations.filesystem):
397
407
  @property
398
408
  def client_class(self):
399
- return S3FSClient
409
+ return BlobFSClient
400
410
 
401
411
 
402
- class S3Destination:
412
+ class SqliteDestination(GenericSqlDestination):
403
413
  def dlt_dest(self, uri: str, **kwargs):
404
- parsed_uri = urlparse(uri)
405
- params = parse_qs(parsed_uri.query)
414
+ return dlt.destinations.sqlalchemy(credentials=uri)
406
415
 
407
- access_key_id = params.get("access_key_id", [None])[0]
408
- if access_key_id is None:
409
- raise MissingValueError("access_key_id", "S3")
416
+ def dlt_run_params(self, uri: str, table: str, **kwargs):
417
+ return {
418
+ # https://dlthub.com/docs/dlt-ecosystem/destinations/sqlalchemy#dataset-files
419
+ "dataset_name": "main",
420
+ "table_name": table,
421
+ }
410
422
 
411
- secret_access_key = params.get("secret_access_key", [None])[0]
412
- if secret_access_key is None:
413
- raise MissingValueError("secret_access_key", "S3")
414
423
 
415
- endpoint_url = params.get("endpoint_url", [None])[0]
416
- if endpoint_url is not None:
417
- parsed_endpoint = urlparse(endpoint_url)
418
- if not parsed_endpoint.scheme or not parsed_endpoint.netloc:
419
- raise ValueError("Invalid endpoint_url. Must be a valid URL.")
424
+ class MySqlDestination(GenericSqlDestination):
425
+ def dlt_dest(self, uri: str, **kwargs):
426
+ return dlt.destinations.sqlalchemy(credentials=uri)
420
427
 
421
- creds = AwsCredentials(
422
- aws_access_key_id=access_key_id,
423
- aws_secret_access_key=secret_access_key,
424
- endpoint_url=endpoint_url,
425
- )
428
+ def dlt_run_params(self, uri: str, table: str, **kwargs):
429
+ parsed = urlparse(uri)
430
+ database = parsed.path.lstrip("/")
431
+ if not database:
432
+ raise ValueError("You need to specify a database")
433
+ return {
434
+ "dataset_name": database,
435
+ "table_name": table,
436
+ }
437
+
438
+
439
+ class BlobStorageDestination(abc.ABC):
440
+ @abc.abstractmethod
441
+ def credentials(self, params: dict) -> FileSystemCredentials:
442
+ """Build credentials for the blob storage destination."""
443
+ pass
444
+
445
+ @property
446
+ @abc.abstractmethod
447
+ def protocol(self) -> str:
448
+ """The protocol used for the blob storage destination."""
449
+ pass
450
+
451
+ def dlt_dest(self, uri: str, **kwargs):
452
+ parsed_uri = urlparse(uri)
453
+ params = parse_qs(parsed_uri.query)
454
+ creds = self.credentials(params)
426
455
 
427
456
  dest_table = kwargs["dest_table"]
428
457
 
@@ -442,7 +471,7 @@ class S3Destination:
442
471
  base_path = "/".join(table_parts[:-1])
443
472
 
444
473
  opts = {
445
- "bucket_url": f"s3://{base_path}",
474
+ "bucket_url": f"{self.protocol}://{base_path}",
446
475
  "credentials": creds,
447
476
  # supresses dlt warnings about dataset name normalization.
448
477
  # we don't use dataset names in S3 so it's fine to disable this.
@@ -452,7 +481,7 @@ class S3Destination:
452
481
  if layout is not None:
453
482
  opts["layout"] = layout
454
483
 
455
- return S3FS(**opts) # type: ignore
484
+ return BlobFS(**opts) # type: ignore
456
485
 
457
486
  def validate_table(self, table: str):
458
487
  table = table.strip("/ ")
@@ -470,28 +499,56 @@ class S3Destination:
470
499
  pass
471
500
 
472
501
 
473
- class SqliteDestination(GenericSqlDestination):
474
- def dlt_dest(self, uri: str, **kwargs):
475
- return dlt.destinations.sqlalchemy(credentials=uri)
502
+ class S3Destination(BlobStorageDestination):
503
+ @property
504
+ def protocol(self) -> str:
505
+ return "s3"
476
506
 
477
- def dlt_run_params(self, uri: str, table: str, **kwargs):
478
- return {
479
- # https://dlthub.com/docs/dlt-ecosystem/destinations/sqlalchemy#dataset-files
480
- "dataset_name": "main",
481
- "table_name": table,
482
- }
507
+ def credentials(self, params: dict) -> FileSystemCredentials:
508
+ access_key_id = params.get("access_key_id", [None])[0]
509
+ if access_key_id is None:
510
+ raise MissingValueError("access_key_id", "S3")
483
511
 
512
+ secret_access_key = params.get("secret_access_key", [None])[0]
513
+ if secret_access_key is None:
514
+ raise MissingValueError("secret_access_key", "S3")
484
515
 
485
- class MySqlDestination(GenericSqlDestination):
486
- def dlt_dest(self, uri: str, **kwargs):
487
- return dlt.destinations.sqlalchemy(credentials=uri)
516
+ endpoint_url = params.get("endpoint_url", [None])[0]
517
+ if endpoint_url is not None:
518
+ parsed_endpoint = urlparse(endpoint_url)
519
+ if not parsed_endpoint.scheme or not parsed_endpoint.netloc:
520
+ raise ValueError("Invalid endpoint_url. Must be a valid URL.")
488
521
 
489
- def dlt_run_params(self, uri: str, table: str, **kwargs):
490
- parsed = urlparse(uri)
491
- database = parsed.path.lstrip("/")
492
- if not database:
493
- raise ValueError("You need to specify a database")
494
- return {
495
- "dataset_name": database,
496
- "table_name": table,
497
- }
522
+ return AwsCredentials(
523
+ aws_access_key_id=access_key_id,
524
+ aws_secret_access_key=secret_access_key,
525
+ endpoint_url=endpoint_url,
526
+ )
527
+
528
+
529
+ class GCSDestination(BlobStorageDestination):
530
+ @property
531
+ def protocol(self) -> str:
532
+ return "gs"
533
+
534
+ def credentials(self, params: dict) -> FileSystemCredentials:
535
+ """Builds GCS credentials from the provided parameters."""
536
+ credentials_path = params.get("credentials_path")
537
+ credentials_base64 = params.get("credentials_base64")
538
+ credentials_available = any(
539
+ map(
540
+ lambda x: x is not None,
541
+ [credentials_path, credentials_base64],
542
+ )
543
+ )
544
+ if credentials_available is False:
545
+ raise MissingValueError("credentials_path or credentials_base64", "GCS")
546
+
547
+ credentials = None
548
+ if credentials_path:
549
+ with open(credentials_path[0], "r") as f:
550
+ credentials = json.load(f)
551
+ else:
552
+ credentials = json.loads(base64.b64decode(credentials_base64[0]).decode()) # type: ignore
553
+
554
+ return credentials
ingestr/src/factory.py CHANGED
@@ -7,9 +7,11 @@ from ingestr.src.destinations import (
7
7
  AthenaDestination,
8
8
  BigQueryDestination,
9
9
  ClickhouseDestination,
10
+ CrateDBDestination,
10
11
  CsvDestination,
11
12
  DatabricksDestination,
12
13
  DuckDBDestination,
14
+ GCSDestination,
13
15
  MsSQLDestination,
14
16
  MySqlDestination,
15
17
  PostgresDestination,
@@ -181,6 +183,7 @@ class SourceDestinationFactory:
181
183
  }
182
184
  destinations: Dict[str, Type[DestinationProtocol]] = {
183
185
  "bigquery": BigQueryDestination,
186
+ "cratedb": CrateDBDestination,
184
187
  "databricks": DatabricksDestination,
185
188
  "duckdb": DuckDBDestination,
186
189
  "mssql": MsSQLDestination,
@@ -197,6 +200,7 @@ class SourceDestinationFactory:
197
200
  "clickhouse+native": ClickhouseDestination,
198
201
  "clickhouse": ClickhouseDestination,
199
202
  "s3": S3Destination,
203
+ "gs": GCSDestination,
200
204
  "sqlite": SqliteDestination,
201
205
  "mysql": MySqlDestination,
202
206
  "mysql+pymysql": MySqlDestination,
@@ -73,7 +73,7 @@ def pulse_source(
73
73
  "write_disposition": "merge",
74
74
  "primary_key": "date",
75
75
  },
76
- "resources": resources, # type:ignore
76
+ "resources": resources, # type:ignore
77
77
  }
78
78
  res = rest_api_resources(config)
79
79
  if metric == "net_loss":
ingestr/src/sources.py CHANGED
@@ -1885,7 +1885,7 @@ class GCSSource:
1885
1885
  endpoint = blob.parse_endpoint(path_to_file)
1886
1886
  except blob.UnsupportedEndpointError:
1887
1887
  raise ValueError(
1888
- "S3 Source only supports specific formats files: csv, jsonl, parquet"
1888
+ "GCS Source only supports specific formats files: csv, jsonl, parquet"
1889
1889
  )
1890
1890
  except Exception as e:
1891
1891
  raise ValueError(
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ingestr
3
- Version: 0.13.60
3
+ Version: 0.13.61
4
4
  Summary: ingestr is a command-line application that ingests data from various sources and stores them in any database.
5
5
  Project-URL: Homepage, https://github.com/bruin-data/ingestr
6
6
  Project-URL: Issues, https://github.com/bruin-data/ingestr/issues
@@ -47,6 +47,7 @@ Requires-Dist: databricks-sqlalchemy==1.0.2
47
47
  Requires-Dist: dataclasses-json==0.6.7
48
48
  Requires-Dist: decorator==5.2.1
49
49
  Requires-Dist: deprecation==2.1.0
50
+ Requires-Dist: dlt-cratedb==0.0.1
50
51
  Requires-Dist: dlt==1.10.0
51
52
  Requires-Dist: dnspython==2.7.0
52
53
  Requires-Dist: duckdb-engine==0.17.0
@@ -305,7 +306,7 @@ Pull requests are welcome. However, please open an issue first to discuss what y
305
306
  <tr>
306
307
  <td>CrateDB</td>
307
308
  <td>✅</td>
308
- <td>❌</td>
309
+ <td>✅</td>
309
310
  </tr>
310
311
  <tr>
311
312
  <td>Databricks</td>
@@ -2,16 +2,16 @@ ingestr/conftest.py,sha256=Q03FIJIZpLBbpj55cfCHIKEjc1FCvWJhMF2cidUJKQU,1748
2
2
  ingestr/main.py,sha256=taDyHyaVSpB17iNLl8zA0gmr4CqDO-MSTQX1CaRBB9U,26364
3
3
  ingestr/src/.gitignore,sha256=8cX1AZTSI0TcdZFGTmS_oyBjpfCzhOEt0DdAo2dFIY8,203
4
4
  ingestr/src/blob.py,sha256=UUWMjHUuoR9xP1XZQ6UANQmnMVyDx3d0X4-2FQC271I,2138
5
- ingestr/src/buildinfo.py,sha256=1sTup4WLO36DuLnh5cnxtmEDBjKKYxAOSisEvjELy1w,21
6
- ingestr/src/destinations.py,sha256=TcxM2rcwHfgMMP6U0yRNcfWKlEzkBbZbqCIDww7lkTY,16882
5
+ ingestr/src/buildinfo.py,sha256=slTtbF5rCzvz-7Z_L3XF0kUk-fOkQu3INJIPtywQb5Y,21
6
+ ingestr/src/destinations.py,sha256=YU7c5cNqViCIJ9NDhgGuorh5jMvLi7yViPWpEJ57Xx0,18788
7
7
  ingestr/src/errors.py,sha256=Ufs4_DfE77_E3vnA1fOQdi6cmuLVNm7_SbFLkL1XPGk,686
8
- ingestr/src/factory.py,sha256=OKqjYqvHhgaOF48-eSNSabcfXt4Gmr1yZ8cFGizXh0g,6319
8
+ ingestr/src/factory.py,sha256=j_ANB1X5BUa_iUOLqreHMGWnDPFV4bGgd0lL35lZ6XE,6432
9
9
  ingestr/src/filters.py,sha256=LLecXe9QkLFkFLUZ92OXNdcANr1a8edDxrflc2ko_KA,1452
10
10
  ingestr/src/http_client.py,sha256=bxqsk6nJNXCo-79gW04B53DQO-yr25vaSsqP0AKtjx4,732
11
11
  ingestr/src/loader.py,sha256=9NaWAyfkXdqAZSS-N72Iwo36Lbx4PyqIfaaH1dNdkFs,1712
12
12
  ingestr/src/partition.py,sha256=BrIP6wFJvyR7Nus_3ElnfxknUXeCipK_E_bB8kZowfc,969
13
13
  ingestr/src/resource.py,sha256=ZqmZxFQVGlF8rFPhBiUB08HES0yoTj8sZ--jKfaaVps,1164
14
- ingestr/src/sources.py,sha256=sJmiiInFb-KCPsaIy4qus6lx59MDCOobWgxJ7lfKH08,99047
14
+ ingestr/src/sources.py,sha256=SZo44oi7DBTEJrAvVhbj0lpXWd4aGSQTMgwZ17lgO9c,99048
15
15
  ingestr/src/table_definition.py,sha256=REbAbqdlmUMUuRh8nEQRreWjPVOQ5ZcfqGkScKdCrmk,390
16
16
  ingestr/src/time.py,sha256=H_Fk2J4ShXyUM-EMY7MqCLZQhlnZMZvO952bmZPc4yE,254
17
17
  ingestr/src/version.py,sha256=J_2xgZ0mKlvuHcjdKCx2nlioneLH0I47JiU_Slr_Nwc,189
@@ -73,7 +73,7 @@ ingestr/src/gorgias/helpers.py,sha256=DamuijnvhGY9hysQO4txrVMf4izkGbh5qfBKImdOIN
73
73
  ingestr/src/hubspot/__init__.py,sha256=wqHefhc_YRI5dNFCcpvH-UUilNThE49sbGouSBiHYsw,11776
74
74
  ingestr/src/hubspot/helpers.py,sha256=k2b-lhxqBNKHoOSHoHegFSsk8xxjjGA0I04V0XyX2b4,7883
75
75
  ingestr/src/hubspot/settings.py,sha256=i73MkSiJfRLMFLfiJgYdhp-rhymHTfoqFzZ4uOJdFJM,2456
76
- ingestr/src/isoc_pulse/__init__.py,sha256=WDgKBn15gyQheXE6oJ_2OuMUQwKPbAjflKAsnucu7u8,4647
76
+ ingestr/src/isoc_pulse/__init__.py,sha256=9b4eN4faatpiwTuRNPuYcEt1hEFDEjua9XhfakUigBk,4648
77
77
  ingestr/src/kafka/__init__.py,sha256=wMCXdiraeKd1Kssi9WcVCGZaNGm2tJEtnNyuB4aR5_k,3541
78
78
  ingestr/src/kafka/helpers.py,sha256=V9WcVn3PKnEpggArHda4vnAcaV8VDuh__dSmRviJb5Y,7502
79
79
  ingestr/src/kinesis/__init__.py,sha256=YretSz4F28tbkcPhd55mBp2Xk7XE9unyWx0nmvl8iEc,6235
@@ -143,8 +143,8 @@ ingestr/testdata/merge_expected.csv,sha256=DReHqWGnQMsf2PBv_Q2pfjsgvikYFnf1zYcQZ
143
143
  ingestr/testdata/merge_part1.csv,sha256=Pw8Z9IDKcNU0qQHx1z6BUf4rF_-SxKGFOvymCt4OY9I,185
144
144
  ingestr/testdata/merge_part2.csv,sha256=T_GiWxA81SN63_tMOIuemcvboEFeAmbKc7xRXvL9esw,287
145
145
  ingestr/tests/unit/test_smartsheets.py,sha256=eiC2CCO4iNJcuN36ONvqmEDryCA1bA1REpayHpu42lk,5058
146
- ingestr-0.13.60.dist-info/METADATA,sha256=FwdcfGIPPRKlSV8wJX1HAqHriGUZBl_XXi0Yco8O874,14993
147
- ingestr-0.13.60.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
148
- ingestr-0.13.60.dist-info/entry_points.txt,sha256=oPJy0KBnPWYjDtP1k8qwAihcTLHSZokSQvRAw_wtfJM,46
149
- ingestr-0.13.60.dist-info/licenses/LICENSE.md,sha256=cW8wIhn8HFE-KLStDF9jHQ1O_ARWP3kTpk_-eOccL24,1075
150
- ingestr-0.13.60.dist-info/RECORD,,
146
+ ingestr-0.13.61.dist-info/METADATA,sha256=0VM6DuyeROIX3TUb8FBXRMiUg4KktwK8Gr95ZespXHE,15027
147
+ ingestr-0.13.61.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
148
+ ingestr-0.13.61.dist-info/entry_points.txt,sha256=oPJy0KBnPWYjDtP1k8qwAihcTLHSZokSQvRAw_wtfJM,46
149
+ ingestr-0.13.61.dist-info/licenses/LICENSE.md,sha256=cW8wIhn8HFE-KLStDF9jHQ1O_ARWP3kTpk_-eOccL24,1075
150
+ ingestr-0.13.61.dist-info/RECORD,,