ingestr 0.13.59__py3-none-any.whl → 0.13.61__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ingestr might be problematic. Click here for more details.
- ingestr/main.py +1 -0
- ingestr/src/buildinfo.py +1 -1
- ingestr/src/destinations.py +102 -45
- ingestr/src/factory.py +4 -0
- ingestr/src/isoc_pulse/__init__.py +1 -1
- ingestr/src/sources.py +2 -1
- ingestr/src/stripe_analytics/__init__.py +1 -42
- ingestr/src/stripe_analytics/helpers.py +8 -62
- {ingestr-0.13.59.dist-info → ingestr-0.13.61.dist-info}/METADATA +3 -2
- {ingestr-0.13.59.dist-info → ingestr-0.13.61.dist-info}/RECORD +13 -13
- {ingestr-0.13.59.dist-info → ingestr-0.13.61.dist-info}/WHEEL +0 -0
- {ingestr-0.13.59.dist-info → ingestr-0.13.61.dist-info}/entry_points.txt +0 -0
- {ingestr-0.13.59.dist-info → ingestr-0.13.61.dist-info}/licenses/LICENSE.md +0 -0
ingestr/main.py
CHANGED
|
@@ -543,6 +543,7 @@ def ingest(
|
|
|
543
543
|
sql_reflection_level=sql_reflection_level.value,
|
|
544
544
|
sql_limit=sql_limit,
|
|
545
545
|
sql_exclude_columns=sql_exclude_columns,
|
|
546
|
+
extract_parallelism=extract_parallelism,
|
|
546
547
|
)
|
|
547
548
|
|
|
548
549
|
resource.for_each(dlt_source, lambda x: x.add_map(cast_set_to_list))
|
ingestr/src/buildinfo.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
version = "v0.13.
|
|
1
|
+
version = "v0.13.61"
|
ingestr/src/destinations.py
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import abc
|
|
1
2
|
import base64
|
|
2
3
|
import csv
|
|
3
4
|
import json
|
|
@@ -9,6 +10,7 @@ from urllib.parse import parse_qs, quote, urlparse
|
|
|
9
10
|
import dlt
|
|
10
11
|
import dlt.destinations.impl.filesystem.filesystem
|
|
11
12
|
from dlt.common.configuration.specs import AwsCredentials
|
|
13
|
+
from dlt.common.storages.configuration import FileSystemCredentials
|
|
12
14
|
from dlt.destinations.impl.clickhouse.configuration import (
|
|
13
15
|
ClickHouseCredentials,
|
|
14
16
|
)
|
|
@@ -111,6 +113,14 @@ class BigQueryDestination:
|
|
|
111
113
|
pass
|
|
112
114
|
|
|
113
115
|
|
|
116
|
+
class CrateDBDestination(GenericSqlDestination):
|
|
117
|
+
def dlt_dest(self, uri: str, **kwargs):
|
|
118
|
+
uri = uri.replace("cratedb://", "postgres://")
|
|
119
|
+
import dlt_cratedb.impl.cratedb.factory
|
|
120
|
+
|
|
121
|
+
return dlt_cratedb.impl.cratedb.factory.cratedb(credentials=uri, **kwargs)
|
|
122
|
+
|
|
123
|
+
|
|
114
124
|
class PostgresDestination(GenericSqlDestination):
|
|
115
125
|
def dlt_dest(self, uri: str, **kwargs):
|
|
116
126
|
return dlt.destinations.postgres(credentials=uri, **kwargs)
|
|
@@ -386,43 +396,62 @@ class ClickhouseDestination:
|
|
|
386
396
|
pass
|
|
387
397
|
|
|
388
398
|
|
|
389
|
-
class
|
|
399
|
+
class BlobFSClient(dlt.destinations.impl.filesystem.filesystem.FilesystemClient):
|
|
390
400
|
@property
|
|
391
401
|
def dataset_path(self):
|
|
392
402
|
# override to remove dataset path
|
|
393
403
|
return self.bucket_path
|
|
394
404
|
|
|
395
405
|
|
|
396
|
-
class
|
|
406
|
+
class BlobFS(dlt.destinations.filesystem):
|
|
397
407
|
@property
|
|
398
408
|
def client_class(self):
|
|
399
|
-
return
|
|
409
|
+
return BlobFSClient
|
|
400
410
|
|
|
401
411
|
|
|
402
|
-
class
|
|
412
|
+
class SqliteDestination(GenericSqlDestination):
|
|
403
413
|
def dlt_dest(self, uri: str, **kwargs):
|
|
404
|
-
|
|
405
|
-
params = parse_qs(parsed_uri.query)
|
|
414
|
+
return dlt.destinations.sqlalchemy(credentials=uri)
|
|
406
415
|
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
416
|
+
def dlt_run_params(self, uri: str, table: str, **kwargs):
|
|
417
|
+
return {
|
|
418
|
+
# https://dlthub.com/docs/dlt-ecosystem/destinations/sqlalchemy#dataset-files
|
|
419
|
+
"dataset_name": "main",
|
|
420
|
+
"table_name": table,
|
|
421
|
+
}
|
|
410
422
|
|
|
411
|
-
secret_access_key = params.get("secret_access_key", [None])[0]
|
|
412
|
-
if secret_access_key is None:
|
|
413
|
-
raise MissingValueError("secret_access_key", "S3")
|
|
414
423
|
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
if not parsed_endpoint.scheme or not parsed_endpoint.netloc:
|
|
419
|
-
raise ValueError("Invalid endpoint_url. Must be a valid URL.")
|
|
424
|
+
class MySqlDestination(GenericSqlDestination):
|
|
425
|
+
def dlt_dest(self, uri: str, **kwargs):
|
|
426
|
+
return dlt.destinations.sqlalchemy(credentials=uri)
|
|
420
427
|
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
428
|
+
def dlt_run_params(self, uri: str, table: str, **kwargs):
|
|
429
|
+
parsed = urlparse(uri)
|
|
430
|
+
database = parsed.path.lstrip("/")
|
|
431
|
+
if not database:
|
|
432
|
+
raise ValueError("You need to specify a database")
|
|
433
|
+
return {
|
|
434
|
+
"dataset_name": database,
|
|
435
|
+
"table_name": table,
|
|
436
|
+
}
|
|
437
|
+
|
|
438
|
+
|
|
439
|
+
class BlobStorageDestination(abc.ABC):
|
|
440
|
+
@abc.abstractmethod
|
|
441
|
+
def credentials(self, params: dict) -> FileSystemCredentials:
|
|
442
|
+
"""Build credentials for the blob storage destination."""
|
|
443
|
+
pass
|
|
444
|
+
|
|
445
|
+
@property
|
|
446
|
+
@abc.abstractmethod
|
|
447
|
+
def protocol(self) -> str:
|
|
448
|
+
"""The protocol used for the blob storage destination."""
|
|
449
|
+
pass
|
|
450
|
+
|
|
451
|
+
def dlt_dest(self, uri: str, **kwargs):
|
|
452
|
+
parsed_uri = urlparse(uri)
|
|
453
|
+
params = parse_qs(parsed_uri.query)
|
|
454
|
+
creds = self.credentials(params)
|
|
426
455
|
|
|
427
456
|
dest_table = kwargs["dest_table"]
|
|
428
457
|
|
|
@@ -442,7 +471,7 @@ class S3Destination:
|
|
|
442
471
|
base_path = "/".join(table_parts[:-1])
|
|
443
472
|
|
|
444
473
|
opts = {
|
|
445
|
-
"bucket_url": f"
|
|
474
|
+
"bucket_url": f"{self.protocol}://{base_path}",
|
|
446
475
|
"credentials": creds,
|
|
447
476
|
# supresses dlt warnings about dataset name normalization.
|
|
448
477
|
# we don't use dataset names in S3 so it's fine to disable this.
|
|
@@ -452,7 +481,7 @@ class S3Destination:
|
|
|
452
481
|
if layout is not None:
|
|
453
482
|
opts["layout"] = layout
|
|
454
483
|
|
|
455
|
-
return
|
|
484
|
+
return BlobFS(**opts) # type: ignore
|
|
456
485
|
|
|
457
486
|
def validate_table(self, table: str):
|
|
458
487
|
table = table.strip("/ ")
|
|
@@ -470,28 +499,56 @@ class S3Destination:
|
|
|
470
499
|
pass
|
|
471
500
|
|
|
472
501
|
|
|
473
|
-
class
|
|
474
|
-
|
|
475
|
-
|
|
502
|
+
class S3Destination(BlobStorageDestination):
|
|
503
|
+
@property
|
|
504
|
+
def protocol(self) -> str:
|
|
505
|
+
return "s3"
|
|
476
506
|
|
|
477
|
-
def
|
|
478
|
-
|
|
479
|
-
|
|
480
|
-
"
|
|
481
|
-
"table_name": table,
|
|
482
|
-
}
|
|
507
|
+
def credentials(self, params: dict) -> FileSystemCredentials:
|
|
508
|
+
access_key_id = params.get("access_key_id", [None])[0]
|
|
509
|
+
if access_key_id is None:
|
|
510
|
+
raise MissingValueError("access_key_id", "S3")
|
|
483
511
|
|
|
512
|
+
secret_access_key = params.get("secret_access_key", [None])[0]
|
|
513
|
+
if secret_access_key is None:
|
|
514
|
+
raise MissingValueError("secret_access_key", "S3")
|
|
484
515
|
|
|
485
|
-
|
|
486
|
-
|
|
487
|
-
|
|
516
|
+
endpoint_url = params.get("endpoint_url", [None])[0]
|
|
517
|
+
if endpoint_url is not None:
|
|
518
|
+
parsed_endpoint = urlparse(endpoint_url)
|
|
519
|
+
if not parsed_endpoint.scheme or not parsed_endpoint.netloc:
|
|
520
|
+
raise ValueError("Invalid endpoint_url. Must be a valid URL.")
|
|
488
521
|
|
|
489
|
-
|
|
490
|
-
|
|
491
|
-
|
|
492
|
-
|
|
493
|
-
|
|
494
|
-
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
|
|
522
|
+
return AwsCredentials(
|
|
523
|
+
aws_access_key_id=access_key_id,
|
|
524
|
+
aws_secret_access_key=secret_access_key,
|
|
525
|
+
endpoint_url=endpoint_url,
|
|
526
|
+
)
|
|
527
|
+
|
|
528
|
+
|
|
529
|
+
class GCSDestination(BlobStorageDestination):
|
|
530
|
+
@property
|
|
531
|
+
def protocol(self) -> str:
|
|
532
|
+
return "gs"
|
|
533
|
+
|
|
534
|
+
def credentials(self, params: dict) -> FileSystemCredentials:
|
|
535
|
+
"""Builds GCS credentials from the provided parameters."""
|
|
536
|
+
credentials_path = params.get("credentials_path")
|
|
537
|
+
credentials_base64 = params.get("credentials_base64")
|
|
538
|
+
credentials_available = any(
|
|
539
|
+
map(
|
|
540
|
+
lambda x: x is not None,
|
|
541
|
+
[credentials_path, credentials_base64],
|
|
542
|
+
)
|
|
543
|
+
)
|
|
544
|
+
if credentials_available is False:
|
|
545
|
+
raise MissingValueError("credentials_path or credentials_base64", "GCS")
|
|
546
|
+
|
|
547
|
+
credentials = None
|
|
548
|
+
if credentials_path:
|
|
549
|
+
with open(credentials_path[0], "r") as f:
|
|
550
|
+
credentials = json.load(f)
|
|
551
|
+
else:
|
|
552
|
+
credentials = json.loads(base64.b64decode(credentials_base64[0]).decode()) # type: ignore
|
|
553
|
+
|
|
554
|
+
return credentials
|
ingestr/src/factory.py
CHANGED
|
@@ -7,9 +7,11 @@ from ingestr.src.destinations import (
|
|
|
7
7
|
AthenaDestination,
|
|
8
8
|
BigQueryDestination,
|
|
9
9
|
ClickhouseDestination,
|
|
10
|
+
CrateDBDestination,
|
|
10
11
|
CsvDestination,
|
|
11
12
|
DatabricksDestination,
|
|
12
13
|
DuckDBDestination,
|
|
14
|
+
GCSDestination,
|
|
13
15
|
MsSQLDestination,
|
|
14
16
|
MySqlDestination,
|
|
15
17
|
PostgresDestination,
|
|
@@ -181,6 +183,7 @@ class SourceDestinationFactory:
|
|
|
181
183
|
}
|
|
182
184
|
destinations: Dict[str, Type[DestinationProtocol]] = {
|
|
183
185
|
"bigquery": BigQueryDestination,
|
|
186
|
+
"cratedb": CrateDBDestination,
|
|
184
187
|
"databricks": DatabricksDestination,
|
|
185
188
|
"duckdb": DuckDBDestination,
|
|
186
189
|
"mssql": MsSQLDestination,
|
|
@@ -197,6 +200,7 @@ class SourceDestinationFactory:
|
|
|
197
200
|
"clickhouse+native": ClickhouseDestination,
|
|
198
201
|
"clickhouse": ClickhouseDestination,
|
|
199
202
|
"s3": S3Destination,
|
|
203
|
+
"gs": GCSDestination,
|
|
200
204
|
"sqlite": SqliteDestination,
|
|
201
205
|
"mysql": MySqlDestination,
|
|
202
206
|
"mysql+pymysql": MySqlDestination,
|
ingestr/src/sources.py
CHANGED
|
@@ -737,6 +737,7 @@ class StripeAnalyticsSource:
|
|
|
737
737
|
endpoint,
|
|
738
738
|
],
|
|
739
739
|
stripe_secret_key=api_key[0],
|
|
740
|
+
max_workers=kwargs.get("extract_parallelism", 4),
|
|
740
741
|
).with_resources(endpoint)
|
|
741
742
|
|
|
742
743
|
raise ValueError(
|
|
@@ -1884,7 +1885,7 @@ class GCSSource:
|
|
|
1884
1885
|
endpoint = blob.parse_endpoint(path_to_file)
|
|
1885
1886
|
except blob.UnsupportedEndpointError:
|
|
1886
1887
|
raise ValueError(
|
|
1887
|
-
"
|
|
1888
|
+
"GCS Source only supports specific formats files: csv, jsonl, parquet"
|
|
1888
1889
|
)
|
|
1889
1890
|
except Exception as e:
|
|
1890
1891
|
raise ValueError(
|
|
@@ -10,7 +10,6 @@ from pendulum import DateTime
|
|
|
10
10
|
from .helpers import (
|
|
11
11
|
async_parallel_pagination,
|
|
12
12
|
pagination,
|
|
13
|
-
parallel_pagination,
|
|
14
13
|
transform_date,
|
|
15
14
|
)
|
|
16
15
|
|
|
@@ -55,53 +54,13 @@ def stripe_source(
|
|
|
55
54
|
)(endpoint)
|
|
56
55
|
|
|
57
56
|
|
|
58
|
-
@dlt.source(max_table_nesting=0)
|
|
59
|
-
def parallel_stripe_source(
|
|
60
|
-
endpoints: Tuple[str, ...],
|
|
61
|
-
stripe_secret_key: str = dlt.secrets.value,
|
|
62
|
-
start_date: Optional[DateTime] = None,
|
|
63
|
-
end_date: Optional[DateTime] = None,
|
|
64
|
-
max_workers: int = 12,
|
|
65
|
-
) -> Iterable[DltResource]:
|
|
66
|
-
"""
|
|
67
|
-
Retrieves data from the Stripe API for the specified endpoints using parallel pagination.
|
|
68
|
-
|
|
69
|
-
This source divides the date range across multiple workers to fetch data in parallel,
|
|
70
|
-
which can significantly speed up data retrieval for large date ranges.
|
|
71
|
-
|
|
72
|
-
Args:
|
|
73
|
-
endpoints (Tuple[str, ...]): A tuple of endpoint names to retrieve data from.
|
|
74
|
-
stripe_secret_key (str): The API access token for authentication. Defaults to the value in the `dlt.secrets` object.
|
|
75
|
-
start_date (Optional[DateTime]): An optional start date to limit the data retrieved. Format: datetime(YYYY, MM, DD). Required for parallel processing.
|
|
76
|
-
end_date (Optional[DateTime]): An optional end date to limit the data retrieved. Format: datetime(YYYY, MM, DD). Required for parallel processing.
|
|
77
|
-
max_workers (int): Maximum number of worker threads for parallel fetching. Defaults to 4.
|
|
78
|
-
|
|
79
|
-
Returns:
|
|
80
|
-
Iterable[DltResource]: Resources with data that was created during the period greater than or equal to 'start_date' and less than 'end_date'.
|
|
81
|
-
"""
|
|
82
|
-
stripe.api_key = stripe_secret_key
|
|
83
|
-
stripe.api_version = "2022-11-15"
|
|
84
|
-
|
|
85
|
-
def parallel_stripe_resource(
|
|
86
|
-
endpoint: str,
|
|
87
|
-
) -> Generator[Dict[Any, Any], Any, None]:
|
|
88
|
-
yield from parallel_pagination(endpoint, start_date, end_date, max_workers)
|
|
89
|
-
|
|
90
|
-
for endpoint in endpoints:
|
|
91
|
-
yield dlt.resource(
|
|
92
|
-
parallel_stripe_resource,
|
|
93
|
-
name=endpoint,
|
|
94
|
-
write_disposition="replace",
|
|
95
|
-
)(endpoint)
|
|
96
|
-
|
|
97
|
-
|
|
98
57
|
@dlt.source(max_table_nesting=0)
|
|
99
58
|
def async_stripe_source(
|
|
100
59
|
endpoints: Tuple[str, ...],
|
|
101
60
|
stripe_secret_key: str = dlt.secrets.value,
|
|
102
61
|
start_date: Optional[DateTime] = None,
|
|
103
62
|
end_date: Optional[DateTime] = None,
|
|
104
|
-
max_workers: int =
|
|
63
|
+
max_workers: int = 4,
|
|
105
64
|
rate_limit_delay: float = 0.03,
|
|
106
65
|
) -> Iterable[DltResource]:
|
|
107
66
|
"""
|
|
@@ -43,67 +43,6 @@ def pagination(
|
|
|
43
43
|
break
|
|
44
44
|
|
|
45
45
|
|
|
46
|
-
def parallel_pagination(
|
|
47
|
-
endpoint: str,
|
|
48
|
-
start_date: Optional[Any] = None,
|
|
49
|
-
end_date: Optional[Any] = None,
|
|
50
|
-
max_workers: int = 4,
|
|
51
|
-
) -> Iterable[TDataItem]:
|
|
52
|
-
"""
|
|
53
|
-
Retrieves data from an endpoint with parallel pagination by dividing date ranges across workers.
|
|
54
|
-
|
|
55
|
-
Args:
|
|
56
|
-
endpoint (str): The endpoint to retrieve data from.
|
|
57
|
-
start_date (Optional[Any]): An optional start date to limit the data retrieved. Defaults to 2010-01-01 if None.
|
|
58
|
-
end_date (Optional[Any]): An optional end date to limit the data retrieved. Defaults to today if None.
|
|
59
|
-
max_workers (int): Maximum number of worker threads to use for parallel fetching. Defaults to 4.
|
|
60
|
-
|
|
61
|
-
Returns:
|
|
62
|
-
Iterable[TDataItem]: Data items retrieved from the endpoint.
|
|
63
|
-
"""
|
|
64
|
-
# Set default date range if not provided: 2010 to today
|
|
65
|
-
if not start_date:
|
|
66
|
-
start_date = pendulum.datetime(2010, 1, 1)
|
|
67
|
-
if not end_date:
|
|
68
|
-
end_date = pendulum.now()
|
|
69
|
-
|
|
70
|
-
# Convert dates to timestamps for processing
|
|
71
|
-
start_ts = transform_date(start_date)
|
|
72
|
-
end_ts = transform_date(end_date)
|
|
73
|
-
|
|
74
|
-
# If date range is very small, use sequential pagination
|
|
75
|
-
date_range_days = (end_ts - start_ts) / (24 * 60 * 60)
|
|
76
|
-
if date_range_days < 30: # Less than 30 days
|
|
77
|
-
yield from pagination(endpoint, start_date, end_date)
|
|
78
|
-
return
|
|
79
|
-
|
|
80
|
-
# Create time chunks with larger chunks for 2010s (less data expected)
|
|
81
|
-
time_chunks = _create_adaptive_time_chunks(start_ts, end_ts, max_workers)
|
|
82
|
-
|
|
83
|
-
# Use ThreadPoolExecutor to fetch data in parallel and yield as soon as ready
|
|
84
|
-
with ThreadPoolExecutor(max_workers=max_workers) as executor:
|
|
85
|
-
# Submit all tasks
|
|
86
|
-
future_to_chunk = {
|
|
87
|
-
executor.submit(
|
|
88
|
-
_fetch_chunk_data_streaming, endpoint, chunk_start, chunk_end
|
|
89
|
-
): (chunk_start, chunk_end)
|
|
90
|
-
for chunk_start, chunk_end in time_chunks
|
|
91
|
-
}
|
|
92
|
-
|
|
93
|
-
# MAXIMUM SPEED - Yield results immediately as they complete
|
|
94
|
-
for future in as_completed(future_to_chunk):
|
|
95
|
-
chunk_start, chunk_end = future_to_chunk[future]
|
|
96
|
-
try:
|
|
97
|
-
chunk_data = future.result()
|
|
98
|
-
# Yield all batches from this chunk immediately - NO ORDERING
|
|
99
|
-
for batch in chunk_data:
|
|
100
|
-
yield batch
|
|
101
|
-
|
|
102
|
-
except Exception as exc:
|
|
103
|
-
print(f"Chunk {chunk_start}-{chunk_end} generated an exception: {exc}")
|
|
104
|
-
raise exc
|
|
105
|
-
|
|
106
|
-
|
|
107
46
|
def _create_time_chunks(start_ts: int, end_ts: int, num_chunks: int) -> List[tuple]:
|
|
108
47
|
"""
|
|
109
48
|
Divide a time range into equal chunks for parallel processing.
|
|
@@ -295,7 +234,6 @@ async def async_parallel_pagination(
|
|
|
295
234
|
|
|
296
235
|
async def fetch_chunk_with_semaphore(chunk_start: int, chunk_end: int):
|
|
297
236
|
async with semaphore:
|
|
298
|
-
await asyncio.sleep(rate_limit_delay)
|
|
299
237
|
return await _fetch_chunk_data_async_fast(endpoint, chunk_start, chunk_end)
|
|
300
238
|
|
|
301
239
|
# Create all tasks
|
|
@@ -390,6 +328,10 @@ async def stripe_get_data_async(
|
|
|
390
328
|
max_wait_time_ms = 10000
|
|
391
329
|
|
|
392
330
|
while retry_count < max_retries:
|
|
331
|
+
# print(
|
|
332
|
+
# f"Fetching {resource} from {datetime.fromtimestamp(start_date).strftime('%Y-%m-%d %H:%M:%S') if start_date else 'None'} to {datetime.fromtimestamp(end_date).strftime('%Y-%m-%d %H:%M:%S') if end_date else 'None'}, retry {retry_count} of {max_retries}",
|
|
333
|
+
# flush=True,
|
|
334
|
+
# )
|
|
393
335
|
try:
|
|
394
336
|
resource_dict = await getattr(stripe, resource).list_async(
|
|
395
337
|
created={"gte": start_date, "lt": end_date}, limit=100, **kwargs
|
|
@@ -399,6 +341,10 @@ async def stripe_get_data_async(
|
|
|
399
341
|
retry_count += 1
|
|
400
342
|
if retry_count < max_retries:
|
|
401
343
|
wait_time = min(2**retry_count * 0.001, max_wait_time_ms)
|
|
344
|
+
print(
|
|
345
|
+
f"Got rate limited, sleeping {wait_time} seconds before retrying...",
|
|
346
|
+
flush=True,
|
|
347
|
+
)
|
|
402
348
|
await asyncio.sleep(wait_time)
|
|
403
349
|
else:
|
|
404
350
|
# Re-raise the last exception if we've exhausted retries
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: ingestr
|
|
3
|
-
Version: 0.13.
|
|
3
|
+
Version: 0.13.61
|
|
4
4
|
Summary: ingestr is a command-line application that ingests data from various sources and stores them in any database.
|
|
5
5
|
Project-URL: Homepage, https://github.com/bruin-data/ingestr
|
|
6
6
|
Project-URL: Issues, https://github.com/bruin-data/ingestr/issues
|
|
@@ -47,6 +47,7 @@ Requires-Dist: databricks-sqlalchemy==1.0.2
|
|
|
47
47
|
Requires-Dist: dataclasses-json==0.6.7
|
|
48
48
|
Requires-Dist: decorator==5.2.1
|
|
49
49
|
Requires-Dist: deprecation==2.1.0
|
|
50
|
+
Requires-Dist: dlt-cratedb==0.0.1
|
|
50
51
|
Requires-Dist: dlt==1.10.0
|
|
51
52
|
Requires-Dist: dnspython==2.7.0
|
|
52
53
|
Requires-Dist: duckdb-engine==0.17.0
|
|
@@ -305,7 +306,7 @@ Pull requests are welcome. However, please open an issue first to discuss what y
|
|
|
305
306
|
<tr>
|
|
306
307
|
<td>CrateDB</td>
|
|
307
308
|
<td>✅</td>
|
|
308
|
-
<td
|
|
309
|
+
<td>✅</td>
|
|
309
310
|
</tr>
|
|
310
311
|
<tr>
|
|
311
312
|
<td>Databricks</td>
|
|
@@ -1,17 +1,17 @@
|
|
|
1
1
|
ingestr/conftest.py,sha256=Q03FIJIZpLBbpj55cfCHIKEjc1FCvWJhMF2cidUJKQU,1748
|
|
2
|
-
ingestr/main.py,sha256=
|
|
2
|
+
ingestr/main.py,sha256=taDyHyaVSpB17iNLl8zA0gmr4CqDO-MSTQX1CaRBB9U,26364
|
|
3
3
|
ingestr/src/.gitignore,sha256=8cX1AZTSI0TcdZFGTmS_oyBjpfCzhOEt0DdAo2dFIY8,203
|
|
4
4
|
ingestr/src/blob.py,sha256=UUWMjHUuoR9xP1XZQ6UANQmnMVyDx3d0X4-2FQC271I,2138
|
|
5
|
-
ingestr/src/buildinfo.py,sha256=
|
|
6
|
-
ingestr/src/destinations.py,sha256=
|
|
5
|
+
ingestr/src/buildinfo.py,sha256=slTtbF5rCzvz-7Z_L3XF0kUk-fOkQu3INJIPtywQb5Y,21
|
|
6
|
+
ingestr/src/destinations.py,sha256=YU7c5cNqViCIJ9NDhgGuorh5jMvLi7yViPWpEJ57Xx0,18788
|
|
7
7
|
ingestr/src/errors.py,sha256=Ufs4_DfE77_E3vnA1fOQdi6cmuLVNm7_SbFLkL1XPGk,686
|
|
8
|
-
ingestr/src/factory.py,sha256=
|
|
8
|
+
ingestr/src/factory.py,sha256=j_ANB1X5BUa_iUOLqreHMGWnDPFV4bGgd0lL35lZ6XE,6432
|
|
9
9
|
ingestr/src/filters.py,sha256=LLecXe9QkLFkFLUZ92OXNdcANr1a8edDxrflc2ko_KA,1452
|
|
10
10
|
ingestr/src/http_client.py,sha256=bxqsk6nJNXCo-79gW04B53DQO-yr25vaSsqP0AKtjx4,732
|
|
11
11
|
ingestr/src/loader.py,sha256=9NaWAyfkXdqAZSS-N72Iwo36Lbx4PyqIfaaH1dNdkFs,1712
|
|
12
12
|
ingestr/src/partition.py,sha256=BrIP6wFJvyR7Nus_3ElnfxknUXeCipK_E_bB8kZowfc,969
|
|
13
13
|
ingestr/src/resource.py,sha256=ZqmZxFQVGlF8rFPhBiUB08HES0yoTj8sZ--jKfaaVps,1164
|
|
14
|
-
ingestr/src/sources.py,sha256=
|
|
14
|
+
ingestr/src/sources.py,sha256=SZo44oi7DBTEJrAvVhbj0lpXWd4aGSQTMgwZ17lgO9c,99048
|
|
15
15
|
ingestr/src/table_definition.py,sha256=REbAbqdlmUMUuRh8nEQRreWjPVOQ5ZcfqGkScKdCrmk,390
|
|
16
16
|
ingestr/src/time.py,sha256=H_Fk2J4ShXyUM-EMY7MqCLZQhlnZMZvO952bmZPc4yE,254
|
|
17
17
|
ingestr/src/version.py,sha256=J_2xgZ0mKlvuHcjdKCx2nlioneLH0I47JiU_Slr_Nwc,189
|
|
@@ -73,7 +73,7 @@ ingestr/src/gorgias/helpers.py,sha256=DamuijnvhGY9hysQO4txrVMf4izkGbh5qfBKImdOIN
|
|
|
73
73
|
ingestr/src/hubspot/__init__.py,sha256=wqHefhc_YRI5dNFCcpvH-UUilNThE49sbGouSBiHYsw,11776
|
|
74
74
|
ingestr/src/hubspot/helpers.py,sha256=k2b-lhxqBNKHoOSHoHegFSsk8xxjjGA0I04V0XyX2b4,7883
|
|
75
75
|
ingestr/src/hubspot/settings.py,sha256=i73MkSiJfRLMFLfiJgYdhp-rhymHTfoqFzZ4uOJdFJM,2456
|
|
76
|
-
ingestr/src/isoc_pulse/__init__.py,sha256=
|
|
76
|
+
ingestr/src/isoc_pulse/__init__.py,sha256=9b4eN4faatpiwTuRNPuYcEt1hEFDEjua9XhfakUigBk,4648
|
|
77
77
|
ingestr/src/kafka/__init__.py,sha256=wMCXdiraeKd1Kssi9WcVCGZaNGm2tJEtnNyuB4aR5_k,3541
|
|
78
78
|
ingestr/src/kafka/helpers.py,sha256=V9WcVn3PKnEpggArHda4vnAcaV8VDuh__dSmRviJb5Y,7502
|
|
79
79
|
ingestr/src/kinesis/__init__.py,sha256=YretSz4F28tbkcPhd55mBp2Xk7XE9unyWx0nmvl8iEc,6235
|
|
@@ -119,8 +119,8 @@ ingestr/src/solidgate/__init__.py,sha256=JdaXvAu5QGuf9-FY294vwCQCEmfrqIld9oqbzqC
|
|
|
119
119
|
ingestr/src/solidgate/helpers.py,sha256=oePEc9nnvmN3IaKrfJCvyKL79xdGM0-gRTN3-8tY4Fc,4952
|
|
120
120
|
ingestr/src/sql_database/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
121
121
|
ingestr/src/sql_database/callbacks.py,sha256=sEFFmXxAURY3yeBjnawigDtq9LBCvi8HFqG4kLd7tMU,2002
|
|
122
|
-
ingestr/src/stripe_analytics/__init__.py,sha256=
|
|
123
|
-
ingestr/src/stripe_analytics/helpers.py,sha256=
|
|
122
|
+
ingestr/src/stripe_analytics/__init__.py,sha256=mK8dGKAlMPVqGE9gG30XfbvOvgVD0yWhNpt-D3iavDY,6385
|
|
123
|
+
ingestr/src/stripe_analytics/helpers.py,sha256=O5ow8xORcyLhw1Yn6vFm__tASfmPOgR0TMVU9gXmxcE,11828
|
|
124
124
|
ingestr/src/stripe_analytics/settings.py,sha256=xt1-ljwP4nLTNUa8l3KwFbtK8FtQHgHpzGF5uPKfRsw,2246
|
|
125
125
|
ingestr/src/telemetry/event.py,sha256=W7bs4uVfPakQ5otmiqgqu1l5SqjYx1p87wudnWXckBc,949
|
|
126
126
|
ingestr/src/testdata/fakebqcredentials.json,sha256=scc6TUc963KAbKTLZCfcmqVzbtzDCW1_8JNRnyAXyy8,628
|
|
@@ -143,8 +143,8 @@ ingestr/testdata/merge_expected.csv,sha256=DReHqWGnQMsf2PBv_Q2pfjsgvikYFnf1zYcQZ
|
|
|
143
143
|
ingestr/testdata/merge_part1.csv,sha256=Pw8Z9IDKcNU0qQHx1z6BUf4rF_-SxKGFOvymCt4OY9I,185
|
|
144
144
|
ingestr/testdata/merge_part2.csv,sha256=T_GiWxA81SN63_tMOIuemcvboEFeAmbKc7xRXvL9esw,287
|
|
145
145
|
ingestr/tests/unit/test_smartsheets.py,sha256=eiC2CCO4iNJcuN36ONvqmEDryCA1bA1REpayHpu42lk,5058
|
|
146
|
-
ingestr-0.13.
|
|
147
|
-
ingestr-0.13.
|
|
148
|
-
ingestr-0.13.
|
|
149
|
-
ingestr-0.13.
|
|
150
|
-
ingestr-0.13.
|
|
146
|
+
ingestr-0.13.61.dist-info/METADATA,sha256=0VM6DuyeROIX3TUb8FBXRMiUg4KktwK8Gr95ZespXHE,15027
|
|
147
|
+
ingestr-0.13.61.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
148
|
+
ingestr-0.13.61.dist-info/entry_points.txt,sha256=oPJy0KBnPWYjDtP1k8qwAihcTLHSZokSQvRAw_wtfJM,46
|
|
149
|
+
ingestr-0.13.61.dist-info/licenses/LICENSE.md,sha256=cW8wIhn8HFE-KLStDF9jHQ1O_ARWP3kTpk_-eOccL24,1075
|
|
150
|
+
ingestr-0.13.61.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|