ingestr 0.13.31__py3-none-any.whl → 0.13.33__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ingestr might be problematic. Click here for more details.
- ingestr/main.py +21 -51
- ingestr/src/adjust/adjust_helpers.py +1 -1
- ingestr/src/airtable/__init__.py +1 -1
- ingestr/src/applovin_max/__init__.py +0 -1
- ingestr/src/appsflyer/client.py +0 -1
- ingestr/src/buildinfo.py +1 -1
- ingestr/src/collector/spinner.py +43 -0
- ingestr/src/destinations.py +51 -18
- ingestr/src/filters.py +2 -3
- ingestr/src/klaviyo/{_init_.py → __init__.py} +0 -1
- ingestr/src/linkedin_ads/helpers.py +0 -1
- ingestr/src/partition.py +2 -1
- ingestr/src/sources.py +169 -90
- ingestr/src/telemetry/event.py +10 -9
- ingestr/src/tiktok_ads/tiktok_helpers.py +0 -1
- {ingestr-0.13.31.dist-info → ingestr-0.13.33.dist-info}/METADATA +1 -6
- {ingestr-0.13.31.dist-info → ingestr-0.13.33.dist-info}/RECORD +20 -19
- {ingestr-0.13.31.dist-info → ingestr-0.13.33.dist-info}/WHEEL +0 -0
- {ingestr-0.13.31.dist-info → ingestr-0.13.33.dist-info}/entry_points.txt +0 -0
- {ingestr-0.13.31.dist-info → ingestr-0.13.33.dist-info}/licenses/LICENSE.md +0 -0
ingestr/main.py
CHANGED
|
@@ -3,15 +3,9 @@ from enum import Enum
|
|
|
3
3
|
from typing import Optional
|
|
4
4
|
|
|
5
5
|
import typer
|
|
6
|
-
from dlt.common.runtime.collector import Collector
|
|
7
6
|
from rich.console import Console
|
|
8
|
-
from rich.status import Status
|
|
9
7
|
from typing_extensions import Annotated
|
|
10
8
|
|
|
11
|
-
import ingestr.src.partition as partition
|
|
12
|
-
import ingestr.src.resource as resource
|
|
13
|
-
from ingestr.src.destinations import AthenaDestination
|
|
14
|
-
from ingestr.src.filters import cast_set_to_list, handle_mysql_empty_dates
|
|
15
9
|
from ingestr.src.telemetry.event import track
|
|
16
10
|
|
|
17
11
|
app = typer.Typer(
|
|
@@ -46,45 +40,6 @@ PARQUET_SUPPORTED_DESTINATIONS = [
|
|
|
46
40
|
JSON_RETURNING_SOURCES = ["notion"]
|
|
47
41
|
|
|
48
42
|
|
|
49
|
-
class SpinnerCollector(Collector):
|
|
50
|
-
status: Status
|
|
51
|
-
current_step: str
|
|
52
|
-
started: bool
|
|
53
|
-
|
|
54
|
-
def __init__(self) -> None:
|
|
55
|
-
self.status = Status("Ingesting data...", spinner="dots")
|
|
56
|
-
self.started = False
|
|
57
|
-
|
|
58
|
-
def update(
|
|
59
|
-
self,
|
|
60
|
-
name: str,
|
|
61
|
-
inc: int = 1,
|
|
62
|
-
total: Optional[int] = None,
|
|
63
|
-
message: Optional[str] = None, # type: ignore
|
|
64
|
-
label: str = "",
|
|
65
|
-
**kwargs,
|
|
66
|
-
) -> None:
|
|
67
|
-
self.status.update(self.current_step)
|
|
68
|
-
|
|
69
|
-
def _start(self, step: str) -> None:
|
|
70
|
-
self.current_step = self.__step_to_label(step)
|
|
71
|
-
self.status.start()
|
|
72
|
-
|
|
73
|
-
def __step_to_label(self, step: str) -> str:
|
|
74
|
-
verb = step.split(" ")[0].lower()
|
|
75
|
-
if verb.startswith("normalize"):
|
|
76
|
-
return "Normalizing the data"
|
|
77
|
-
elif verb.startswith("load"):
|
|
78
|
-
return "Loading the data to the destination"
|
|
79
|
-
elif verb.startswith("extract"):
|
|
80
|
-
return "Extracting the data from the source"
|
|
81
|
-
|
|
82
|
-
return f"{verb.capitalize()} the data"
|
|
83
|
-
|
|
84
|
-
def _stop(self) -> None:
|
|
85
|
-
self.status.stop()
|
|
86
|
-
|
|
87
|
-
|
|
88
43
|
class IncrementalStrategy(str, Enum):
|
|
89
44
|
create_replace = "replace"
|
|
90
45
|
append = "append"
|
|
@@ -309,6 +264,13 @@ def ingest(
|
|
|
309
264
|
envvar=["YIELD_LIMIT", "INGESTR_YIELD_LIMIT"],
|
|
310
265
|
),
|
|
311
266
|
] = None, # type: ignore
|
|
267
|
+
staging_bucket: Annotated[
|
|
268
|
+
Optional[str],
|
|
269
|
+
typer.Option(
|
|
270
|
+
help="The staging bucket to be used for the ingestion, must be prefixed with 'gs://' or 's3://'",
|
|
271
|
+
envvar=["STAGING_BUCKET", "INGESTR_STAGING_BUCKET"],
|
|
272
|
+
),
|
|
273
|
+
] = None, # type: ignore
|
|
312
274
|
):
|
|
313
275
|
import hashlib
|
|
314
276
|
import tempfile
|
|
@@ -317,14 +279,16 @@ def ingest(
|
|
|
317
279
|
import dlt
|
|
318
280
|
import humanize
|
|
319
281
|
import typer
|
|
320
|
-
from dlt.common.data_types import TDataType
|
|
321
|
-
from dlt.common.destination import Destination
|
|
322
282
|
from dlt.common.pipeline import LoadInfo
|
|
323
283
|
from dlt.common.runtime.collector import Collector, LogCollector
|
|
324
284
|
from dlt.common.schema.typing import TColumnSchema
|
|
325
285
|
|
|
286
|
+
import ingestr.src.partition as partition
|
|
287
|
+
import ingestr.src.resource as resource
|
|
288
|
+
from ingestr.src.collector.spinner import SpinnerCollector
|
|
289
|
+
from ingestr.src.destinations import AthenaDestination
|
|
326
290
|
from ingestr.src.factory import SourceDestinationFactory
|
|
327
|
-
from ingestr.src.
|
|
291
|
+
from ingestr.src.filters import cast_set_to_list, handle_mysql_empty_dates
|
|
328
292
|
|
|
329
293
|
def report_errors(run_info: LoadInfo):
|
|
330
294
|
for load_package in run_info.load_packages:
|
|
@@ -359,7 +323,7 @@ def ingest(
|
|
|
359
323
|
return (source_table, dest_table)
|
|
360
324
|
|
|
361
325
|
def validate_loader_file_format(
|
|
362
|
-
dlt_dest
|
|
326
|
+
dlt_dest, loader_file_format: Optional[LoaderFileFormat]
|
|
363
327
|
):
|
|
364
328
|
if (
|
|
365
329
|
loader_file_format
|
|
@@ -371,9 +335,11 @@ def ingest(
|
|
|
371
335
|
)
|
|
372
336
|
raise typer.Abort()
|
|
373
337
|
|
|
374
|
-
def parse_columns(columns: list[str]) -> dict
|
|
338
|
+
def parse_columns(columns: list[str]) -> dict:
|
|
375
339
|
from typing import cast, get_args
|
|
376
340
|
|
|
341
|
+
from dlt.common.data_types import TDataType
|
|
342
|
+
|
|
377
343
|
possible_types = get_args(TDataType)
|
|
378
344
|
|
|
379
345
|
types: dict[str, TDataType] = {}
|
|
@@ -406,6 +372,7 @@ def ingest(
|
|
|
406
372
|
dlt.config["data_writer.file_max_items"] = loader_file_size
|
|
407
373
|
dlt.config["extract.workers"] = extract_parallelism
|
|
408
374
|
dlt.config["extract.max_parallel_items"] = extract_parallelism
|
|
375
|
+
dlt.config["load.raise_on_max_retries"] = 15
|
|
409
376
|
if schema_naming != SchemaNaming.default:
|
|
410
377
|
dlt.config["schema.naming"] = schema_naming.value
|
|
411
378
|
|
|
@@ -457,7 +424,9 @@ def ingest(
|
|
|
457
424
|
pipelines_dir = tempfile.mkdtemp()
|
|
458
425
|
is_pipelines_dir_temp = True
|
|
459
426
|
|
|
460
|
-
dlt_dest = destination.dlt_dest(
|
|
427
|
+
dlt_dest = destination.dlt_dest(
|
|
428
|
+
uri=dest_uri, dest_table=dest_table, staging_bucket=staging_bucket
|
|
429
|
+
)
|
|
461
430
|
validate_loader_file_format(dlt_dest, loader_file_format)
|
|
462
431
|
|
|
463
432
|
if partition_by:
|
|
@@ -606,6 +575,7 @@ def ingest(
|
|
|
606
575
|
**destination.dlt_run_params(
|
|
607
576
|
uri=dest_uri,
|
|
608
577
|
table=dest_table,
|
|
578
|
+
staging_bucket=staging_bucket,
|
|
609
579
|
),
|
|
610
580
|
write_disposition=write_disposition, # type: ignore
|
|
611
581
|
primary_key=(primary_key if primary_key and len(primary_key) > 0 else None), # type: ignore
|
|
@@ -36,7 +36,7 @@ class AdjustAPI:
|
|
|
36
36
|
def __init__(self, api_key):
|
|
37
37
|
self.api_key = api_key
|
|
38
38
|
self.request_client = Client(
|
|
39
|
-
request_timeout=
|
|
39
|
+
request_timeout=1000, # Adjust support recommends 1000 seconds of read timeout.
|
|
40
40
|
raise_for_status=False,
|
|
41
41
|
retry_condition=retry_on_limit,
|
|
42
42
|
request_max_attempts=12,
|
ingestr/src/airtable/__init__.py
CHANGED
|
@@ -55,7 +55,7 @@ def airtable_resource(
|
|
|
55
55
|
field for field in table["fields"] if field["id"] == primary_key_id
|
|
56
56
|
][0]
|
|
57
57
|
table_name: str = table["name"]
|
|
58
|
-
primary_key: List[str] = [primary_key_field[
|
|
58
|
+
primary_key: List[str] = [f"fields__{primary_key_field['name']}"]
|
|
59
59
|
air_table = api.table(base_id, table["id"])
|
|
60
60
|
|
|
61
61
|
# Table.iterate() supports rich customization options, such as chunk size, fields, cell format, timezone, locale, and view
|
ingestr/src/appsflyer/client.py
CHANGED
ingestr/src/buildinfo.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
version = "v0.13.
|
|
1
|
+
version = "v0.13.33"
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
from typing import Optional
|
|
2
|
+
|
|
3
|
+
from dlt.common.runtime.collector import Collector
|
|
4
|
+
from rich.status import Status
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class SpinnerCollector(Collector):
|
|
8
|
+
status: Status
|
|
9
|
+
current_step: str
|
|
10
|
+
started: bool
|
|
11
|
+
|
|
12
|
+
def __init__(self) -> None:
|
|
13
|
+
self.status = Status("Ingesting data...", spinner="dots")
|
|
14
|
+
self.started = False
|
|
15
|
+
|
|
16
|
+
def update(
|
|
17
|
+
self,
|
|
18
|
+
name: str,
|
|
19
|
+
inc: int = 1,
|
|
20
|
+
total: Optional[int] = None,
|
|
21
|
+
message: Optional[str] = None, # type: ignore
|
|
22
|
+
label: str = "",
|
|
23
|
+
**kwargs,
|
|
24
|
+
) -> None:
|
|
25
|
+
self.status.update(self.current_step)
|
|
26
|
+
|
|
27
|
+
def _start(self, step: str) -> None:
|
|
28
|
+
self.current_step = self.__step_to_label(step)
|
|
29
|
+
self.status.start()
|
|
30
|
+
|
|
31
|
+
def __step_to_label(self, step: str) -> str:
|
|
32
|
+
verb = step.split(" ")[0].lower()
|
|
33
|
+
if verb.startswith("normalize"):
|
|
34
|
+
return "Normalizing the data"
|
|
35
|
+
elif verb.startswith("load"):
|
|
36
|
+
return "Loading the data to the destination"
|
|
37
|
+
elif verb.startswith("extract"):
|
|
38
|
+
return "Extracting the data from the source"
|
|
39
|
+
|
|
40
|
+
return f"{verb.capitalize()} the data"
|
|
41
|
+
|
|
42
|
+
def _stop(self) -> None:
|
|
43
|
+
self.status.stop()
|
ingestr/src/destinations.py
CHANGED
|
@@ -60,6 +60,22 @@ class BigQueryDestination:
|
|
|
60
60
|
base64.b64decode(credentials_base64[0]).decode("utf-8")
|
|
61
61
|
)
|
|
62
62
|
|
|
63
|
+
staging_bucket = kwargs.get("staging_bucket", None)
|
|
64
|
+
if staging_bucket:
|
|
65
|
+
if not staging_bucket.startswith("gs://"):
|
|
66
|
+
raise ValueError("Staging bucket must start with gs://")
|
|
67
|
+
|
|
68
|
+
os.environ["DESTINATION__FILESYSTEM__BUCKET_URL"] = staging_bucket
|
|
69
|
+
os.environ["DESTINATION__FILESYSTEM__CREDENTIALS__PROJECT_ID"] = (
|
|
70
|
+
credentials.get("project_id", None)
|
|
71
|
+
)
|
|
72
|
+
os.environ["DESTINATION__FILESYSTEM__CREDENTIALS__PRIVATE_KEY"] = (
|
|
73
|
+
credentials.get("private_key", None)
|
|
74
|
+
)
|
|
75
|
+
os.environ["DESTINATION__FILESYSTEM__CREDENTIALS__CLIENT_EMAIL"] = (
|
|
76
|
+
credentials.get("client_email", None)
|
|
77
|
+
)
|
|
78
|
+
|
|
63
79
|
project_id = None
|
|
64
80
|
if source_fields.hostname:
|
|
65
81
|
project_id = source_fields.hostname
|
|
@@ -83,6 +99,10 @@ class BigQueryDestination:
|
|
|
83
99
|
"table_name": table_fields[-1],
|
|
84
100
|
}
|
|
85
101
|
|
|
102
|
+
staging_bucket = kwargs.get("staging_bucket", None)
|
|
103
|
+
if staging_bucket:
|
|
104
|
+
res["staging"] = "filesystem"
|
|
105
|
+
|
|
86
106
|
return res
|
|
87
107
|
|
|
88
108
|
def post_load(self):
|
|
@@ -223,34 +243,47 @@ class AthenaDestination:
|
|
|
223
243
|
query_result_path = bucket
|
|
224
244
|
|
|
225
245
|
access_key_id = source_params.get("access_key_id", [None])[0]
|
|
226
|
-
if not access_key_id:
|
|
227
|
-
raise ValueError("The AWS access_key_id is required to connect to Athena.")
|
|
228
|
-
|
|
229
246
|
secret_access_key = source_params.get("secret_access_key", [None])[0]
|
|
230
|
-
|
|
231
|
-
|
|
247
|
+
session_token = source_params.get("session_token", [None])[0]
|
|
248
|
+
profile_name = source_params.get("profile", ["default"])[0]
|
|
249
|
+
region_name = source_params.get("region_name", [None])[0]
|
|
232
250
|
|
|
233
|
-
|
|
251
|
+
if not access_key_id and not secret_access_key:
|
|
252
|
+
import botocore.session # type: ignore
|
|
253
|
+
|
|
254
|
+
session = botocore.session.Session(profile=profile_name)
|
|
255
|
+
default = session.get_credentials()
|
|
256
|
+
if not profile_name:
|
|
257
|
+
raise ValueError(
|
|
258
|
+
"You have to either provide access_key_id and secret_access_key pair or a valid AWS profile name."
|
|
259
|
+
)
|
|
260
|
+
access_key_id = default.access_key
|
|
261
|
+
secret_access_key = default.secret_key
|
|
262
|
+
session_token = default.token
|
|
263
|
+
if region_name is None:
|
|
264
|
+
region_name = session.get_config_variable("region")
|
|
234
265
|
|
|
235
|
-
region_name = source_params.get("region_name", [None])[0]
|
|
236
266
|
if not region_name:
|
|
237
267
|
raise ValueError("The region_name is required to connect to Athena.")
|
|
238
268
|
|
|
239
269
|
os.environ["DESTINATION__BUCKET_URL"] = bucket
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
270
|
+
if access_key_id and secret_access_key:
|
|
271
|
+
os.environ["DESTINATION__CREDENTIALS__AWS_ACCESS_KEY_ID"] = access_key_id
|
|
272
|
+
os.environ["DESTINATION__CREDENTIALS__AWS_SECRET_ACCESS_KEY"] = (
|
|
273
|
+
secret_access_key
|
|
274
|
+
)
|
|
275
|
+
if session_token:
|
|
276
|
+
os.environ["DESTINATION__CREDENTIALS__AWS_SESSION_TOKEN"] = session_token
|
|
244
277
|
|
|
245
|
-
credentials = AwsCredentials(
|
|
246
|
-
aws_access_key_id=access_key_id,
|
|
247
|
-
aws_secret_access_key=secret_access_key,
|
|
248
|
-
region_name=region_name,
|
|
249
|
-
)
|
|
250
278
|
return dlt.destinations.athena(
|
|
251
279
|
query_result_bucket=query_result_path,
|
|
252
|
-
athena_work_group=
|
|
253
|
-
credentials=
|
|
280
|
+
athena_work_group=source_params.get("workgroup", [None])[0],
|
|
281
|
+
credentials=AwsCredentials(
|
|
282
|
+
aws_access_key_id=access_key_id, # type: ignore
|
|
283
|
+
aws_secret_access_key=secret_access_key, # type: ignore
|
|
284
|
+
aws_session_token=session_token,
|
|
285
|
+
region_name=region_name,
|
|
286
|
+
),
|
|
254
287
|
destination_name=bucket,
|
|
255
288
|
)
|
|
256
289
|
|
ingestr/src/filters.py
CHANGED
|
@@ -1,6 +1,3 @@
|
|
|
1
|
-
from dlt.common.libs.sql_alchemy import Table
|
|
2
|
-
|
|
3
|
-
|
|
4
1
|
def cast_set_to_list(row):
|
|
5
2
|
# this handles just the sqlalchemy backend for now
|
|
6
3
|
if isinstance(row, dict):
|
|
@@ -32,6 +29,8 @@ def handle_mysql_empty_dates(row):
|
|
|
32
29
|
|
|
33
30
|
|
|
34
31
|
def table_adapter_exclude_columns(cols: list[str]):
|
|
32
|
+
from dlt.common.libs.sql_alchemy import Table
|
|
33
|
+
|
|
35
34
|
def excluder(table: Table):
|
|
36
35
|
cols_to_remove = [col for col in table._columns if col.name in cols] # type: ignore
|
|
37
36
|
for col in cols_to_remove:
|
ingestr/src/partition.py
CHANGED
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
from typing import Dict
|
|
2
2
|
|
|
3
3
|
from dlt.common.schema.typing import TColumnSchema
|
|
4
|
-
from dlt.destinations.adapters import athena_adapter, athena_partition
|
|
5
4
|
from dlt.sources import DltResource, DltSource
|
|
6
5
|
|
|
7
6
|
import ingestr.src.resource as resource
|
|
@@ -12,6 +11,8 @@ def apply_athena_hints(
|
|
|
12
11
|
partition_column: str,
|
|
13
12
|
additional_hints: Dict[str, TColumnSchema] = {},
|
|
14
13
|
) -> None:
|
|
14
|
+
from dlt.destinations.adapters import athena_adapter, athena_partition
|
|
15
|
+
|
|
15
16
|
def _apply_partition_hint(resource: DltResource) -> None:
|
|
16
17
|
columns = resource.columns if resource.columns else {}
|
|
17
18
|
|
ingestr/src/sources.py
CHANGED
|
@@ -13,107 +13,37 @@ from typing import (
|
|
|
13
13
|
List,
|
|
14
14
|
Literal,
|
|
15
15
|
Optional,
|
|
16
|
+
TypeAlias,
|
|
16
17
|
Union,
|
|
17
18
|
)
|
|
18
19
|
from urllib.parse import ParseResult, parse_qs, quote, urlencode, urlparse
|
|
19
20
|
|
|
20
|
-
import dlt
|
|
21
|
-
import gcsfs # type: ignore
|
|
22
21
|
import pendulum
|
|
23
|
-
import s3fs # type: ignore
|
|
24
|
-
from dlt.common.configuration.specs import (
|
|
25
|
-
AwsCredentials,
|
|
26
|
-
)
|
|
27
|
-
from dlt.common.libs.sql_alchemy import (
|
|
28
|
-
Engine,
|
|
29
|
-
MetaData,
|
|
30
|
-
)
|
|
31
22
|
from dlt.common.time import ensure_pendulum_datetime
|
|
32
|
-
from dlt.common.typing import TDataItem, TSecretStrValue
|
|
33
23
|
from dlt.extract import Incremental
|
|
24
|
+
from dlt.sources import incremental as dlt_incremental
|
|
34
25
|
from dlt.sources.credentials import (
|
|
35
26
|
ConnectionStringCredentials,
|
|
36
27
|
)
|
|
37
|
-
from dlt.sources.sql_database import sql_table
|
|
38
|
-
from dlt.sources.sql_database.helpers import TableLoader
|
|
39
|
-
from dlt.sources.sql_database.schema_types import (
|
|
40
|
-
ReflectionLevel,
|
|
41
|
-
SelectAny,
|
|
42
|
-
Table,
|
|
43
|
-
TTypeAdapter,
|
|
44
|
-
)
|
|
45
|
-
from google.ads.googleads.client import GoogleAdsClient # type: ignore
|
|
46
|
-
from sqlalchemy import Column
|
|
47
|
-
from sqlalchemy import types as sa
|
|
48
28
|
|
|
49
29
|
from ingestr.src import blob
|
|
50
|
-
from ingestr.src.adjust import REQUIRED_CUSTOM_DIMENSIONS, adjust_source
|
|
51
|
-
from ingestr.src.adjust.adjust_helpers import parse_filters
|
|
52
|
-
from ingestr.src.airtable import airtable_source
|
|
53
|
-
from ingestr.src.applovin import applovin_source
|
|
54
|
-
from ingestr.src.applovin_max import applovin_max_source
|
|
55
|
-
from ingestr.src.appstore import app_store
|
|
56
|
-
from ingestr.src.appstore.client import AppStoreConnectClient
|
|
57
|
-
from ingestr.src.arrow import memory_mapped_arrow
|
|
58
|
-
from ingestr.src.asana_source import asana_source
|
|
59
|
-
from ingestr.src.chess import source
|
|
60
|
-
from ingestr.src.dynamodb import dynamodb
|
|
61
30
|
from ingestr.src.errors import (
|
|
62
31
|
InvalidBlobTableError,
|
|
63
32
|
MissingValueError,
|
|
64
33
|
UnsupportedResourceError,
|
|
65
34
|
)
|
|
66
|
-
from ingestr.src.facebook_ads import facebook_ads_source, facebook_insights_source
|
|
67
|
-
from ingestr.src.filesystem import readers
|
|
68
|
-
from ingestr.src.filters import table_adapter_exclude_columns
|
|
69
|
-
from ingestr.src.frankfurter import frankfurter_source
|
|
70
|
-
from ingestr.src.frankfurter.helpers import validate_dates
|
|
71
|
-
from ingestr.src.github import github_reactions, github_repo_events, github_stargazers
|
|
72
|
-
from ingestr.src.google_ads import google_ads
|
|
73
|
-
from ingestr.src.google_analytics import google_analytics
|
|
74
|
-
from ingestr.src.google_sheets import google_spreadsheet
|
|
75
|
-
from ingestr.src.gorgias import gorgias_source
|
|
76
|
-
from ingestr.src.hubspot import hubspot
|
|
77
|
-
from ingestr.src.kafka import kafka_consumer
|
|
78
|
-
from ingestr.src.kafka.helpers import KafkaCredentials
|
|
79
|
-
from ingestr.src.kinesis import kinesis_stream
|
|
80
|
-
from ingestr.src.klaviyo._init_ import klaviyo_source
|
|
81
|
-
from ingestr.src.linkedin_ads import linked_in_ads_source
|
|
82
|
-
from ingestr.src.linkedin_ads.dimension_time_enum import (
|
|
83
|
-
Dimension,
|
|
84
|
-
TimeGranularity,
|
|
85
|
-
)
|
|
86
|
-
from ingestr.src.mongodb import mongodb_collection
|
|
87
|
-
from ingestr.src.notion import notion_databases
|
|
88
|
-
from ingestr.src.personio import personio_source
|
|
89
|
-
from ingestr.src.pipedrive import pipedrive_source
|
|
90
|
-
from ingestr.src.salesforce import salesforce_source
|
|
91
|
-
from ingestr.src.shopify import shopify_source
|
|
92
|
-
from ingestr.src.slack import slack_source
|
|
93
|
-
from ingestr.src.sql_database.callbacks import (
|
|
94
|
-
chained_query_adapter_callback,
|
|
95
|
-
custom_query_variable_subsitution,
|
|
96
|
-
limit_callback,
|
|
97
|
-
type_adapter_callback,
|
|
98
|
-
)
|
|
99
|
-
from ingestr.src.stripe_analytics import stripe_source
|
|
100
35
|
from ingestr.src.table_definition import TableDefinition, table_string_to_dataclass
|
|
101
|
-
from ingestr.src.tiktok_ads import tiktok_source
|
|
102
|
-
from ingestr.src.time import isotime
|
|
103
|
-
from ingestr.src.zendesk import zendesk_chat, zendesk_support, zendesk_talk
|
|
104
|
-
from ingestr.src.zendesk.helpers.credentials import (
|
|
105
|
-
ZendeskCredentialsOAuth,
|
|
106
|
-
ZendeskCredentialsToken,
|
|
107
|
-
)
|
|
108
|
-
|
|
109
|
-
TableBackend = Literal["sqlalchemy", "pyarrow", "pandas", "connectorx"]
|
|
110
|
-
TQueryAdapter = Callable[[SelectAny, Table], SelectAny]
|
|
111
36
|
|
|
112
37
|
|
|
113
38
|
class SqlSource:
|
|
114
39
|
table_builder: Callable
|
|
115
40
|
|
|
116
|
-
def __init__(self, table_builder=
|
|
41
|
+
def __init__(self, table_builder=None) -> None:
|
|
42
|
+
if table_builder is None:
|
|
43
|
+
from dlt.sources.sql_database import sql_table
|
|
44
|
+
|
|
45
|
+
table_builder = sql_table
|
|
46
|
+
|
|
117
47
|
self.table_builder = table_builder
|
|
118
48
|
|
|
119
49
|
def handles_incrementality(self) -> bool:
|
|
@@ -128,7 +58,7 @@ class SqlSource:
|
|
|
128
58
|
if kwargs.get("incremental_key"):
|
|
129
59
|
start_value = kwargs.get("interval_start")
|
|
130
60
|
end_value = kwargs.get("interval_end")
|
|
131
|
-
incremental =
|
|
61
|
+
incremental = dlt_incremental(
|
|
132
62
|
kwargs.get("incremental_key", ""),
|
|
133
63
|
initial_value=start_value,
|
|
134
64
|
end_value=end_value,
|
|
@@ -183,6 +113,27 @@ class SqlSource:
|
|
|
183
113
|
if uri.startswith("db2://"):
|
|
184
114
|
uri = uri.replace("db2://", "db2+ibm_db://")
|
|
185
115
|
|
|
116
|
+
from dlt.common.libs.sql_alchemy import (
|
|
117
|
+
Engine,
|
|
118
|
+
MetaData,
|
|
119
|
+
)
|
|
120
|
+
from dlt.sources.sql_database.schema_types import (
|
|
121
|
+
ReflectionLevel,
|
|
122
|
+
SelectAny,
|
|
123
|
+
Table,
|
|
124
|
+
TTypeAdapter,
|
|
125
|
+
)
|
|
126
|
+
from sqlalchemy import Column
|
|
127
|
+
from sqlalchemy import types as sa
|
|
128
|
+
|
|
129
|
+
from ingestr.src.filters import table_adapter_exclude_columns
|
|
130
|
+
from ingestr.src.sql_database.callbacks import (
|
|
131
|
+
chained_query_adapter_callback,
|
|
132
|
+
custom_query_variable_subsitution,
|
|
133
|
+
limit_callback,
|
|
134
|
+
type_adapter_callback,
|
|
135
|
+
)
|
|
136
|
+
|
|
186
137
|
query_adapters = []
|
|
187
138
|
if kwargs.get("sql_limit"):
|
|
188
139
|
query_adapters.append(
|
|
@@ -201,6 +152,13 @@ class SqlSource:
|
|
|
201
152
|
defer_table_reflect = True
|
|
202
153
|
query_value = table.split(":", 1)[1]
|
|
203
154
|
|
|
155
|
+
TableBackend: TypeAlias = Literal[
|
|
156
|
+
"sqlalchemy", "pyarrow", "pandas", "connectorx"
|
|
157
|
+
]
|
|
158
|
+
TQueryAdapter: TypeAlias = Callable[[SelectAny, Table], SelectAny]
|
|
159
|
+
import dlt
|
|
160
|
+
from dlt.common.typing import TDataItem
|
|
161
|
+
|
|
204
162
|
# this is a very hacky version of the table_rows function. it is built this way to go around the dlt's table loader.
|
|
205
163
|
# I didn't want to write a full fledged sqlalchemy source for now, and wanted to benefit from the existing stuff to begin with.
|
|
206
164
|
# this is by no means a production ready solution, but it works for now.
|
|
@@ -251,6 +209,8 @@ class SqlSource:
|
|
|
251
209
|
*cols,
|
|
252
210
|
)
|
|
253
211
|
|
|
212
|
+
from dlt.sources.sql_database.helpers import TableLoader
|
|
213
|
+
|
|
254
214
|
loader = TableLoader(
|
|
255
215
|
engine,
|
|
256
216
|
backend,
|
|
@@ -293,7 +253,12 @@ class SqlSource:
|
|
|
293
253
|
class ArrowMemoryMappedSource:
|
|
294
254
|
table_builder: Callable
|
|
295
255
|
|
|
296
|
-
def __init__(self, table_builder=
|
|
256
|
+
def __init__(self, table_builder=None) -> None:
|
|
257
|
+
if table_builder is None:
|
|
258
|
+
from ingestr.src.arrow import memory_mapped_arrow
|
|
259
|
+
|
|
260
|
+
table_builder = memory_mapped_arrow
|
|
261
|
+
|
|
297
262
|
self.table_builder = table_builder
|
|
298
263
|
|
|
299
264
|
def handles_incrementality(self) -> bool:
|
|
@@ -305,7 +270,7 @@ class ArrowMemoryMappedSource:
|
|
|
305
270
|
start_value = kwargs.get("interval_start")
|
|
306
271
|
end_value = kwargs.get("interval_end")
|
|
307
272
|
|
|
308
|
-
incremental =
|
|
273
|
+
incremental = dlt_incremental(
|
|
309
274
|
kwargs.get("incremental_key", ""),
|
|
310
275
|
initial_value=start_value,
|
|
311
276
|
end_value=end_value,
|
|
@@ -338,7 +303,12 @@ class ArrowMemoryMappedSource:
|
|
|
338
303
|
class MongoDbSource:
|
|
339
304
|
table_builder: Callable
|
|
340
305
|
|
|
341
|
-
def __init__(self, table_builder=
|
|
306
|
+
def __init__(self, table_builder=None) -> None:
|
|
307
|
+
if table_builder is None:
|
|
308
|
+
from ingestr.src.mongodb import mongodb_collection
|
|
309
|
+
|
|
310
|
+
table_builder = mongodb_collection
|
|
311
|
+
|
|
342
312
|
self.table_builder = table_builder
|
|
343
313
|
|
|
344
314
|
def handles_incrementality(self) -> bool:
|
|
@@ -352,7 +322,7 @@ class MongoDbSource:
|
|
|
352
322
|
start_value = kwargs.get("interval_start")
|
|
353
323
|
end_value = kwargs.get("interval_end")
|
|
354
324
|
|
|
355
|
-
incremental =
|
|
325
|
+
incremental = dlt_incremental(
|
|
356
326
|
kwargs.get("incremental_key", ""),
|
|
357
327
|
initial_value=start_value,
|
|
358
328
|
end_value=end_value,
|
|
@@ -377,7 +347,7 @@ class LocalCsvSource:
|
|
|
377
347
|
|
|
378
348
|
def dlt_source(self, uri: str, table: str, **kwargs):
|
|
379
349
|
def csv_file(
|
|
380
|
-
incremental: Optional[
|
|
350
|
+
incremental: Optional[dlt_incremental[Any]] = None,
|
|
381
351
|
):
|
|
382
352
|
file_path = uri.split("://")[1]
|
|
383
353
|
myFile = open(file_path, "r")
|
|
@@ -419,11 +389,13 @@ class LocalCsvSource:
|
|
|
419
389
|
if page:
|
|
420
390
|
yield page
|
|
421
391
|
|
|
422
|
-
|
|
392
|
+
from dlt import resource
|
|
393
|
+
|
|
394
|
+
return resource(
|
|
423
395
|
csv_file,
|
|
424
396
|
merge_key=kwargs.get("merge_key"), # type: ignore
|
|
425
397
|
)(
|
|
426
|
-
incremental=
|
|
398
|
+
incremental=dlt_incremental(
|
|
427
399
|
kwargs.get("incremental_key", ""),
|
|
428
400
|
initial_value=kwargs.get("interval_start"),
|
|
429
401
|
end_value=kwargs.get("interval_end"),
|
|
@@ -439,7 +411,12 @@ class LocalCsvSource:
|
|
|
439
411
|
class NotionSource:
|
|
440
412
|
table_builder: Callable
|
|
441
413
|
|
|
442
|
-
def __init__(self, table_builder=
|
|
414
|
+
def __init__(self, table_builder=None) -> None:
|
|
415
|
+
if table_builder is None:
|
|
416
|
+
from ingestr.src.notion import notion_databases
|
|
417
|
+
|
|
418
|
+
table_builder = notion_databases
|
|
419
|
+
|
|
443
420
|
self.table_builder = table_builder
|
|
444
421
|
|
|
445
422
|
def handles_incrementality(self) -> bool:
|
|
@@ -499,6 +476,8 @@ class ShopifySource:
|
|
|
499
476
|
f"Table name '{table}' is not supported for Shopify source yet, if you are interested in it please create a GitHub issue at https://github.com/bruin-data/ingestr"
|
|
500
477
|
)
|
|
501
478
|
|
|
479
|
+
from ingestr.src.shopify import shopify_source
|
|
480
|
+
|
|
502
481
|
return shopify_source(
|
|
503
482
|
private_app_password=api_key[0],
|
|
504
483
|
shop_url=f"https://{source_fields.netloc}",
|
|
@@ -543,6 +522,8 @@ class GorgiasSource:
|
|
|
543
522
|
if kwargs.get("interval_end"):
|
|
544
523
|
date_args["end_date"] = kwargs.get("interval_end")
|
|
545
524
|
|
|
525
|
+
from ingestr.src.gorgias import gorgias_source
|
|
526
|
+
|
|
546
527
|
return gorgias_source(
|
|
547
528
|
domain=source_fields.netloc,
|
|
548
529
|
email=email[0],
|
|
@@ -554,7 +535,12 @@ class GorgiasSource:
|
|
|
554
535
|
class GoogleSheetsSource:
|
|
555
536
|
table_builder: Callable
|
|
556
537
|
|
|
557
|
-
def __init__(self, table_builder=
|
|
538
|
+
def __init__(self, table_builder=None) -> None:
|
|
539
|
+
if table_builder is None:
|
|
540
|
+
from ingestr.src.google_sheets import google_spreadsheet
|
|
541
|
+
|
|
542
|
+
table_builder = google_spreadsheet
|
|
543
|
+
|
|
558
544
|
self.table_builder = table_builder
|
|
559
545
|
|
|
560
546
|
def handles_incrementality(self) -> bool:
|
|
@@ -635,6 +621,8 @@ class ChessSource:
|
|
|
635
621
|
f"Resource '{table}' is not supported for Chess source yet, if you are interested in it please create a GitHub issue at https://github.com/bruin-data/ingestr"
|
|
636
622
|
)
|
|
637
623
|
|
|
624
|
+
from ingestr.src.chess import source
|
|
625
|
+
|
|
638
626
|
return source(players=list_players, **date_args).with_resources(
|
|
639
627
|
table_mapping[table]
|
|
640
628
|
)
|
|
@@ -685,6 +673,8 @@ class StripeAnalyticsSource:
|
|
|
685
673
|
if kwargs.get("interval_end"):
|
|
686
674
|
date_args["end_date"] = kwargs.get("interval_end")
|
|
687
675
|
|
|
676
|
+
from ingestr.src.stripe_analytics import stripe_source
|
|
677
|
+
|
|
688
678
|
return stripe_source(
|
|
689
679
|
endpoints=[
|
|
690
680
|
endpoint,
|
|
@@ -717,6 +707,11 @@ class FacebookAdsSource:
|
|
|
717
707
|
"access_token and accound_id are required to connect to Facebook Ads."
|
|
718
708
|
)
|
|
719
709
|
|
|
710
|
+
from ingestr.src.facebook_ads import (
|
|
711
|
+
facebook_ads_source,
|
|
712
|
+
facebook_insights_source,
|
|
713
|
+
)
|
|
714
|
+
|
|
720
715
|
endpoint = None
|
|
721
716
|
if table in ["campaigns", "ad_sets", "ad_creatives", "ads", "leads"]:
|
|
722
717
|
endpoint = table
|
|
@@ -774,6 +769,8 @@ class SlackSource:
|
|
|
774
769
|
if kwargs.get("interval_end"):
|
|
775
770
|
date_args["end_date"] = kwargs.get("interval_end")
|
|
776
771
|
|
|
772
|
+
from ingestr.src.slack import slack_source
|
|
773
|
+
|
|
777
774
|
return slack_source(
|
|
778
775
|
access_token=api_key[0],
|
|
779
776
|
table_per_channel=False,
|
|
@@ -803,6 +800,8 @@ class HubspotSource:
|
|
|
803
800
|
|
|
804
801
|
endpoint = None
|
|
805
802
|
|
|
803
|
+
from ingestr.src.hubspot import hubspot
|
|
804
|
+
|
|
806
805
|
if table.startswith("custom:"):
|
|
807
806
|
fields = table.split(":", 2)
|
|
808
807
|
if len(fields) != 2:
|
|
@@ -860,6 +859,8 @@ class AirtableSource:
|
|
|
860
859
|
"base_id and access_token in the URI are required to connect to Airtable"
|
|
861
860
|
)
|
|
862
861
|
|
|
862
|
+
from ingestr.src.airtable import airtable_source
|
|
863
|
+
|
|
863
864
|
return airtable_source(
|
|
864
865
|
base_id=base_id[0], table_names=tables, access_token=access_token[0]
|
|
865
866
|
)
|
|
@@ -907,6 +908,9 @@ class KlaviyoSource:
|
|
|
907
908
|
)
|
|
908
909
|
|
|
909
910
|
start_date = kwargs.get("interval_start") or "2000-01-01"
|
|
911
|
+
|
|
912
|
+
from ingestr.src.klaviyo import klaviyo_source
|
|
913
|
+
|
|
910
914
|
return klaviyo_source(
|
|
911
915
|
api_key=api_key[0],
|
|
912
916
|
start_date=start_date,
|
|
@@ -940,6 +944,9 @@ class KafkaSource:
|
|
|
940
944
|
raise ValueError("group_id in the URI is required to connect to kafka")
|
|
941
945
|
|
|
942
946
|
start_date = kwargs.get("interval_start")
|
|
947
|
+
from ingestr.src.kafka import kafka_consumer
|
|
948
|
+
from ingestr.src.kafka.helpers import KafkaCredentials
|
|
949
|
+
|
|
943
950
|
return kafka_consumer(
|
|
944
951
|
topics=[table],
|
|
945
952
|
credentials=KafkaCredentials(
|
|
@@ -995,6 +1002,9 @@ class AdjustSource:
|
|
|
995
1002
|
if kwargs.get("interval_end"):
|
|
996
1003
|
end_date = ensure_pendulum_datetime(str(kwargs.get("interval_end")))
|
|
997
1004
|
|
|
1005
|
+
from ingestr.src.adjust import REQUIRED_CUSTOM_DIMENSIONS, adjust_source
|
|
1006
|
+
from ingestr.src.adjust.adjust_helpers import parse_filters
|
|
1007
|
+
|
|
998
1008
|
dimensions = None
|
|
999
1009
|
metrics = None
|
|
1000
1010
|
filters = []
|
|
@@ -1101,6 +1111,12 @@ class ZendeskSource:
|
|
|
1101
1111
|
if not subdomain:
|
|
1102
1112
|
raise ValueError("Subdomain is required to connect with Zendesk")
|
|
1103
1113
|
|
|
1114
|
+
from ingestr.src.zendesk import zendesk_chat, zendesk_support, zendesk_talk
|
|
1115
|
+
from ingestr.src.zendesk.helpers.credentials import (
|
|
1116
|
+
ZendeskCredentialsOAuth,
|
|
1117
|
+
ZendeskCredentialsToken,
|
|
1118
|
+
)
|
|
1119
|
+
|
|
1104
1120
|
if not source_fields.username and source_fields.password:
|
|
1105
1121
|
oauth_token = source_fields.password
|
|
1106
1122
|
if not oauth_token:
|
|
@@ -1189,6 +1205,8 @@ class S3Source:
|
|
|
1189
1205
|
|
|
1190
1206
|
bucket_url = f"s3://{bucket_name}/"
|
|
1191
1207
|
|
|
1208
|
+
import s3fs # type: ignore
|
|
1209
|
+
|
|
1192
1210
|
fs = s3fs.S3FileSystem(
|
|
1193
1211
|
key=access_key_id[0],
|
|
1194
1212
|
secret=secret_access_key[0],
|
|
@@ -1206,6 +1224,8 @@ class S3Source:
|
|
|
1206
1224
|
"S3 Source only supports specific formats files: csv, jsonl, parquet"
|
|
1207
1225
|
)
|
|
1208
1226
|
|
|
1227
|
+
from ingestr.src.filesystem import readers
|
|
1228
|
+
|
|
1209
1229
|
return readers(bucket_url, fs, path_to_file).with_resources(endpoint)
|
|
1210
1230
|
|
|
1211
1231
|
|
|
@@ -1300,6 +1320,8 @@ class TikTokSource:
|
|
|
1300
1320
|
filter_name = list(filters.keys())[0]
|
|
1301
1321
|
filter_value = list(map(int, filters[list(filters.keys())[0]]))
|
|
1302
1322
|
|
|
1323
|
+
from ingestr.src.tiktok_ads import tiktok_source
|
|
1324
|
+
|
|
1303
1325
|
return tiktok_source(
|
|
1304
1326
|
start_date=start_date,
|
|
1305
1327
|
end_date=end_date,
|
|
@@ -1348,7 +1370,12 @@ class AsanaSource:
|
|
|
1348
1370
|
f"Resource '{table}' is not supported for Asana source yet, if you are interested in it please create a GitHub issue at https://github.com/bruin-data/ingestr"
|
|
1349
1371
|
)
|
|
1350
1372
|
|
|
1373
|
+
import dlt
|
|
1374
|
+
|
|
1375
|
+
from ingestr.src.asana_source import asana_source
|
|
1376
|
+
|
|
1351
1377
|
dlt.secrets["sources.asana_source.access_token"] = access_token[0]
|
|
1378
|
+
|
|
1352
1379
|
src = asana_source()
|
|
1353
1380
|
src.workspaces.add_filter(lambda w: w["gid"] == workspace)
|
|
1354
1381
|
return src.with_resources(table)
|
|
@@ -1394,6 +1421,9 @@ class DynamoDBSource:
|
|
|
1394
1421
|
if not secret_key:
|
|
1395
1422
|
raise ValueError("secret_access_key is required to connect to Dynamodb")
|
|
1396
1423
|
|
|
1424
|
+
from dlt.common.configuration.specs import AwsCredentials
|
|
1425
|
+
from dlt.common.typing import TSecretStrValue
|
|
1426
|
+
|
|
1397
1427
|
creds = AwsCredentials(
|
|
1398
1428
|
aws_access_key_id=access_key[0],
|
|
1399
1429
|
aws_secret_access_key=TSecretStrValue(secret_key[0]),
|
|
@@ -1404,8 +1434,11 @@ class DynamoDBSource:
|
|
|
1404
1434
|
incremental = None
|
|
1405
1435
|
incremental_key = kwargs.get("incremental_key")
|
|
1406
1436
|
|
|
1437
|
+
from ingestr.src.dynamodb import dynamodb
|
|
1438
|
+
from ingestr.src.time import isotime
|
|
1439
|
+
|
|
1407
1440
|
if incremental_key:
|
|
1408
|
-
incremental =
|
|
1441
|
+
incremental = dlt_incremental(
|
|
1409
1442
|
incremental_key.strip(),
|
|
1410
1443
|
initial_value=isotime(kwargs.get("interval_start")),
|
|
1411
1444
|
end_value=isotime(kwargs.get("interval_end")),
|
|
@@ -1474,6 +1507,8 @@ class GoogleAnalyticsSource:
|
|
|
1474
1507
|
if kwargs.get("interval_end") is not None:
|
|
1475
1508
|
end_date = pendulum.instance(kwargs.get("interval_end")) # type: ignore
|
|
1476
1509
|
|
|
1510
|
+
from ingestr.src.google_analytics import google_analytics
|
|
1511
|
+
|
|
1477
1512
|
return google_analytics(
|
|
1478
1513
|
property_id=property_id[0],
|
|
1479
1514
|
start_date=start_date,
|
|
@@ -1511,6 +1546,12 @@ class GitHubSource:
|
|
|
1511
1546
|
|
|
1512
1547
|
access_token = source_fields.get("access_token", [""])[0]
|
|
1513
1548
|
|
|
1549
|
+
from ingestr.src.github import (
|
|
1550
|
+
github_reactions,
|
|
1551
|
+
github_repo_events,
|
|
1552
|
+
github_stargazers,
|
|
1553
|
+
)
|
|
1554
|
+
|
|
1514
1555
|
if table in ["issues", "pull_requests"]:
|
|
1515
1556
|
return github_reactions(
|
|
1516
1557
|
owner=owner, name=repo, access_token=access_token
|
|
@@ -1543,6 +1584,8 @@ class AppleAppStoreSource:
|
|
|
1543
1584
|
else:
|
|
1544
1585
|
key = base64.b64decode(key_base64[0]).decode() # type: ignore
|
|
1545
1586
|
|
|
1587
|
+
from ingestr.src.appstore.client import AppStoreConnectClient
|
|
1588
|
+
|
|
1546
1589
|
return AppStoreConnectClient(key.encode(), key_id, issuer_id)
|
|
1547
1590
|
|
|
1548
1591
|
def dlt_source(self, uri: str, table: str, **kwargs):
|
|
@@ -1583,6 +1626,8 @@ class AppleAppStoreSource:
|
|
|
1583
1626
|
if app_ids is None:
|
|
1584
1627
|
raise MissingValueError("app_id", "App Store")
|
|
1585
1628
|
|
|
1629
|
+
from ingestr.src.appstore import app_store
|
|
1630
|
+
|
|
1586
1631
|
src = app_store(
|
|
1587
1632
|
client,
|
|
1588
1633
|
app_ids,
|
|
@@ -1639,6 +1684,8 @@ class GCSSource:
|
|
|
1639
1684
|
# (The RECOMMENDED way of passing service account credentials)
|
|
1640
1685
|
# directly with gcsfs. As a workaround, we construct the GCSFileSystem
|
|
1641
1686
|
# and pass it directly to filesystem.readers.
|
|
1687
|
+
import gcsfs # type: ignore
|
|
1688
|
+
|
|
1642
1689
|
fs = gcsfs.GCSFileSystem(
|
|
1643
1690
|
token=credentials,
|
|
1644
1691
|
)
|
|
@@ -1655,6 +1702,8 @@ class GCSSource:
|
|
|
1655
1702
|
"GCS Source only supports specific formats files: csv, jsonl, parquet"
|
|
1656
1703
|
)
|
|
1657
1704
|
|
|
1705
|
+
from ingestr.src.filesystem import readers
|
|
1706
|
+
|
|
1658
1707
|
return readers(bucket_url, fs, path_to_file).with_resources(endpoint)
|
|
1659
1708
|
|
|
1660
1709
|
|
|
@@ -1662,7 +1711,9 @@ class GoogleAdsSource:
|
|
|
1662
1711
|
def handles_incrementality(self) -> bool:
|
|
1663
1712
|
return True
|
|
1664
1713
|
|
|
1665
|
-
def init_client(self, params: Dict[str, List[str]])
|
|
1714
|
+
def init_client(self, params: Dict[str, List[str]]):
|
|
1715
|
+
from google.ads.googleads.client import GoogleAdsClient # type: ignore
|
|
1716
|
+
|
|
1666
1717
|
dev_token = params.get("dev_token")
|
|
1667
1718
|
if dev_token is None or len(dev_token) == 0:
|
|
1668
1719
|
raise MissingValueError("dev_token", "Google Ads")
|
|
@@ -1716,6 +1767,7 @@ class GoogleAdsSource:
|
|
|
1716
1767
|
raise MissingValueError("customer_id", "Google Ads")
|
|
1717
1768
|
|
|
1718
1769
|
params = parse_qs(parsed_uri.query)
|
|
1770
|
+
|
|
1719
1771
|
client = self.init_client(params)
|
|
1720
1772
|
|
|
1721
1773
|
start_date = kwargs.get("interval_start") or datetime.now(
|
|
@@ -1737,6 +1789,8 @@ class GoogleAdsSource:
|
|
|
1737
1789
|
report_spec = table
|
|
1738
1790
|
table = "daily_report"
|
|
1739
1791
|
|
|
1792
|
+
from ingestr.src.google_ads import google_ads
|
|
1793
|
+
|
|
1740
1794
|
src = google_ads(
|
|
1741
1795
|
client,
|
|
1742
1796
|
customer_id,
|
|
@@ -1801,6 +1855,12 @@ class LinkedInAdsSource:
|
|
|
1801
1855
|
"'date' or 'month' is required to connect to LinkedIn Ads, please provide at least one of these dimensions."
|
|
1802
1856
|
)
|
|
1803
1857
|
|
|
1858
|
+
from ingestr.src.linkedin_ads import linked_in_ads_source
|
|
1859
|
+
from ingestr.src.linkedin_ads.dimension_time_enum import (
|
|
1860
|
+
Dimension,
|
|
1861
|
+
TimeGranularity,
|
|
1862
|
+
)
|
|
1863
|
+
|
|
1804
1864
|
if "date" in dimensions:
|
|
1805
1865
|
time_granularity = TimeGranularity.daily
|
|
1806
1866
|
dimensions.remove("date")
|
|
@@ -1859,6 +1919,8 @@ class AppLovinSource:
|
|
|
1859
1919
|
custom_report = table
|
|
1860
1920
|
table = "custom_report"
|
|
1861
1921
|
|
|
1922
|
+
from ingestr.src.applovin import applovin_source
|
|
1923
|
+
|
|
1862
1924
|
src = applovin_source(
|
|
1863
1925
|
api_key[0],
|
|
1864
1926
|
start_date.strftime("%Y-%m-%d"),
|
|
@@ -1926,6 +1988,8 @@ class ApplovinMaxSource:
|
|
|
1926
1988
|
|
|
1927
1989
|
end_date = interval_end.date() if interval_end is not None else None
|
|
1928
1990
|
|
|
1991
|
+
from ingestr.src.applovin_max import applovin_max_source
|
|
1992
|
+
|
|
1929
1993
|
return applovin_max_source(
|
|
1930
1994
|
start_date=start_date,
|
|
1931
1995
|
end_date=end_date,
|
|
@@ -1954,6 +2018,8 @@ class SalesforceSource:
|
|
|
1954
2018
|
if v is None:
|
|
1955
2019
|
raise MissingValueError(k, "Salesforce")
|
|
1956
2020
|
|
|
2021
|
+
from ingestr.src.salesforce import salesforce_source
|
|
2022
|
+
|
|
1957
2023
|
src = salesforce_source(**creds) # type: ignore
|
|
1958
2024
|
|
|
1959
2025
|
if table not in src.resources:
|
|
@@ -2001,6 +2067,8 @@ class PersonioSource:
|
|
|
2001
2067
|
]:
|
|
2002
2068
|
raise UnsupportedResourceError(table, "Personio")
|
|
2003
2069
|
|
|
2070
|
+
from ingestr.src.personio import personio_source
|
|
2071
|
+
|
|
2004
2072
|
return personio_source(
|
|
2005
2073
|
client_id=client_id[0],
|
|
2006
2074
|
client_secret=client_secret[0],
|
|
@@ -2035,11 +2103,17 @@ class KinesisSource:
|
|
|
2035
2103
|
if start_date is not None:
|
|
2036
2104
|
# the resource will read all messages after this timestamp.
|
|
2037
2105
|
start_date = ensure_pendulum_datetime(start_date)
|
|
2106
|
+
|
|
2107
|
+
from dlt.common.configuration.specs import AwsCredentials
|
|
2108
|
+
|
|
2109
|
+
from ingestr.src.kinesis import kinesis_stream
|
|
2110
|
+
|
|
2038
2111
|
credentials = AwsCredentials(
|
|
2039
2112
|
aws_access_key_id=aws_access_key_id[0],
|
|
2040
2113
|
aws_secret_access_key=aws_secret_access_key[0],
|
|
2041
2114
|
region_name=region_name[0],
|
|
2042
2115
|
)
|
|
2116
|
+
|
|
2043
2117
|
return kinesis_stream(
|
|
2044
2118
|
stream_name=table, credentials=credentials, initial_at_timestamp=start_date
|
|
2045
2119
|
)
|
|
@@ -2073,6 +2147,8 @@ class PipedriveSource:
|
|
|
2073
2147
|
]:
|
|
2074
2148
|
raise UnsupportedResourceError(table, "Pipedrive")
|
|
2075
2149
|
|
|
2150
|
+
from ingestr.src.pipedrive import pipedrive_source
|
|
2151
|
+
|
|
2076
2152
|
return pipedrive_source(
|
|
2077
2153
|
pipedrive_api_key=api_key, since_timestamp=start_date
|
|
2078
2154
|
).with_resources(table)
|
|
@@ -2085,6 +2161,9 @@ class FrankfurterSource:
|
|
|
2085
2161
|
def dlt_source(self, uri: str, table: str, **kwargs):
|
|
2086
2162
|
# start and end dates only assigned and validated for exchange_rates table
|
|
2087
2163
|
# Note: if an end date but no start date is provided, start date and end date will be set to current date
|
|
2164
|
+
from ingestr.src.frankfurter import frankfurter_source
|
|
2165
|
+
from ingestr.src.frankfurter.helpers import validate_dates
|
|
2166
|
+
|
|
2088
2167
|
if table == "exchange_rates":
|
|
2089
2168
|
if kwargs.get("interval_start"):
|
|
2090
2169
|
start_date = ensure_pendulum_datetime(str(kwargs.get("interval_start")))
|
ingestr/src/telemetry/event.py
CHANGED
|
@@ -1,13 +1,4 @@
|
|
|
1
1
|
import os
|
|
2
|
-
import platform
|
|
3
|
-
|
|
4
|
-
import machineid
|
|
5
|
-
import rudderstack.analytics as rudder_analytics # type: ignore
|
|
6
|
-
|
|
7
|
-
from ingestr.src.version import __version__ # type: ignore
|
|
8
|
-
|
|
9
|
-
rudder_analytics.write_key = "2cUr13DDQcX2x2kAfMEfdrKvrQa"
|
|
10
|
-
rudder_analytics.dataPlaneUrl = "https://getbruinbumlky.dataplane.rudderstack.com"
|
|
11
2
|
|
|
12
3
|
|
|
13
4
|
def track(event_name, event_properties: dict):
|
|
@@ -16,6 +7,16 @@ def track(event_name, event_properties: dict):
|
|
|
16
7
|
):
|
|
17
8
|
return
|
|
18
9
|
|
|
10
|
+
import platform
|
|
11
|
+
|
|
12
|
+
import machineid
|
|
13
|
+
import rudderstack.analytics as rudder_analytics # type: ignore
|
|
14
|
+
|
|
15
|
+
from ingestr.src.version import __version__ # type: ignore
|
|
16
|
+
|
|
17
|
+
rudder_analytics.write_key = "2cUr13DDQcX2x2kAfMEfdrKvrQa"
|
|
18
|
+
rudder_analytics.dataPlaneUrl = "https://getbruinbumlky.dataplane.rudderstack.com"
|
|
19
|
+
|
|
19
20
|
try:
|
|
20
21
|
if not event_properties:
|
|
21
22
|
event_properties = {}
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: ingestr
|
|
3
|
-
Version: 0.13.
|
|
3
|
+
Version: 0.13.33
|
|
4
4
|
Summary: ingestr is a command-line application that ingests data from various sources and stores them in any database.
|
|
5
5
|
Project-URL: Homepage, https://github.com/bruin-data/ingestr
|
|
6
6
|
Project-URL: Issues, https://github.com/bruin-data/ingestr/issues
|
|
@@ -419,11 +419,6 @@ Pull requests are welcome. However, please open an issue first to discuss what y
|
|
|
419
419
|
<td>Google Analytics</td>
|
|
420
420
|
<td>✅</td>
|
|
421
421
|
<td>-</td>
|
|
422
|
-
</tr>
|
|
423
|
-
<tr>
|
|
424
|
-
<td>Intercom</td>
|
|
425
|
-
<td>✅</td>
|
|
426
|
-
<td>-</td>
|
|
427
422
|
</tr>
|
|
428
423
|
<tr>
|
|
429
424
|
<td>Klaviyo</td>
|
|
@@ -1,26 +1,26 @@
|
|
|
1
1
|
ingestr/conftest.py,sha256=Q03FIJIZpLBbpj55cfCHIKEjc1FCvWJhMF2cidUJKQU,1748
|
|
2
|
-
ingestr/main.py,sha256=
|
|
2
|
+
ingestr/main.py,sha256=mRlGSqi2sHcZ2AKlwn5MqoMvFxXlSjcZxmPJr76rmRk,25187
|
|
3
3
|
ingestr/src/.gitignore,sha256=8cX1AZTSI0TcdZFGTmS_oyBjpfCzhOEt0DdAo2dFIY8,203
|
|
4
4
|
ingestr/src/blob.py,sha256=onMe5ZHxPXTdcB_s2oGNdMo-XQJ3ajwOsWE9eSTGFmc,1495
|
|
5
|
-
ingestr/src/buildinfo.py,sha256=
|
|
6
|
-
ingestr/src/destinations.py,sha256=
|
|
5
|
+
ingestr/src/buildinfo.py,sha256=0FHynPZSrkbfDnEUgBysgvemGcO2crTfn654je6lyHY,21
|
|
6
|
+
ingestr/src/destinations.py,sha256=0fEwLY78SQDXbHcX4iz4Xc7H8FXN-QhVJL9uoUTZOs4,12924
|
|
7
7
|
ingestr/src/errors.py,sha256=Ufs4_DfE77_E3vnA1fOQdi6cmuLVNm7_SbFLkL1XPGk,686
|
|
8
8
|
ingestr/src/factory.py,sha256=659h_sVRBhtPv2dvtOK8tf3PtUhlK3KsWLrb20_iQKw,5333
|
|
9
|
-
ingestr/src/filters.py,sha256=
|
|
9
|
+
ingestr/src/filters.py,sha256=C-_TIVkF_cxZBgG-Run2Oyn0TAhJgA8IWXZ-OPY3uek,1136
|
|
10
10
|
ingestr/src/loader.py,sha256=9NaWAyfkXdqAZSS-N72Iwo36Lbx4PyqIfaaH1dNdkFs,1712
|
|
11
|
-
ingestr/src/partition.py,sha256=
|
|
11
|
+
ingestr/src/partition.py,sha256=BrIP6wFJvyR7Nus_3ElnfxknUXeCipK_E_bB8kZowfc,969
|
|
12
12
|
ingestr/src/resource.py,sha256=XG-sbBapFVEM7OhHQFQRTdTLlh-mHB-N4V1t8F8Tsww,543
|
|
13
|
-
ingestr/src/sources.py,sha256=
|
|
13
|
+
ingestr/src/sources.py,sha256=JiSqOyuvun0T3JQriYDmp_8EPe_2cepwo0wWtI-jOrU,76003
|
|
14
14
|
ingestr/src/table_definition.py,sha256=REbAbqdlmUMUuRh8nEQRreWjPVOQ5ZcfqGkScKdCrmk,390
|
|
15
15
|
ingestr/src/time.py,sha256=H_Fk2J4ShXyUM-EMY7MqCLZQhlnZMZvO952bmZPc4yE,254
|
|
16
16
|
ingestr/src/version.py,sha256=J_2xgZ0mKlvuHcjdKCx2nlioneLH0I47JiU_Slr_Nwc,189
|
|
17
17
|
ingestr/src/adjust/__init__.py,sha256=ULjtJqrNS6XDvUyGl0tjl12-tLyXlCgeFe2icTbtu3Q,3255
|
|
18
|
-
ingestr/src/adjust/adjust_helpers.py,sha256=
|
|
19
|
-
ingestr/src/airtable/__init__.py,sha256=
|
|
18
|
+
ingestr/src/adjust/adjust_helpers.py,sha256=IHSS94A7enOWkZ8cP5iW3RdYt0Xl3qZGAmDc1Xy4qkI,3802
|
|
19
|
+
ingestr/src/airtable/__init__.py,sha256=mdzeaq0g12HR8gbhtVR_aS_5GVWPZn6XD-zHUE5FunI,2788
|
|
20
20
|
ingestr/src/applovin/__init__.py,sha256=X_YCLppPrnL8KXfYWICE_uDfMzHHH3JZ-DBGZ1RlaOI,6984
|
|
21
|
-
ingestr/src/applovin_max/__init__.py,sha256=
|
|
21
|
+
ingestr/src/applovin_max/__init__.py,sha256=ZrxOUSirGxkGDmM9wsQO3anwNVzqtoCwN_OuCXfPkXE,3285
|
|
22
22
|
ingestr/src/appsflyer/__init__.py,sha256=QoK-B3cYYMD3bqzQaLWNH6FkJyjRbzRkBF2n6urxubs,8071
|
|
23
|
-
ingestr/src/appsflyer/client.py,sha256=
|
|
23
|
+
ingestr/src/appsflyer/client.py,sha256=E6xPW4KlbBnQZ0K4eq2Xgb3AmGrtrzIX9bX8EnQr-D4,3615
|
|
24
24
|
ingestr/src/appstore/__init__.py,sha256=3P4VZH2WJF477QjW19jMTwu6L8DXcLkYSdutnvp3AmM,4742
|
|
25
25
|
ingestr/src/appstore/client.py,sha256=qY9nBZPNIAveR-Dn-pW141Mr9xi9LMOz2HHfnfueHvE,3975
|
|
26
26
|
ingestr/src/appstore/errors.py,sha256=KVpPWth5qlv6_QWEm3aJAt3cdf6miPJs0UDzxknx2Ms,481
|
|
@@ -33,6 +33,7 @@ ingestr/src/asana_source/settings.py,sha256=-2tpdkwh04RvLKFvwQodnFLYn9MaxOO1hseb
|
|
|
33
33
|
ingestr/src/chess/__init__.py,sha256=y0Q8aKBigeKf3N7wuB_gadMQjVJzBPUT8Jhp1ObEWjk,6812
|
|
34
34
|
ingestr/src/chess/helpers.py,sha256=v1HTImOMjAF7AzZUPDIuHu00e7ut0o5y1kWcVYo4QZw,549
|
|
35
35
|
ingestr/src/chess/settings.py,sha256=p0RlCGgtXUacPDEvZmwzSWmzX0Apj1riwfz-nrMK89k,158
|
|
36
|
+
ingestr/src/collector/spinner.py,sha256=_ZUqF5MI43hVIULdjF5s5mrAZbhEFXaiWirQmrv3Yk4,1201
|
|
36
37
|
ingestr/src/dynamodb/__init__.py,sha256=swhxkeYBbJ35jn1IghCtvYWT2BM33KynVCh_oR4z28A,2264
|
|
37
38
|
ingestr/src/facebook_ads/__init__.py,sha256=reEpSr4BaKA1wO3qVgCH51gW-TgWkbJ_g24UIhJWbac,9286
|
|
38
39
|
ingestr/src/facebook_ads/exceptions.py,sha256=4Nlbc0Mv3i5g-9AoyT-n1PIa8IDi3VCTfEAzholx4Wc,115
|
|
@@ -68,12 +69,12 @@ ingestr/src/kafka/__init__.py,sha256=wMCXdiraeKd1Kssi9WcVCGZaNGm2tJEtnNyuB4aR5_k
|
|
|
68
69
|
ingestr/src/kafka/helpers.py,sha256=V9WcVn3PKnEpggArHda4vnAcaV8VDuh__dSmRviJb5Y,7502
|
|
69
70
|
ingestr/src/kinesis/__init__.py,sha256=u5ThH1y8uObZKXgIo71em1UnX6MsVHWOjcf1jKqKbE8,6205
|
|
70
71
|
ingestr/src/kinesis/helpers.py,sha256=aF0GCDKSectaaW8XPdERY_6bUs0ky19dcBs24ZFn-o0,2473
|
|
71
|
-
ingestr/src/klaviyo/
|
|
72
|
+
ingestr/src/klaviyo/__init__.py,sha256=o_noUgbxLk36s4f9W56_ibPorF0n7kVapPUlV0p-jfA,7875
|
|
72
73
|
ingestr/src/klaviyo/client.py,sha256=tPj79ia7AW0ZOJhzlKNPCliGbdojRNwUFp8HvB2ym5s,7434
|
|
73
74
|
ingestr/src/klaviyo/helpers.py,sha256=_i-SHffhv25feLDcjy6Blj1UxYLISCwVCMgGtrlnYHk,496
|
|
74
75
|
ingestr/src/linkedin_ads/__init__.py,sha256=CAPWFyV24loziiphbLmODxZUXZJwm4JxlFkr56q0jfo,1855
|
|
75
76
|
ingestr/src/linkedin_ads/dimension_time_enum.py,sha256=EmHRdkFyTAfo4chGjThrwqffWJxmAadZMbpTvf0xkQc,198
|
|
76
|
-
ingestr/src/linkedin_ads/helpers.py,sha256=
|
|
77
|
+
ingestr/src/linkedin_ads/helpers.py,sha256=eUWudRVlXl4kqIhfXQ1eVsUpZwJn7UFqKSpnbLfxzds,4498
|
|
77
78
|
ingestr/src/mongodb/__init__.py,sha256=aMr1PFIDUMRv--ne61lR17HudsN-fsrzMeyxe9PqK2s,4335
|
|
78
79
|
ingestr/src/mongodb/helpers.py,sha256=y9rYKR8eyIqam_eNsZmwSYevgi8mghh7Zp8qhTHl65s,5652
|
|
79
80
|
ingestr/src/notion/__init__.py,sha256=36wUui8finbc85ObkRMq8boMraXMUehdABN_AMe_hzA,1834
|
|
@@ -103,10 +104,10 @@ ingestr/src/sql_database/callbacks.py,sha256=sEFFmXxAURY3yeBjnawigDtq9LBCvi8HFqG
|
|
|
103
104
|
ingestr/src/stripe_analytics/__init__.py,sha256=0HCL0qsrh_si1RR3a4k9XS94VWQ4v9aG7CqXF-V-57M,4593
|
|
104
105
|
ingestr/src/stripe_analytics/helpers.py,sha256=iqZOyiGIOhOAhVXXU16DP0hkkTKcTrDu69vAJoTxgEo,1976
|
|
105
106
|
ingestr/src/stripe_analytics/settings.py,sha256=rl9L5XumxO0pjkZf7MGesXHp4QLRgnz3RWLuDWDBKXo,380
|
|
106
|
-
ingestr/src/telemetry/event.py,sha256=
|
|
107
|
+
ingestr/src/telemetry/event.py,sha256=W7bs4uVfPakQ5otmiqgqu1l5SqjYx1p87wudnWXckBc,949
|
|
107
108
|
ingestr/src/testdata/fakebqcredentials.json,sha256=scc6TUc963KAbKTLZCfcmqVzbtzDCW1_8JNRnyAXyy8,628
|
|
108
109
|
ingestr/src/tiktok_ads/__init__.py,sha256=aEqCl3dTH6_d43s1jgAeG1UasEls_SlorORulYMwIL8,4590
|
|
109
|
-
ingestr/src/tiktok_ads/tiktok_helpers.py,sha256=
|
|
110
|
+
ingestr/src/tiktok_ads/tiktok_helpers.py,sha256=jmWHvZzN1Vt_PWrJkgq5a2wIwon-OBEzXoZx0jEy-74,3905
|
|
110
111
|
ingestr/src/zendesk/__init__.py,sha256=tmJ_jdb6kpwmEKpcv6Im71-bOZI6h-Tcofe18OH4I24,17762
|
|
111
112
|
ingestr/src/zendesk/settings.py,sha256=Vdj706nTJFQ-3KH4nO97iYCQuba3dV3E9gfnmLK6xwU,2294
|
|
112
113
|
ingestr/src/zendesk/helpers/__init__.py,sha256=YTJejCiUjfIcsj9FrkY0l-JGYDI7RRte1Ydq5FDH_0c,888
|
|
@@ -121,8 +122,8 @@ ingestr/testdata/delete_insert_part2.csv,sha256=B_KUzpzbNdDY_n7wWop1mT2cz36TmayS
|
|
|
121
122
|
ingestr/testdata/merge_expected.csv,sha256=DReHqWGnQMsf2PBv_Q2pfjsgvikYFnf1zYcQZ7ZqYN0,276
|
|
122
123
|
ingestr/testdata/merge_part1.csv,sha256=Pw8Z9IDKcNU0qQHx1z6BUf4rF_-SxKGFOvymCt4OY9I,185
|
|
123
124
|
ingestr/testdata/merge_part2.csv,sha256=T_GiWxA81SN63_tMOIuemcvboEFeAmbKc7xRXvL9esw,287
|
|
124
|
-
ingestr-0.13.
|
|
125
|
-
ingestr-0.13.
|
|
126
|
-
ingestr-0.13.
|
|
127
|
-
ingestr-0.13.
|
|
128
|
-
ingestr-0.13.
|
|
125
|
+
ingestr-0.13.33.dist-info/METADATA,sha256=EZJ3I_1bEu7BeaeNBzWKNePpaLkw56jWoPiJBSUJF_Y,13574
|
|
126
|
+
ingestr-0.13.33.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
127
|
+
ingestr-0.13.33.dist-info/entry_points.txt,sha256=oPJy0KBnPWYjDtP1k8qwAihcTLHSZokSQvRAw_wtfJM,46
|
|
128
|
+
ingestr-0.13.33.dist-info/licenses/LICENSE.md,sha256=cW8wIhn8HFE-KLStDF9jHQ1O_ARWP3kTpk_-eOccL24,1075
|
|
129
|
+
ingestr-0.13.33.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|