ingestr 0.9.1__py3-none-any.whl → 0.9.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ingestr might be problematic. Click here for more details.
- ingestr/main.py +3 -1
- ingestr/src/adjust/__init__.py +100 -0
- ingestr/src/adjust/{helpers.py → adjust_helpers.py} +46 -23
- ingestr/src/arrow/__init__.py +77 -0
- ingestr/src/factory.py +3 -0
- ingestr/src/mongodb/__init__.py +1 -1
- ingestr/src/mongodb/helpers.py +1 -1
- ingestr/src/sources.py +109 -23
- ingestr/src/version.py +1 -1
- {ingestr-0.9.1.dist-info → ingestr-0.9.3.dist-info}/METADATA +3 -2
- {ingestr-0.9.1.dist-info → ingestr-0.9.3.dist-info}/RECORD +14 -13
- {ingestr-0.9.1.dist-info → ingestr-0.9.3.dist-info}/WHEEL +1 -1
- ingestr/src/adjust/_init_.py +0 -31
- {ingestr-0.9.1.dist-info → ingestr-0.9.3.dist-info}/entry_points.txt +0 -0
- {ingestr-0.9.1.dist-info → ingestr-0.9.3.dist-info}/licenses/LICENSE.md +0 -0
ingestr/main.py
CHANGED
|
@@ -323,10 +323,12 @@ def ingest(
|
|
|
323
323
|
else "Platform-specific"
|
|
324
324
|
)
|
|
325
325
|
|
|
326
|
+
source_table_print = source_table.split(":")[0]
|
|
327
|
+
|
|
326
328
|
print()
|
|
327
329
|
print("[bold green]Initiated the pipeline with the following:[/bold green]")
|
|
328
330
|
print(
|
|
329
|
-
f"[bold yellow] Source:[/bold yellow] {factory.source_scheme} / {
|
|
331
|
+
f"[bold yellow] Source:[/bold yellow] {factory.source_scheme} / {source_table_print}"
|
|
330
332
|
)
|
|
331
333
|
print(
|
|
332
334
|
f"[bold yellow] Destination:[/bold yellow] {factory.destination_scheme} / {dest_table}"
|
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
from typing import Optional, Sequence
|
|
2
|
+
|
|
3
|
+
import dlt
|
|
4
|
+
import pendulum
|
|
5
|
+
from dlt.sources import DltResource
|
|
6
|
+
|
|
7
|
+
from .adjust_helpers import DEFAULT_DIMENSIONS, DEFAULT_METRICS, AdjustAPI
|
|
8
|
+
|
|
9
|
+
REQUIRED_CUSTOM_DIMENSIONS = [
|
|
10
|
+
"hour",
|
|
11
|
+
"day",
|
|
12
|
+
"week",
|
|
13
|
+
"month",
|
|
14
|
+
"quarter",
|
|
15
|
+
"year",
|
|
16
|
+
]
|
|
17
|
+
KNOWN_TYPE_HINTS = {
|
|
18
|
+
"hour": {"data_type": "timestamp"},
|
|
19
|
+
"day": {"data_type": "date"},
|
|
20
|
+
"week": {"data_type": "text"},
|
|
21
|
+
"month": {"data_type": "text"},
|
|
22
|
+
"quarter": {"data_type": "text"},
|
|
23
|
+
"year": {"data_type": "text"},
|
|
24
|
+
"campaign": {"data_type": "text"},
|
|
25
|
+
"adgroup": {"data_type": "text"},
|
|
26
|
+
"creative": {"data_type": "text"},
|
|
27
|
+
# metrics
|
|
28
|
+
"installs": {"data_type": "bigint"},
|
|
29
|
+
"clicks": {"data_type": "bigint"},
|
|
30
|
+
"cost": {"data_type": "decimal"},
|
|
31
|
+
"network_cost": {"data_type": "decimal"},
|
|
32
|
+
"impressions": {"data_type": "bigint"},
|
|
33
|
+
"ad_revenue": {"data_type": "decimal"},
|
|
34
|
+
"all_revenue": {"data_type": "decimal"},
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
@dlt.source(max_table_nesting=0)
|
|
39
|
+
def adjust_source(
|
|
40
|
+
start_date: pendulum.DateTime,
|
|
41
|
+
end_date: pendulum.DateTime,
|
|
42
|
+
api_key: str,
|
|
43
|
+
dimensions: Optional[list[str]] = None,
|
|
44
|
+
metrics: Optional[list[str]] = None,
|
|
45
|
+
merge_key: Optional[str] = None,
|
|
46
|
+
filters: Optional[dict] = None,
|
|
47
|
+
) -> Sequence[DltResource]:
|
|
48
|
+
@dlt.resource(write_disposition="merge", merge_key="day")
|
|
49
|
+
def campaigns():
|
|
50
|
+
adjust_api = AdjustAPI(api_key=api_key)
|
|
51
|
+
yield from adjust_api.fetch_report_data(
|
|
52
|
+
start_date=start_date,
|
|
53
|
+
end_date=end_date,
|
|
54
|
+
dimensions=DEFAULT_DIMENSIONS,
|
|
55
|
+
metrics=DEFAULT_METRICS,
|
|
56
|
+
filters=filters,
|
|
57
|
+
)
|
|
58
|
+
|
|
59
|
+
@dlt.resource(write_disposition="merge", merge_key="day")
|
|
60
|
+
def creatives():
|
|
61
|
+
adjust_api = AdjustAPI(api_key=api_key)
|
|
62
|
+
yield from adjust_api.fetch_report_data(
|
|
63
|
+
start_date=start_date,
|
|
64
|
+
end_date=end_date,
|
|
65
|
+
dimensions=DEFAULT_DIMENSIONS + ["adgroup", "creative"],
|
|
66
|
+
metrics=DEFAULT_METRICS,
|
|
67
|
+
filters=filters,
|
|
68
|
+
)
|
|
69
|
+
|
|
70
|
+
merge_key = merge_key
|
|
71
|
+
for dimension in REQUIRED_CUSTOM_DIMENSIONS:
|
|
72
|
+
if dimension in dimensions:
|
|
73
|
+
merge_key = dimension
|
|
74
|
+
break
|
|
75
|
+
|
|
76
|
+
type_hints = {}
|
|
77
|
+
for dimension in dimensions:
|
|
78
|
+
if dimension in KNOWN_TYPE_HINTS:
|
|
79
|
+
type_hints[dimension] = KNOWN_TYPE_HINTS[dimension]
|
|
80
|
+
for metric in metrics:
|
|
81
|
+
if metric in KNOWN_TYPE_HINTS:
|
|
82
|
+
type_hints[metric] = KNOWN_TYPE_HINTS[metric]
|
|
83
|
+
|
|
84
|
+
@dlt.resource(
|
|
85
|
+
write_disposition={"disposition": "merge", "strategy": "delete+insert"},
|
|
86
|
+
merge_key=merge_key,
|
|
87
|
+
primary_key=dimensions,
|
|
88
|
+
columns=type_hints,
|
|
89
|
+
)
|
|
90
|
+
def custom():
|
|
91
|
+
adjust_api = AdjustAPI(api_key=api_key)
|
|
92
|
+
yield from adjust_api.fetch_report_data(
|
|
93
|
+
start_date=start_date,
|
|
94
|
+
end_date=end_date,
|
|
95
|
+
dimensions=dimensions,
|
|
96
|
+
metrics=metrics,
|
|
97
|
+
filters=filters,
|
|
98
|
+
)
|
|
99
|
+
|
|
100
|
+
return campaigns, creatives, custom
|
|
@@ -1,3 +1,6 @@
|
|
|
1
|
+
from typing import Optional
|
|
2
|
+
|
|
3
|
+
import pendulum
|
|
1
4
|
import requests
|
|
2
5
|
from dlt.sources.helpers.requests import Client
|
|
3
6
|
from requests.exceptions import HTTPError
|
|
@@ -32,33 +35,32 @@ class AdjustAPI:
|
|
|
32
35
|
|
|
33
36
|
def fetch_report_data(
|
|
34
37
|
self,
|
|
35
|
-
start_date,
|
|
36
|
-
end_date,
|
|
38
|
+
start_date: pendulum.DateTime,
|
|
39
|
+
end_date: pendulum.DateTime,
|
|
37
40
|
dimensions=DEFAULT_DIMENSIONS,
|
|
38
41
|
metrics=DEFAULT_METRICS,
|
|
39
|
-
|
|
40
|
-
ad_spend_mode="network",
|
|
41
|
-
attribution_source="first",
|
|
42
|
-
attribution_type="all",
|
|
43
|
-
cohort_maturity="immature",
|
|
44
|
-
reattributed="all",
|
|
45
|
-
sandbox="false",
|
|
42
|
+
filters: Optional[dict] = None,
|
|
46
43
|
):
|
|
47
44
|
headers = {"Authorization": f"Bearer {self.api_key}"}
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
"
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
45
|
+
params = {}
|
|
46
|
+
|
|
47
|
+
if filters:
|
|
48
|
+
for key, value in filters.items():
|
|
49
|
+
if isinstance(value, list):
|
|
50
|
+
params[key] = ",".join(value)
|
|
51
|
+
else:
|
|
52
|
+
params[key] = value
|
|
53
|
+
|
|
54
|
+
params["date_period"] = (
|
|
55
|
+
f"{start_date.format('YYYY-MM-DD')}:{end_date.format('YYYY-MM-DD')}"
|
|
56
|
+
)
|
|
57
|
+
params["dimensions"] = ",".join(dimensions)
|
|
58
|
+
params["metrics"] = ",".join(metrics)
|
|
59
|
+
|
|
60
|
+
if start_date > end_date:
|
|
61
|
+
raise ValueError(
|
|
62
|
+
f"Invalid date range: Start date ({start_date}) must be earlier than end date ({end_date})."
|
|
63
|
+
)
|
|
62
64
|
|
|
63
65
|
def retry_on_limit(
|
|
64
66
|
response: requests.Response, exception: BaseException
|
|
@@ -80,3 +82,24 @@ class AdjustAPI:
|
|
|
80
82
|
yield items
|
|
81
83
|
else:
|
|
82
84
|
raise HTTPError(f"Request failed with status code: {response.status_code}")
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def parse_filters(filters_raw: str) -> dict:
|
|
88
|
+
# Parse filter string like "key1=value1,key2=value2,value3,value4"
|
|
89
|
+
filters = {}
|
|
90
|
+
current_key = None
|
|
91
|
+
|
|
92
|
+
for item in filters_raw.split(","):
|
|
93
|
+
if "=" in item:
|
|
94
|
+
# Start of a new key-value pair
|
|
95
|
+
key, value = item.split("=")
|
|
96
|
+
filters[key] = [value] # Always start with a list
|
|
97
|
+
current_key = key
|
|
98
|
+
elif current_key is not None:
|
|
99
|
+
# Additional value for the current key
|
|
100
|
+
filters[current_key].append(item)
|
|
101
|
+
|
|
102
|
+
# Convert single-item lists to simple values
|
|
103
|
+
filters = {k: v[0] if len(v) == 1 else v for k, v in filters.items()}
|
|
104
|
+
|
|
105
|
+
return filters
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
"""Source that loads tables form Airtable.
|
|
2
|
+
Supports whitelisting of tables or loading of all tables from a specified base.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from typing import Any, Optional
|
|
6
|
+
|
|
7
|
+
import dlt
|
|
8
|
+
from dlt.common.schema.typing import TColumnNames, TTableSchemaColumns
|
|
9
|
+
from dlt.extract.items import TTableHintTemplate
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def memory_mapped_arrow(
|
|
13
|
+
path: str,
|
|
14
|
+
columns: Optional[TTableSchemaColumns] = None,
|
|
15
|
+
primary_key: Optional[TTableHintTemplate[TColumnNames]] = None,
|
|
16
|
+
merge_key: Optional[TTableHintTemplate[TColumnNames]] = None,
|
|
17
|
+
incremental: Optional[dlt.sources.incremental[Any]] = None,
|
|
18
|
+
):
|
|
19
|
+
@dlt.resource(
|
|
20
|
+
name="arrow_mmap",
|
|
21
|
+
columns=columns, # type: ignore
|
|
22
|
+
primary_key=primary_key, # type: ignore
|
|
23
|
+
merge_key=merge_key, # type: ignore
|
|
24
|
+
)
|
|
25
|
+
def arrow_mmap(
|
|
26
|
+
incremental: Optional[dlt.sources.incremental[Any]] = incremental,
|
|
27
|
+
):
|
|
28
|
+
import pyarrow as pa # type: ignore
|
|
29
|
+
import pyarrow.ipc as ipc # type: ignore
|
|
30
|
+
|
|
31
|
+
with pa.memory_map(path, "rb") as mmap:
|
|
32
|
+
reader: ipc.RecordBatchFileReader = ipc.open_file(mmap)
|
|
33
|
+
table = reader.read_all()
|
|
34
|
+
|
|
35
|
+
last_value = None
|
|
36
|
+
end_value = None
|
|
37
|
+
if incremental:
|
|
38
|
+
if incremental.cursor_path not in table.column_names:
|
|
39
|
+
raise KeyError(
|
|
40
|
+
f"Cursor column '{incremental.cursor_path}' does not exist in table"
|
|
41
|
+
)
|
|
42
|
+
|
|
43
|
+
last_value = incremental.last_value
|
|
44
|
+
end_value = incremental.end_value
|
|
45
|
+
|
|
46
|
+
if last_value is not None:
|
|
47
|
+
# Check if the column is a date type
|
|
48
|
+
if pa.types.is_temporal(table.schema.field(incremental.cursor_path).type): # type: ignore
|
|
49
|
+
if not isinstance(last_value, pa.TimestampScalar):
|
|
50
|
+
last_value = pa.scalar(last_value, type=pa.timestamp("ns"))
|
|
51
|
+
|
|
52
|
+
table = table.filter(
|
|
53
|
+
pa.compute.field(incremental.cursor_path) > last_value # type: ignore
|
|
54
|
+
)
|
|
55
|
+
else:
|
|
56
|
+
# For non-date types, use direct comparison
|
|
57
|
+
table = table.filter(
|
|
58
|
+
pa.compute.field(incremental.cursor_path) > last_value # type: ignore
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
if end_value is not None:
|
|
62
|
+
if pa.types.is_timestamp(table.schema.field(incremental.cursor_path).type): # type: ignore
|
|
63
|
+
# Convert end_value to timestamp if it's not already
|
|
64
|
+
if not isinstance(end_value, pa.TimestampScalar):
|
|
65
|
+
end_value = pa.scalar(end_value, type=pa.timestamp("ns"))
|
|
66
|
+
table = table.filter(
|
|
67
|
+
pa.compute.field(incremental.cursor_path) < end_value # type: ignore
|
|
68
|
+
)
|
|
69
|
+
else:
|
|
70
|
+
# For non-date types, use direct comparison
|
|
71
|
+
table = table.filter(
|
|
72
|
+
pa.compute.field(incremental.cursor_path) < end_value # type: ignore
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
yield table
|
|
76
|
+
|
|
77
|
+
return arrow_mmap
|
ingestr/src/factory.py
CHANGED
|
@@ -18,6 +18,7 @@ from ingestr.src.sources import (
|
|
|
18
18
|
AdjustSource,
|
|
19
19
|
AirtableSource,
|
|
20
20
|
AppsflyerSource,
|
|
21
|
+
ArrowMemoryMappedSource,
|
|
21
22
|
ChessSource,
|
|
22
23
|
FacebookAdsSource,
|
|
23
24
|
GoogleSheetsSource,
|
|
@@ -136,6 +137,8 @@ class SourceDestinationFactory:
|
|
|
136
137
|
return AdjustSource()
|
|
137
138
|
elif self.source_scheme == "zendesk":
|
|
138
139
|
return ZendeskSource()
|
|
140
|
+
elif self.source_scheme == "mmap":
|
|
141
|
+
return ArrowMemoryMappedSource()
|
|
139
142
|
elif self.source_scheme == "s3":
|
|
140
143
|
return S3Source()
|
|
141
144
|
else:
|
ingestr/src/mongodb/__init__.py
CHANGED
|
@@ -65,7 +65,7 @@ def mongodb(
|
|
|
65
65
|
sections=("sources", "mongodb"), spec=MongoDbCollectionResourceConfiguration
|
|
66
66
|
)
|
|
67
67
|
def mongodb_collection(
|
|
68
|
-
connection_url: str = dlt.
|
|
68
|
+
connection_url: str = dlt.config.value,
|
|
69
69
|
database: Optional[str] = dlt.config.value,
|
|
70
70
|
collection: str = dlt.config.value,
|
|
71
71
|
incremental: Optional[dlt.sources.incremental] = None, # type: ignore[type-arg]
|
ingestr/src/mongodb/helpers.py
CHANGED
|
@@ -155,7 +155,7 @@ class MongoDbCollectionConfiguration(BaseConfiguration):
|
|
|
155
155
|
|
|
156
156
|
@configspec
|
|
157
157
|
class MongoDbCollectionResourceConfiguration(BaseConfiguration):
|
|
158
|
-
connection_url: str = dlt.
|
|
158
|
+
connection_url: str = dlt.config.value
|
|
159
159
|
database: Optional[str] = dlt.config.value
|
|
160
160
|
collection: str = dlt.config.value
|
|
161
161
|
incremental: Optional[dlt.sources.incremental] = None # type: ignore[type-arg]
|
ingestr/src/sources.py
CHANGED
|
@@ -1,17 +1,21 @@
|
|
|
1
1
|
import base64
|
|
2
2
|
import csv
|
|
3
3
|
import json
|
|
4
|
-
from datetime import date
|
|
4
|
+
from datetime import date
|
|
5
5
|
from typing import Any, Callable, Optional
|
|
6
6
|
from urllib.parse import parse_qs, urlparse
|
|
7
7
|
|
|
8
8
|
import dlt
|
|
9
|
+
import pendulum
|
|
9
10
|
from dlt.common.configuration.specs import AwsCredentials
|
|
11
|
+
from dlt.common.time import ensure_pendulum_datetime
|
|
10
12
|
from dlt.common.typing import TSecretStrValue
|
|
11
13
|
|
|
12
|
-
from ingestr.src.adjust
|
|
14
|
+
from ingestr.src.adjust import REQUIRED_CUSTOM_DIMENSIONS, adjust_source
|
|
15
|
+
from ingestr.src.adjust.adjust_helpers import parse_filters
|
|
13
16
|
from ingestr.src.airtable import airtable_source
|
|
14
17
|
from ingestr.src.appsflyer._init_ import appsflyer_source
|
|
18
|
+
from ingestr.src.arrow import memory_mapped_arrow
|
|
15
19
|
from ingestr.src.chess import source
|
|
16
20
|
from ingestr.src.facebook_ads import facebook_ads_source, facebook_insights_source
|
|
17
21
|
from ingestr.src.filesystem import readers
|
|
@@ -75,6 +79,51 @@ class SqlSource:
|
|
|
75
79
|
return table_instance
|
|
76
80
|
|
|
77
81
|
|
|
82
|
+
class ArrowMemoryMappedSource:
|
|
83
|
+
table_builder: Callable
|
|
84
|
+
|
|
85
|
+
def __init__(self, table_builder=memory_mapped_arrow) -> None:
|
|
86
|
+
self.table_builder = table_builder
|
|
87
|
+
|
|
88
|
+
def handles_incrementality(self) -> bool:
|
|
89
|
+
return False
|
|
90
|
+
|
|
91
|
+
def dlt_source(self, uri: str, table: str, **kwargs):
|
|
92
|
+
import os
|
|
93
|
+
|
|
94
|
+
incremental = None
|
|
95
|
+
if kwargs.get("incremental_key"):
|
|
96
|
+
start_value = kwargs.get("interval_start")
|
|
97
|
+
end_value = kwargs.get("interval_end")
|
|
98
|
+
|
|
99
|
+
incremental = dlt.sources.incremental(
|
|
100
|
+
kwargs.get("incremental_key", ""),
|
|
101
|
+
initial_value=start_value,
|
|
102
|
+
end_value=end_value,
|
|
103
|
+
)
|
|
104
|
+
|
|
105
|
+
file_path = uri.split("://")[1]
|
|
106
|
+
if not os.path.exists(file_path):
|
|
107
|
+
raise ValueError(f"File at path {file_path} does not exist")
|
|
108
|
+
|
|
109
|
+
if os.path.isdir(file_path):
|
|
110
|
+
raise ValueError(
|
|
111
|
+
f"Path {file_path} is a directory, it should be an Arrow memory mapped file"
|
|
112
|
+
)
|
|
113
|
+
|
|
114
|
+
primary_key = kwargs.get("primary_key")
|
|
115
|
+
merge_key = kwargs.get("merge_key")
|
|
116
|
+
|
|
117
|
+
table_instance = self.table_builder(
|
|
118
|
+
path=file_path,
|
|
119
|
+
incremental=incremental,
|
|
120
|
+
merge_key=merge_key,
|
|
121
|
+
primary_key=primary_key,
|
|
122
|
+
)
|
|
123
|
+
|
|
124
|
+
return table_instance
|
|
125
|
+
|
|
126
|
+
|
|
78
127
|
class MongoDbSource:
|
|
79
128
|
table_builder: Callable
|
|
80
129
|
|
|
@@ -656,12 +705,12 @@ class KafkaSource:
|
|
|
656
705
|
credentials=KafkaCredentials(
|
|
657
706
|
bootstrap_servers=bootstrap_servers[0],
|
|
658
707
|
group_id=group_id[0],
|
|
659
|
-
security_protocol=
|
|
660
|
-
|
|
661
|
-
|
|
662
|
-
sasl_mechanisms=
|
|
663
|
-
|
|
664
|
-
|
|
708
|
+
security_protocol=(
|
|
709
|
+
security_protocol[0] if len(security_protocol) > 0 else None
|
|
710
|
+
), # type: ignore
|
|
711
|
+
sasl_mechanisms=(
|
|
712
|
+
sasl_mechanisms[0] if len(sasl_mechanisms) > 0 else None
|
|
713
|
+
), # type: ignore
|
|
665
714
|
sasl_username=sasl_username[0] if len(sasl_username) > 0 else None, # type: ignore
|
|
666
715
|
sasl_password=sasl_password[0] if len(sasl_password) > 0 else None, # type: ignore
|
|
667
716
|
),
|
|
@@ -673,10 +722,10 @@ class KafkaSource:
|
|
|
673
722
|
|
|
674
723
|
class AdjustSource:
|
|
675
724
|
def handles_incrementality(self) -> bool:
|
|
676
|
-
return
|
|
725
|
+
return False
|
|
677
726
|
|
|
678
727
|
def dlt_source(self, uri: str, table: str, **kwargs):
|
|
679
|
-
if kwargs.get("incremental_key"):
|
|
728
|
+
if kwargs.get("incremental_key") and not table.startswith("custom:"):
|
|
680
729
|
raise ValueError(
|
|
681
730
|
"Adjust takes care of incrementality on its own, you should not provide incremental_key"
|
|
682
731
|
)
|
|
@@ -688,25 +737,62 @@ class AdjustSource:
|
|
|
688
737
|
if not api_key:
|
|
689
738
|
raise ValueError("api_key in the URI is required to connect to Adjust")
|
|
690
739
|
|
|
691
|
-
|
|
692
|
-
interval_end = kwargs.get("interval_end")
|
|
740
|
+
lookback_days = int(source_params.get("lookback_days", [30])[0])
|
|
693
741
|
|
|
694
742
|
start_date = (
|
|
695
|
-
|
|
696
|
-
|
|
697
|
-
|
|
698
|
-
interval_end.strftime("%Y-%m-%d")
|
|
699
|
-
if interval_end
|
|
700
|
-
else datetime.now().strftime("%Y-%m-%d")
|
|
743
|
+
pendulum.now()
|
|
744
|
+
.replace(hour=0, minute=0, second=0, microsecond=0)
|
|
745
|
+
.subtract(days=lookback_days)
|
|
701
746
|
)
|
|
747
|
+
if kwargs.get("interval_start"):
|
|
748
|
+
start_date = (
|
|
749
|
+
ensure_pendulum_datetime(str(kwargs.get("interval_start")))
|
|
750
|
+
.replace(hour=0, minute=0, second=0, microsecond=0)
|
|
751
|
+
.subtract(days=lookback_days)
|
|
752
|
+
)
|
|
702
753
|
|
|
703
|
-
|
|
704
|
-
if
|
|
705
|
-
|
|
754
|
+
end_date = pendulum.now()
|
|
755
|
+
if kwargs.get("interval_end"):
|
|
756
|
+
end_date = ensure_pendulum_datetime(str(kwargs.get("interval_end")))
|
|
757
|
+
|
|
758
|
+
dimensions = None
|
|
759
|
+
metrics = None
|
|
760
|
+
filters = []
|
|
761
|
+
if table.startswith("custom:"):
|
|
762
|
+
fields = table.split(":")
|
|
763
|
+
if len(fields) != 3 and len(fields) != 4:
|
|
764
|
+
raise ValueError(
|
|
765
|
+
"Invalid Adjust custom table format. Expected format: custom:<dimensions>,<metrics> or custom:<dimensions>:<metrics>:<filters>"
|
|
766
|
+
)
|
|
767
|
+
|
|
768
|
+
dimensions = fields[1].split(",")
|
|
769
|
+
metrics = fields[2].split(",")
|
|
770
|
+
table = "custom"
|
|
771
|
+
|
|
772
|
+
found = False
|
|
773
|
+
for dimension in dimensions:
|
|
774
|
+
if dimension in REQUIRED_CUSTOM_DIMENSIONS:
|
|
775
|
+
found = True
|
|
776
|
+
break
|
|
777
|
+
|
|
778
|
+
if not found:
|
|
779
|
+
raise ValueError(
|
|
780
|
+
f"At least one of the required dimensions is missing for custom Adjust report: {REQUIRED_CUSTOM_DIMENSIONS}"
|
|
781
|
+
)
|
|
782
|
+
|
|
783
|
+
if len(fields) == 4:
|
|
784
|
+
filters_raw = fields[3]
|
|
785
|
+
filters = parse_filters(filters_raw)
|
|
706
786
|
|
|
707
787
|
return adjust_source(
|
|
708
|
-
start_date=start_date,
|
|
709
|
-
|
|
788
|
+
start_date=start_date,
|
|
789
|
+
end_date=end_date,
|
|
790
|
+
api_key=api_key[0],
|
|
791
|
+
dimensions=dimensions,
|
|
792
|
+
metrics=metrics,
|
|
793
|
+
merge_key=kwargs.get("merge_key"),
|
|
794
|
+
filters=filters,
|
|
795
|
+
).with_resources(table)
|
|
710
796
|
|
|
711
797
|
|
|
712
798
|
class AppsflyerSource:
|
ingestr/src/version.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = "0.9.
|
|
1
|
+
__version__ = "0.9.3"
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: ingestr
|
|
3
|
-
Version: 0.9.
|
|
3
|
+
Version: 0.9.3
|
|
4
4
|
Summary: ingestr is a command-line application that ingests data from various sources and stores them in any database.
|
|
5
5
|
Project-URL: Homepage, https://github.com/bruin-data/ingestr
|
|
6
6
|
Project-URL: Issues, https://github.com/bruin-data/ingestr/issues
|
|
@@ -28,7 +28,6 @@ Requires-Dist: py-machineid==0.5.1
|
|
|
28
28
|
Requires-Dist: pyairtable==2.3.3
|
|
29
29
|
Requires-Dist: pymongo==4.6.3
|
|
30
30
|
Requires-Dist: pymysql==1.1.0
|
|
31
|
-
Requires-Dist: pyodbc==5.1.0
|
|
32
31
|
Requires-Dist: pyrate-limiter==3.6.1
|
|
33
32
|
Requires-Dist: redshift-connector==2.1.0
|
|
34
33
|
Requires-Dist: rich==13.7.1
|
|
@@ -44,6 +43,8 @@ Requires-Dist: stripe==10.7.0
|
|
|
44
43
|
Requires-Dist: tqdm==4.66.2
|
|
45
44
|
Requires-Dist: typer==0.12.3
|
|
46
45
|
Requires-Dist: types-requests==2.32.0.20240907
|
|
46
|
+
Provides-Extra: odbc
|
|
47
|
+
Requires-Dist: pyodbc==5.1.0; extra == 'odbc'
|
|
47
48
|
Provides-Extra: oracle
|
|
48
49
|
Requires-Dist: cx-oracle==8.3.0; extra == 'oracle'
|
|
49
50
|
Description-Content-Type: text/markdown
|
|
@@ -1,15 +1,16 @@
|
|
|
1
|
-
ingestr/main.py,sha256=
|
|
1
|
+
ingestr/main.py,sha256=B8TAQotJoYSvmaQQm33o2lv99OVLYNz-1Aw_fgQahwE,17718
|
|
2
2
|
ingestr/src/.gitignore,sha256=8cX1AZTSI0TcdZFGTmS_oyBjpfCzhOEt0DdAo2dFIY8,203
|
|
3
3
|
ingestr/src/destinations.py,sha256=2SfPMjtTelPmzQmc3zNs8xGcKIPuGn_hoZFIBUuhjXI,6338
|
|
4
|
-
ingestr/src/factory.py,sha256=
|
|
5
|
-
ingestr/src/sources.py,sha256=
|
|
4
|
+
ingestr/src/factory.py,sha256=ft81B-YJgvEROkHAZjMjTIS7IYvle-uZQv45b7-Wfk0,4947
|
|
5
|
+
ingestr/src/sources.py,sha256=0eLrkd3oVsK9bWG7rp2Asu5QNrAUP2ZeMIIkduEuLL8,33756
|
|
6
6
|
ingestr/src/table_definition.py,sha256=REbAbqdlmUMUuRh8nEQRreWjPVOQ5ZcfqGkScKdCrmk,390
|
|
7
|
-
ingestr/src/version.py,sha256=
|
|
8
|
-
ingestr/src/adjust/
|
|
9
|
-
ingestr/src/adjust/
|
|
7
|
+
ingestr/src/version.py,sha256=xKd3pzbczuMsdB08eLAOqZDUd_q1IRxwZ_ccAFL4c4A,22
|
|
8
|
+
ingestr/src/adjust/__init__.py,sha256=oTM7XozDcMuUiCZ0w4gWEBXuCCtMZ0iBfkKdd2pVa1E,3007
|
|
9
|
+
ingestr/src/adjust/adjust_helpers.py,sha256=-tmmxy9k3wms-ZEIgxmlp2cAQ2X_O1lgjY1128bbMu4,3224
|
|
10
10
|
ingestr/src/airtable/__init__.py,sha256=GHWYrjI2qhs_JihdNJysB0Ni3bzqT_MLXn_S9_Q5zRA,2775
|
|
11
11
|
ingestr/src/appsflyer/_init_.py,sha256=ne2-9FQ654Drtd3GkKQv8Bwb6LEqCnJw49MfO5Jyzgs,739
|
|
12
12
|
ingestr/src/appsflyer/client.py,sha256=TNmwakLzmO6DZW3wcfLfQRl7aNBHgFqSsk4ef-MmJ1w,3084
|
|
13
|
+
ingestr/src/arrow/__init__.py,sha256=AgU7S9Ra3ZeeG00Mf32zxO5sgMFfRnTdOSirUJ1Pu10,2976
|
|
13
14
|
ingestr/src/chess/__init__.py,sha256=PaxT2DObudOGlhyoENE5LjR6rTdsxiqKKpAZeyzVLCA,6791
|
|
14
15
|
ingestr/src/chess/helpers.py,sha256=v1HTImOMjAF7AzZUPDIuHu00e7ut0o5y1kWcVYo4QZw,549
|
|
15
16
|
ingestr/src/chess/settings.py,sha256=p0RlCGgtXUacPDEvZmwzSWmzX0Apj1riwfz-nrMK89k,158
|
|
@@ -35,8 +36,8 @@ ingestr/src/kafka/helpers.py,sha256=V9WcVn3PKnEpggArHda4vnAcaV8VDuh__dSmRviJb5Y,
|
|
|
35
36
|
ingestr/src/klaviyo/_init_.py,sha256=nq2T1p3Xc7yiwGabsZBp2Jy2fa8_n5oxqxBnUGhKOgg,6592
|
|
36
37
|
ingestr/src/klaviyo/client.py,sha256=tPj79ia7AW0ZOJhzlKNPCliGbdojRNwUFp8HvB2ym5s,7434
|
|
37
38
|
ingestr/src/klaviyo/helpers.py,sha256=_i-SHffhv25feLDcjy6Blj1UxYLISCwVCMgGtrlnYHk,496
|
|
38
|
-
ingestr/src/mongodb/__init__.py,sha256=
|
|
39
|
-
ingestr/src/mongodb/helpers.py,sha256=
|
|
39
|
+
ingestr/src/mongodb/__init__.py,sha256=aMr1PFIDUMRv--ne61lR17HudsN-fsrzMeyxe9PqK2s,4335
|
|
40
|
+
ingestr/src/mongodb/helpers.py,sha256=y9rYKR8eyIqam_eNsZmwSYevgi8mghh7Zp8qhTHl65s,5652
|
|
40
41
|
ingestr/src/notion/__init__.py,sha256=36wUui8finbc85ObkRMq8boMraXMUehdABN_AMe_hzA,1834
|
|
41
42
|
ingestr/src/notion/settings.py,sha256=MwQVZViJtnvOegfjXYc_pJ50oUYgSRPgwqu7TvpeMOA,82
|
|
42
43
|
ingestr/src/notion/helpers/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -73,8 +74,8 @@ ingestr/testdata/delete_insert_part2.csv,sha256=B_KUzpzbNdDY_n7wWop1mT2cz36TmayS
|
|
|
73
74
|
ingestr/testdata/merge_expected.csv,sha256=DReHqWGnQMsf2PBv_Q2pfjsgvikYFnf1zYcQZ7ZqYN0,276
|
|
74
75
|
ingestr/testdata/merge_part1.csv,sha256=Pw8Z9IDKcNU0qQHx1z6BUf4rF_-SxKGFOvymCt4OY9I,185
|
|
75
76
|
ingestr/testdata/merge_part2.csv,sha256=T_GiWxA81SN63_tMOIuemcvboEFeAmbKc7xRXvL9esw,287
|
|
76
|
-
ingestr-0.9.
|
|
77
|
-
ingestr-0.9.
|
|
78
|
-
ingestr-0.9.
|
|
79
|
-
ingestr-0.9.
|
|
80
|
-
ingestr-0.9.
|
|
77
|
+
ingestr-0.9.3.dist-info/METADATA,sha256=QWrj7bb9OT6Gyh-mviHNElRqV23OO2guc8YKrZ05yfU,7004
|
|
78
|
+
ingestr-0.9.3.dist-info/WHEEL,sha256=C2FUgwZgiLbznR-k0b_5k3Ai_1aASOXDss3lzCUsUug,87
|
|
79
|
+
ingestr-0.9.3.dist-info/entry_points.txt,sha256=oPJy0KBnPWYjDtP1k8qwAihcTLHSZokSQvRAw_wtfJM,46
|
|
80
|
+
ingestr-0.9.3.dist-info/licenses/LICENSE.md,sha256=cW8wIhn8HFE-KLStDF9jHQ1O_ARWP3kTpk_-eOccL24,1075
|
|
81
|
+
ingestr-0.9.3.dist-info/RECORD,,
|
ingestr/src/adjust/_init_.py
DELETED
|
@@ -1,31 +0,0 @@
|
|
|
1
|
-
from typing import Sequence
|
|
2
|
-
|
|
3
|
-
import dlt
|
|
4
|
-
from dlt.sources import DltResource
|
|
5
|
-
|
|
6
|
-
from .helpers import DEFAULT_DIMENSIONS, AdjustAPI
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
@dlt.source(max_table_nesting=0)
|
|
10
|
-
def adjust_source(
|
|
11
|
-
start_date: str,
|
|
12
|
-
end_date: str,
|
|
13
|
-
api_key: str,
|
|
14
|
-
) -> Sequence[DltResource]:
|
|
15
|
-
@dlt.resource(write_disposition="merge", merge_key="day")
|
|
16
|
-
def campaigns():
|
|
17
|
-
adjust_api = AdjustAPI(api_key=api_key)
|
|
18
|
-
yield from adjust_api.fetch_report_data(
|
|
19
|
-
start_date=start_date,
|
|
20
|
-
end_date=end_date,
|
|
21
|
-
)
|
|
22
|
-
|
|
23
|
-
@dlt.resource(write_disposition="merge", merge_key="day")
|
|
24
|
-
def creatives():
|
|
25
|
-
dimensions = DEFAULT_DIMENSIONS + ["adgroup", "creative"]
|
|
26
|
-
adjust_api = AdjustAPI(api_key=api_key)
|
|
27
|
-
yield from adjust_api.fetch_report_data(
|
|
28
|
-
start_date=start_date, end_date=end_date, dimensions=dimensions
|
|
29
|
-
)
|
|
30
|
-
|
|
31
|
-
return campaigns, creatives
|
|
File without changes
|
|
File without changes
|