ingestr 0.7.8__py3-none-any.whl → 0.8.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ingestr might be problematic. Click here for more details.
- ingestr/main.py +1 -0
- ingestr/src/adjust/_init_.py +31 -0
- ingestr/src/adjust/helpers.py +82 -0
- ingestr/src/appsflyer/_init_.py +24 -0
- ingestr/src/appsflyer/client.py +106 -0
- ingestr/src/factory.py +6 -0
- ingestr/src/shopify/__init__.py +1767 -54
- ingestr/src/shopify/helpers.py +73 -32
- ingestr/src/sources.py +89 -7
- ingestr/src/version.py +1 -1
- {ingestr-0.7.8.dist-info → ingestr-0.8.2.dist-info}/METADATA +10 -1
- {ingestr-0.7.8.dist-info → ingestr-0.8.2.dist-info}/RECORD +15 -11
- {ingestr-0.7.8.dist-info → ingestr-0.8.2.dist-info}/WHEEL +0 -0
- {ingestr-0.7.8.dist-info → ingestr-0.8.2.dist-info}/entry_points.txt +0 -0
- {ingestr-0.7.8.dist-info → ingestr-0.8.2.dist-info}/licenses/LICENSE.md +0 -0
ingestr/main.py
CHANGED
|
@@ -259,6 +259,7 @@ def ingest(
|
|
|
259
259
|
},
|
|
260
260
|
)
|
|
261
261
|
|
|
262
|
+
dlt.config["data_writer.buffer_max_items"] = page_size
|
|
262
263
|
dlt.config["data_writer.file_max_items"] = loader_file_size
|
|
263
264
|
dlt.config["extract.workers"] = extract_parallelism
|
|
264
265
|
dlt.config["extract.max_parallel_items"] = extract_parallelism
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
from typing import Sequence
|
|
2
|
+
|
|
3
|
+
import dlt
|
|
4
|
+
from dlt.sources import DltResource
|
|
5
|
+
|
|
6
|
+
from .helpers import DEFAULT_DIMENSIONS, AdjustAPI
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
@dlt.source(max_table_nesting=0)
|
|
10
|
+
def adjust_source(
|
|
11
|
+
start_date: str,
|
|
12
|
+
end_date: str,
|
|
13
|
+
api_key: str,
|
|
14
|
+
) -> Sequence[DltResource]:
|
|
15
|
+
@dlt.resource(write_disposition="merge", merge_key="day")
|
|
16
|
+
def campaigns():
|
|
17
|
+
adjust_api = AdjustAPI(api_key=api_key)
|
|
18
|
+
yield from adjust_api.fetch_report_data(
|
|
19
|
+
start_date=start_date,
|
|
20
|
+
end_date=end_date,
|
|
21
|
+
)
|
|
22
|
+
|
|
23
|
+
@dlt.resource(write_disposition="merge", merge_key="day")
|
|
24
|
+
def creatives():
|
|
25
|
+
dimensions = DEFAULT_DIMENSIONS + ["adgroup", "creative"]
|
|
26
|
+
adjust_api = AdjustAPI(api_key=api_key)
|
|
27
|
+
yield from adjust_api.fetch_report_data(
|
|
28
|
+
start_date=start_date, end_date=end_date, dimensions=dimensions
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
return campaigns, creatives
|
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
import requests
|
|
2
|
+
from dlt.sources.helpers.requests import Client
|
|
3
|
+
from requests.exceptions import HTTPError
|
|
4
|
+
|
|
5
|
+
DEFAULT_DIMENSIONS = ["campaign", "day", "app", "store_type", "channel", "country"]
|
|
6
|
+
|
|
7
|
+
DEFAULT_METRICS = [
|
|
8
|
+
"network_cost",
|
|
9
|
+
"all_revenue_total_d0",
|
|
10
|
+
"ad_revenue_total_d0",
|
|
11
|
+
"revenue_total_d0",
|
|
12
|
+
"all_revenue_total_d1",
|
|
13
|
+
"ad_revenue_total_d1",
|
|
14
|
+
"revenue_total_d1",
|
|
15
|
+
"all_revenue_total_d3",
|
|
16
|
+
"ad_revenue_total_d3",
|
|
17
|
+
"revenue_total_d3",
|
|
18
|
+
"all_revenue_total_d7",
|
|
19
|
+
"ad_revenue_total_d7",
|
|
20
|
+
"revenue_total_d7",
|
|
21
|
+
"all_revenue_total_d14",
|
|
22
|
+
"ad_revenue_total_d14",
|
|
23
|
+
"revenue_total_d14",
|
|
24
|
+
"all_revenue_total_d21",
|
|
25
|
+
]
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class AdjustAPI:
|
|
29
|
+
def __init__(self, api_key):
|
|
30
|
+
self.api_key = api_key
|
|
31
|
+
self.uri = "https://automate.adjust.com/reports-service/report"
|
|
32
|
+
|
|
33
|
+
def fetch_report_data(
|
|
34
|
+
self,
|
|
35
|
+
start_date,
|
|
36
|
+
end_date,
|
|
37
|
+
dimensions=DEFAULT_DIMENSIONS,
|
|
38
|
+
metrics=DEFAULT_METRICS,
|
|
39
|
+
utc_offset="+00:00",
|
|
40
|
+
ad_spend_mode="network",
|
|
41
|
+
attribution_source="first",
|
|
42
|
+
attribution_type="all",
|
|
43
|
+
cohort_maturity="immature",
|
|
44
|
+
reattributed="all",
|
|
45
|
+
sandbox="false",
|
|
46
|
+
):
|
|
47
|
+
headers = {"Authorization": f"Bearer {self.api_key}"}
|
|
48
|
+
comma_separated_dimensions = ",".join(dimensions)
|
|
49
|
+
comma_separated_metrics = ",".join(metrics)
|
|
50
|
+
params = {
|
|
51
|
+
"date_period": f"{start_date}:{end_date}",
|
|
52
|
+
"dimensions": comma_separated_dimensions,
|
|
53
|
+
"metrics": comma_separated_metrics,
|
|
54
|
+
"utc_offset": utc_offset,
|
|
55
|
+
"ad_spend_mode": ad_spend_mode,
|
|
56
|
+
"attribution_source": attribution_source,
|
|
57
|
+
"attribution_type": attribution_type,
|
|
58
|
+
"cohort_maturity": cohort_maturity,
|
|
59
|
+
"reattributed": reattributed,
|
|
60
|
+
"sandbox": sandbox,
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
def retry_on_limit(
|
|
64
|
+
response: requests.Response, exception: BaseException
|
|
65
|
+
) -> bool:
|
|
66
|
+
return response.status_code == 429
|
|
67
|
+
|
|
68
|
+
request_client = Client(
|
|
69
|
+
request_timeout=8.0,
|
|
70
|
+
raise_for_status=False,
|
|
71
|
+
retry_condition=retry_on_limit,
|
|
72
|
+
request_max_attempts=12,
|
|
73
|
+
request_backoff_factor=2,
|
|
74
|
+
).session
|
|
75
|
+
|
|
76
|
+
response = request_client.get(self.uri, headers=headers, params=params)
|
|
77
|
+
if response.status_code == 200:
|
|
78
|
+
result = response.json()
|
|
79
|
+
items = result.get("rows", [])
|
|
80
|
+
yield items
|
|
81
|
+
else:
|
|
82
|
+
raise HTTPError(f"Request failed with status code: {response.status_code}")
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
from typing import Iterable
|
|
2
|
+
|
|
3
|
+
import dlt
|
|
4
|
+
from dlt.common.typing import TDataItem
|
|
5
|
+
from dlt.sources import DltResource
|
|
6
|
+
|
|
7
|
+
from ingestr.src.appsflyer.client import AppsflyerClient
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
@dlt.source(max_table_nesting=0)
|
|
11
|
+
def appsflyer_source(
|
|
12
|
+
api_key: str, start_date: str, end_date: str
|
|
13
|
+
) -> Iterable[DltResource]:
|
|
14
|
+
client = AppsflyerClient(api_key)
|
|
15
|
+
|
|
16
|
+
@dlt.resource(write_disposition="merge", merge_key="install_time")
|
|
17
|
+
def campaigns() -> Iterable[TDataItem]:
|
|
18
|
+
yield from client.fetch_campaigns(start_date, end_date)
|
|
19
|
+
|
|
20
|
+
@dlt.resource(write_disposition="merge", merge_key="install_time")
|
|
21
|
+
def creatives() -> Iterable[TDataItem]:
|
|
22
|
+
yield from client.fetch_creatives(start_date, end_date)
|
|
23
|
+
|
|
24
|
+
return campaigns, creatives
|
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
from typing import Optional
|
|
2
|
+
|
|
3
|
+
import requests
|
|
4
|
+
from dlt.sources.helpers.requests import Client
|
|
5
|
+
from requests.exceptions import HTTPError
|
|
6
|
+
|
|
7
|
+
DEFAULT_GROUPING = ["c", "geo", "app_id", "install_time"]
|
|
8
|
+
DEFAULT_KPIS = [
|
|
9
|
+
"impressions",
|
|
10
|
+
"clicks",
|
|
11
|
+
"installs",
|
|
12
|
+
"cost",
|
|
13
|
+
"revenue",
|
|
14
|
+
"average_ecpi",
|
|
15
|
+
"loyal_users",
|
|
16
|
+
"uninstalls",
|
|
17
|
+
"roi",
|
|
18
|
+
]
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class AppsflyerClient:
|
|
22
|
+
def __init__(self, api_key: str):
|
|
23
|
+
self.api_key = api_key
|
|
24
|
+
self.uri = "https://hq1.appsflyer.com/api/master-agg-data/v4/app/all"
|
|
25
|
+
|
|
26
|
+
def __get_headers(self):
|
|
27
|
+
return {
|
|
28
|
+
"Authorization": f"{self.api_key}",
|
|
29
|
+
"accept": "text/json",
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
def _fetch_data(
|
|
33
|
+
self,
|
|
34
|
+
from_date: str,
|
|
35
|
+
to_date: str,
|
|
36
|
+
maximum_rows=1000000,
|
|
37
|
+
dimensions=DEFAULT_GROUPING,
|
|
38
|
+
metrics=DEFAULT_KPIS,
|
|
39
|
+
):
|
|
40
|
+
params = {
|
|
41
|
+
"from": from_date,
|
|
42
|
+
"to": to_date,
|
|
43
|
+
"groupings": ",".join(dimensions),
|
|
44
|
+
"kpis": ",".join(metrics),
|
|
45
|
+
"format": "json",
|
|
46
|
+
"maximum_rows": maximum_rows,
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
def retry_on_limit(
|
|
50
|
+
response: Optional[requests.Response], exception: Optional[BaseException]
|
|
51
|
+
) -> bool:
|
|
52
|
+
return (
|
|
53
|
+
isinstance(response, requests.Response) and response.status_code == 429
|
|
54
|
+
)
|
|
55
|
+
|
|
56
|
+
request_client = Client(
|
|
57
|
+
request_timeout=10.0,
|
|
58
|
+
raise_for_status=False,
|
|
59
|
+
retry_condition=retry_on_limit,
|
|
60
|
+
request_max_attempts=12,
|
|
61
|
+
request_backoff_factor=2,
|
|
62
|
+
).session
|
|
63
|
+
|
|
64
|
+
try:
|
|
65
|
+
response = request_client.get(
|
|
66
|
+
url=self.uri, headers=self.__get_headers(), params=params
|
|
67
|
+
)
|
|
68
|
+
|
|
69
|
+
if response.status_code == 200:
|
|
70
|
+
result = response.json()
|
|
71
|
+
yield result
|
|
72
|
+
else:
|
|
73
|
+
raise HTTPError(
|
|
74
|
+
f"Request failed with status code: {response.status_code}"
|
|
75
|
+
)
|
|
76
|
+
|
|
77
|
+
except requests.RequestException as e:
|
|
78
|
+
raise HTTPError(f"Request failed: {e}")
|
|
79
|
+
|
|
80
|
+
def fetch_campaigns(
|
|
81
|
+
self,
|
|
82
|
+
start_date: str,
|
|
83
|
+
end_date: str,
|
|
84
|
+
):
|
|
85
|
+
metrics = DEFAULT_KPIS + [
|
|
86
|
+
"cohort_day_1_revenue_per_user",
|
|
87
|
+
"cohort_day_1_total_revenue_per_user",
|
|
88
|
+
"cohort_day_3_revenue_per_user",
|
|
89
|
+
"cohort_day_3_total_revenue_per_user",
|
|
90
|
+
"cohort_day_7_total_revenue_per_user",
|
|
91
|
+
"cohort_day_7_revenue_per_user",
|
|
92
|
+
"cohort_day_14_total_revenue_per_user",
|
|
93
|
+
"cohort_day_14_revenue_per_user",
|
|
94
|
+
"cohort_day_21_total_revenue_per_user",
|
|
95
|
+
"cohort_day_21_revenue_per_user",
|
|
96
|
+
"retention_day_7",
|
|
97
|
+
]
|
|
98
|
+
return self._fetch_data(start_date, end_date, metrics=metrics)
|
|
99
|
+
|
|
100
|
+
def fetch_creatives(
|
|
101
|
+
self,
|
|
102
|
+
start_date: str,
|
|
103
|
+
end_date: str,
|
|
104
|
+
):
|
|
105
|
+
dimensions = DEFAULT_GROUPING + ["af_adset_id", "af_adset", "af_ad_id"]
|
|
106
|
+
return self._fetch_data(start_date, end_date, dimensions=dimensions)
|
ingestr/src/factory.py
CHANGED
|
@@ -15,7 +15,9 @@ from ingestr.src.destinations import (
|
|
|
15
15
|
SynapseDestination,
|
|
16
16
|
)
|
|
17
17
|
from ingestr.src.sources import (
|
|
18
|
+
AdjustSource,
|
|
18
19
|
AirtableSource,
|
|
20
|
+
AppsflyerSource,
|
|
19
21
|
ChessSource,
|
|
20
22
|
FacebookAdsSource,
|
|
21
23
|
GoogleSheetsSource,
|
|
@@ -124,8 +126,12 @@ class SourceDestinationFactory:
|
|
|
124
126
|
return AirtableSource()
|
|
125
127
|
elif self.source_scheme == "klaviyo":
|
|
126
128
|
return KlaviyoSource()
|
|
129
|
+
elif self.source_scheme == "appsflyer":
|
|
130
|
+
return AppsflyerSource()
|
|
127
131
|
elif self.source_scheme == "kafka":
|
|
128
132
|
return KafkaSource()
|
|
133
|
+
elif self.source_scheme == "adjust":
|
|
134
|
+
return AdjustSource()
|
|
129
135
|
else:
|
|
130
136
|
raise ValueError(f"Unsupported source scheme: {self.source_scheme}")
|
|
131
137
|
|