ingestr 0.13.2__py3-none-any.whl → 0.14.104__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ingestr/conftest.py +72 -0
- ingestr/main.py +134 -87
- ingestr/src/adjust/__init__.py +4 -4
- ingestr/src/adjust/adjust_helpers.py +7 -3
- ingestr/src/airtable/__init__.py +3 -2
- ingestr/src/allium/__init__.py +128 -0
- ingestr/src/anthropic/__init__.py +277 -0
- ingestr/src/anthropic/helpers.py +525 -0
- ingestr/src/applovin/__init__.py +262 -0
- ingestr/src/applovin_max/__init__.py +117 -0
- ingestr/src/appsflyer/__init__.py +325 -0
- ingestr/src/appsflyer/client.py +49 -45
- ingestr/src/appstore/__init__.py +1 -0
- ingestr/src/arrow/__init__.py +9 -1
- ingestr/src/asana_source/__init__.py +1 -1
- ingestr/src/attio/__init__.py +102 -0
- ingestr/src/attio/helpers.py +65 -0
- ingestr/src/blob.py +38 -11
- ingestr/src/buildinfo.py +1 -0
- ingestr/src/chess/__init__.py +1 -1
- ingestr/src/clickup/__init__.py +85 -0
- ingestr/src/clickup/helpers.py +47 -0
- ingestr/src/collector/spinner.py +43 -0
- ingestr/src/couchbase_source/__init__.py +118 -0
- ingestr/src/couchbase_source/helpers.py +135 -0
- ingestr/src/cursor/__init__.py +83 -0
- ingestr/src/cursor/helpers.py +188 -0
- ingestr/src/destinations.py +520 -33
- ingestr/src/docebo/__init__.py +589 -0
- ingestr/src/docebo/client.py +435 -0
- ingestr/src/docebo/helpers.py +97 -0
- ingestr/src/elasticsearch/__init__.py +80 -0
- ingestr/src/elasticsearch/helpers.py +138 -0
- ingestr/src/errors.py +8 -0
- ingestr/src/facebook_ads/__init__.py +47 -28
- ingestr/src/facebook_ads/helpers.py +59 -37
- ingestr/src/facebook_ads/settings.py +2 -0
- ingestr/src/facebook_ads/utils.py +39 -0
- ingestr/src/factory.py +116 -2
- ingestr/src/filesystem/__init__.py +8 -3
- ingestr/src/filters.py +46 -3
- ingestr/src/fluxx/__init__.py +9906 -0
- ingestr/src/fluxx/helpers.py +209 -0
- ingestr/src/frankfurter/__init__.py +157 -0
- ingestr/src/frankfurter/helpers.py +48 -0
- ingestr/src/freshdesk/__init__.py +89 -0
- ingestr/src/freshdesk/freshdesk_client.py +137 -0
- ingestr/src/freshdesk/settings.py +9 -0
- ingestr/src/fundraiseup/__init__.py +95 -0
- ingestr/src/fundraiseup/client.py +81 -0
- ingestr/src/github/__init__.py +41 -6
- ingestr/src/github/helpers.py +5 -5
- ingestr/src/google_analytics/__init__.py +22 -4
- ingestr/src/google_analytics/helpers.py +124 -6
- ingestr/src/google_sheets/__init__.py +4 -4
- ingestr/src/google_sheets/helpers/data_processing.py +2 -2
- ingestr/src/hostaway/__init__.py +302 -0
- ingestr/src/hostaway/client.py +288 -0
- ingestr/src/http/__init__.py +35 -0
- ingestr/src/http/readers.py +114 -0
- ingestr/src/http_client.py +24 -0
- ingestr/src/hubspot/__init__.py +66 -23
- ingestr/src/hubspot/helpers.py +52 -22
- ingestr/src/hubspot/settings.py +14 -7
- ingestr/src/influxdb/__init__.py +46 -0
- ingestr/src/influxdb/client.py +34 -0
- ingestr/src/intercom/__init__.py +142 -0
- ingestr/src/intercom/helpers.py +674 -0
- ingestr/src/intercom/settings.py +279 -0
- ingestr/src/isoc_pulse/__init__.py +159 -0
- ingestr/src/jira_source/__init__.py +340 -0
- ingestr/src/jira_source/helpers.py +439 -0
- ingestr/src/jira_source/settings.py +170 -0
- ingestr/src/kafka/__init__.py +4 -1
- ingestr/src/kinesis/__init__.py +139 -0
- ingestr/src/kinesis/helpers.py +82 -0
- ingestr/src/klaviyo/{_init_.py → __init__.py} +5 -6
- ingestr/src/linear/__init__.py +634 -0
- ingestr/src/linear/helpers.py +111 -0
- ingestr/src/linkedin_ads/helpers.py +0 -1
- ingestr/src/loader.py +69 -0
- ingestr/src/mailchimp/__init__.py +126 -0
- ingestr/src/mailchimp/helpers.py +226 -0
- ingestr/src/mailchimp/settings.py +164 -0
- ingestr/src/masking.py +344 -0
- ingestr/src/mixpanel/__init__.py +62 -0
- ingestr/src/mixpanel/client.py +99 -0
- ingestr/src/monday/__init__.py +246 -0
- ingestr/src/monday/helpers.py +392 -0
- ingestr/src/monday/settings.py +328 -0
- ingestr/src/mongodb/__init__.py +72 -8
- ingestr/src/mongodb/helpers.py +915 -38
- ingestr/src/partition.py +32 -0
- ingestr/src/personio/__init__.py +331 -0
- ingestr/src/personio/helpers.py +86 -0
- ingestr/src/phantombuster/__init__.py +65 -0
- ingestr/src/phantombuster/client.py +87 -0
- ingestr/src/pinterest/__init__.py +82 -0
- ingestr/src/pipedrive/__init__.py +198 -0
- ingestr/src/pipedrive/helpers/__init__.py +23 -0
- ingestr/src/pipedrive/helpers/custom_fields_munger.py +102 -0
- ingestr/src/pipedrive/helpers/pages.py +115 -0
- ingestr/src/pipedrive/settings.py +27 -0
- ingestr/src/pipedrive/typing.py +3 -0
- ingestr/src/plusvibeai/__init__.py +335 -0
- ingestr/src/plusvibeai/helpers.py +544 -0
- ingestr/src/plusvibeai/settings.py +252 -0
- ingestr/src/quickbooks/__init__.py +117 -0
- ingestr/src/resource.py +40 -0
- ingestr/src/revenuecat/__init__.py +83 -0
- ingestr/src/revenuecat/helpers.py +237 -0
- ingestr/src/salesforce/__init__.py +156 -0
- ingestr/src/salesforce/helpers.py +64 -0
- ingestr/src/shopify/__init__.py +1 -17
- ingestr/src/smartsheets/__init__.py +82 -0
- ingestr/src/snapchat_ads/__init__.py +489 -0
- ingestr/src/snapchat_ads/client.py +72 -0
- ingestr/src/snapchat_ads/helpers.py +535 -0
- ingestr/src/socrata_source/__init__.py +83 -0
- ingestr/src/socrata_source/helpers.py +85 -0
- ingestr/src/socrata_source/settings.py +8 -0
- ingestr/src/solidgate/__init__.py +219 -0
- ingestr/src/solidgate/helpers.py +154 -0
- ingestr/src/sources.py +3132 -212
- ingestr/src/stripe_analytics/__init__.py +49 -21
- ingestr/src/stripe_analytics/helpers.py +286 -1
- ingestr/src/stripe_analytics/settings.py +62 -10
- ingestr/src/telemetry/event.py +10 -9
- ingestr/src/tiktok_ads/__init__.py +12 -6
- ingestr/src/tiktok_ads/tiktok_helpers.py +0 -1
- ingestr/src/trustpilot/__init__.py +48 -0
- ingestr/src/trustpilot/client.py +48 -0
- ingestr/src/version.py +6 -1
- ingestr/src/wise/__init__.py +68 -0
- ingestr/src/wise/client.py +63 -0
- ingestr/src/zoom/__init__.py +99 -0
- ingestr/src/zoom/helpers.py +102 -0
- ingestr/tests/unit/test_smartsheets.py +133 -0
- ingestr-0.14.104.dist-info/METADATA +563 -0
- ingestr-0.14.104.dist-info/RECORD +203 -0
- ingestr/src/appsflyer/_init_.py +0 -24
- ingestr-0.13.2.dist-info/METADATA +0 -302
- ingestr-0.13.2.dist-info/RECORD +0 -107
- {ingestr-0.13.2.dist-info → ingestr-0.14.104.dist-info}/WHEEL +0 -0
- {ingestr-0.13.2.dist-info → ingestr-0.14.104.dist-info}/entry_points.txt +0 -0
- {ingestr-0.13.2.dist-info → ingestr-0.14.104.dist-info}/licenses/LICENSE.md +0 -0
|
@@ -0,0 +1,262 @@
|
|
|
1
|
+
from datetime import datetime, timedelta, timezone
|
|
2
|
+
from enum import Enum
|
|
3
|
+
from typing import Dict, List, Optional
|
|
4
|
+
|
|
5
|
+
import dlt
|
|
6
|
+
from dlt.sources.rest_api import EndpointResource, RESTAPIConfig, rest_api_resources
|
|
7
|
+
from requests import Response
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class InvalidCustomReportError(Exception):
|
|
11
|
+
def __init__(self):
|
|
12
|
+
super().__init__(
|
|
13
|
+
"Custom report should be in the format 'custom:{endpoint}:{report_type}:{dimensions}"
|
|
14
|
+
)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class ClientError(Exception):
|
|
18
|
+
pass
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
TYPE_HINTS = {
|
|
22
|
+
"application_is_hidden": {"data_type": "bool"},
|
|
23
|
+
"average_cpa": {"data_type": "double"},
|
|
24
|
+
"average_cpc": {"data_type": "double"},
|
|
25
|
+
"campaign_bid_goal": {"data_type": "double"},
|
|
26
|
+
"campaign_roas_goal": {"data_type": "double"},
|
|
27
|
+
"clicks": {"data_type": "bigint"},
|
|
28
|
+
"conversions": {"data_type": "bigint"},
|
|
29
|
+
"conversion_rate": {"data_type": "double"},
|
|
30
|
+
"cost": {"data_type": "double"}, # assuming float.
|
|
31
|
+
"ctr": {"data_type": "double"},
|
|
32
|
+
"day": {"data_type": "date"},
|
|
33
|
+
"first_purchase": {"data_type": "bigint"},
|
|
34
|
+
"ecpm": {"data_type": "double"},
|
|
35
|
+
"impressions": {"data_type": "bigint"},
|
|
36
|
+
"installs": {"data_type": "bigint"},
|
|
37
|
+
"revenue": {"data_type": "double"},
|
|
38
|
+
"redownloads": {"data_type": "bigint"},
|
|
39
|
+
"sales": {"data_type": "double"}, # assuming float.
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
class ReportType(Enum):
|
|
44
|
+
PUBLISHER = "publisher"
|
|
45
|
+
ADVERTISER = "advertiser"
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
REPORT_SCHEMA: Dict[ReportType, List[str]] = {
|
|
49
|
+
ReportType.PUBLISHER: [
|
|
50
|
+
"ad_type",
|
|
51
|
+
"application",
|
|
52
|
+
"application_is_hidden",
|
|
53
|
+
"bidding_integration",
|
|
54
|
+
"clicks",
|
|
55
|
+
"country",
|
|
56
|
+
"ctr",
|
|
57
|
+
"day",
|
|
58
|
+
"device_type",
|
|
59
|
+
"ecpm",
|
|
60
|
+
"impressions",
|
|
61
|
+
"package_name",
|
|
62
|
+
"placement_type",
|
|
63
|
+
"platform",
|
|
64
|
+
"revenue",
|
|
65
|
+
"size",
|
|
66
|
+
"store_id",
|
|
67
|
+
"zone",
|
|
68
|
+
"zone_id",
|
|
69
|
+
],
|
|
70
|
+
ReportType.ADVERTISER: [
|
|
71
|
+
"ad",
|
|
72
|
+
"ad_creative_type",
|
|
73
|
+
"ad_id",
|
|
74
|
+
"ad_type",
|
|
75
|
+
"average_cpa",
|
|
76
|
+
"average_cpc",
|
|
77
|
+
"campaign",
|
|
78
|
+
"campaign_ad_type",
|
|
79
|
+
"campaign_bid_goal",
|
|
80
|
+
"campaign_id_external",
|
|
81
|
+
"campaign_package_name",
|
|
82
|
+
"campaign_roas_goal",
|
|
83
|
+
"campaign_store_id",
|
|
84
|
+
"campaign_type",
|
|
85
|
+
"clicks",
|
|
86
|
+
"conversions",
|
|
87
|
+
"conversion_rate",
|
|
88
|
+
"cost",
|
|
89
|
+
"country",
|
|
90
|
+
"creative_set",
|
|
91
|
+
"creative_set_id",
|
|
92
|
+
"ctr",
|
|
93
|
+
"custom_page_id",
|
|
94
|
+
"day",
|
|
95
|
+
"device_type",
|
|
96
|
+
"external_placement_id",
|
|
97
|
+
"first_purchase",
|
|
98
|
+
"impressions",
|
|
99
|
+
"installs",
|
|
100
|
+
"optimization_day_target",
|
|
101
|
+
"placement_type",
|
|
102
|
+
"platform",
|
|
103
|
+
"redownloads",
|
|
104
|
+
"sales",
|
|
105
|
+
"size",
|
|
106
|
+
"target_event",
|
|
107
|
+
"traffic_source",
|
|
108
|
+
],
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
PROBABILISTIC_REPORT_EXCLUDE = [
|
|
112
|
+
"installs",
|
|
113
|
+
"redownloads",
|
|
114
|
+
]
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
@dlt.source
|
|
118
|
+
def applovin_source(
|
|
119
|
+
api_key: str,
|
|
120
|
+
start_date: str,
|
|
121
|
+
end_date: Optional[str],
|
|
122
|
+
custom: Optional[str],
|
|
123
|
+
):
|
|
124
|
+
backfill = False
|
|
125
|
+
if end_date is None:
|
|
126
|
+
backfill = True
|
|
127
|
+
|
|
128
|
+
# use the greatest of yesterday and start_date
|
|
129
|
+
end_date = max(
|
|
130
|
+
datetime.now(timezone.utc) - timedelta(days=1),
|
|
131
|
+
datetime.fromisoformat(start_date).replace(tzinfo=timezone.utc),
|
|
132
|
+
).strftime("%Y-%m-%d")
|
|
133
|
+
|
|
134
|
+
config: RESTAPIConfig = {
|
|
135
|
+
"client": {
|
|
136
|
+
"base_url": "https://r.applovin.com/",
|
|
137
|
+
"auth": {
|
|
138
|
+
"type": "api_key",
|
|
139
|
+
"name": "api_key",
|
|
140
|
+
"location": "query",
|
|
141
|
+
"api_key": api_key,
|
|
142
|
+
},
|
|
143
|
+
},
|
|
144
|
+
"resource_defaults": {
|
|
145
|
+
"write_disposition": "merge",
|
|
146
|
+
"endpoint": {
|
|
147
|
+
"incremental": {
|
|
148
|
+
"cursor_path": "day",
|
|
149
|
+
"start_param": "start",
|
|
150
|
+
"initial_value": start_date,
|
|
151
|
+
"range_start": "closed",
|
|
152
|
+
"range_end": "closed",
|
|
153
|
+
},
|
|
154
|
+
"params": {
|
|
155
|
+
"format": "json",
|
|
156
|
+
"end": end_date,
|
|
157
|
+
},
|
|
158
|
+
"paginator": "single_page",
|
|
159
|
+
"response_actions": [
|
|
160
|
+
http_error_handler,
|
|
161
|
+
],
|
|
162
|
+
},
|
|
163
|
+
},
|
|
164
|
+
"resources": [
|
|
165
|
+
resource(
|
|
166
|
+
"publisher-report",
|
|
167
|
+
"report",
|
|
168
|
+
REPORT_SCHEMA[ReportType.PUBLISHER],
|
|
169
|
+
ReportType.PUBLISHER,
|
|
170
|
+
),
|
|
171
|
+
resource(
|
|
172
|
+
"advertiser-report",
|
|
173
|
+
"report",
|
|
174
|
+
REPORT_SCHEMA[ReportType.ADVERTISER],
|
|
175
|
+
ReportType.ADVERTISER,
|
|
176
|
+
),
|
|
177
|
+
resource(
|
|
178
|
+
"advertiser-probabilistic-report",
|
|
179
|
+
"probabilisticReport",
|
|
180
|
+
exclude(
|
|
181
|
+
REPORT_SCHEMA[ReportType.ADVERTISER], PROBABILISTIC_REPORT_EXCLUDE
|
|
182
|
+
),
|
|
183
|
+
ReportType.ADVERTISER,
|
|
184
|
+
),
|
|
185
|
+
resource(
|
|
186
|
+
"advertiser-ska-report",
|
|
187
|
+
"skaReport",
|
|
188
|
+
REPORT_SCHEMA[ReportType.ADVERTISER],
|
|
189
|
+
ReportType.ADVERTISER,
|
|
190
|
+
),
|
|
191
|
+
],
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
if custom:
|
|
195
|
+
custom_report = custom_report_from_spec(custom)
|
|
196
|
+
config["resources"].append(custom_report)
|
|
197
|
+
|
|
198
|
+
if backfill:
|
|
199
|
+
config["resource_defaults"]["endpoint"]["incremental"]["end_value"] = end_date # type: ignore
|
|
200
|
+
|
|
201
|
+
yield from rest_api_resources(config)
|
|
202
|
+
|
|
203
|
+
|
|
204
|
+
def resource(
|
|
205
|
+
name: str,
|
|
206
|
+
endpoint: str,
|
|
207
|
+
dimensions: List[str],
|
|
208
|
+
report_type: ReportType,
|
|
209
|
+
) -> EndpointResource:
|
|
210
|
+
return {
|
|
211
|
+
"name": name,
|
|
212
|
+
"columns": build_type_hints(dimensions),
|
|
213
|
+
"merge_key": "day",
|
|
214
|
+
"endpoint": {
|
|
215
|
+
"path": endpoint,
|
|
216
|
+
"params": {
|
|
217
|
+
"report_type": report_type.value,
|
|
218
|
+
"columns": ",".join(dimensions),
|
|
219
|
+
},
|
|
220
|
+
},
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
|
|
224
|
+
def custom_report_from_spec(spec: str) -> EndpointResource:
|
|
225
|
+
parts = spec.split(":")
|
|
226
|
+
if len(parts) != 4:
|
|
227
|
+
raise InvalidCustomReportError()
|
|
228
|
+
|
|
229
|
+
_, endpoint, report, dims = parts
|
|
230
|
+
report_type = ReportType(report.strip())
|
|
231
|
+
dimensions = validate_dimensions(dims)
|
|
232
|
+
endpoint = endpoint.strip()
|
|
233
|
+
|
|
234
|
+
return resource(
|
|
235
|
+
name="custom_report",
|
|
236
|
+
endpoint=endpoint,
|
|
237
|
+
dimensions=dimensions,
|
|
238
|
+
report_type=report_type,
|
|
239
|
+
)
|
|
240
|
+
|
|
241
|
+
|
|
242
|
+
def validate_dimensions(dimensions: str) -> List[str]:
|
|
243
|
+
dims = [dim.strip() for dim in dimensions.split(",")]
|
|
244
|
+
|
|
245
|
+
if "day" not in dims:
|
|
246
|
+
dims.append("day")
|
|
247
|
+
|
|
248
|
+
return dims
|
|
249
|
+
|
|
250
|
+
|
|
251
|
+
def exclude(source: List[str], exclude_list: List[str]) -> List[str]:
|
|
252
|
+
excludes = set(exclude_list)
|
|
253
|
+
return [col for col in source if col not in excludes]
|
|
254
|
+
|
|
255
|
+
|
|
256
|
+
def build_type_hints(cols: List[str]) -> dict:
|
|
257
|
+
return {col: TYPE_HINTS[col] for col in cols if col in TYPE_HINTS}
|
|
258
|
+
|
|
259
|
+
|
|
260
|
+
def http_error_handler(resp: Response):
|
|
261
|
+
if not resp.ok:
|
|
262
|
+
raise ClientError(f"HTTP Status {resp.status_code}: {resp.text}")
|
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
from datetime import timedelta
|
|
2
|
+
from typing import Iterator
|
|
3
|
+
|
|
4
|
+
import dlt
|
|
5
|
+
import pandas as pd # type: ignore[import-untyped]
|
|
6
|
+
import pendulum
|
|
7
|
+
import requests
|
|
8
|
+
from dlt.sources import DltResource
|
|
9
|
+
from dlt.sources.helpers.requests import Client
|
|
10
|
+
from pendulum.date import Date
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
@dlt.source(max_table_nesting=0)
|
|
14
|
+
def applovin_max_source(
|
|
15
|
+
start_date: Date,
|
|
16
|
+
applications: list[str],
|
|
17
|
+
api_key: str,
|
|
18
|
+
end_date: Date | None,
|
|
19
|
+
) -> DltResource:
|
|
20
|
+
@dlt.resource(
|
|
21
|
+
name="user_ad_revenue",
|
|
22
|
+
write_disposition="merge",
|
|
23
|
+
merge_key="partition_date",
|
|
24
|
+
columns={
|
|
25
|
+
"partition_date": {"data_type": "date", "partition": True},
|
|
26
|
+
},
|
|
27
|
+
)
|
|
28
|
+
def fetch_ad_revenue_report(
|
|
29
|
+
dateTime=(
|
|
30
|
+
dlt.sources.incremental(
|
|
31
|
+
"partition_date",
|
|
32
|
+
initial_value=start_date,
|
|
33
|
+
end_value=end_date,
|
|
34
|
+
range_start="closed",
|
|
35
|
+
range_end="closed",
|
|
36
|
+
)
|
|
37
|
+
),
|
|
38
|
+
) -> Iterator[dict]:
|
|
39
|
+
url = "https://r.applovin.com/max/userAdRevenueReport"
|
|
40
|
+
start_date = dateTime.last_value
|
|
41
|
+
|
|
42
|
+
if dateTime.end_value is None:
|
|
43
|
+
end_date = (pendulum.yesterday("UTC")).date()
|
|
44
|
+
else:
|
|
45
|
+
end_date = dateTime.end_value
|
|
46
|
+
|
|
47
|
+
client = create_client()
|
|
48
|
+
platforms = ["ios", "android", "fireos"]
|
|
49
|
+
|
|
50
|
+
for app in applications:
|
|
51
|
+
current_date = start_date
|
|
52
|
+
while current_date <= end_date:
|
|
53
|
+
for platform in platforms:
|
|
54
|
+
df = get_data(
|
|
55
|
+
url=url,
|
|
56
|
+
current_date=current_date,
|
|
57
|
+
application=app,
|
|
58
|
+
api_key=api_key,
|
|
59
|
+
client=client,
|
|
60
|
+
platform=platform,
|
|
61
|
+
)
|
|
62
|
+
if df is not None:
|
|
63
|
+
yield df
|
|
64
|
+
current_date = current_date + timedelta(days=1)
|
|
65
|
+
|
|
66
|
+
return fetch_ad_revenue_report
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def create_client() -> requests.Session:
|
|
70
|
+
return Client(
|
|
71
|
+
raise_for_status=False,
|
|
72
|
+
retry_condition=retry_on_limit,
|
|
73
|
+
request_max_attempts=12,
|
|
74
|
+
).session
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def retry_on_limit(
|
|
78
|
+
response: requests.Response | None, exception: BaseException | None
|
|
79
|
+
) -> bool:
|
|
80
|
+
if response is None:
|
|
81
|
+
return False
|
|
82
|
+
return response.status_code == 429
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def get_data(
|
|
86
|
+
url: str,
|
|
87
|
+
current_date: Date,
|
|
88
|
+
application: str,
|
|
89
|
+
api_key: str,
|
|
90
|
+
platform: str,
|
|
91
|
+
client: requests.Session,
|
|
92
|
+
):
|
|
93
|
+
params = {
|
|
94
|
+
"api_key": api_key,
|
|
95
|
+
"date": current_date.isoformat(),
|
|
96
|
+
"platform": platform,
|
|
97
|
+
"application": application,
|
|
98
|
+
"aggregated": "false",
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
response = client.get(url=url, params=params)
|
|
102
|
+
|
|
103
|
+
if response.status_code != 200:
|
|
104
|
+
if response.status_code == 404:
|
|
105
|
+
if "No Mediation App Id found for platform" in response.text:
|
|
106
|
+
return None
|
|
107
|
+
error_message = (
|
|
108
|
+
f"AppLovin MAX API error (status {response.status_code}): {response.text}"
|
|
109
|
+
)
|
|
110
|
+
raise requests.HTTPError(error_message)
|
|
111
|
+
|
|
112
|
+
response_url = response.json().get("ad_revenue_report_url")
|
|
113
|
+
df = pd.read_csv(response_url)
|
|
114
|
+
df["Date"] = pd.to_datetime(df["Date"])
|
|
115
|
+
df["partition_date"] = df["Date"].dt.date
|
|
116
|
+
df["platform"] = platform
|
|
117
|
+
return df
|
|
@@ -0,0 +1,325 @@
|
|
|
1
|
+
from typing import Iterable
|
|
2
|
+
|
|
3
|
+
import dlt
|
|
4
|
+
import pendulum
|
|
5
|
+
from dlt.common.typing import TDataItem
|
|
6
|
+
from dlt.sources import DltResource
|
|
7
|
+
|
|
8
|
+
from ingestr.src.appsflyer.client import AppsflyerClient
|
|
9
|
+
|
|
10
|
+
DIMENSION_RESPONSE_MAPPING = {
|
|
11
|
+
"c": "campaign",
|
|
12
|
+
"af_adset_id": "adset_id",
|
|
13
|
+
"af_adset": "adset",
|
|
14
|
+
"af_ad_id": "ad_id",
|
|
15
|
+
}
|
|
16
|
+
HINTS = {
|
|
17
|
+
"app_id": {
|
|
18
|
+
"data_type": "text",
|
|
19
|
+
"nullable": False,
|
|
20
|
+
},
|
|
21
|
+
"campaign": {
|
|
22
|
+
"data_type": "text",
|
|
23
|
+
"nullable": False,
|
|
24
|
+
},
|
|
25
|
+
"geo": {
|
|
26
|
+
"data_type": "text",
|
|
27
|
+
"nullable": False,
|
|
28
|
+
},
|
|
29
|
+
"cost": {
|
|
30
|
+
"data_type": "decimal",
|
|
31
|
+
"precision": 30,
|
|
32
|
+
"scale": 5,
|
|
33
|
+
"nullable": False,
|
|
34
|
+
},
|
|
35
|
+
"clicks": {
|
|
36
|
+
"data_type": "bigint",
|
|
37
|
+
"nullable": False,
|
|
38
|
+
},
|
|
39
|
+
"impressions": {
|
|
40
|
+
"data_type": "bigint",
|
|
41
|
+
"nullable": False,
|
|
42
|
+
},
|
|
43
|
+
"average_ecpi": {
|
|
44
|
+
"data_type": "decimal",
|
|
45
|
+
"precision": 30,
|
|
46
|
+
"scale": 5,
|
|
47
|
+
"nullable": False,
|
|
48
|
+
},
|
|
49
|
+
"installs": {
|
|
50
|
+
"data_type": "bigint",
|
|
51
|
+
"nullable": False,
|
|
52
|
+
},
|
|
53
|
+
"retention_day_7": {
|
|
54
|
+
"data_type": "decimal",
|
|
55
|
+
"precision": 30,
|
|
56
|
+
"scale": 5,
|
|
57
|
+
"nullable": False,
|
|
58
|
+
},
|
|
59
|
+
"retention_day_14": {
|
|
60
|
+
"data_type": "decimal",
|
|
61
|
+
"precision": 30,
|
|
62
|
+
"scale": 5,
|
|
63
|
+
"nullable": False,
|
|
64
|
+
},
|
|
65
|
+
"cohort_day_1_revenue_per_user": {
|
|
66
|
+
"data_type": "decimal",
|
|
67
|
+
"precision": 30,
|
|
68
|
+
"scale": 5,
|
|
69
|
+
"nullable": True,
|
|
70
|
+
},
|
|
71
|
+
"cohort_day_1_total_revenue_per_user": {
|
|
72
|
+
"data_type": "decimal",
|
|
73
|
+
"precision": 30,
|
|
74
|
+
"scale": 5,
|
|
75
|
+
"nullable": True,
|
|
76
|
+
},
|
|
77
|
+
"cohort_day_3_revenue_per_user": {
|
|
78
|
+
"data_type": "decimal",
|
|
79
|
+
"precision": 30,
|
|
80
|
+
"scale": 5,
|
|
81
|
+
"nullable": True,
|
|
82
|
+
},
|
|
83
|
+
"cohort_day_3_total_revenue_per_user": {
|
|
84
|
+
"data_type": "decimal",
|
|
85
|
+
"precision": 30,
|
|
86
|
+
"scale": 5,
|
|
87
|
+
"nullable": True,
|
|
88
|
+
},
|
|
89
|
+
"cohort_day_7_revenue_per_user": {
|
|
90
|
+
"data_type": "decimal",
|
|
91
|
+
"precision": 30,
|
|
92
|
+
"scale": 5,
|
|
93
|
+
"nullable": True,
|
|
94
|
+
},
|
|
95
|
+
"cohort_day_7_total_revenue_per_user": {
|
|
96
|
+
"data_type": "decimal",
|
|
97
|
+
"precision": 30,
|
|
98
|
+
"scale": 5,
|
|
99
|
+
"nullable": True,
|
|
100
|
+
},
|
|
101
|
+
"cohort_day_14_revenue_per_user": {
|
|
102
|
+
"data_type": "decimal",
|
|
103
|
+
"precision": 30,
|
|
104
|
+
"scale": 5,
|
|
105
|
+
"nullable": True,
|
|
106
|
+
},
|
|
107
|
+
"cohort_day_14_total_revenue_per_user": {
|
|
108
|
+
"data_type": "decimal",
|
|
109
|
+
"precision": 30,
|
|
110
|
+
"scale": 5,
|
|
111
|
+
"nullable": True,
|
|
112
|
+
},
|
|
113
|
+
"cohort_day_21_revenue_per_user": {
|
|
114
|
+
"data_type": "decimal",
|
|
115
|
+
"precision": 30,
|
|
116
|
+
"scale": 5,
|
|
117
|
+
"nullable": True,
|
|
118
|
+
},
|
|
119
|
+
"cohort_day_21_total_revenue_per_user": {
|
|
120
|
+
"data_type": "decimal",
|
|
121
|
+
"precision": 30,
|
|
122
|
+
"scale": 5,
|
|
123
|
+
"nullable": True,
|
|
124
|
+
},
|
|
125
|
+
"install_time": {
|
|
126
|
+
"data_type": "date",
|
|
127
|
+
"nullable": False,
|
|
128
|
+
},
|
|
129
|
+
"loyal_users": {
|
|
130
|
+
"data_type": "bigint",
|
|
131
|
+
"nullable": False,
|
|
132
|
+
},
|
|
133
|
+
"revenue": {
|
|
134
|
+
"data_type": "decimal",
|
|
135
|
+
"precision": 30,
|
|
136
|
+
"scale": 5,
|
|
137
|
+
"nullable": True,
|
|
138
|
+
},
|
|
139
|
+
"roi": {
|
|
140
|
+
"data_type": "decimal",
|
|
141
|
+
"precision": 30,
|
|
142
|
+
"scale": 5,
|
|
143
|
+
"nullable": True,
|
|
144
|
+
},
|
|
145
|
+
"uninstalls": {
|
|
146
|
+
"data_type": "bigint",
|
|
147
|
+
"nullable": True,
|
|
148
|
+
},
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
CAMPAIGNS_DIMENSIONS = ["c", "geo", "app_id", "install_time"]
|
|
152
|
+
CAMPAIGNS_METRICS = [
|
|
153
|
+
"average_ecpi",
|
|
154
|
+
"clicks",
|
|
155
|
+
"cohort_day_1_revenue_per_user",
|
|
156
|
+
"cohort_day_1_total_revenue_per_user",
|
|
157
|
+
"cohort_day_14_revenue_per_user",
|
|
158
|
+
"cohort_day_14_total_revenue_per_user",
|
|
159
|
+
"cohort_day_21_revenue_per_user",
|
|
160
|
+
"cohort_day_21_total_revenue_per_user",
|
|
161
|
+
"cohort_day_3_revenue_per_user",
|
|
162
|
+
"cohort_day_3_total_revenue_per_user",
|
|
163
|
+
"cohort_day_7_revenue_per_user",
|
|
164
|
+
"cohort_day_7_total_revenue_per_user",
|
|
165
|
+
"cost",
|
|
166
|
+
"impressions",
|
|
167
|
+
"installs",
|
|
168
|
+
"loyal_users",
|
|
169
|
+
"retention_day_7",
|
|
170
|
+
"revenue",
|
|
171
|
+
"roi",
|
|
172
|
+
"uninstalls",
|
|
173
|
+
]
|
|
174
|
+
|
|
175
|
+
CREATIVES_DIMENSIONS = [
|
|
176
|
+
"c",
|
|
177
|
+
"geo",
|
|
178
|
+
"app_id",
|
|
179
|
+
"install_time",
|
|
180
|
+
"af_adset_id",
|
|
181
|
+
"af_adset",
|
|
182
|
+
"af_ad_id",
|
|
183
|
+
]
|
|
184
|
+
CREATIVES_METRICS = [
|
|
185
|
+
"impressions",
|
|
186
|
+
"clicks",
|
|
187
|
+
"installs",
|
|
188
|
+
"cost",
|
|
189
|
+
"revenue",
|
|
190
|
+
"average_ecpi",
|
|
191
|
+
"loyal_users",
|
|
192
|
+
"uninstalls",
|
|
193
|
+
"roi",
|
|
194
|
+
]
|
|
195
|
+
|
|
196
|
+
|
|
197
|
+
@dlt.source(max_table_nesting=0)
|
|
198
|
+
def appsflyer_source(
|
|
199
|
+
api_key: str,
|
|
200
|
+
start_date: str,
|
|
201
|
+
end_date: str,
|
|
202
|
+
dimensions: list[str],
|
|
203
|
+
metrics: list[str],
|
|
204
|
+
) -> Iterable[DltResource]:
|
|
205
|
+
client = AppsflyerClient(api_key)
|
|
206
|
+
|
|
207
|
+
@dlt.resource(
|
|
208
|
+
write_disposition="merge",
|
|
209
|
+
merge_key="install_time",
|
|
210
|
+
columns=make_hints(CAMPAIGNS_DIMENSIONS, CAMPAIGNS_METRICS),
|
|
211
|
+
)
|
|
212
|
+
def campaigns(
|
|
213
|
+
datetime=dlt.sources.incremental(
|
|
214
|
+
"install_time",
|
|
215
|
+
initial_value=(
|
|
216
|
+
start_date
|
|
217
|
+
if start_date
|
|
218
|
+
else pendulum.today().subtract(days=30).format("YYYY-MM-DD")
|
|
219
|
+
),
|
|
220
|
+
end_value=end_date,
|
|
221
|
+
range_end="closed",
|
|
222
|
+
range_start="closed",
|
|
223
|
+
),
|
|
224
|
+
) -> Iterable[TDataItem]:
|
|
225
|
+
end = (
|
|
226
|
+
datetime.end_value
|
|
227
|
+
if datetime.end_value
|
|
228
|
+
else pendulum.now().format("YYYY-MM-DD")
|
|
229
|
+
)
|
|
230
|
+
|
|
231
|
+
yield from client._fetch_data(
|
|
232
|
+
from_date=datetime.last_value,
|
|
233
|
+
to_date=end,
|
|
234
|
+
dimensions=CAMPAIGNS_DIMENSIONS,
|
|
235
|
+
metrics=CAMPAIGNS_METRICS,
|
|
236
|
+
)
|
|
237
|
+
|
|
238
|
+
@dlt.resource(
|
|
239
|
+
write_disposition="merge",
|
|
240
|
+
merge_key="install_time",
|
|
241
|
+
columns=make_hints(CREATIVES_DIMENSIONS, CREATIVES_METRICS),
|
|
242
|
+
)
|
|
243
|
+
def creatives(
|
|
244
|
+
datetime=dlt.sources.incremental(
|
|
245
|
+
"install_time",
|
|
246
|
+
initial_value=(
|
|
247
|
+
start_date
|
|
248
|
+
if start_date
|
|
249
|
+
else pendulum.today().subtract(days=30).format("YYYY-MM-DD")
|
|
250
|
+
),
|
|
251
|
+
end_value=end_date,
|
|
252
|
+
range_end="closed",
|
|
253
|
+
range_start="closed",
|
|
254
|
+
),
|
|
255
|
+
) -> Iterable[TDataItem]:
|
|
256
|
+
end = (
|
|
257
|
+
datetime.end_value
|
|
258
|
+
if datetime.end_value
|
|
259
|
+
else pendulum.now().format("YYYY-MM-DD")
|
|
260
|
+
)
|
|
261
|
+
yield from client._fetch_data(
|
|
262
|
+
datetime.last_value,
|
|
263
|
+
end,
|
|
264
|
+
dimensions=CREATIVES_DIMENSIONS,
|
|
265
|
+
metrics=CREATIVES_METRICS,
|
|
266
|
+
)
|
|
267
|
+
|
|
268
|
+
primary_keys = []
|
|
269
|
+
if "install_time" not in dimensions:
|
|
270
|
+
dimensions.append("install_time")
|
|
271
|
+
primary_keys.append("install_time")
|
|
272
|
+
|
|
273
|
+
for dimension in dimensions:
|
|
274
|
+
if dimension in DIMENSION_RESPONSE_MAPPING:
|
|
275
|
+
primary_keys.append(DIMENSION_RESPONSE_MAPPING[dimension])
|
|
276
|
+
else:
|
|
277
|
+
primary_keys.append(dimension)
|
|
278
|
+
|
|
279
|
+
@dlt.resource(
|
|
280
|
+
write_disposition="merge",
|
|
281
|
+
primary_key=primary_keys,
|
|
282
|
+
columns=make_hints(dimensions, metrics),
|
|
283
|
+
)
|
|
284
|
+
def custom(
|
|
285
|
+
datetime=dlt.sources.incremental(
|
|
286
|
+
"install_time",
|
|
287
|
+
initial_value=(
|
|
288
|
+
start_date
|
|
289
|
+
if start_date
|
|
290
|
+
else pendulum.today().subtract(days=30).format("YYYY-MM-DD")
|
|
291
|
+
),
|
|
292
|
+
end_value=end_date,
|
|
293
|
+
),
|
|
294
|
+
):
|
|
295
|
+
end = (
|
|
296
|
+
datetime.end_value
|
|
297
|
+
if datetime.end_value
|
|
298
|
+
else pendulum.now().format("YYYY-MM-DD")
|
|
299
|
+
)
|
|
300
|
+
res = client._fetch_data(
|
|
301
|
+
from_date=datetime.last_value,
|
|
302
|
+
to_date=end,
|
|
303
|
+
dimensions=dimensions,
|
|
304
|
+
metrics=metrics,
|
|
305
|
+
)
|
|
306
|
+
yield from res
|
|
307
|
+
|
|
308
|
+
return campaigns, creatives, custom
|
|
309
|
+
|
|
310
|
+
|
|
311
|
+
def make_hints(dimensions: list[str], metrics: list[str]):
|
|
312
|
+
campaign_hints = {}
|
|
313
|
+
for dimension in dimensions:
|
|
314
|
+
resp_key = dimension
|
|
315
|
+
if dimension in DIMENSION_RESPONSE_MAPPING:
|
|
316
|
+
resp_key = DIMENSION_RESPONSE_MAPPING[dimension]
|
|
317
|
+
|
|
318
|
+
if resp_key in HINTS:
|
|
319
|
+
campaign_hints[resp_key] = HINTS[resp_key]
|
|
320
|
+
|
|
321
|
+
for metric in metrics:
|
|
322
|
+
if metric in HINTS:
|
|
323
|
+
campaign_hints[metric] = HINTS[metric]
|
|
324
|
+
|
|
325
|
+
return campaign_hints
|