ingestr 0.13.4__py3-none-any.whl → 0.13.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ingestr might be problematic. Click here for more details.
- ingestr/src/applovin/__init__.py +22 -43
- ingestr/src/applovin_max/__init__.py +99 -0
- ingestr/src/destinations.py +5 -0
- ingestr/src/factory.py +3 -1
- ingestr/src/sources.py +48 -2
- {ingestr-0.13.4.dist-info → ingestr-0.13.6.dist-info}/METADATA +1 -1
- {ingestr-0.13.4.dist-info → ingestr-0.13.6.dist-info}/RECORD +10 -9
- {ingestr-0.13.4.dist-info → ingestr-0.13.6.dist-info}/WHEEL +0 -0
- {ingestr-0.13.4.dist-info → ingestr-0.13.6.dist-info}/entry_points.txt +0 -0
- {ingestr-0.13.4.dist-info → ingestr-0.13.6.dist-info}/licenses/LICENSE.md +0 -0
ingestr/src/applovin/__init__.py
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
|
-
from datetime import datetime, timezone
|
|
1
|
+
from datetime import datetime, timezone, timedelta
|
|
2
2
|
from enum import Enum
|
|
3
3
|
from typing import Dict, List, Optional
|
|
4
|
+
from requests import Response
|
|
4
5
|
|
|
5
6
|
import dlt
|
|
6
7
|
from dlt.sources.rest_api import EndpointResource, RESTAPIConfig, rest_api_resources
|
|
@@ -12,11 +13,8 @@ class InvalidCustomReportError(Exception):
|
|
|
12
13
|
"Custom report should be in the format 'custom:{endpoint}:{report_type}:{dimensions}"
|
|
13
14
|
)
|
|
14
15
|
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
def __init__(self, dim: str, report_type: str):
|
|
18
|
-
super().__init__(f"Unknown dimension {dim} for report type {report_type}")
|
|
19
|
-
|
|
16
|
+
class ClientError(Exception):
|
|
17
|
+
pass
|
|
20
18
|
|
|
21
19
|
TYPE_HINTS = {
|
|
22
20
|
"application_is_hidden": {"data_type": "bool"},
|
|
@@ -108,32 +106,6 @@ REPORT_SCHEMA: Dict[ReportType, List[str]] = {
|
|
|
108
106
|
],
|
|
109
107
|
}
|
|
110
108
|
|
|
111
|
-
# NOTE(turtledev): These values are valid columns,
|
|
112
|
-
# but often don't produce a value. Find a way to either add
|
|
113
|
-
# a default value, or use an alternative strategy to de-duplicate
|
|
114
|
-
# OR make them nullable
|
|
115
|
-
SKA_REPORT_EXCLUDE = [
|
|
116
|
-
"ad",
|
|
117
|
-
"ad_id",
|
|
118
|
-
"ad_type",
|
|
119
|
-
"average_cpc",
|
|
120
|
-
"campaign_ad_type",
|
|
121
|
-
"clicks",
|
|
122
|
-
"conversions",
|
|
123
|
-
"conversion_rate",
|
|
124
|
-
"creative_set",
|
|
125
|
-
"creative_set_id",
|
|
126
|
-
"ctr",
|
|
127
|
-
"custom_page_id",
|
|
128
|
-
"device_type",
|
|
129
|
-
"first_purchase",
|
|
130
|
-
"impressions",
|
|
131
|
-
"placement_type",
|
|
132
|
-
"sales",
|
|
133
|
-
"size",
|
|
134
|
-
"traffic_source",
|
|
135
|
-
]
|
|
136
|
-
|
|
137
109
|
PROBABILISTIC_REPORT_EXCLUDE = [
|
|
138
110
|
"installs",
|
|
139
111
|
"redownloads",
|
|
@@ -147,19 +119,16 @@ def applovin_source(
|
|
|
147
119
|
end_date: Optional[str],
|
|
148
120
|
custom: Optional[str],
|
|
149
121
|
):
|
|
150
|
-
ska_report_columns = exclude(
|
|
151
|
-
REPORT_SCHEMA[ReportType.ADVERTISER],
|
|
152
|
-
SKA_REPORT_EXCLUDE,
|
|
153
|
-
)
|
|
154
122
|
|
|
155
|
-
probabilistic_report_columns = exclude(
|
|
156
|
-
REPORT_SCHEMA[ReportType.ADVERTISER],
|
|
157
|
-
PROBABILISTIC_REPORT_EXCLUDE,
|
|
158
|
-
)
|
|
159
123
|
backfill = False
|
|
160
124
|
if end_date is None:
|
|
161
125
|
backfill = True
|
|
162
|
-
|
|
126
|
+
|
|
127
|
+
# use the greatest of yesterday and start_date
|
|
128
|
+
end_date = max(
|
|
129
|
+
datetime.now(timezone.utc) - timedelta(days=1),
|
|
130
|
+
datetime.fromisoformat(start_date).replace(tzinfo=timezone.utc)
|
|
131
|
+
).strftime("%Y-%m-%d")
|
|
163
132
|
|
|
164
133
|
config: RESTAPIConfig = {
|
|
165
134
|
"client": {
|
|
@@ -186,6 +155,9 @@ def applovin_source(
|
|
|
186
155
|
"end": end_date,
|
|
187
156
|
},
|
|
188
157
|
"paginator": "single_page",
|
|
158
|
+
"response_actions": [
|
|
159
|
+
http_error_handler,
|
|
160
|
+
]
|
|
189
161
|
},
|
|
190
162
|
},
|
|
191
163
|
"resources": [
|
|
@@ -204,13 +176,16 @@ def applovin_source(
|
|
|
204
176
|
resource(
|
|
205
177
|
"advertiser-probabilistic-report",
|
|
206
178
|
"probabilisticReport",
|
|
207
|
-
|
|
179
|
+
exclude(
|
|
180
|
+
REPORT_SCHEMA[ReportType.ADVERTISER],
|
|
181
|
+
PROBABILISTIC_REPORT_EXCLUDE
|
|
182
|
+
),
|
|
208
183
|
ReportType.ADVERTISER,
|
|
209
184
|
),
|
|
210
185
|
resource(
|
|
211
186
|
"advertiser-ska-report",
|
|
212
187
|
"skaReport",
|
|
213
|
-
|
|
188
|
+
REPORT_SCHEMA[ReportType.ADVERTISER],
|
|
214
189
|
ReportType.ADVERTISER,
|
|
215
190
|
),
|
|
216
191
|
],
|
|
@@ -280,3 +255,7 @@ def exclude(source: List[str], exclude_list: List[str]) -> List[str]:
|
|
|
280
255
|
|
|
281
256
|
def build_type_hints(cols: List[str]) -> dict:
|
|
282
257
|
return {col: TYPE_HINTS[col] for col in cols if col in TYPE_HINTS}
|
|
258
|
+
|
|
259
|
+
def http_error_handler(resp: Response):
|
|
260
|
+
if not resp.ok:
|
|
261
|
+
raise ClientError(f"HTTP Status {resp.status_code}: {resp.text}")
|
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
from typing import Iterator
|
|
2
|
+
|
|
3
|
+
import dlt
|
|
4
|
+
import pandas as pd # type: ignore[import-untyped]
|
|
5
|
+
import pendulum
|
|
6
|
+
import requests
|
|
7
|
+
from dlt.sources import DltResource
|
|
8
|
+
from dlt.sources.helpers.requests import Client
|
|
9
|
+
from pendulum.date import Date
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
@dlt.source(max_table_nesting=0)
|
|
13
|
+
def applovin_max_source(
|
|
14
|
+
start_date: str,
|
|
15
|
+
application: str,
|
|
16
|
+
api_key: str,
|
|
17
|
+
end_date: str | None,
|
|
18
|
+
) -> DltResource:
|
|
19
|
+
@dlt.resource(
|
|
20
|
+
name="ad_revenue",
|
|
21
|
+
write_disposition="merge",
|
|
22
|
+
merge_key="_partition_date",
|
|
23
|
+
)
|
|
24
|
+
def fetch_ad_revenue_report(
|
|
25
|
+
dateTime=(
|
|
26
|
+
dlt.sources.incremental(
|
|
27
|
+
"_partition_date",
|
|
28
|
+
initial_value=start_date,
|
|
29
|
+
end_value=end_date,
|
|
30
|
+
range_start="closed",
|
|
31
|
+
range_end="closed",
|
|
32
|
+
)
|
|
33
|
+
),
|
|
34
|
+
) -> Iterator[dict]:
|
|
35
|
+
url = "https://r.applovin.com/max/userAdRevenueReport"
|
|
36
|
+
start_date = pendulum.from_format(dateTime.last_value, "YYYY-MM-DD").date()
|
|
37
|
+
if dateTime.end_value is None:
|
|
38
|
+
end_date = (pendulum.yesterday("UTC")).date()
|
|
39
|
+
else:
|
|
40
|
+
end_date = pendulum.from_format(dateTime.end_value, "YYYY-MM-DD").date()
|
|
41
|
+
yield get_data(
|
|
42
|
+
url=url,
|
|
43
|
+
start_date=start_date,
|
|
44
|
+
end_date=end_date,
|
|
45
|
+
application=application,
|
|
46
|
+
api_key=api_key,
|
|
47
|
+
)
|
|
48
|
+
|
|
49
|
+
return fetch_ad_revenue_report
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def create_client() -> requests.Session:
|
|
53
|
+
return Client(
|
|
54
|
+
request_timeout=10.0,
|
|
55
|
+
raise_for_status=False,
|
|
56
|
+
retry_condition=retry_on_limit,
|
|
57
|
+
request_max_attempts=12,
|
|
58
|
+
).session
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def retry_on_limit(
|
|
62
|
+
response: requests.Response | None, exception: BaseException | None
|
|
63
|
+
) -> bool:
|
|
64
|
+
if response is None:
|
|
65
|
+
return False
|
|
66
|
+
return response.status_code == 429
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def get_data(
|
|
70
|
+
url: str, start_date: Date, end_date: Date, application: str, api_key: str
|
|
71
|
+
):
|
|
72
|
+
client = create_client()
|
|
73
|
+
platforms = ["ios", "android", "fireos"]
|
|
74
|
+
current_date = start_date
|
|
75
|
+
while current_date <= end_date:
|
|
76
|
+
for platform in platforms:
|
|
77
|
+
params = {
|
|
78
|
+
"api_key": api_key,
|
|
79
|
+
"date": current_date.strftime("%Y-%m-%d"),
|
|
80
|
+
"platform": platform,
|
|
81
|
+
"application": application,
|
|
82
|
+
"aggregated": "false",
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
response = client.get(url=url, params=params)
|
|
86
|
+
|
|
87
|
+
if response.status_code == 400:
|
|
88
|
+
raise ValueError(response.text)
|
|
89
|
+
|
|
90
|
+
if response.status_code != 200:
|
|
91
|
+
continue
|
|
92
|
+
|
|
93
|
+
response_url = response.json().get("ad_revenue_report_url")
|
|
94
|
+
df = pd.read_csv(response_url)
|
|
95
|
+
df["Date"] = pd.to_datetime(df["Date"])
|
|
96
|
+
df["_partition_date"] = df["Date"].dt.strftime("%Y-%m-%d")
|
|
97
|
+
yield df
|
|
98
|
+
|
|
99
|
+
current_date = current_date.add(days=1)
|
ingestr/src/destinations.py
CHANGED
|
@@ -60,9 +60,14 @@ class BigQueryDestination:
|
|
|
60
60
|
base64.b64decode(credentials_base64[0]).decode("utf-8")
|
|
61
61
|
)
|
|
62
62
|
|
|
63
|
+
project_id = None
|
|
64
|
+
if source_fields.hostname:
|
|
65
|
+
project_id = source_fields.hostname
|
|
66
|
+
|
|
63
67
|
return dlt.destinations.bigquery(
|
|
64
68
|
credentials=credentials, # type: ignore
|
|
65
69
|
location=location,
|
|
70
|
+
project_id=project_id,
|
|
66
71
|
**kwargs,
|
|
67
72
|
)
|
|
68
73
|
|
ingestr/src/factory.py
CHANGED
|
@@ -20,6 +20,7 @@ from ingestr.src.sources import (
|
|
|
20
20
|
AdjustSource,
|
|
21
21
|
AirtableSource,
|
|
22
22
|
AppleAppStoreSource,
|
|
23
|
+
ApplovinMaxSource,
|
|
23
24
|
AppLovinSource,
|
|
24
25
|
AppsflyerSource,
|
|
25
26
|
ArrowMemoryMappedSource,
|
|
@@ -67,9 +68,9 @@ SQL_SOURCE_SCHEMES = [
|
|
|
67
68
|
"oracle+cx_oracle",
|
|
68
69
|
"hana",
|
|
69
70
|
"clickhouse",
|
|
70
|
-
|
|
71
71
|
]
|
|
72
72
|
|
|
73
|
+
|
|
73
74
|
class SourceProtocol(Protocol):
|
|
74
75
|
def dlt_source(self, uri: str, table: str, **kwargs):
|
|
75
76
|
pass
|
|
@@ -134,6 +135,7 @@ class SourceDestinationFactory:
|
|
|
134
135
|
"gs": GCSSource,
|
|
135
136
|
"linkedinads": LinkedInAdsSource,
|
|
136
137
|
"applovin": AppLovinSource,
|
|
138
|
+
"applovinmax": ApplovinMaxSource,
|
|
137
139
|
}
|
|
138
140
|
destinations: Dict[str, Type[DestinationProtocol]] = {
|
|
139
141
|
"bigquery": BigQueryDestination,
|
ingestr/src/sources.py
CHANGED
|
@@ -51,6 +51,7 @@ from ingestr.src.adjust import REQUIRED_CUSTOM_DIMENSIONS, adjust_source
|
|
|
51
51
|
from ingestr.src.adjust.adjust_helpers import parse_filters
|
|
52
52
|
from ingestr.src.airtable import airtable_source
|
|
53
53
|
from ingestr.src.applovin import applovin_source
|
|
54
|
+
from ingestr.src.applovin_max import applovin_max_source
|
|
54
55
|
from ingestr.src.appsflyer._init_ import appsflyer_source
|
|
55
56
|
from ingestr.src.appstore import app_store
|
|
56
57
|
from ingestr.src.appstore.client import AppStoreConnectClient
|
|
@@ -132,11 +133,11 @@ class SqlSource:
|
|
|
132
133
|
|
|
133
134
|
if uri.startswith("mysql://"):
|
|
134
135
|
uri = uri.replace("mysql://", "mysql+pymysql://")
|
|
135
|
-
|
|
136
|
+
|
|
136
137
|
if uri.startswith("clickhouse://"):
|
|
137
138
|
uri = uri.replace("clickhouse://", "clickhouse+native://")
|
|
138
139
|
if "secure=" not in uri:
|
|
139
|
-
|
|
140
|
+
uri += "?secure=1"
|
|
140
141
|
|
|
141
142
|
query_adapters = []
|
|
142
143
|
if kwargs.get("sql_limit"):
|
|
@@ -1787,3 +1788,48 @@ class AppLovinSource:
|
|
|
1787
1788
|
raise UnsupportedResourceError(table, "AppLovin")
|
|
1788
1789
|
|
|
1789
1790
|
return src.with_resources(table)
|
|
1791
|
+
|
|
1792
|
+
|
|
1793
|
+
class ApplovinMaxSource:
|
|
1794
|
+
def handles_incrementality(self) -> bool:
|
|
1795
|
+
return True
|
|
1796
|
+
|
|
1797
|
+
def dlt_source(self, uri: str, table: str, **kwargs):
|
|
1798
|
+
parsed_uri = urlparse(uri)
|
|
1799
|
+
params = parse_qs(parsed_uri.query)
|
|
1800
|
+
|
|
1801
|
+
api_key = params.get("api_key")
|
|
1802
|
+
if api_key is None:
|
|
1803
|
+
raise ValueError("api_key is required to connect to AppLovin Max API.")
|
|
1804
|
+
|
|
1805
|
+
application = params.get("application")
|
|
1806
|
+
if application is None:
|
|
1807
|
+
raise ValueError("application is required to connect to AppLovin Max API.")
|
|
1808
|
+
|
|
1809
|
+
interval_start = kwargs.get("interval_start")
|
|
1810
|
+
interval_end = kwargs.get("interval_end")
|
|
1811
|
+
|
|
1812
|
+
if "ad_revenue" in table:
|
|
1813
|
+
table = "ad_revenue"
|
|
1814
|
+
else:
|
|
1815
|
+
raise ValueError(
|
|
1816
|
+
f"Table name '{table}' is not supported for AppLovin Max source yet, if you are interested in it please create a GitHub issue at https://github.com/bruin-data/ingestr"
|
|
1817
|
+
)
|
|
1818
|
+
|
|
1819
|
+
now = pendulum.now("UTC")
|
|
1820
|
+
default_start = now.subtract(days=30).date()
|
|
1821
|
+
|
|
1822
|
+
start_date = (
|
|
1823
|
+
interval_start if interval_start is not None else default_start
|
|
1824
|
+
).strftime("%Y-%m-%d")
|
|
1825
|
+
|
|
1826
|
+
end_date = (
|
|
1827
|
+
interval_end.strftime("%Y-%m-%d") if interval_end is not None else None
|
|
1828
|
+
)
|
|
1829
|
+
|
|
1830
|
+
return applovin_max_source(
|
|
1831
|
+
start_date=start_date,
|
|
1832
|
+
end_date=end_date,
|
|
1833
|
+
api_key=api_key[0],
|
|
1834
|
+
application=application[0],
|
|
1835
|
+
).with_resources(table)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: ingestr
|
|
3
|
-
Version: 0.13.
|
|
3
|
+
Version: 0.13.6
|
|
4
4
|
Summary: ingestr is a command-line application that ingests data from various sources and stores them in any database.
|
|
5
5
|
Project-URL: Homepage, https://github.com/bruin-data/ingestr
|
|
6
6
|
Project-URL: Issues, https://github.com/bruin-data/ingestr/issues
|
|
@@ -1,19 +1,20 @@
|
|
|
1
1
|
ingestr/main.py,sha256=ufn8AcM2ID80ChUApJzYDjnQaurMXOkYfTm6GzAggSQ,24746
|
|
2
2
|
ingestr/src/.gitignore,sha256=8cX1AZTSI0TcdZFGTmS_oyBjpfCzhOEt0DdAo2dFIY8,203
|
|
3
3
|
ingestr/src/blob.py,sha256=XDk_XqmU_He4sQ1brY3ceoZgpq_ZBZihz1gHW9MzqUk,1381
|
|
4
|
-
ingestr/src/destinations.py,sha256=
|
|
4
|
+
ingestr/src/destinations.py,sha256=vrGij4qMPCdXTMIimROWBJFqzOqCM4DFmgyubgSHejA,11279
|
|
5
5
|
ingestr/src/errors.py,sha256=Ufs4_DfE77_E3vnA1fOQdi6cmuLVNm7_SbFLkL1XPGk,686
|
|
6
|
-
ingestr/src/factory.py,sha256=
|
|
6
|
+
ingestr/src/factory.py,sha256=XYwjy5dfG5mLIU1v-mS17Kwl0cxSs3MG7NtgPPwZ_0U,5009
|
|
7
7
|
ingestr/src/filters.py,sha256=0JQXeAr2APFMnW2sd-6BlAMWv93bXV17j8b5MM8sHmM,580
|
|
8
8
|
ingestr/src/loader.py,sha256=9NaWAyfkXdqAZSS-N72Iwo36Lbx4PyqIfaaH1dNdkFs,1712
|
|
9
|
-
ingestr/src/sources.py,sha256=
|
|
9
|
+
ingestr/src/sources.py,sha256=ljh__y_ZXj8NUT0v63ZAT42K1SZsEJEB88YtQHG0IXQ,64830
|
|
10
10
|
ingestr/src/table_definition.py,sha256=REbAbqdlmUMUuRh8nEQRreWjPVOQ5ZcfqGkScKdCrmk,390
|
|
11
11
|
ingestr/src/time.py,sha256=H_Fk2J4ShXyUM-EMY7MqCLZQhlnZMZvO952bmZPc4yE,254
|
|
12
12
|
ingestr/src/version.py,sha256=l6zVm0GMMwnBlIOONWc6snhko9d8-HO1y6Jj1T1vsiQ,158
|
|
13
13
|
ingestr/src/adjust/__init__.py,sha256=ULjtJqrNS6XDvUyGl0tjl12-tLyXlCgeFe2icTbtu3Q,3255
|
|
14
14
|
ingestr/src/adjust/adjust_helpers.py,sha256=av97NPSn-hQtTbAC0vUSCAWYePmOiG5R-DGdMssm7FQ,3646
|
|
15
15
|
ingestr/src/airtable/__init__.py,sha256=GHWYrjI2qhs_JihdNJysB0Ni3bzqT_MLXn_S9_Q5zRA,2775
|
|
16
|
-
ingestr/src/applovin/__init__.py,sha256=
|
|
16
|
+
ingestr/src/applovin/__init__.py,sha256=vtmYnRKnNOSzFWQIbKGbrcu6AcBdHuhPMsNruUvEIgg,7000
|
|
17
|
+
ingestr/src/applovin_max/__init__.py,sha256=1NUOeJzRyZZQ95KEirbrlSrk-8SNc9JrlM_5pGgBgHg,2878
|
|
17
18
|
ingestr/src/appsflyer/_init_.py,sha256=ne2-9FQ654Drtd3GkKQv8Bwb6LEqCnJw49MfO5Jyzgs,739
|
|
18
19
|
ingestr/src/appsflyer/client.py,sha256=TNmwakLzmO6DZW3wcfLfQRl7aNBHgFqSsk4ef-MmJ1w,3084
|
|
19
20
|
ingestr/src/appstore/__init__.py,sha256=3P4VZH2WJF477QjW19jMTwu6L8DXcLkYSdutnvp3AmM,4742
|
|
@@ -102,8 +103,8 @@ ingestr/testdata/delete_insert_part2.csv,sha256=B_KUzpzbNdDY_n7wWop1mT2cz36TmayS
|
|
|
102
103
|
ingestr/testdata/merge_expected.csv,sha256=DReHqWGnQMsf2PBv_Q2pfjsgvikYFnf1zYcQZ7ZqYN0,276
|
|
103
104
|
ingestr/testdata/merge_part1.csv,sha256=Pw8Z9IDKcNU0qQHx1z6BUf4rF_-SxKGFOvymCt4OY9I,185
|
|
104
105
|
ingestr/testdata/merge_part2.csv,sha256=T_GiWxA81SN63_tMOIuemcvboEFeAmbKc7xRXvL9esw,287
|
|
105
|
-
ingestr-0.13.
|
|
106
|
-
ingestr-0.13.
|
|
107
|
-
ingestr-0.13.
|
|
108
|
-
ingestr-0.13.
|
|
109
|
-
ingestr-0.13.
|
|
106
|
+
ingestr-0.13.6.dist-info/METADATA,sha256=ylIPPjjCbE-qTxxYtx3OXydv-uOYuQvD-qNc13fs-Bk,8956
|
|
107
|
+
ingestr-0.13.6.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
108
|
+
ingestr-0.13.6.dist-info/entry_points.txt,sha256=oPJy0KBnPWYjDtP1k8qwAihcTLHSZokSQvRAw_wtfJM,46
|
|
109
|
+
ingestr-0.13.6.dist-info/licenses/LICENSE.md,sha256=cW8wIhn8HFE-KLStDF9jHQ1O_ARWP3kTpk_-eOccL24,1075
|
|
110
|
+
ingestr-0.13.6.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|