ingestr 0.13.5__py3-none-any.whl → 0.13.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ingestr might be problematic. Click here for more details.

@@ -1,6 +1,7 @@
1
- from datetime import datetime, timezone
1
+ from datetime import datetime, timezone, timedelta
2
2
  from enum import Enum
3
3
  from typing import Dict, List, Optional
4
+ from requests import Response
4
5
 
5
6
  import dlt
6
7
  from dlt.sources.rest_api import EndpointResource, RESTAPIConfig, rest_api_resources
@@ -12,11 +13,8 @@ class InvalidCustomReportError(Exception):
12
13
  "Custom report should be in the format 'custom:{endpoint}:{report_type}:{dimensions}"
13
14
  )
14
15
 
15
-
16
- class InvalidDimensionError(Exception):
17
- def __init__(self, dim: str, report_type: str):
18
- super().__init__(f"Unknown dimension {dim} for report type {report_type}")
19
-
16
+ class ClientError(Exception):
17
+ pass
20
18
 
21
19
  TYPE_HINTS = {
22
20
  "application_is_hidden": {"data_type": "bool"},
@@ -108,32 +106,6 @@ REPORT_SCHEMA: Dict[ReportType, List[str]] = {
108
106
  ],
109
107
  }
110
108
 
111
- # NOTE(turtledev): These values are valid columns,
112
- # but often don't produce a value. Find a way to either add
113
- # a default value, or use an alternative strategy to de-duplicate
114
- # OR make them nullable
115
- SKA_REPORT_EXCLUDE = [
116
- "ad",
117
- "ad_id",
118
- "ad_type",
119
- "average_cpc",
120
- "campaign_ad_type",
121
- "clicks",
122
- "conversions",
123
- "conversion_rate",
124
- "creative_set",
125
- "creative_set_id",
126
- "ctr",
127
- "custom_page_id",
128
- "device_type",
129
- "first_purchase",
130
- "impressions",
131
- "placement_type",
132
- "sales",
133
- "size",
134
- "traffic_source",
135
- ]
136
-
137
109
  PROBABILISTIC_REPORT_EXCLUDE = [
138
110
  "installs",
139
111
  "redownloads",
@@ -147,19 +119,16 @@ def applovin_source(
147
119
  end_date: Optional[str],
148
120
  custom: Optional[str],
149
121
  ):
150
- ska_report_columns = exclude(
151
- REPORT_SCHEMA[ReportType.ADVERTISER],
152
- SKA_REPORT_EXCLUDE,
153
- )
154
122
 
155
- probabilistic_report_columns = exclude(
156
- REPORT_SCHEMA[ReportType.ADVERTISER],
157
- PROBABILISTIC_REPORT_EXCLUDE,
158
- )
159
123
  backfill = False
160
124
  if end_date is None:
161
125
  backfill = True
162
- end_date = datetime.now(timezone.utc).date().strftime("%Y-%m-%d")
126
+
127
+ # use the greatest of yesterday and start_date
128
+ end_date = max(
129
+ datetime.now(timezone.utc) - timedelta(days=1),
130
+ datetime.fromisoformat(start_date).replace(tzinfo=timezone.utc)
131
+ ).strftime("%Y-%m-%d")
163
132
 
164
133
  config: RESTAPIConfig = {
165
134
  "client": {
@@ -186,6 +155,9 @@ def applovin_source(
186
155
  "end": end_date,
187
156
  },
188
157
  "paginator": "single_page",
158
+ "response_actions": [
159
+ http_error_handler,
160
+ ]
189
161
  },
190
162
  },
191
163
  "resources": [
@@ -204,13 +176,16 @@ def applovin_source(
204
176
  resource(
205
177
  "advertiser-probabilistic-report",
206
178
  "probabilisticReport",
207
- probabilistic_report_columns,
179
+ exclude(
180
+ REPORT_SCHEMA[ReportType.ADVERTISER],
181
+ PROBABILISTIC_REPORT_EXCLUDE
182
+ ),
208
183
  ReportType.ADVERTISER,
209
184
  ),
210
185
  resource(
211
186
  "advertiser-ska-report",
212
187
  "skaReport",
213
- ska_report_columns,
188
+ REPORT_SCHEMA[ReportType.ADVERTISER],
214
189
  ReportType.ADVERTISER,
215
190
  ),
216
191
  ],
@@ -280,3 +255,7 @@ def exclude(source: List[str], exclude_list: List[str]) -> List[str]:
280
255
 
281
256
  def build_type_hints(cols: List[str]) -> dict:
282
257
  return {col: TYPE_HINTS[col] for col in cols if col in TYPE_HINTS}
258
+
259
+ def http_error_handler(resp: Response):
260
+ if not resp.ok:
261
+ raise ClientError(f"HTTP Status {resp.status_code}: {resp.text}")
@@ -0,0 +1,99 @@
1
+ from typing import Iterator
2
+
3
+ import dlt
4
+ import pandas as pd # type: ignore[import-untyped]
5
+ import pendulum
6
+ import requests
7
+ from dlt.sources import DltResource
8
+ from dlt.sources.helpers.requests import Client
9
+ from pendulum.date import Date
10
+
11
+
12
+ @dlt.source(max_table_nesting=0)
13
+ def applovin_max_source(
14
+ start_date: str,
15
+ application: str,
16
+ api_key: str,
17
+ end_date: str | None,
18
+ ) -> DltResource:
19
+ @dlt.resource(
20
+ name="ad_revenue",
21
+ write_disposition="merge",
22
+ merge_key="_partition_date",
23
+ )
24
+ def fetch_ad_revenue_report(
25
+ dateTime=(
26
+ dlt.sources.incremental(
27
+ "_partition_date",
28
+ initial_value=start_date,
29
+ end_value=end_date,
30
+ range_start="closed",
31
+ range_end="closed",
32
+ )
33
+ ),
34
+ ) -> Iterator[dict]:
35
+ url = "https://r.applovin.com/max/userAdRevenueReport"
36
+ start_date = pendulum.from_format(dateTime.last_value, "YYYY-MM-DD").date()
37
+ if dateTime.end_value is None:
38
+ end_date = (pendulum.yesterday("UTC")).date()
39
+ else:
40
+ end_date = pendulum.from_format(dateTime.end_value, "YYYY-MM-DD").date()
41
+ yield get_data(
42
+ url=url,
43
+ start_date=start_date,
44
+ end_date=end_date,
45
+ application=application,
46
+ api_key=api_key,
47
+ )
48
+
49
+ return fetch_ad_revenue_report
50
+
51
+
52
+ def create_client() -> requests.Session:
53
+ return Client(
54
+ request_timeout=10.0,
55
+ raise_for_status=False,
56
+ retry_condition=retry_on_limit,
57
+ request_max_attempts=12,
58
+ ).session
59
+
60
+
61
+ def retry_on_limit(
62
+ response: requests.Response | None, exception: BaseException | None
63
+ ) -> bool:
64
+ if response is None:
65
+ return False
66
+ return response.status_code == 429
67
+
68
+
69
+ def get_data(
70
+ url: str, start_date: Date, end_date: Date, application: str, api_key: str
71
+ ):
72
+ client = create_client()
73
+ platforms = ["ios", "android", "fireos"]
74
+ current_date = start_date
75
+ while current_date <= end_date:
76
+ for platform in platforms:
77
+ params = {
78
+ "api_key": api_key,
79
+ "date": current_date.strftime("%Y-%m-%d"),
80
+ "platform": platform,
81
+ "application": application,
82
+ "aggregated": "false",
83
+ }
84
+
85
+ response = client.get(url=url, params=params)
86
+
87
+ if response.status_code == 400:
88
+ raise ValueError(response.text)
89
+
90
+ if response.status_code != 200:
91
+ continue
92
+
93
+ response_url = response.json().get("ad_revenue_report_url")
94
+ df = pd.read_csv(response_url)
95
+ df["Date"] = pd.to_datetime(df["Date"])
96
+ df["_partition_date"] = df["Date"].dt.strftime("%Y-%m-%d")
97
+ yield df
98
+
99
+ current_date = current_date.add(days=1)
ingestr/src/factory.py CHANGED
@@ -20,6 +20,7 @@ from ingestr.src.sources import (
20
20
  AdjustSource,
21
21
  AirtableSource,
22
22
  AppleAppStoreSource,
23
+ ApplovinMaxSource,
23
24
  AppLovinSource,
24
25
  AppsflyerSource,
25
26
  ArrowMemoryMappedSource,
@@ -134,6 +135,7 @@ class SourceDestinationFactory:
134
135
  "gs": GCSSource,
135
136
  "linkedinads": LinkedInAdsSource,
136
137
  "applovin": AppLovinSource,
138
+ "applovinmax": ApplovinMaxSource,
137
139
  }
138
140
  destinations: Dict[str, Type[DestinationProtocol]] = {
139
141
  "bigquery": BigQueryDestination,
ingestr/src/sources.py CHANGED
@@ -51,6 +51,7 @@ from ingestr.src.adjust import REQUIRED_CUSTOM_DIMENSIONS, adjust_source
51
51
  from ingestr.src.adjust.adjust_helpers import parse_filters
52
52
  from ingestr.src.airtable import airtable_source
53
53
  from ingestr.src.applovin import applovin_source
54
+ from ingestr.src.applovin_max import applovin_max_source
54
55
  from ingestr.src.appsflyer._init_ import appsflyer_source
55
56
  from ingestr.src.appstore import app_store
56
57
  from ingestr.src.appstore.client import AppStoreConnectClient
@@ -1787,3 +1788,48 @@ class AppLovinSource:
1787
1788
  raise UnsupportedResourceError(table, "AppLovin")
1788
1789
 
1789
1790
  return src.with_resources(table)
1791
+
1792
+
1793
+ class ApplovinMaxSource:
1794
+ def handles_incrementality(self) -> bool:
1795
+ return True
1796
+
1797
+ def dlt_source(self, uri: str, table: str, **kwargs):
1798
+ parsed_uri = urlparse(uri)
1799
+ params = parse_qs(parsed_uri.query)
1800
+
1801
+ api_key = params.get("api_key")
1802
+ if api_key is None:
1803
+ raise ValueError("api_key is required to connect to AppLovin Max API.")
1804
+
1805
+ application = params.get("application")
1806
+ if application is None:
1807
+ raise ValueError("application is required to connect to AppLovin Max API.")
1808
+
1809
+ interval_start = kwargs.get("interval_start")
1810
+ interval_end = kwargs.get("interval_end")
1811
+
1812
+ if "ad_revenue" in table:
1813
+ table = "ad_revenue"
1814
+ else:
1815
+ raise ValueError(
1816
+ f"Table name '{table}' is not supported for AppLovin Max source yet, if you are interested in it please create a GitHub issue at https://github.com/bruin-data/ingestr"
1817
+ )
1818
+
1819
+ now = pendulum.now("UTC")
1820
+ default_start = now.subtract(days=30).date()
1821
+
1822
+ start_date = (
1823
+ interval_start if interval_start is not None else default_start
1824
+ ).strftime("%Y-%m-%d")
1825
+
1826
+ end_date = (
1827
+ interval_end.strftime("%Y-%m-%d") if interval_end is not None else None
1828
+ )
1829
+
1830
+ return applovin_max_source(
1831
+ start_date=start_date,
1832
+ end_date=end_date,
1833
+ api_key=api_key[0],
1834
+ application=application[0],
1835
+ ).with_resources(table)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ingestr
3
- Version: 0.13.5
3
+ Version: 0.13.6
4
4
  Summary: ingestr is a command-line application that ingests data from various sources and stores them in any database.
5
5
  Project-URL: Homepage, https://github.com/bruin-data/ingestr
6
6
  Project-URL: Issues, https://github.com/bruin-data/ingestr/issues
@@ -3,17 +3,18 @@ ingestr/src/.gitignore,sha256=8cX1AZTSI0TcdZFGTmS_oyBjpfCzhOEt0DdAo2dFIY8,203
3
3
  ingestr/src/blob.py,sha256=XDk_XqmU_He4sQ1brY3ceoZgpq_ZBZihz1gHW9MzqUk,1381
4
4
  ingestr/src/destinations.py,sha256=vrGij4qMPCdXTMIimROWBJFqzOqCM4DFmgyubgSHejA,11279
5
5
  ingestr/src/errors.py,sha256=Ufs4_DfE77_E3vnA1fOQdi6cmuLVNm7_SbFLkL1XPGk,686
6
- ingestr/src/factory.py,sha256=CG_CZox_vJOYfmlCJ1FH9Ipb5LDeESCzox5ZOAO-wjs,4944
6
+ ingestr/src/factory.py,sha256=XYwjy5dfG5mLIU1v-mS17Kwl0cxSs3MG7NtgPPwZ_0U,5009
7
7
  ingestr/src/filters.py,sha256=0JQXeAr2APFMnW2sd-6BlAMWv93bXV17j8b5MM8sHmM,580
8
8
  ingestr/src/loader.py,sha256=9NaWAyfkXdqAZSS-N72Iwo36Lbx4PyqIfaaH1dNdkFs,1712
9
- ingestr/src/sources.py,sha256=AxtWtqhn1OOFAYypLph2FhmD3CD7CDVBRvOIKb6mDIg,63263
9
+ ingestr/src/sources.py,sha256=ljh__y_ZXj8NUT0v63ZAT42K1SZsEJEB88YtQHG0IXQ,64830
10
10
  ingestr/src/table_definition.py,sha256=REbAbqdlmUMUuRh8nEQRreWjPVOQ5ZcfqGkScKdCrmk,390
11
11
  ingestr/src/time.py,sha256=H_Fk2J4ShXyUM-EMY7MqCLZQhlnZMZvO952bmZPc4yE,254
12
12
  ingestr/src/version.py,sha256=l6zVm0GMMwnBlIOONWc6snhko9d8-HO1y6Jj1T1vsiQ,158
13
13
  ingestr/src/adjust/__init__.py,sha256=ULjtJqrNS6XDvUyGl0tjl12-tLyXlCgeFe2icTbtu3Q,3255
14
14
  ingestr/src/adjust/adjust_helpers.py,sha256=av97NPSn-hQtTbAC0vUSCAWYePmOiG5R-DGdMssm7FQ,3646
15
15
  ingestr/src/airtable/__init__.py,sha256=GHWYrjI2qhs_JihdNJysB0Ni3bzqT_MLXn_S9_Q5zRA,2775
16
- ingestr/src/applovin/__init__.py,sha256=VwVTtVQetnSpUt3cgy6TuH3sYdTnQP63eO_qYqT1TEA,7387
16
+ ingestr/src/applovin/__init__.py,sha256=vtmYnRKnNOSzFWQIbKGbrcu6AcBdHuhPMsNruUvEIgg,7000
17
+ ingestr/src/applovin_max/__init__.py,sha256=1NUOeJzRyZZQ95KEirbrlSrk-8SNc9JrlM_5pGgBgHg,2878
17
18
  ingestr/src/appsflyer/_init_.py,sha256=ne2-9FQ654Drtd3GkKQv8Bwb6LEqCnJw49MfO5Jyzgs,739
18
19
  ingestr/src/appsflyer/client.py,sha256=TNmwakLzmO6DZW3wcfLfQRl7aNBHgFqSsk4ef-MmJ1w,3084
19
20
  ingestr/src/appstore/__init__.py,sha256=3P4VZH2WJF477QjW19jMTwu6L8DXcLkYSdutnvp3AmM,4742
@@ -102,8 +103,8 @@ ingestr/testdata/delete_insert_part2.csv,sha256=B_KUzpzbNdDY_n7wWop1mT2cz36TmayS
102
103
  ingestr/testdata/merge_expected.csv,sha256=DReHqWGnQMsf2PBv_Q2pfjsgvikYFnf1zYcQZ7ZqYN0,276
103
104
  ingestr/testdata/merge_part1.csv,sha256=Pw8Z9IDKcNU0qQHx1z6BUf4rF_-SxKGFOvymCt4OY9I,185
104
105
  ingestr/testdata/merge_part2.csv,sha256=T_GiWxA81SN63_tMOIuemcvboEFeAmbKc7xRXvL9esw,287
105
- ingestr-0.13.5.dist-info/METADATA,sha256=5kk8N8xrnWAJvOL4n2Mh1Xv7sXOn6iJyybHVBEGNRcU,8956
106
- ingestr-0.13.5.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
107
- ingestr-0.13.5.dist-info/entry_points.txt,sha256=oPJy0KBnPWYjDtP1k8qwAihcTLHSZokSQvRAw_wtfJM,46
108
- ingestr-0.13.5.dist-info/licenses/LICENSE.md,sha256=cW8wIhn8HFE-KLStDF9jHQ1O_ARWP3kTpk_-eOccL24,1075
109
- ingestr-0.13.5.dist-info/RECORD,,
106
+ ingestr-0.13.6.dist-info/METADATA,sha256=ylIPPjjCbE-qTxxYtx3OXydv-uOYuQvD-qNc13fs-Bk,8956
107
+ ingestr-0.13.6.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
108
+ ingestr-0.13.6.dist-info/entry_points.txt,sha256=oPJy0KBnPWYjDtP1k8qwAihcTLHSZokSQvRAw_wtfJM,46
109
+ ingestr-0.13.6.dist-info/licenses/LICENSE.md,sha256=cW8wIhn8HFE-KLStDF9jHQ1O_ARWP3kTpk_-eOccL24,1075
110
+ ingestr-0.13.6.dist-info/RECORD,,