ingestr 0.13.2__py3-none-any.whl → 0.13.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ingestr might be problematic. Click here for more details.

@@ -0,0 +1,282 @@
1
+ from datetime import datetime, timezone
2
+ from enum import Enum
3
+ from typing import Dict, List, Optional
4
+
5
+ import dlt
6
+ from dlt.sources.rest_api import EndpointResource, RESTAPIConfig, rest_api_resources
7
+
8
+
9
+ class InvalidCustomReportError(Exception):
10
+ def __init__(self):
11
+ super().__init__(
12
+ "Custom report should be in the format 'custom:{endpoint}:{report_type}:{dimensions}"
13
+ )
14
+
15
+
16
+ class InvalidDimensionError(Exception):
17
+ def __init__(self, dim: str, report_type: str):
18
+ super().__init__(f"Unknown dimension {dim} for report type {report_type}")
19
+
20
+
21
+ TYPE_HINTS = {
22
+ "application_is_hidden": {"data_type": "bool"},
23
+ "average_cpa": {"data_type": "double"},
24
+ "average_cpc": {"data_type": "double"},
25
+ "campaign_bid_goal": {"data_type": "double"},
26
+ "campaign_roas_goal": {"data_type": "double"},
27
+ "clicks": {"data_type": "bigint"},
28
+ "conversions": {"data_type": "bigint"},
29
+ "conversion_rate": {"data_type": "double"},
30
+ "cost": {"data_type": "double"}, # assuming float.
31
+ "ctr": {"data_type": "double"},
32
+ "day": {"data_type": "date"},
33
+ "first_purchase": {"data_type": "bigint"},
34
+ "ecpm": {"data_type": "double"},
35
+ "impressions": {"data_type": "bigint"},
36
+ "installs": {"data_type": "bigint"},
37
+ "revenue": {"data_type": "double"},
38
+ "redownloads": {"data_type": "bigint"},
39
+ "sales": {"data_type": "double"}, # assuming float.
40
+ }
41
+
42
+
43
+ class ReportType(Enum):
44
+ PUBLISHER = "publisher"
45
+ ADVERTISER = "advertiser"
46
+
47
+
48
+ REPORT_SCHEMA: Dict[ReportType, List[str]] = {
49
+ ReportType.PUBLISHER: [
50
+ "ad_type",
51
+ "application",
52
+ "application_is_hidden",
53
+ "bidding_integration",
54
+ "clicks",
55
+ "country",
56
+ "ctr",
57
+ "day",
58
+ "device_type",
59
+ "ecpm",
60
+ "impressions",
61
+ "package_name",
62
+ "placement_type",
63
+ "platform",
64
+ "revenue",
65
+ "size",
66
+ "store_id",
67
+ "zone",
68
+ "zone_id",
69
+ ],
70
+ ReportType.ADVERTISER: [
71
+ "ad",
72
+ "ad_creative_type",
73
+ "ad_id",
74
+ "ad_type",
75
+ "average_cpa",
76
+ "average_cpc",
77
+ "campaign",
78
+ "campaign_ad_type",
79
+ "campaign_bid_goal",
80
+ "campaign_id_external",
81
+ "campaign_package_name",
82
+ "campaign_roas_goal",
83
+ "campaign_store_id",
84
+ "campaign_type",
85
+ "clicks",
86
+ "conversions",
87
+ "conversion_rate",
88
+ "cost",
89
+ "country",
90
+ "creative_set",
91
+ "creative_set_id",
92
+ "ctr",
93
+ "custom_page_id",
94
+ "day",
95
+ "device_type",
96
+ "external_placement_id",
97
+ "first_purchase",
98
+ "impressions",
99
+ "installs",
100
+ "optimization_day_target",
101
+ "placement_type",
102
+ "platform",
103
+ "redownloads",
104
+ "sales",
105
+ "size",
106
+ "target_event",
107
+ "traffic_source",
108
+ ],
109
+ }
110
+
111
+ # NOTE(turtledev): These values are valid columns,
112
+ # but often don't produce a value. Find a way to either add
113
+ # a default value, or use an alternative strategy to de-duplicate
114
+ # OR make them nullable
115
+ SKA_REPORT_EXCLUDE = [
116
+ "ad",
117
+ "ad_id",
118
+ "ad_type",
119
+ "average_cpc",
120
+ "campaign_ad_type",
121
+ "clicks",
122
+ "conversions",
123
+ "conversion_rate",
124
+ "creative_set",
125
+ "creative_set_id",
126
+ "ctr",
127
+ "custom_page_id",
128
+ "device_type",
129
+ "first_purchase",
130
+ "impressions",
131
+ "placement_type",
132
+ "sales",
133
+ "size",
134
+ "traffic_source",
135
+ ]
136
+
137
+ PROBABILISTIC_REPORT_EXCLUDE = [
138
+ "installs",
139
+ "redownloads",
140
+ ]
141
+
142
+
143
+ @dlt.source
144
+ def applovin_source(
145
+ api_key: str,
146
+ start_date: str,
147
+ end_date: Optional[str],
148
+ custom: Optional[str],
149
+ ):
150
+ ska_report_columns = exclude(
151
+ REPORT_SCHEMA[ReportType.ADVERTISER],
152
+ SKA_REPORT_EXCLUDE,
153
+ )
154
+
155
+ probabilistic_report_columns = exclude(
156
+ REPORT_SCHEMA[ReportType.ADVERTISER],
157
+ PROBABILISTIC_REPORT_EXCLUDE,
158
+ )
159
+ backfill = False
160
+ if end_date is None:
161
+ backfill = True
162
+ end_date = datetime.now(timezone.utc).date().strftime("%Y-%m-%d")
163
+
164
+ config: RESTAPIConfig = {
165
+ "client": {
166
+ "base_url": "https://r.applovin.com/",
167
+ "auth": {
168
+ "type": "api_key",
169
+ "name": "api_key",
170
+ "location": "query",
171
+ "api_key": api_key,
172
+ },
173
+ },
174
+ "resource_defaults": {
175
+ "write_disposition": "merge",
176
+ "endpoint": {
177
+ "incremental": {
178
+ "cursor_path": "day",
179
+ "start_param": "start",
180
+ "initial_value": start_date,
181
+ "range_start": "closed",
182
+ "range_end": "closed",
183
+ },
184
+ "params": {
185
+ "format": "json",
186
+ "end": end_date,
187
+ },
188
+ "paginator": "single_page",
189
+ },
190
+ },
191
+ "resources": [
192
+ resource(
193
+ "publisher-report",
194
+ "report",
195
+ REPORT_SCHEMA[ReportType.PUBLISHER],
196
+ ReportType.PUBLISHER,
197
+ ),
198
+ resource(
199
+ "advertiser-report",
200
+ "report",
201
+ REPORT_SCHEMA[ReportType.ADVERTISER],
202
+ ReportType.ADVERTISER,
203
+ ),
204
+ resource(
205
+ "advertiser-probabilistic-report",
206
+ "probabilisticReport",
207
+ probabilistic_report_columns,
208
+ ReportType.ADVERTISER,
209
+ ),
210
+ resource(
211
+ "advertiser-ska-report",
212
+ "skaReport",
213
+ ska_report_columns,
214
+ ReportType.ADVERTISER,
215
+ ),
216
+ ],
217
+ }
218
+
219
+ if custom:
220
+ custom_report = custom_report_from_spec(custom)
221
+ config["resources"].append(custom_report)
222
+
223
+ if backfill:
224
+ config["resource_defaults"]["endpoint"]["incremental"]["end_value"] = end_date # type: ignore
225
+
226
+ yield from rest_api_resources(config)
227
+
228
+
229
+ def resource(
230
+ name: str,
231
+ endpoint: str,
232
+ dimensions: List[str],
233
+ report_type: ReportType,
234
+ ) -> EndpointResource:
235
+ return {
236
+ "name": name,
237
+ "columns": build_type_hints(dimensions),
238
+ "merge_key": "day",
239
+ "endpoint": {
240
+ "path": endpoint,
241
+ "params": {
242
+ "report_type": report_type.value,
243
+ "columns": ",".join(dimensions),
244
+ },
245
+ },
246
+ }
247
+
248
+
249
+ def custom_report_from_spec(spec: str) -> EndpointResource:
250
+ parts = spec.split(":")
251
+ if len(parts) != 4:
252
+ raise InvalidCustomReportError()
253
+
254
+ _, endpoint, report, dims = parts
255
+ report_type = ReportType(report.strip())
256
+ dimensions = validate_dimensions(dims)
257
+ endpoint = endpoint.strip()
258
+
259
+ return resource(
260
+ name="custom_report",
261
+ endpoint=endpoint,
262
+ dimensions=dimensions,
263
+ report_type=report_type,
264
+ )
265
+
266
+
267
+ def validate_dimensions(dimensions: str) -> List[str]:
268
+ dims = [dim.strip() for dim in dimensions.split(",")]
269
+
270
+ if "day" not in dims:
271
+ dims.append("day")
272
+
273
+ return dims
274
+
275
+
276
+ def exclude(source: List[str], exclude_list: List[str]) -> List[str]:
277
+ excludes = set(exclude_list)
278
+ return [col for col in source if col not in excludes]
279
+
280
+
281
+ def build_type_hints(cols: List[str]) -> dict:
282
+ return {col: TYPE_HINTS[col] for col in cols if col in TYPE_HINTS}
@@ -297,14 +297,16 @@ class ClickhouseDestination:
297
297
  raise ValueError(
298
298
  "The TCP port of the ClickHouse server is required to establish a connection."
299
299
  )
300
-
300
+
301
301
  query_params = parse_qs(parsed_uri.query)
302
302
  secure = int(query_params["secure"][0]) if "secure" in query_params else 1
303
303
 
304
304
  http_port = (
305
305
  int(query_params["http_port"][0])
306
306
  if "http_port" in query_params
307
- else 8443 if secure == 1 else 8123
307
+ else 8443
308
+ if secure == 1
309
+ else 8123
308
310
  )
309
311
 
310
312
  if secure not in (0, 1):
ingestr/src/factory.py CHANGED
@@ -20,6 +20,7 @@ from ingestr.src.sources import (
20
20
  AdjustSource,
21
21
  AirtableSource,
22
22
  AppleAppStoreSource,
23
+ AppLovinSource,
23
24
  AppsflyerSource,
24
25
  ArrowMemoryMappedSource,
25
26
  AsanaSource,
@@ -131,6 +132,7 @@ class SourceDestinationFactory:
131
132
  "appstore": AppleAppStoreSource,
132
133
  "gs": GCSSource,
133
134
  "linkedinads": LinkedInAdsSource,
135
+ "applovin": AppLovinSource,
134
136
  }
135
137
  destinations: Dict[str, Type[DestinationProtocol]] = {
136
138
  "bigquery": BigQueryDestination,
ingestr/src/sources.py CHANGED
@@ -50,6 +50,7 @@ from ingestr.src import blob
50
50
  from ingestr.src.adjust import REQUIRED_CUSTOM_DIMENSIONS, adjust_source
51
51
  from ingestr.src.adjust.adjust_helpers import parse_filters
52
52
  from ingestr.src.airtable import airtable_source
53
+ from ingestr.src.applovin import applovin_source
53
54
  from ingestr.src.appsflyer._init_ import appsflyer_source
54
55
  from ingestr.src.appstore import app_store
55
56
  from ingestr.src.appstore.client import AppStoreConnectClient
@@ -1737,3 +1738,47 @@ class LinkedInAdsSource:
1737
1738
  metrics=metrics,
1738
1739
  time_granularity=time_granularity,
1739
1740
  ).with_resources("custom_reports")
1741
+
1742
+
1743
+ class AppLovinSource:
1744
+ def handles_incrementality(self) -> bool:
1745
+ return True
1746
+
1747
+ def dlt_source(self, uri: str, table: str, **kwargs):
1748
+ if kwargs.get("incremental_key") is not None:
1749
+ raise ValueError(
1750
+ "Google Ads takes care of incrementality on its own, you should not provide incremental_key"
1751
+ )
1752
+
1753
+ parsed_uri = urlparse(uri)
1754
+ params = parse_qs(parsed_uri.query)
1755
+
1756
+ api_key = params.get("api_key", None)
1757
+ if api_key is None:
1758
+ raise MissingValueError("api_key", "AppLovin")
1759
+
1760
+ interval_start = kwargs.get("interval_start")
1761
+ interval_end = kwargs.get("interval_end")
1762
+
1763
+ now = datetime.now()
1764
+ start_date = (
1765
+ interval_start if interval_start is not None else now - timedelta(days=1)
1766
+ )
1767
+ end_date = interval_end
1768
+
1769
+ custom_report = None
1770
+ if table.startswith("custom:"):
1771
+ custom_report = table
1772
+ table = "custom_report"
1773
+
1774
+ src = applovin_source(
1775
+ api_key[0],
1776
+ start_date.strftime("%Y-%m-%d"),
1777
+ end_date.strftime("%Y-%m-%d") if end_date else None,
1778
+ custom_report,
1779
+ )
1780
+
1781
+ if table not in src.resources:
1782
+ raise UnsupportedResourceError(table, "AppLovin")
1783
+
1784
+ return src.with_resources(table)
ingestr/src/version.py CHANGED
@@ -1 +1 @@
1
- __version__ = "0.13.2"
1
+ __version__ = "0.13.3"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ingestr
3
- Version: 0.13.2
3
+ Version: 0.13.3
4
4
  Summary: ingestr is a command-line application that ingests data from various sources and stores them in any database.
5
5
  Project-URL: Homepage, https://github.com/bruin-data/ingestr
6
6
  Project-URL: Issues, https://github.com/bruin-data/ingestr/issues
@@ -1,17 +1,18 @@
1
1
  ingestr/main.py,sha256=ufn8AcM2ID80ChUApJzYDjnQaurMXOkYfTm6GzAggSQ,24746
2
2
  ingestr/src/.gitignore,sha256=8cX1AZTSI0TcdZFGTmS_oyBjpfCzhOEt0DdAo2dFIY8,203
3
3
  ingestr/src/blob.py,sha256=XDk_XqmU_He4sQ1brY3ceoZgpq_ZBZihz1gHW9MzqUk,1381
4
- ingestr/src/destinations.py,sha256=aMRlgsq5ANnpSQmGqqWY8diB8DsF_WHbb667GnJ56js,11178
4
+ ingestr/src/destinations.py,sha256=WxerdCx0gS4JveYAE-GzdJLbgP4t2QXXQhUoU3GvmLM,11194
5
5
  ingestr/src/errors.py,sha256=Ufs4_DfE77_E3vnA1fOQdi6cmuLVNm7_SbFLkL1XPGk,686
6
- ingestr/src/factory.py,sha256=3XM2rilA69vkkOCHNzUt1XqCOc3gLMnOnlQmW5d1V5s,4870
6
+ ingestr/src/factory.py,sha256=b1Fg3lhTu6HoALi4AFrTOm3fbNPj5EB2lPigrjiY1so,4926
7
7
  ingestr/src/filters.py,sha256=0JQXeAr2APFMnW2sd-6BlAMWv93bXV17j8b5MM8sHmM,580
8
- ingestr/src/sources.py,sha256=VBuD6ngMHKaCLeYZ9Oe9tw67578hPc1dP_5iBNtEJdM,61683
8
+ ingestr/src/sources.py,sha256=YkBCyjI8DWovyA8CI68zOlHT4UKPsG8-UTCyuafI_WA,63075
9
9
  ingestr/src/table_definition.py,sha256=REbAbqdlmUMUuRh8nEQRreWjPVOQ5ZcfqGkScKdCrmk,390
10
10
  ingestr/src/time.py,sha256=H_Fk2J4ShXyUM-EMY7MqCLZQhlnZMZvO952bmZPc4yE,254
11
- ingestr/src/version.py,sha256=blu6md2c3Nnj5gDBi8U36sYO3k8HcND8s7UoQBjfn3g,23
11
+ ingestr/src/version.py,sha256=aiCDTKDs80gZjvqiXYkxhq_MLi4Du0L9OByDr6ZEVV4,23
12
12
  ingestr/src/adjust/__init__.py,sha256=ULjtJqrNS6XDvUyGl0tjl12-tLyXlCgeFe2icTbtu3Q,3255
13
13
  ingestr/src/adjust/adjust_helpers.py,sha256=av97NPSn-hQtTbAC0vUSCAWYePmOiG5R-DGdMssm7FQ,3646
14
14
  ingestr/src/airtable/__init__.py,sha256=GHWYrjI2qhs_JihdNJysB0Ni3bzqT_MLXn_S9_Q5zRA,2775
15
+ ingestr/src/applovin/__init__.py,sha256=VwVTtVQetnSpUt3cgy6TuH3sYdTnQP63eO_qYqT1TEA,7387
15
16
  ingestr/src/appsflyer/_init_.py,sha256=ne2-9FQ654Drtd3GkKQv8Bwb6LEqCnJw49MfO5Jyzgs,739
16
17
  ingestr/src/appsflyer/client.py,sha256=TNmwakLzmO6DZW3wcfLfQRl7aNBHgFqSsk4ef-MmJ1w,3084
17
18
  ingestr/src/appstore/__init__.py,sha256=3P4VZH2WJF477QjW19jMTwu6L8DXcLkYSdutnvp3AmM,4742
@@ -100,8 +101,8 @@ ingestr/testdata/delete_insert_part2.csv,sha256=B_KUzpzbNdDY_n7wWop1mT2cz36TmayS
100
101
  ingestr/testdata/merge_expected.csv,sha256=DReHqWGnQMsf2PBv_Q2pfjsgvikYFnf1zYcQZ7ZqYN0,276
101
102
  ingestr/testdata/merge_part1.csv,sha256=Pw8Z9IDKcNU0qQHx1z6BUf4rF_-SxKGFOvymCt4OY9I,185
102
103
  ingestr/testdata/merge_part2.csv,sha256=T_GiWxA81SN63_tMOIuemcvboEFeAmbKc7xRXvL9esw,287
103
- ingestr-0.13.2.dist-info/METADATA,sha256=ivVRv68P1AR_inmOV4_yMW8tfTnTtE7EBnA-bKDiIL4,8252
104
- ingestr-0.13.2.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
105
- ingestr-0.13.2.dist-info/entry_points.txt,sha256=oPJy0KBnPWYjDtP1k8qwAihcTLHSZokSQvRAw_wtfJM,46
106
- ingestr-0.13.2.dist-info/licenses/LICENSE.md,sha256=cW8wIhn8HFE-KLStDF9jHQ1O_ARWP3kTpk_-eOccL24,1075
107
- ingestr-0.13.2.dist-info/RECORD,,
104
+ ingestr-0.13.3.dist-info/METADATA,sha256=mVT47j7eOqecG3fnDHEkWzqmM4QHWDUixAWKpkGsoEk,8252
105
+ ingestr-0.13.3.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
106
+ ingestr-0.13.3.dist-info/entry_points.txt,sha256=oPJy0KBnPWYjDtP1k8qwAihcTLHSZokSQvRAw_wtfJM,46
107
+ ingestr-0.13.3.dist-info/licenses/LICENSE.md,sha256=cW8wIhn8HFE-KLStDF9jHQ1O_ARWP3kTpk_-eOccL24,1075
108
+ ingestr-0.13.3.dist-info/RECORD,,