ingestr 0.13.2__py3-none-any.whl → 0.13.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ingestr might be problematic. Click here for more details.
- ingestr/src/applovin/__init__.py +282 -0
- ingestr/src/destinations.py +7 -6
- ingestr/src/factory.py +4 -1
- ingestr/src/loader.py +69 -0
- ingestr/src/sources.py +50 -0
- ingestr/src/version.py +6 -1
- {ingestr-0.13.2.dist-info → ingestr-0.13.4.dist-info}/METADATA +58 -19
- {ingestr-0.13.2.dist-info → ingestr-0.13.4.dist-info}/RECORD +11 -9
- {ingestr-0.13.2.dist-info → ingestr-0.13.4.dist-info}/WHEEL +0 -0
- {ingestr-0.13.2.dist-info → ingestr-0.13.4.dist-info}/entry_points.txt +0 -0
- {ingestr-0.13.2.dist-info → ingestr-0.13.4.dist-info}/licenses/LICENSE.md +0 -0
|
@@ -0,0 +1,282 @@
|
|
|
1
|
+
from datetime import datetime, timezone
|
|
2
|
+
from enum import Enum
|
|
3
|
+
from typing import Dict, List, Optional
|
|
4
|
+
|
|
5
|
+
import dlt
|
|
6
|
+
from dlt.sources.rest_api import EndpointResource, RESTAPIConfig, rest_api_resources
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class InvalidCustomReportError(Exception):
|
|
10
|
+
def __init__(self):
|
|
11
|
+
super().__init__(
|
|
12
|
+
"Custom report should be in the format 'custom:{endpoint}:{report_type}:{dimensions}"
|
|
13
|
+
)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class InvalidDimensionError(Exception):
|
|
17
|
+
def __init__(self, dim: str, report_type: str):
|
|
18
|
+
super().__init__(f"Unknown dimension {dim} for report type {report_type}")
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
TYPE_HINTS = {
|
|
22
|
+
"application_is_hidden": {"data_type": "bool"},
|
|
23
|
+
"average_cpa": {"data_type": "double"},
|
|
24
|
+
"average_cpc": {"data_type": "double"},
|
|
25
|
+
"campaign_bid_goal": {"data_type": "double"},
|
|
26
|
+
"campaign_roas_goal": {"data_type": "double"},
|
|
27
|
+
"clicks": {"data_type": "bigint"},
|
|
28
|
+
"conversions": {"data_type": "bigint"},
|
|
29
|
+
"conversion_rate": {"data_type": "double"},
|
|
30
|
+
"cost": {"data_type": "double"}, # assuming float.
|
|
31
|
+
"ctr": {"data_type": "double"},
|
|
32
|
+
"day": {"data_type": "date"},
|
|
33
|
+
"first_purchase": {"data_type": "bigint"},
|
|
34
|
+
"ecpm": {"data_type": "double"},
|
|
35
|
+
"impressions": {"data_type": "bigint"},
|
|
36
|
+
"installs": {"data_type": "bigint"},
|
|
37
|
+
"revenue": {"data_type": "double"},
|
|
38
|
+
"redownloads": {"data_type": "bigint"},
|
|
39
|
+
"sales": {"data_type": "double"}, # assuming float.
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
class ReportType(Enum):
|
|
44
|
+
PUBLISHER = "publisher"
|
|
45
|
+
ADVERTISER = "advertiser"
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
REPORT_SCHEMA: Dict[ReportType, List[str]] = {
|
|
49
|
+
ReportType.PUBLISHER: [
|
|
50
|
+
"ad_type",
|
|
51
|
+
"application",
|
|
52
|
+
"application_is_hidden",
|
|
53
|
+
"bidding_integration",
|
|
54
|
+
"clicks",
|
|
55
|
+
"country",
|
|
56
|
+
"ctr",
|
|
57
|
+
"day",
|
|
58
|
+
"device_type",
|
|
59
|
+
"ecpm",
|
|
60
|
+
"impressions",
|
|
61
|
+
"package_name",
|
|
62
|
+
"placement_type",
|
|
63
|
+
"platform",
|
|
64
|
+
"revenue",
|
|
65
|
+
"size",
|
|
66
|
+
"store_id",
|
|
67
|
+
"zone",
|
|
68
|
+
"zone_id",
|
|
69
|
+
],
|
|
70
|
+
ReportType.ADVERTISER: [
|
|
71
|
+
"ad",
|
|
72
|
+
"ad_creative_type",
|
|
73
|
+
"ad_id",
|
|
74
|
+
"ad_type",
|
|
75
|
+
"average_cpa",
|
|
76
|
+
"average_cpc",
|
|
77
|
+
"campaign",
|
|
78
|
+
"campaign_ad_type",
|
|
79
|
+
"campaign_bid_goal",
|
|
80
|
+
"campaign_id_external",
|
|
81
|
+
"campaign_package_name",
|
|
82
|
+
"campaign_roas_goal",
|
|
83
|
+
"campaign_store_id",
|
|
84
|
+
"campaign_type",
|
|
85
|
+
"clicks",
|
|
86
|
+
"conversions",
|
|
87
|
+
"conversion_rate",
|
|
88
|
+
"cost",
|
|
89
|
+
"country",
|
|
90
|
+
"creative_set",
|
|
91
|
+
"creative_set_id",
|
|
92
|
+
"ctr",
|
|
93
|
+
"custom_page_id",
|
|
94
|
+
"day",
|
|
95
|
+
"device_type",
|
|
96
|
+
"external_placement_id",
|
|
97
|
+
"first_purchase",
|
|
98
|
+
"impressions",
|
|
99
|
+
"installs",
|
|
100
|
+
"optimization_day_target",
|
|
101
|
+
"placement_type",
|
|
102
|
+
"platform",
|
|
103
|
+
"redownloads",
|
|
104
|
+
"sales",
|
|
105
|
+
"size",
|
|
106
|
+
"target_event",
|
|
107
|
+
"traffic_source",
|
|
108
|
+
],
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
# NOTE(turtledev): These values are valid columns,
|
|
112
|
+
# but often don't produce a value. Find a way to either add
|
|
113
|
+
# a default value, or use an alternative strategy to de-duplicate
|
|
114
|
+
# OR make them nullable
|
|
115
|
+
SKA_REPORT_EXCLUDE = [
|
|
116
|
+
"ad",
|
|
117
|
+
"ad_id",
|
|
118
|
+
"ad_type",
|
|
119
|
+
"average_cpc",
|
|
120
|
+
"campaign_ad_type",
|
|
121
|
+
"clicks",
|
|
122
|
+
"conversions",
|
|
123
|
+
"conversion_rate",
|
|
124
|
+
"creative_set",
|
|
125
|
+
"creative_set_id",
|
|
126
|
+
"ctr",
|
|
127
|
+
"custom_page_id",
|
|
128
|
+
"device_type",
|
|
129
|
+
"first_purchase",
|
|
130
|
+
"impressions",
|
|
131
|
+
"placement_type",
|
|
132
|
+
"sales",
|
|
133
|
+
"size",
|
|
134
|
+
"traffic_source",
|
|
135
|
+
]
|
|
136
|
+
|
|
137
|
+
PROBABILISTIC_REPORT_EXCLUDE = [
|
|
138
|
+
"installs",
|
|
139
|
+
"redownloads",
|
|
140
|
+
]
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
@dlt.source
|
|
144
|
+
def applovin_source(
|
|
145
|
+
api_key: str,
|
|
146
|
+
start_date: str,
|
|
147
|
+
end_date: Optional[str],
|
|
148
|
+
custom: Optional[str],
|
|
149
|
+
):
|
|
150
|
+
ska_report_columns = exclude(
|
|
151
|
+
REPORT_SCHEMA[ReportType.ADVERTISER],
|
|
152
|
+
SKA_REPORT_EXCLUDE,
|
|
153
|
+
)
|
|
154
|
+
|
|
155
|
+
probabilistic_report_columns = exclude(
|
|
156
|
+
REPORT_SCHEMA[ReportType.ADVERTISER],
|
|
157
|
+
PROBABILISTIC_REPORT_EXCLUDE,
|
|
158
|
+
)
|
|
159
|
+
backfill = False
|
|
160
|
+
if end_date is None:
|
|
161
|
+
backfill = True
|
|
162
|
+
end_date = datetime.now(timezone.utc).date().strftime("%Y-%m-%d")
|
|
163
|
+
|
|
164
|
+
config: RESTAPIConfig = {
|
|
165
|
+
"client": {
|
|
166
|
+
"base_url": "https://r.applovin.com/",
|
|
167
|
+
"auth": {
|
|
168
|
+
"type": "api_key",
|
|
169
|
+
"name": "api_key",
|
|
170
|
+
"location": "query",
|
|
171
|
+
"api_key": api_key,
|
|
172
|
+
},
|
|
173
|
+
},
|
|
174
|
+
"resource_defaults": {
|
|
175
|
+
"write_disposition": "merge",
|
|
176
|
+
"endpoint": {
|
|
177
|
+
"incremental": {
|
|
178
|
+
"cursor_path": "day",
|
|
179
|
+
"start_param": "start",
|
|
180
|
+
"initial_value": start_date,
|
|
181
|
+
"range_start": "closed",
|
|
182
|
+
"range_end": "closed",
|
|
183
|
+
},
|
|
184
|
+
"params": {
|
|
185
|
+
"format": "json",
|
|
186
|
+
"end": end_date,
|
|
187
|
+
},
|
|
188
|
+
"paginator": "single_page",
|
|
189
|
+
},
|
|
190
|
+
},
|
|
191
|
+
"resources": [
|
|
192
|
+
resource(
|
|
193
|
+
"publisher-report",
|
|
194
|
+
"report",
|
|
195
|
+
REPORT_SCHEMA[ReportType.PUBLISHER],
|
|
196
|
+
ReportType.PUBLISHER,
|
|
197
|
+
),
|
|
198
|
+
resource(
|
|
199
|
+
"advertiser-report",
|
|
200
|
+
"report",
|
|
201
|
+
REPORT_SCHEMA[ReportType.ADVERTISER],
|
|
202
|
+
ReportType.ADVERTISER,
|
|
203
|
+
),
|
|
204
|
+
resource(
|
|
205
|
+
"advertiser-probabilistic-report",
|
|
206
|
+
"probabilisticReport",
|
|
207
|
+
probabilistic_report_columns,
|
|
208
|
+
ReportType.ADVERTISER,
|
|
209
|
+
),
|
|
210
|
+
resource(
|
|
211
|
+
"advertiser-ska-report",
|
|
212
|
+
"skaReport",
|
|
213
|
+
ska_report_columns,
|
|
214
|
+
ReportType.ADVERTISER,
|
|
215
|
+
),
|
|
216
|
+
],
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
if custom:
|
|
220
|
+
custom_report = custom_report_from_spec(custom)
|
|
221
|
+
config["resources"].append(custom_report)
|
|
222
|
+
|
|
223
|
+
if backfill:
|
|
224
|
+
config["resource_defaults"]["endpoint"]["incremental"]["end_value"] = end_date # type: ignore
|
|
225
|
+
|
|
226
|
+
yield from rest_api_resources(config)
|
|
227
|
+
|
|
228
|
+
|
|
229
|
+
def resource(
|
|
230
|
+
name: str,
|
|
231
|
+
endpoint: str,
|
|
232
|
+
dimensions: List[str],
|
|
233
|
+
report_type: ReportType,
|
|
234
|
+
) -> EndpointResource:
|
|
235
|
+
return {
|
|
236
|
+
"name": name,
|
|
237
|
+
"columns": build_type_hints(dimensions),
|
|
238
|
+
"merge_key": "day",
|
|
239
|
+
"endpoint": {
|
|
240
|
+
"path": endpoint,
|
|
241
|
+
"params": {
|
|
242
|
+
"report_type": report_type.value,
|
|
243
|
+
"columns": ",".join(dimensions),
|
|
244
|
+
},
|
|
245
|
+
},
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
|
|
249
|
+
def custom_report_from_spec(spec: str) -> EndpointResource:
|
|
250
|
+
parts = spec.split(":")
|
|
251
|
+
if len(parts) != 4:
|
|
252
|
+
raise InvalidCustomReportError()
|
|
253
|
+
|
|
254
|
+
_, endpoint, report, dims = parts
|
|
255
|
+
report_type = ReportType(report.strip())
|
|
256
|
+
dimensions = validate_dimensions(dims)
|
|
257
|
+
endpoint = endpoint.strip()
|
|
258
|
+
|
|
259
|
+
return resource(
|
|
260
|
+
name="custom_report",
|
|
261
|
+
endpoint=endpoint,
|
|
262
|
+
dimensions=dimensions,
|
|
263
|
+
report_type=report_type,
|
|
264
|
+
)
|
|
265
|
+
|
|
266
|
+
|
|
267
|
+
def validate_dimensions(dimensions: str) -> List[str]:
|
|
268
|
+
dims = [dim.strip() for dim in dimensions.split(",")]
|
|
269
|
+
|
|
270
|
+
if "day" not in dims:
|
|
271
|
+
dims.append("day")
|
|
272
|
+
|
|
273
|
+
return dims
|
|
274
|
+
|
|
275
|
+
|
|
276
|
+
def exclude(source: List[str], exclude_list: List[str]) -> List[str]:
|
|
277
|
+
excludes = set(exclude_list)
|
|
278
|
+
return [col for col in source if col not in excludes]
|
|
279
|
+
|
|
280
|
+
|
|
281
|
+
def build_type_hints(cols: List[str]) -> dict:
|
|
282
|
+
return {col: TYPE_HINTS[col] for col in cols if col in TYPE_HINTS}
|
ingestr/src/destinations.py
CHANGED
|
@@ -7,12 +7,13 @@ import tempfile
|
|
|
7
7
|
from urllib.parse import parse_qs, quote, urlparse
|
|
8
8
|
|
|
9
9
|
import dlt
|
|
10
|
-
import pyarrow.parquet # type: ignore
|
|
11
10
|
from dlt.common.configuration.specs import AwsCredentials
|
|
12
11
|
from dlt.destinations.impl.clickhouse.configuration import (
|
|
13
12
|
ClickHouseCredentials,
|
|
14
13
|
)
|
|
15
14
|
|
|
15
|
+
from ingestr.src.loader import load_dlt_file
|
|
16
|
+
|
|
16
17
|
|
|
17
18
|
class GenericSqlDestination:
|
|
18
19
|
def dlt_run_params(self, uri: str, table: str, **kwargs) -> dict:
|
|
@@ -184,11 +185,9 @@ class CsvDestination(GenericSqlDestination):
|
|
|
184
185
|
if output_path.count("/") > 1:
|
|
185
186
|
os.makedirs(os.path.dirname(output_path), exist_ok=True)
|
|
186
187
|
|
|
187
|
-
table = pyarrow.parquet.read_table(first_file_path)
|
|
188
|
-
rows = table.to_pylist()
|
|
189
188
|
with open(output_path, "w", newline="") as csv_file:
|
|
190
189
|
csv_writer = None
|
|
191
|
-
for row in
|
|
190
|
+
for row in load_dlt_file(first_file_path):
|
|
192
191
|
row = filter_keys(row)
|
|
193
192
|
if csv_writer is None:
|
|
194
193
|
csv_writer = csv.DictWriter(csv_file, fieldnames=row.keys())
|
|
@@ -297,14 +296,16 @@ class ClickhouseDestination:
|
|
|
297
296
|
raise ValueError(
|
|
298
297
|
"The TCP port of the ClickHouse server is required to establish a connection."
|
|
299
298
|
)
|
|
300
|
-
|
|
299
|
+
|
|
301
300
|
query_params = parse_qs(parsed_uri.query)
|
|
302
301
|
secure = int(query_params["secure"][0]) if "secure" in query_params else 1
|
|
303
302
|
|
|
304
303
|
http_port = (
|
|
305
304
|
int(query_params["http_port"][0])
|
|
306
305
|
if "http_port" in query_params
|
|
307
|
-
else 8443
|
|
306
|
+
else 8443
|
|
307
|
+
if secure == 1
|
|
308
|
+
else 8123
|
|
308
309
|
)
|
|
309
310
|
|
|
310
311
|
if secure not in (0, 1):
|
ingestr/src/factory.py
CHANGED
|
@@ -20,6 +20,7 @@ from ingestr.src.sources import (
|
|
|
20
20
|
AdjustSource,
|
|
21
21
|
AirtableSource,
|
|
22
22
|
AppleAppStoreSource,
|
|
23
|
+
AppLovinSource,
|
|
23
24
|
AppsflyerSource,
|
|
24
25
|
ArrowMemoryMappedSource,
|
|
25
26
|
AsanaSource,
|
|
@@ -65,9 +66,10 @@ SQL_SOURCE_SCHEMES = [
|
|
|
65
66
|
"oracle",
|
|
66
67
|
"oracle+cx_oracle",
|
|
67
68
|
"hana",
|
|
69
|
+
"clickhouse",
|
|
70
|
+
|
|
68
71
|
]
|
|
69
72
|
|
|
70
|
-
|
|
71
73
|
class SourceProtocol(Protocol):
|
|
72
74
|
def dlt_source(self, uri: str, table: str, **kwargs):
|
|
73
75
|
pass
|
|
@@ -131,6 +133,7 @@ class SourceDestinationFactory:
|
|
|
131
133
|
"appstore": AppleAppStoreSource,
|
|
132
134
|
"gs": GCSSource,
|
|
133
135
|
"linkedinads": LinkedInAdsSource,
|
|
136
|
+
"applovin": AppLovinSource,
|
|
134
137
|
}
|
|
135
138
|
destinations: Dict[str, Type[DestinationProtocol]] = {
|
|
136
139
|
"bigquery": BigQueryDestination,
|
ingestr/src/loader.py
ADDED
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
import csv
|
|
2
|
+
import gzip
|
|
3
|
+
import json
|
|
4
|
+
import subprocess
|
|
5
|
+
from contextlib import contextmanager
|
|
6
|
+
from typing import Generator
|
|
7
|
+
|
|
8
|
+
from pyarrow.parquet import ParquetFile # type: ignore
|
|
9
|
+
|
|
10
|
+
PARQUET_BATCH_SIZE = 64
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class UnsupportedLoaderFileFormat(Exception):
|
|
14
|
+
pass
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def load_dlt_file(filepath: str) -> Generator:
|
|
18
|
+
"""
|
|
19
|
+
load_dlt_file reads dlt loader files. It handles different loader file formats
|
|
20
|
+
automatically. It returns a generator that yield data items as a python dict
|
|
21
|
+
"""
|
|
22
|
+
result = subprocess.run(
|
|
23
|
+
["file", "-b", filepath],
|
|
24
|
+
check=True,
|
|
25
|
+
capture_output=True,
|
|
26
|
+
text=True,
|
|
27
|
+
)
|
|
28
|
+
|
|
29
|
+
filetype = result.stdout.strip()
|
|
30
|
+
with factory(filetype, filepath) as reader:
|
|
31
|
+
yield from reader
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def factory(filetype: str, filepath: str):
|
|
35
|
+
# ???(turtledev): can dlt produce non-gizpped jsonl files?
|
|
36
|
+
if filetype.startswith("gzip"):
|
|
37
|
+
return jsonlfile(filepath)
|
|
38
|
+
elif filetype.startswith("CSV"):
|
|
39
|
+
return csvfile(filepath)
|
|
40
|
+
elif filetype.startswith("Apache Parquet"):
|
|
41
|
+
return parquetfile(filepath)
|
|
42
|
+
else:
|
|
43
|
+
raise UnsupportedLoaderFileFormat(filetype)
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
@contextmanager
|
|
47
|
+
def jsonlfile(filepath: str):
|
|
48
|
+
def reader(fd):
|
|
49
|
+
for line in fd:
|
|
50
|
+
yield json.loads(line.decode().strip())
|
|
51
|
+
|
|
52
|
+
with gzip.open(filepath) as fd:
|
|
53
|
+
yield reader(fd)
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
@contextmanager
|
|
57
|
+
def csvfile(filepath: str):
|
|
58
|
+
with open(filepath, "r") as fd:
|
|
59
|
+
yield csv.DictReader(fd)
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
@contextmanager
|
|
63
|
+
def parquetfile(filepath: str):
|
|
64
|
+
def reader(pf: ParquetFile):
|
|
65
|
+
for batch in pf.iter_batches(PARQUET_BATCH_SIZE):
|
|
66
|
+
yield from batch.to_pylist()
|
|
67
|
+
|
|
68
|
+
with open(filepath, "rb") as fd:
|
|
69
|
+
yield reader(ParquetFile(fd))
|
ingestr/src/sources.py
CHANGED
|
@@ -50,6 +50,7 @@ from ingestr.src import blob
|
|
|
50
50
|
from ingestr.src.adjust import REQUIRED_CUSTOM_DIMENSIONS, adjust_source
|
|
51
51
|
from ingestr.src.adjust.adjust_helpers import parse_filters
|
|
52
52
|
from ingestr.src.airtable import airtable_source
|
|
53
|
+
from ingestr.src.applovin import applovin_source
|
|
53
54
|
from ingestr.src.appsflyer._init_ import appsflyer_source
|
|
54
55
|
from ingestr.src.appstore import app_store
|
|
55
56
|
from ingestr.src.appstore.client import AppStoreConnectClient
|
|
@@ -131,6 +132,11 @@ class SqlSource:
|
|
|
131
132
|
|
|
132
133
|
if uri.startswith("mysql://"):
|
|
133
134
|
uri = uri.replace("mysql://", "mysql+pymysql://")
|
|
135
|
+
|
|
136
|
+
if uri.startswith("clickhouse://"):
|
|
137
|
+
uri = uri.replace("clickhouse://", "clickhouse+native://")
|
|
138
|
+
if "secure=" not in uri:
|
|
139
|
+
uri += "?secure=1"
|
|
134
140
|
|
|
135
141
|
query_adapters = []
|
|
136
142
|
if kwargs.get("sql_limit"):
|
|
@@ -1737,3 +1743,47 @@ class LinkedInAdsSource:
|
|
|
1737
1743
|
metrics=metrics,
|
|
1738
1744
|
time_granularity=time_granularity,
|
|
1739
1745
|
).with_resources("custom_reports")
|
|
1746
|
+
|
|
1747
|
+
|
|
1748
|
+
class AppLovinSource:
|
|
1749
|
+
def handles_incrementality(self) -> bool:
|
|
1750
|
+
return True
|
|
1751
|
+
|
|
1752
|
+
def dlt_source(self, uri: str, table: str, **kwargs):
|
|
1753
|
+
if kwargs.get("incremental_key") is not None:
|
|
1754
|
+
raise ValueError(
|
|
1755
|
+
"Google Ads takes care of incrementality on its own, you should not provide incremental_key"
|
|
1756
|
+
)
|
|
1757
|
+
|
|
1758
|
+
parsed_uri = urlparse(uri)
|
|
1759
|
+
params = parse_qs(parsed_uri.query)
|
|
1760
|
+
|
|
1761
|
+
api_key = params.get("api_key", None)
|
|
1762
|
+
if api_key is None:
|
|
1763
|
+
raise MissingValueError("api_key", "AppLovin")
|
|
1764
|
+
|
|
1765
|
+
interval_start = kwargs.get("interval_start")
|
|
1766
|
+
interval_end = kwargs.get("interval_end")
|
|
1767
|
+
|
|
1768
|
+
now = datetime.now()
|
|
1769
|
+
start_date = (
|
|
1770
|
+
interval_start if interval_start is not None else now - timedelta(days=1)
|
|
1771
|
+
)
|
|
1772
|
+
end_date = interval_end
|
|
1773
|
+
|
|
1774
|
+
custom_report = None
|
|
1775
|
+
if table.startswith("custom:"):
|
|
1776
|
+
custom_report = table
|
|
1777
|
+
table = "custom_report"
|
|
1778
|
+
|
|
1779
|
+
src = applovin_source(
|
|
1780
|
+
api_key[0],
|
|
1781
|
+
start_date.strftime("%Y-%m-%d"),
|
|
1782
|
+
end_date.strftime("%Y-%m-%d") if end_date else None,
|
|
1783
|
+
custom_report,
|
|
1784
|
+
)
|
|
1785
|
+
|
|
1786
|
+
if table not in src.resources:
|
|
1787
|
+
raise UnsupportedResourceError(table, "AppLovin")
|
|
1788
|
+
|
|
1789
|
+
return src.with_resources(table)
|
ingestr/src/version.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: ingestr
|
|
3
|
-
Version: 0.13.
|
|
3
|
+
Version: 0.13.4
|
|
4
4
|
Summary: ingestr is a command-line application that ingests data from various sources and stores them in any database.
|
|
5
5
|
Project-URL: Homepage, https://github.com/bruin-data/ingestr
|
|
6
6
|
Project-URL: Issues, https://github.com/bruin-data/ingestr/issues
|
|
@@ -132,7 +132,6 @@ Pull requests are welcome. However, please open an issue first to discuss what y
|
|
|
132
132
|
> After cloning `ingestr` make sure to run `make setup` to install githooks.
|
|
133
133
|
|
|
134
134
|
## Supported sources & destinations
|
|
135
|
-
|
|
136
135
|
<table>
|
|
137
136
|
<tr>
|
|
138
137
|
<th></th>
|
|
@@ -142,33 +141,28 @@ Pull requests are welcome. However, please open an issue first to discuss what y
|
|
|
142
141
|
<tr>
|
|
143
142
|
<td colspan="3" style='text-align:center;'><strong>Databases</strong></td>
|
|
144
143
|
</tr>
|
|
145
|
-
<tr>
|
|
146
|
-
<td>Postgres</td>
|
|
147
|
-
<td>✅</td>
|
|
148
|
-
<td>✅</td>
|
|
149
|
-
</tr>
|
|
150
144
|
<tr>
|
|
151
145
|
<td>BigQuery</td>
|
|
152
146
|
<td>✅</td>
|
|
153
147
|
<td>✅</td>
|
|
154
148
|
</tr>
|
|
155
149
|
<tr>
|
|
156
|
-
<td>
|
|
157
|
-
<td
|
|
150
|
+
<td>ClickHouse</td>
|
|
151
|
+
<td>❌</td>
|
|
158
152
|
<td>✅</td>
|
|
159
153
|
</tr>
|
|
160
154
|
<tr>
|
|
161
|
-
<td>
|
|
155
|
+
<td>Databricks</td>
|
|
162
156
|
<td>✅</td>
|
|
163
157
|
<td>✅</td>
|
|
164
158
|
</tr>
|
|
165
159
|
<tr>
|
|
166
|
-
<td>
|
|
160
|
+
<td>DuckDB</td>
|
|
167
161
|
<td>✅</td>
|
|
168
162
|
<td>✅</td>
|
|
169
163
|
</tr>
|
|
170
164
|
<tr>
|
|
171
|
-
<td>
|
|
165
|
+
<td>Local CSV file</td>
|
|
172
166
|
<td>✅</td>
|
|
173
167
|
<td>✅</td>
|
|
174
168
|
</tr>
|
|
@@ -178,12 +172,12 @@ Pull requests are welcome. However, please open an issue first to discuss what y
|
|
|
178
172
|
<td>✅</td>
|
|
179
173
|
</tr>
|
|
180
174
|
<tr>
|
|
181
|
-
<td>
|
|
182
|
-
<td>✅</td>
|
|
175
|
+
<td>MongoDB</td>
|
|
183
176
|
<td>✅</td>
|
|
177
|
+
<td>❌</td>
|
|
184
178
|
</tr>
|
|
185
179
|
<tr>
|
|
186
|
-
<td>
|
|
180
|
+
<td>MySQL</td>
|
|
187
181
|
<td>✅</td>
|
|
188
182
|
<td>❌</td>
|
|
189
183
|
</tr>
|
|
@@ -192,18 +186,28 @@ Pull requests are welcome. However, please open an issue first to discuss what y
|
|
|
192
186
|
<td>✅</td>
|
|
193
187
|
<td>❌</td>
|
|
194
188
|
</tr>
|
|
189
|
+
<tr>
|
|
190
|
+
<td>Postgres</td>
|
|
191
|
+
<td>✅</td>
|
|
192
|
+
<td>✅</td>
|
|
193
|
+
</tr>
|
|
194
|
+
<tr>
|
|
195
|
+
<td>Redshift</td>
|
|
196
|
+
<td>✅</td>
|
|
197
|
+
<td>✅</td>
|
|
198
|
+
</tr>
|
|
195
199
|
<tr>
|
|
196
200
|
<td>SAP Hana</td>
|
|
197
201
|
<td>✅</td>
|
|
198
202
|
<td>❌</td>
|
|
199
203
|
</tr>
|
|
200
204
|
<tr>
|
|
201
|
-
<td>
|
|
205
|
+
<td>Snowflake</td>
|
|
206
|
+
<td>✅</td>
|
|
202
207
|
<td>✅</td>
|
|
203
|
-
<td>❌</td>
|
|
204
208
|
</tr>
|
|
205
209
|
<tr>
|
|
206
|
-
<td>
|
|
210
|
+
<td>SQLite</td>
|
|
207
211
|
<td>✅</td>
|
|
208
212
|
<td>❌</td>
|
|
209
213
|
</tr>
|
|
@@ -218,11 +222,21 @@ Pull requests are welcome. However, please open an issue first to discuss what y
|
|
|
218
222
|
<td>✅</td>
|
|
219
223
|
<td>-</td>
|
|
220
224
|
</tr>
|
|
225
|
+
<tr>
|
|
226
|
+
<td>Apache Kafka</td>
|
|
227
|
+
<td>✅</td>
|
|
228
|
+
<td>-</td>
|
|
229
|
+
</tr>
|
|
221
230
|
<tr>
|
|
222
231
|
<td>AppsFlyer</td>
|
|
223
232
|
<td>✅</td>
|
|
224
233
|
<td>-</td>
|
|
225
234
|
</tr>
|
|
235
|
+
<tr>
|
|
236
|
+
<td>App Store</td>
|
|
237
|
+
<td>✅</td>
|
|
238
|
+
<td>-</td>
|
|
239
|
+
</tr>
|
|
226
240
|
<tr>
|
|
227
241
|
<td>Asana</td>
|
|
228
242
|
<td>✅</td>
|
|
@@ -243,6 +257,11 @@ Pull requests are welcome. However, please open an issue first to discuss what y
|
|
|
243
257
|
<td>✅</td>
|
|
244
258
|
<td>-</td>
|
|
245
259
|
</tr>
|
|
260
|
+
<tr>
|
|
261
|
+
<td>Github</td>
|
|
262
|
+
<td>✅</td>
|
|
263
|
+
<td>-</td>
|
|
264
|
+
</tr>
|
|
246
265
|
<tr>
|
|
247
266
|
<td>Gorgias</td>
|
|
248
267
|
<td>✅</td>
|
|
@@ -254,7 +273,17 @@ Pull requests are welcome. However, please open an issue first to discuss what y
|
|
|
254
273
|
<td>-</td>
|
|
255
274
|
</tr>
|
|
256
275
|
<tr>
|
|
257
|
-
<td>
|
|
276
|
+
<td>Google Ads</td>
|
|
277
|
+
<td>✅</td>
|
|
278
|
+
<td>-</td>
|
|
279
|
+
</tr>
|
|
280
|
+
<tr>
|
|
281
|
+
<td>Google Analytics</td>
|
|
282
|
+
<td>✅</td>
|
|
283
|
+
<td>-</td>
|
|
284
|
+
</tr>
|
|
285
|
+
<tr>
|
|
286
|
+
<td>Intercom</td>
|
|
258
287
|
<td>✅</td>
|
|
259
288
|
<td>-</td>
|
|
260
289
|
</tr>
|
|
@@ -263,6 +292,11 @@ Pull requests are welcome. However, please open an issue first to discuss what y
|
|
|
263
292
|
<td>✅</td>
|
|
264
293
|
<td>-</td>
|
|
265
294
|
</tr>
|
|
295
|
+
<tr>
|
|
296
|
+
<td>LinkedIn Ads</td>
|
|
297
|
+
<td>✅</td>
|
|
298
|
+
<td>-</td>
|
|
299
|
+
</tr>
|
|
266
300
|
<tr>
|
|
267
301
|
<td>Notion</td>
|
|
268
302
|
<td>✅</td>
|
|
@@ -288,6 +322,11 @@ Pull requests are welcome. However, please open an issue first to discuss what y
|
|
|
288
322
|
<td>✅</td>
|
|
289
323
|
<td>-</td>
|
|
290
324
|
</tr>
|
|
325
|
+
<tr>
|
|
326
|
+
<td>TikTok Ads</td>
|
|
327
|
+
<td>✅</td>
|
|
328
|
+
<td>-</td>
|
|
329
|
+
</tr>
|
|
291
330
|
<tr>
|
|
292
331
|
<td>Zendesk</td>
|
|
293
332
|
<td>✅</td>
|
|
@@ -1,17 +1,19 @@
|
|
|
1
1
|
ingestr/main.py,sha256=ufn8AcM2ID80ChUApJzYDjnQaurMXOkYfTm6GzAggSQ,24746
|
|
2
2
|
ingestr/src/.gitignore,sha256=8cX1AZTSI0TcdZFGTmS_oyBjpfCzhOEt0DdAo2dFIY8,203
|
|
3
3
|
ingestr/src/blob.py,sha256=XDk_XqmU_He4sQ1brY3ceoZgpq_ZBZihz1gHW9MzqUk,1381
|
|
4
|
-
ingestr/src/destinations.py,sha256=
|
|
4
|
+
ingestr/src/destinations.py,sha256=v45jYRVGXCof1XEvl07XlkVkxeIMiGmBwTrZT8-rQAI,11134
|
|
5
5
|
ingestr/src/errors.py,sha256=Ufs4_DfE77_E3vnA1fOQdi6cmuLVNm7_SbFLkL1XPGk,686
|
|
6
|
-
ingestr/src/factory.py,sha256=
|
|
6
|
+
ingestr/src/factory.py,sha256=_VkpHRKCLP7UGm2-iHbXZOHuuvfMduoXo_oNZSX-mSw,4948
|
|
7
7
|
ingestr/src/filters.py,sha256=0JQXeAr2APFMnW2sd-6BlAMWv93bXV17j8b5MM8sHmM,580
|
|
8
|
-
ingestr/src/
|
|
8
|
+
ingestr/src/loader.py,sha256=9NaWAyfkXdqAZSS-N72Iwo36Lbx4PyqIfaaH1dNdkFs,1712
|
|
9
|
+
ingestr/src/sources.py,sha256=PkAjowbUF1LxaLshn2x39Mi5lzIOVm9speX7gTMTFzE,63269
|
|
9
10
|
ingestr/src/table_definition.py,sha256=REbAbqdlmUMUuRh8nEQRreWjPVOQ5ZcfqGkScKdCrmk,390
|
|
10
11
|
ingestr/src/time.py,sha256=H_Fk2J4ShXyUM-EMY7MqCLZQhlnZMZvO952bmZPc4yE,254
|
|
11
|
-
ingestr/src/version.py,sha256=
|
|
12
|
+
ingestr/src/version.py,sha256=l6zVm0GMMwnBlIOONWc6snhko9d8-HO1y6Jj1T1vsiQ,158
|
|
12
13
|
ingestr/src/adjust/__init__.py,sha256=ULjtJqrNS6XDvUyGl0tjl12-tLyXlCgeFe2icTbtu3Q,3255
|
|
13
14
|
ingestr/src/adjust/adjust_helpers.py,sha256=av97NPSn-hQtTbAC0vUSCAWYePmOiG5R-DGdMssm7FQ,3646
|
|
14
15
|
ingestr/src/airtable/__init__.py,sha256=GHWYrjI2qhs_JihdNJysB0Ni3bzqT_MLXn_S9_Q5zRA,2775
|
|
16
|
+
ingestr/src/applovin/__init__.py,sha256=VwVTtVQetnSpUt3cgy6TuH3sYdTnQP63eO_qYqT1TEA,7387
|
|
15
17
|
ingestr/src/appsflyer/_init_.py,sha256=ne2-9FQ654Drtd3GkKQv8Bwb6LEqCnJw49MfO5Jyzgs,739
|
|
16
18
|
ingestr/src/appsflyer/client.py,sha256=TNmwakLzmO6DZW3wcfLfQRl7aNBHgFqSsk4ef-MmJ1w,3084
|
|
17
19
|
ingestr/src/appstore/__init__.py,sha256=3P4VZH2WJF477QjW19jMTwu6L8DXcLkYSdutnvp3AmM,4742
|
|
@@ -100,8 +102,8 @@ ingestr/testdata/delete_insert_part2.csv,sha256=B_KUzpzbNdDY_n7wWop1mT2cz36TmayS
|
|
|
100
102
|
ingestr/testdata/merge_expected.csv,sha256=DReHqWGnQMsf2PBv_Q2pfjsgvikYFnf1zYcQZ7ZqYN0,276
|
|
101
103
|
ingestr/testdata/merge_part1.csv,sha256=Pw8Z9IDKcNU0qQHx1z6BUf4rF_-SxKGFOvymCt4OY9I,185
|
|
102
104
|
ingestr/testdata/merge_part2.csv,sha256=T_GiWxA81SN63_tMOIuemcvboEFeAmbKc7xRXvL9esw,287
|
|
103
|
-
ingestr-0.13.
|
|
104
|
-
ingestr-0.13.
|
|
105
|
-
ingestr-0.13.
|
|
106
|
-
ingestr-0.13.
|
|
107
|
-
ingestr-0.13.
|
|
105
|
+
ingestr-0.13.4.dist-info/METADATA,sha256=SVXv8u5Rz4HUCIx_sYfBwff4MmtSp7UyqPCU17O4Krc,8956
|
|
106
|
+
ingestr-0.13.4.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
107
|
+
ingestr-0.13.4.dist-info/entry_points.txt,sha256=oPJy0KBnPWYjDtP1k8qwAihcTLHSZokSQvRAw_wtfJM,46
|
|
108
|
+
ingestr-0.13.4.dist-info/licenses/LICENSE.md,sha256=cW8wIhn8HFE-KLStDF9jHQ1O_ARWP3kTpk_-eOccL24,1075
|
|
109
|
+
ingestr-0.13.4.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|