ingestr 0.13.28__py3-none-any.whl → 0.13.30__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ingestr might be problematic. Click here for more details.
- ingestr/main.py +10 -0
- ingestr/src/appsflyer/__init__.py +325 -0
- ingestr/src/appsflyer/client.py +49 -44
- ingestr/src/buildinfo.py +1 -1
- ingestr/src/hubspot/__init__.py +8 -9
- ingestr/src/hubspot/helpers.py +20 -13
- ingestr/src/hubspot/settings.py +13 -7
- ingestr/src/sources.py +33 -19
- {ingestr-0.13.28.dist-info → ingestr-0.13.30.dist-info}/METADATA +1 -1
- {ingestr-0.13.28.dist-info → ingestr-0.13.30.dist-info}/RECORD +13 -13
- ingestr/src/appsflyer/_init_.py +0 -24
- {ingestr-0.13.28.dist-info → ingestr-0.13.30.dist-info}/WHEEL +0 -0
- {ingestr-0.13.28.dist-info → ingestr-0.13.30.dist-info}/entry_points.txt +0 -0
- {ingestr-0.13.28.dist-info → ingestr-0.13.30.dist-info}/licenses/LICENSE.md +0 -0
ingestr/main.py
CHANGED
|
@@ -302,6 +302,13 @@ def ingest(
|
|
|
302
302
|
envvar=["COLUMNS", "INGESTR_COLUMNS"],
|
|
303
303
|
),
|
|
304
304
|
] = None, # type: ignore
|
|
305
|
+
yield_limit: Annotated[
|
|
306
|
+
Optional[int],
|
|
307
|
+
typer.Option(
|
|
308
|
+
help="Limit the number of pages yielded from the source",
|
|
309
|
+
envvar=["YIELD_LIMIT", "INGESTR_YIELD_LIMIT"],
|
|
310
|
+
),
|
|
311
|
+
] = None, # type: ignore
|
|
305
312
|
):
|
|
306
313
|
import hashlib
|
|
307
314
|
import tempfile
|
|
@@ -556,6 +563,9 @@ def ingest(
|
|
|
556
563
|
if factory.source_scheme.startswith("mysql"):
|
|
557
564
|
resource.for_each(dlt_source, lambda x: x.add_map(handle_mysql_empty_dates))
|
|
558
565
|
|
|
566
|
+
if yield_limit:
|
|
567
|
+
resource.for_each(dlt_source, lambda x: x.add_limit(yield_limit))
|
|
568
|
+
|
|
559
569
|
def col_h(x):
|
|
560
570
|
if column_hints:
|
|
561
571
|
x.apply_hints(columns=column_hints)
|
|
@@ -0,0 +1,325 @@
|
|
|
1
|
+
from typing import Iterable
|
|
2
|
+
|
|
3
|
+
import dlt
|
|
4
|
+
import pendulum
|
|
5
|
+
from dlt.common.typing import TDataItem
|
|
6
|
+
from dlt.sources import DltResource
|
|
7
|
+
|
|
8
|
+
from ingestr.src.appsflyer.client import AppsflyerClient
|
|
9
|
+
|
|
10
|
+
DIMENSION_RESPONSE_MAPPING = {
|
|
11
|
+
"c": "campaign",
|
|
12
|
+
"af_adset_id": "adset_id",
|
|
13
|
+
"af_adset": "adset",
|
|
14
|
+
"af_ad_id": "ad_id",
|
|
15
|
+
}
|
|
16
|
+
HINTS = {
|
|
17
|
+
"app_id": {
|
|
18
|
+
"data_type": "text",
|
|
19
|
+
"nullable": False,
|
|
20
|
+
},
|
|
21
|
+
"campaign": {
|
|
22
|
+
"data_type": "text",
|
|
23
|
+
"nullable": False,
|
|
24
|
+
},
|
|
25
|
+
"geo": {
|
|
26
|
+
"data_type": "text",
|
|
27
|
+
"nullable": False,
|
|
28
|
+
},
|
|
29
|
+
"cost": {
|
|
30
|
+
"data_type": "decimal",
|
|
31
|
+
"precision": 30,
|
|
32
|
+
"scale": 5,
|
|
33
|
+
"nullable": False,
|
|
34
|
+
},
|
|
35
|
+
"clicks": {
|
|
36
|
+
"data_type": "bigint",
|
|
37
|
+
"nullable": False,
|
|
38
|
+
},
|
|
39
|
+
"impressions": {
|
|
40
|
+
"data_type": "bigint",
|
|
41
|
+
"nullable": False,
|
|
42
|
+
},
|
|
43
|
+
"average_ecpi": {
|
|
44
|
+
"data_type": "decimal",
|
|
45
|
+
"precision": 30,
|
|
46
|
+
"scale": 5,
|
|
47
|
+
"nullable": False,
|
|
48
|
+
},
|
|
49
|
+
"installs": {
|
|
50
|
+
"data_type": "bigint",
|
|
51
|
+
"nullable": False,
|
|
52
|
+
},
|
|
53
|
+
"retention_day_7": {
|
|
54
|
+
"data_type": "decimal",
|
|
55
|
+
"precision": 30,
|
|
56
|
+
"scale": 5,
|
|
57
|
+
"nullable": False,
|
|
58
|
+
},
|
|
59
|
+
"retention_day_14": {
|
|
60
|
+
"data_type": "decimal",
|
|
61
|
+
"precision": 30,
|
|
62
|
+
"scale": 5,
|
|
63
|
+
"nullable": False,
|
|
64
|
+
},
|
|
65
|
+
"cohort_day_1_revenue_per_user": {
|
|
66
|
+
"data_type": "decimal",
|
|
67
|
+
"precision": 30,
|
|
68
|
+
"scale": 5,
|
|
69
|
+
"nullable": True,
|
|
70
|
+
},
|
|
71
|
+
"cohort_day_1_total_revenue_per_user": {
|
|
72
|
+
"data_type": "decimal",
|
|
73
|
+
"precision": 30,
|
|
74
|
+
"scale": 5,
|
|
75
|
+
"nullable": True,
|
|
76
|
+
},
|
|
77
|
+
"cohort_day_3_revenue_per_user": {
|
|
78
|
+
"data_type": "decimal",
|
|
79
|
+
"precision": 30,
|
|
80
|
+
"scale": 5,
|
|
81
|
+
"nullable": True,
|
|
82
|
+
},
|
|
83
|
+
"cohort_day_3_total_revenue_per_user": {
|
|
84
|
+
"data_type": "decimal",
|
|
85
|
+
"precision": 30,
|
|
86
|
+
"scale": 5,
|
|
87
|
+
"nullable": True,
|
|
88
|
+
},
|
|
89
|
+
"cohort_day_7_revenue_per_user": {
|
|
90
|
+
"data_type": "decimal",
|
|
91
|
+
"precision": 30,
|
|
92
|
+
"scale": 5,
|
|
93
|
+
"nullable": True,
|
|
94
|
+
},
|
|
95
|
+
"cohort_day_7_total_revenue_per_user": {
|
|
96
|
+
"data_type": "decimal",
|
|
97
|
+
"precision": 30,
|
|
98
|
+
"scale": 5,
|
|
99
|
+
"nullable": True,
|
|
100
|
+
},
|
|
101
|
+
"cohort_day_14_revenue_per_user": {
|
|
102
|
+
"data_type": "decimal",
|
|
103
|
+
"precision": 30,
|
|
104
|
+
"scale": 5,
|
|
105
|
+
"nullable": True,
|
|
106
|
+
},
|
|
107
|
+
"cohort_day_14_total_revenue_per_user": {
|
|
108
|
+
"data_type": "decimal",
|
|
109
|
+
"precision": 30,
|
|
110
|
+
"scale": 5,
|
|
111
|
+
"nullable": True,
|
|
112
|
+
},
|
|
113
|
+
"cohort_day_21_revenue_per_user": {
|
|
114
|
+
"data_type": "decimal",
|
|
115
|
+
"precision": 30,
|
|
116
|
+
"scale": 5,
|
|
117
|
+
"nullable": True,
|
|
118
|
+
},
|
|
119
|
+
"cohort_day_21_total_revenue_per_user": {
|
|
120
|
+
"data_type": "decimal",
|
|
121
|
+
"precision": 30,
|
|
122
|
+
"scale": 5,
|
|
123
|
+
"nullable": True,
|
|
124
|
+
},
|
|
125
|
+
"install_time": {
|
|
126
|
+
"data_type": "date",
|
|
127
|
+
"nullable": False,
|
|
128
|
+
},
|
|
129
|
+
"loyal_users": {
|
|
130
|
+
"data_type": "bigint",
|
|
131
|
+
"nullable": False,
|
|
132
|
+
},
|
|
133
|
+
"revenue": {
|
|
134
|
+
"data_type": "decimal",
|
|
135
|
+
"precision": 30,
|
|
136
|
+
"scale": 5,
|
|
137
|
+
"nullable": True,
|
|
138
|
+
},
|
|
139
|
+
"roi": {
|
|
140
|
+
"data_type": "decimal",
|
|
141
|
+
"precision": 30,
|
|
142
|
+
"scale": 5,
|
|
143
|
+
"nullable": True,
|
|
144
|
+
},
|
|
145
|
+
"uninstalls": {
|
|
146
|
+
"data_type": "bigint",
|
|
147
|
+
"nullable": True,
|
|
148
|
+
},
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
CAMPAIGNS_DIMENSIONS = ["c", "geo", "app_id", "install_time"]
|
|
152
|
+
CAMPAIGNS_METRICS = [
|
|
153
|
+
"average_ecpi",
|
|
154
|
+
"clicks",
|
|
155
|
+
"cohort_day_1_revenue_per_user",
|
|
156
|
+
"cohort_day_1_total_revenue_per_user",
|
|
157
|
+
"cohort_day_14_revenue_per_user",
|
|
158
|
+
"cohort_day_14_total_revenue_per_user",
|
|
159
|
+
"cohort_day_21_revenue_per_user",
|
|
160
|
+
"cohort_day_21_total_revenue_per_user",
|
|
161
|
+
"cohort_day_3_revenue_per_user",
|
|
162
|
+
"cohort_day_3_total_revenue_per_user",
|
|
163
|
+
"cohort_day_7_revenue_per_user",
|
|
164
|
+
"cohort_day_7_total_revenue_per_user",
|
|
165
|
+
"cost",
|
|
166
|
+
"impressions",
|
|
167
|
+
"installs",
|
|
168
|
+
"loyal_users",
|
|
169
|
+
"retention_day_7",
|
|
170
|
+
"revenue",
|
|
171
|
+
"roi",
|
|
172
|
+
"uninstalls",
|
|
173
|
+
]
|
|
174
|
+
|
|
175
|
+
CREATIVES_DIMENSIONS = [
|
|
176
|
+
"c",
|
|
177
|
+
"geo",
|
|
178
|
+
"app_id",
|
|
179
|
+
"install_time",
|
|
180
|
+
"af_adset_id",
|
|
181
|
+
"af_adset",
|
|
182
|
+
"af_ad_id",
|
|
183
|
+
]
|
|
184
|
+
CREATIVES_METRICS = [
|
|
185
|
+
"impressions",
|
|
186
|
+
"clicks",
|
|
187
|
+
"installs",
|
|
188
|
+
"cost",
|
|
189
|
+
"revenue",
|
|
190
|
+
"average_ecpi",
|
|
191
|
+
"loyal_users",
|
|
192
|
+
"uninstalls",
|
|
193
|
+
"roi",
|
|
194
|
+
]
|
|
195
|
+
|
|
196
|
+
|
|
197
|
+
@dlt.source(max_table_nesting=0)
|
|
198
|
+
def appsflyer_source(
|
|
199
|
+
api_key: str,
|
|
200
|
+
start_date: str,
|
|
201
|
+
end_date: str,
|
|
202
|
+
dimensions: list[str],
|
|
203
|
+
metrics: list[str],
|
|
204
|
+
) -> Iterable[DltResource]:
|
|
205
|
+
client = AppsflyerClient(api_key)
|
|
206
|
+
|
|
207
|
+
@dlt.resource(
|
|
208
|
+
write_disposition="merge",
|
|
209
|
+
merge_key="install_time",
|
|
210
|
+
columns=make_hints(CAMPAIGNS_DIMENSIONS, CAMPAIGNS_METRICS),
|
|
211
|
+
)
|
|
212
|
+
def campaigns(
|
|
213
|
+
datetime=dlt.sources.incremental(
|
|
214
|
+
"install_time",
|
|
215
|
+
initial_value=(
|
|
216
|
+
start_date
|
|
217
|
+
if start_date
|
|
218
|
+
else pendulum.today().subtract(days=30).format("YYYY-MM-DD")
|
|
219
|
+
),
|
|
220
|
+
end_value=end_date,
|
|
221
|
+
range_end="closed",
|
|
222
|
+
range_start="closed",
|
|
223
|
+
),
|
|
224
|
+
) -> Iterable[TDataItem]:
|
|
225
|
+
end = (
|
|
226
|
+
datetime.end_value
|
|
227
|
+
if datetime.end_value
|
|
228
|
+
else pendulum.now().format("YYYY-MM-DD")
|
|
229
|
+
)
|
|
230
|
+
|
|
231
|
+
yield from client._fetch_data(
|
|
232
|
+
from_date=datetime.last_value,
|
|
233
|
+
to_date=end,
|
|
234
|
+
dimensions=CAMPAIGNS_DIMENSIONS,
|
|
235
|
+
metrics=CAMPAIGNS_METRICS,
|
|
236
|
+
)
|
|
237
|
+
|
|
238
|
+
@dlt.resource(
|
|
239
|
+
write_disposition="merge",
|
|
240
|
+
merge_key="install_time",
|
|
241
|
+
columns=make_hints(CREATIVES_DIMENSIONS, CREATIVES_METRICS),
|
|
242
|
+
)
|
|
243
|
+
def creatives(
|
|
244
|
+
datetime=dlt.sources.incremental(
|
|
245
|
+
"install_time",
|
|
246
|
+
initial_value=(
|
|
247
|
+
start_date
|
|
248
|
+
if start_date
|
|
249
|
+
else pendulum.today().subtract(days=30).format("YYYY-MM-DD")
|
|
250
|
+
),
|
|
251
|
+
end_value=end_date,
|
|
252
|
+
range_end="closed",
|
|
253
|
+
range_start="closed",
|
|
254
|
+
),
|
|
255
|
+
) -> Iterable[TDataItem]:
|
|
256
|
+
end = (
|
|
257
|
+
datetime.end_value
|
|
258
|
+
if datetime.end_value
|
|
259
|
+
else pendulum.now().format("YYYY-MM-DD")
|
|
260
|
+
)
|
|
261
|
+
yield from client._fetch_data(
|
|
262
|
+
datetime.last_value,
|
|
263
|
+
end,
|
|
264
|
+
dimensions=CREATIVES_DIMENSIONS,
|
|
265
|
+
metrics=CREATIVES_METRICS,
|
|
266
|
+
)
|
|
267
|
+
|
|
268
|
+
primary_keys = []
|
|
269
|
+
if "install_time" not in dimensions:
|
|
270
|
+
dimensions.append("install_time")
|
|
271
|
+
primary_keys.append("install_time")
|
|
272
|
+
|
|
273
|
+
for dimension in dimensions:
|
|
274
|
+
if dimension in DIMENSION_RESPONSE_MAPPING:
|
|
275
|
+
primary_keys.append(DIMENSION_RESPONSE_MAPPING[dimension])
|
|
276
|
+
else:
|
|
277
|
+
primary_keys.append(dimension)
|
|
278
|
+
|
|
279
|
+
@dlt.resource(
|
|
280
|
+
write_disposition="merge",
|
|
281
|
+
primary_key=primary_keys,
|
|
282
|
+
columns=make_hints(dimensions, metrics),
|
|
283
|
+
)
|
|
284
|
+
def custom(
|
|
285
|
+
datetime=dlt.sources.incremental(
|
|
286
|
+
"install_time",
|
|
287
|
+
initial_value=(
|
|
288
|
+
start_date
|
|
289
|
+
if start_date
|
|
290
|
+
else pendulum.today().subtract(days=30).format("YYYY-MM-DD")
|
|
291
|
+
),
|
|
292
|
+
end_value=end_date,
|
|
293
|
+
),
|
|
294
|
+
):
|
|
295
|
+
end = (
|
|
296
|
+
datetime.end_value
|
|
297
|
+
if datetime.end_value
|
|
298
|
+
else pendulum.now().format("YYYY-MM-DD")
|
|
299
|
+
)
|
|
300
|
+
res = client._fetch_data(
|
|
301
|
+
from_date=datetime.last_value,
|
|
302
|
+
to_date=end,
|
|
303
|
+
dimensions=dimensions,
|
|
304
|
+
metrics=metrics,
|
|
305
|
+
)
|
|
306
|
+
yield from res
|
|
307
|
+
|
|
308
|
+
return campaigns, creatives, custom
|
|
309
|
+
|
|
310
|
+
|
|
311
|
+
def make_hints(dimensions: list[str], metrics: list[str]):
|
|
312
|
+
campaign_hints = {}
|
|
313
|
+
for dimension in dimensions:
|
|
314
|
+
resp_key = dimension
|
|
315
|
+
if dimension in DIMENSION_RESPONSE_MAPPING:
|
|
316
|
+
resp_key = DIMENSION_RESPONSE_MAPPING[dimension]
|
|
317
|
+
|
|
318
|
+
if resp_key in HINTS:
|
|
319
|
+
campaign_hints[resp_key] = HINTS[resp_key]
|
|
320
|
+
|
|
321
|
+
for metric in metrics:
|
|
322
|
+
if metric in HINTS:
|
|
323
|
+
campaign_hints[metric] = HINTS[metric]
|
|
324
|
+
|
|
325
|
+
return campaign_hints
|
ingestr/src/appsflyer/client.py
CHANGED
|
@@ -4,19 +4,6 @@ import requests
|
|
|
4
4
|
from dlt.sources.helpers.requests import Client
|
|
5
5
|
from requests.exceptions import HTTPError
|
|
6
6
|
|
|
7
|
-
DEFAULT_GROUPING = ["c", "geo", "app_id", "install_time"]
|
|
8
|
-
DEFAULT_KPIS = [
|
|
9
|
-
"impressions",
|
|
10
|
-
"clicks",
|
|
11
|
-
"installs",
|
|
12
|
-
"cost",
|
|
13
|
-
"revenue",
|
|
14
|
-
"average_ecpi",
|
|
15
|
-
"loyal_users",
|
|
16
|
-
"uninstalls",
|
|
17
|
-
"roi",
|
|
18
|
-
]
|
|
19
|
-
|
|
20
7
|
|
|
21
8
|
class AppsflyerClient:
|
|
22
9
|
def __init__(self, api_key: str):
|
|
@@ -33,15 +20,20 @@ class AppsflyerClient:
|
|
|
33
20
|
self,
|
|
34
21
|
from_date: str,
|
|
35
22
|
to_date: str,
|
|
23
|
+
dimensions: list[str],
|
|
24
|
+
metrics: list[str],
|
|
36
25
|
maximum_rows=1000000,
|
|
37
|
-
dimensions=DEFAULT_GROUPING,
|
|
38
|
-
metrics=DEFAULT_KPIS,
|
|
39
26
|
):
|
|
27
|
+
excluded_metrics = exclude_metrics_for_date_range(metrics, from_date, to_date)
|
|
28
|
+
included_metrics = [
|
|
29
|
+
metric for metric in metrics if metric not in excluded_metrics
|
|
30
|
+
]
|
|
31
|
+
|
|
40
32
|
params = {
|
|
41
33
|
"from": from_date,
|
|
42
34
|
"to": to_date,
|
|
43
35
|
"groupings": ",".join(dimensions),
|
|
44
|
-
"kpis": ",".join(
|
|
36
|
+
"kpis": ",".join(included_metrics),
|
|
45
37
|
"format": "json",
|
|
46
38
|
"maximum_rows": maximum_rows,
|
|
47
39
|
}
|
|
@@ -68,39 +60,52 @@ class AppsflyerClient:
|
|
|
68
60
|
|
|
69
61
|
if response.status_code == 200:
|
|
70
62
|
result = response.json()
|
|
71
|
-
yield result
|
|
63
|
+
yield standardize_keys(result, excluded_metrics)
|
|
72
64
|
else:
|
|
73
65
|
raise HTTPError(
|
|
74
|
-
f"Request failed with status code: {response.status_code}"
|
|
66
|
+
f"Request failed with status code: {response.status_code}: {response.text}"
|
|
75
67
|
)
|
|
76
68
|
|
|
77
69
|
except requests.RequestException as e:
|
|
78
70
|
raise HTTPError(f"Request failed: {e}")
|
|
79
71
|
|
|
80
|
-
def fetch_campaigns(
|
|
81
|
-
self,
|
|
82
|
-
start_date: str,
|
|
83
|
-
end_date: str,
|
|
84
|
-
):
|
|
85
|
-
metrics = DEFAULT_KPIS + [
|
|
86
|
-
"cohort_day_1_revenue_per_user",
|
|
87
|
-
"cohort_day_1_total_revenue_per_user",
|
|
88
|
-
"cohort_day_3_revenue_per_user",
|
|
89
|
-
"cohort_day_3_total_revenue_per_user",
|
|
90
|
-
"cohort_day_7_total_revenue_per_user",
|
|
91
|
-
"cohort_day_7_revenue_per_user",
|
|
92
|
-
"cohort_day_14_total_revenue_per_user",
|
|
93
|
-
"cohort_day_14_revenue_per_user",
|
|
94
|
-
"cohort_day_21_total_revenue_per_user",
|
|
95
|
-
"cohort_day_21_revenue_per_user",
|
|
96
|
-
"retention_day_7",
|
|
97
|
-
]
|
|
98
|
-
return self._fetch_data(start_date, end_date, metrics=metrics)
|
|
99
72
|
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
73
|
+
def standardize_keys(data: list[dict], excluded_metrics: list[str]) -> list[dict]:
|
|
74
|
+
def fix_key(key: str) -> str:
|
|
75
|
+
return key.lower().replace("-", "").replace(" ", "_").replace(" ", "_")
|
|
76
|
+
|
|
77
|
+
standardized = []
|
|
78
|
+
for item in data:
|
|
79
|
+
standardized_item = {}
|
|
80
|
+
for key, value in item.items():
|
|
81
|
+
standardized_item[fix_key(key)] = value
|
|
82
|
+
|
|
83
|
+
for metric in excluded_metrics:
|
|
84
|
+
if metric not in standardized_item:
|
|
85
|
+
standardized_item[fix_key(metric)] = None
|
|
86
|
+
|
|
87
|
+
standardized.append(standardized_item)
|
|
88
|
+
|
|
89
|
+
return standardized
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
def exclude_metrics_for_date_range(
|
|
93
|
+
metrics: list[str], from_date: str, to_date: str
|
|
94
|
+
) -> list[str]:
|
|
95
|
+
"""
|
|
96
|
+
Some of the cohort metrics are not available if there hasn't been enough time to have data for that cohort.
|
|
97
|
+
This means if you request data for yesterday with cohort day 7 metrics, you will get an error because 7 days hasn't passed yet.
|
|
98
|
+
One would expect the API to handle this gracefully, but it doesn't.
|
|
99
|
+
|
|
100
|
+
This function will exclude the metrics that are not available for the given date range.
|
|
101
|
+
"""
|
|
102
|
+
import pendulum
|
|
103
|
+
|
|
104
|
+
excluded_metrics = []
|
|
105
|
+
days_between_today_and_end = (pendulum.now() - pendulum.parse(to_date)).days # type: ignore
|
|
106
|
+
for metric in metrics:
|
|
107
|
+
if "cohort_day_" in metric:
|
|
108
|
+
day_count = int(metric.split("_")[2])
|
|
109
|
+
if days_between_today_and_end <= day_count:
|
|
110
|
+
excluded_metrics.append(metric)
|
|
111
|
+
return excluded_metrics
|
ingestr/src/buildinfo.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
version = "v0.13.
|
|
1
|
+
version = "v0.13.30"
|
ingestr/src/hubspot/__init__.py
CHANGED
|
@@ -169,7 +169,7 @@ def hubspot(
|
|
|
169
169
|
api_key: str = api_key,
|
|
170
170
|
) -> Iterator[TDataItems]:
|
|
171
171
|
"""Hubspot schemas resource"""
|
|
172
|
-
yield from fetch_data(CRM_SCHEMAS_ENDPOINT, api_key,resource_name="schemas")
|
|
172
|
+
yield from fetch_data(CRM_SCHEMAS_ENDPOINT, api_key, resource_name="schemas")
|
|
173
173
|
|
|
174
174
|
@dlt.resource(name="quotes", write_disposition="replace")
|
|
175
175
|
def quotes(
|
|
@@ -192,8 +192,7 @@ def hubspot(
|
|
|
192
192
|
api_key: str = api_key,
|
|
193
193
|
custom_object_name: str = custom_object,
|
|
194
194
|
) -> Iterator[TDataItems]:
|
|
195
|
-
|
|
196
|
-
get_custom_object = schemas(api_key)
|
|
195
|
+
get_custom_object = schemas(api_key)
|
|
197
196
|
object_type_id = None
|
|
198
197
|
for custom_object in get_custom_object:
|
|
199
198
|
if custom_object["name"] == custom_object_name.capitalize():
|
|
@@ -201,18 +200,18 @@ def hubspot(
|
|
|
201
200
|
break
|
|
202
201
|
if object_type_id is None:
|
|
203
202
|
raise ValueError(f"There is no such custom object as {custom_object_name}")
|
|
204
|
-
custom_object_properties= f"crm/v3/properties/{object_type_id}"
|
|
205
|
-
|
|
203
|
+
custom_object_properties = f"crm/v3/properties/{object_type_id}"
|
|
204
|
+
|
|
206
205
|
props_pages = fetch_data(custom_object_properties, api_key)
|
|
207
206
|
props = []
|
|
208
207
|
for page in props_pages:
|
|
209
208
|
props.extend([prop["name"] for prop in page])
|
|
210
209
|
props = ",".join(sorted(list(set(props))))
|
|
211
|
-
|
|
212
|
-
custom_object_endpoint= f"crm/v3/objects/{object_type_id}/?properties={props}"
|
|
213
|
-
|
|
210
|
+
|
|
211
|
+
custom_object_endpoint = f"crm/v3/objects/{object_type_id}/?properties={props}"
|
|
212
|
+
|
|
214
213
|
"""Hubspot custom object details resource"""
|
|
215
|
-
yield from fetch_data(custom_object_endpoint, api_key,resource_name="custom")
|
|
214
|
+
yield from fetch_data(custom_object_endpoint, api_key, resource_name="custom")
|
|
216
215
|
|
|
217
216
|
return companies, contacts, deals, tickets, products, quotes, schemas, custom
|
|
218
217
|
|
ingestr/src/hubspot/helpers.py
CHANGED
|
@@ -90,7 +90,10 @@ def fetch_property_history(
|
|
|
90
90
|
|
|
91
91
|
|
|
92
92
|
def fetch_data(
|
|
93
|
-
endpoint: str,
|
|
93
|
+
endpoint: str,
|
|
94
|
+
api_key: str,
|
|
95
|
+
params: Optional[Dict[str, Any]] = None,
|
|
96
|
+
resource_name: str = None,
|
|
94
97
|
) -> Iterator[List[Dict[str, Any]]]:
|
|
95
98
|
"""
|
|
96
99
|
Fetch data from HUBSPOT endpoint using a specified API key and yield the properties of each result.
|
|
@@ -133,15 +136,17 @@ def fetch_data(
|
|
|
133
136
|
_objects: List[Dict[str, Any]] = []
|
|
134
137
|
for _result in _data["results"]:
|
|
135
138
|
if resource_name == "schemas":
|
|
136
|
-
_objects.append(
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
139
|
+
_objects.append(
|
|
140
|
+
{
|
|
141
|
+
"name": _result["labels"].get("singular", ""),
|
|
142
|
+
"objectTypeId": _result.get("objectTypeId", ""),
|
|
143
|
+
"id": _result.get("id", ""),
|
|
144
|
+
"fullyQualifiedName": _result.get("fullyQualifiedName", ""),
|
|
145
|
+
"properties": _result.get("properties", ""),
|
|
146
|
+
"createdAt": _result.get("createdAt", ""),
|
|
147
|
+
"updatedAt": _result.get("updatedAt", ""),
|
|
148
|
+
}
|
|
149
|
+
)
|
|
145
150
|
elif resource_name == "custom":
|
|
146
151
|
_objects.append(
|
|
147
152
|
_result.get("properties", ""),
|
|
@@ -157,9 +162,11 @@ def fetch_data(
|
|
|
157
162
|
{
|
|
158
163
|
"value": _obj["hs_object_id"],
|
|
159
164
|
f"{association}_id": __r["id"],
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
165
|
+
}
|
|
166
|
+
for __r in _result["associations"][association][
|
|
167
|
+
"results"
|
|
168
|
+
]
|
|
169
|
+
]
|
|
163
170
|
|
|
164
171
|
# remove duplicates from list of dicts
|
|
165
172
|
__values = [
|
ingestr/src/hubspot/settings.py
CHANGED
|
@@ -5,15 +5,21 @@ from dlt.common import pendulum
|
|
|
5
5
|
STARTDATE = pendulum.datetime(year=2000, month=1, day=1)
|
|
6
6
|
|
|
7
7
|
CRM_CONTACTS_ENDPOINT = (
|
|
8
|
-
"/crm/v3/objects/contacts?associations=deals,products,tickets,quotes"
|
|
8
|
+
"/crm/v3/objects/contacts?associations=companies,deals,products,tickets,quotes"
|
|
9
9
|
)
|
|
10
|
-
CRM_COMPANIES_ENDPOINT =
|
|
11
|
-
|
|
10
|
+
CRM_COMPANIES_ENDPOINT = "/crm/v3/objects/companies?associations=products"
|
|
11
|
+
CRM_DEALS_ENDPOINT = (
|
|
12
|
+
"/crm/v3/objects/deals?associations=companies,contacts,products,tickets,quotes"
|
|
13
|
+
)
|
|
14
|
+
CRM_PRODUCTS_ENDPOINT = (
|
|
15
|
+
"/crm/v3/objects/products?associations=companies,contacts,deals,tickets,quotes"
|
|
16
|
+
)
|
|
17
|
+
CRM_TICKETS_ENDPOINT = (
|
|
18
|
+
"/crm/v3/objects/tickets?associations=companies,contacts,deals,products,quotes"
|
|
19
|
+
)
|
|
20
|
+
CRM_QUOTES_ENDPOINT = (
|
|
21
|
+
"/crm/v3/objects/quotes?associations=companies,contacts,deals,products,tickets"
|
|
12
22
|
)
|
|
13
|
-
CRM_DEALS_ENDPOINT = "/crm/v3/objects/deals"
|
|
14
|
-
CRM_PRODUCTS_ENDPOINT = "/crm/v3/objects/products"
|
|
15
|
-
CRM_TICKETS_ENDPOINT = "/crm/v3/objects/tickets"
|
|
16
|
-
CRM_QUOTES_ENDPOINT = "/crm/v3/objects/quotes"
|
|
17
23
|
CRM_SCHEMAS_ENDPOINT = "/crm/v3/schemas"
|
|
18
24
|
|
|
19
25
|
CRM_OBJECT_ENDPOINTS = {
|
ingestr/src/sources.py
CHANGED
|
@@ -52,7 +52,6 @@ from ingestr.src.adjust.adjust_helpers import parse_filters
|
|
|
52
52
|
from ingestr.src.airtable import airtable_source
|
|
53
53
|
from ingestr.src.applovin import applovin_source
|
|
54
54
|
from ingestr.src.applovin_max import applovin_max_source
|
|
55
|
-
from ingestr.src.appsflyer._init_ import appsflyer_source
|
|
56
55
|
from ingestr.src.appstore import app_store
|
|
57
56
|
from ingestr.src.appstore.client import AppStoreConnectClient
|
|
58
57
|
from ingestr.src.arrow import memory_mapped_arrow
|
|
@@ -816,7 +815,15 @@ class HubspotSource:
|
|
|
816
815
|
custom_object=endpoint,
|
|
817
816
|
).with_resources("custom")
|
|
818
817
|
|
|
819
|
-
elif table in [
|
|
818
|
+
elif table in [
|
|
819
|
+
"contacts",
|
|
820
|
+
"companies",
|
|
821
|
+
"deals",
|
|
822
|
+
"tickets",
|
|
823
|
+
"products",
|
|
824
|
+
"quotes",
|
|
825
|
+
"schemas",
|
|
826
|
+
]:
|
|
820
827
|
endpoint = table
|
|
821
828
|
else:
|
|
822
829
|
raise ValueError(
|
|
@@ -1035,6 +1042,8 @@ class AppsflyerSource:
|
|
|
1035
1042
|
return True
|
|
1036
1043
|
|
|
1037
1044
|
def dlt_source(self, uri: str, table: str, **kwargs):
|
|
1045
|
+
from ingestr.src.appsflyer import appsflyer_source
|
|
1046
|
+
|
|
1038
1047
|
if kwargs.get("incremental_key"):
|
|
1039
1048
|
raise ValueError(
|
|
1040
1049
|
"Appsflyer_Source takes care of incrementality on its own, you should not provide incremental_key"
|
|
@@ -1047,22 +1056,27 @@ class AppsflyerSource:
|
|
|
1047
1056
|
if not api_key:
|
|
1048
1057
|
raise ValueError("api_key in the URI is required to connect to Appsflyer")
|
|
1049
1058
|
|
|
1050
|
-
|
|
1051
|
-
|
|
1052
|
-
|
|
1053
|
-
|
|
1054
|
-
|
|
1055
|
-
|
|
1056
|
-
)
|
|
1057
|
-
|
|
1058
|
-
|
|
1059
|
-
|
|
1059
|
+
start_date = kwargs.get("interval_start")
|
|
1060
|
+
end_date = kwargs.get("interval_end")
|
|
1061
|
+
dimensions = []
|
|
1062
|
+
metrics = []
|
|
1063
|
+
if table.startswith("custom:"):
|
|
1064
|
+
fields = table.split(":", 3)
|
|
1065
|
+
if len(fields) != 3:
|
|
1066
|
+
raise ValueError(
|
|
1067
|
+
"Invalid Adjust custom table format. Expected format: custom:<dimensions>:<metrics>"
|
|
1068
|
+
)
|
|
1069
|
+
dimensions = fields[1].split(",")
|
|
1070
|
+
metrics = fields[2].split(",")
|
|
1071
|
+
table = "custom"
|
|
1060
1072
|
|
|
1061
1073
|
return appsflyer_source(
|
|
1062
1074
|
api_key=api_key[0],
|
|
1063
|
-
start_date=start_date,
|
|
1064
|
-
end_date=end_date,
|
|
1065
|
-
|
|
1075
|
+
start_date=start_date.strftime("%Y-%m-%d") if start_date else None, # type: ignore
|
|
1076
|
+
end_date=end_date.strftime("%Y-%m-%d") if end_date else None, # type: ignore
|
|
1077
|
+
dimensions=dimensions,
|
|
1078
|
+
metrics=metrics,
|
|
1079
|
+
).with_resources(table)
|
|
1066
1080
|
|
|
1067
1081
|
|
|
1068
1082
|
class ZendeskSource:
|
|
@@ -1414,16 +1428,16 @@ class GoogleAnalyticsSource:
|
|
|
1414
1428
|
cred_base64 = source_fields.get("credentials_base64")
|
|
1415
1429
|
|
|
1416
1430
|
if not cred_path and not cred_base64:
|
|
1417
|
-
raise ValueError(
|
|
1431
|
+
raise ValueError(
|
|
1432
|
+
"credentials_path or credentials_base64 is required to connect Google Analytics"
|
|
1433
|
+
)
|
|
1418
1434
|
|
|
1419
1435
|
credentials = {}
|
|
1420
1436
|
if cred_path:
|
|
1421
1437
|
with open(cred_path[0], "r") as f:
|
|
1422
1438
|
credentials = json.load(f)
|
|
1423
1439
|
elif cred_base64:
|
|
1424
|
-
credentials = json.loads(
|
|
1425
|
-
base64.b64decode(cred_base64[0]).decode("utf-8")
|
|
1426
|
-
)
|
|
1440
|
+
credentials = json.loads(base64.b64decode(cred_base64[0]).decode("utf-8"))
|
|
1427
1441
|
|
|
1428
1442
|
property_id = source_fields.get("property_id")
|
|
1429
1443
|
if not property_id:
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: ingestr
|
|
3
|
-
Version: 0.13.
|
|
3
|
+
Version: 0.13.30
|
|
4
4
|
Summary: ingestr is a command-line application that ingests data from various sources and stores them in any database.
|
|
5
5
|
Project-URL: Homepage, https://github.com/bruin-data/ingestr
|
|
6
6
|
Project-URL: Issues, https://github.com/bruin-data/ingestr/issues
|
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
ingestr/conftest.py,sha256=Q03FIJIZpLBbpj55cfCHIKEjc1FCvWJhMF2cidUJKQU,1748
|
|
2
|
-
ingestr/main.py,sha256=
|
|
2
|
+
ingestr/main.py,sha256=Vt5NFN59BUlkYrOwiF9xF5MCFYp9r-aVSnQ99f1ypRE,25964
|
|
3
3
|
ingestr/src/.gitignore,sha256=8cX1AZTSI0TcdZFGTmS_oyBjpfCzhOEt0DdAo2dFIY8,203
|
|
4
4
|
ingestr/src/blob.py,sha256=onMe5ZHxPXTdcB_s2oGNdMo-XQJ3ajwOsWE9eSTGFmc,1495
|
|
5
|
-
ingestr/src/buildinfo.py,sha256=
|
|
5
|
+
ingestr/src/buildinfo.py,sha256=4uN8Z1CmXZ2T6X2YHEC3ZWFuS5uL_sTMUZiwgVyotHo,21
|
|
6
6
|
ingestr/src/destinations.py,sha256=vrGij4qMPCdXTMIimROWBJFqzOqCM4DFmgyubgSHejA,11279
|
|
7
7
|
ingestr/src/errors.py,sha256=Ufs4_DfE77_E3vnA1fOQdi6cmuLVNm7_SbFLkL1XPGk,686
|
|
8
8
|
ingestr/src/factory.py,sha256=659h_sVRBhtPv2dvtOK8tf3PtUhlK3KsWLrb20_iQKw,5333
|
|
@@ -10,7 +10,7 @@ ingestr/src/filters.py,sha256=5LNpBgm8FJXdrFHGyM7dLVyphKykSpPk7yuQAZ8GML4,1133
|
|
|
10
10
|
ingestr/src/loader.py,sha256=9NaWAyfkXdqAZSS-N72Iwo36Lbx4PyqIfaaH1dNdkFs,1712
|
|
11
11
|
ingestr/src/partition.py,sha256=E0WHqh1FTheQAIVK_-jWUx0dgyYZCD1VxlAm362gao4,964
|
|
12
12
|
ingestr/src/resource.py,sha256=XG-sbBapFVEM7OhHQFQRTdTLlh-mHB-N4V1t8F8Tsww,543
|
|
13
|
-
ingestr/src/sources.py,sha256=
|
|
13
|
+
ingestr/src/sources.py,sha256=oCqYQPGUrGVASJyZpDe7b4hz6SqwcaFtF6hXHxx4pzo,74650
|
|
14
14
|
ingestr/src/table_definition.py,sha256=REbAbqdlmUMUuRh8nEQRreWjPVOQ5ZcfqGkScKdCrmk,390
|
|
15
15
|
ingestr/src/time.py,sha256=H_Fk2J4ShXyUM-EMY7MqCLZQhlnZMZvO952bmZPc4yE,254
|
|
16
16
|
ingestr/src/version.py,sha256=J_2xgZ0mKlvuHcjdKCx2nlioneLH0I47JiU_Slr_Nwc,189
|
|
@@ -19,8 +19,8 @@ ingestr/src/adjust/adjust_helpers.py,sha256=G_EvNuvA7CsaOtbV3g249iAyggMDMZYbtWOz
|
|
|
19
19
|
ingestr/src/airtable/__init__.py,sha256=GHWYrjI2qhs_JihdNJysB0Ni3bzqT_MLXn_S9_Q5zRA,2775
|
|
20
20
|
ingestr/src/applovin/__init__.py,sha256=X_YCLppPrnL8KXfYWICE_uDfMzHHH3JZ-DBGZ1RlaOI,6984
|
|
21
21
|
ingestr/src/applovin_max/__init__.py,sha256=CBMADQ23gi0oxAsxe-RV67GGb8I4EFOX_It45Vv9dj4,3315
|
|
22
|
-
ingestr/src/appsflyer/
|
|
23
|
-
ingestr/src/appsflyer/client.py,sha256=
|
|
22
|
+
ingestr/src/appsflyer/__init__.py,sha256=QoK-B3cYYMD3bqzQaLWNH6FkJyjRbzRkBF2n6urxubs,8071
|
|
23
|
+
ingestr/src/appsflyer/client.py,sha256=5lIbiVG_l1_n2uMZx6_rw3FyM0yZUM9-sOa20utNtYU,3649
|
|
24
24
|
ingestr/src/appstore/__init__.py,sha256=3P4VZH2WJF477QjW19jMTwu6L8DXcLkYSdutnvp3AmM,4742
|
|
25
25
|
ingestr/src/appstore/client.py,sha256=qY9nBZPNIAveR-Dn-pW141Mr9xi9LMOz2HHfnfueHvE,3975
|
|
26
26
|
ingestr/src/appstore/errors.py,sha256=KVpPWth5qlv6_QWEm3aJAt3cdf6miPJs0UDzxknx2Ms,481
|
|
@@ -61,9 +61,9 @@ ingestr/src/google_sheets/helpers/api_calls.py,sha256=RiVfdacbaneszhmuhYilkJnkc9
|
|
|
61
61
|
ingestr/src/google_sheets/helpers/data_processing.py,sha256=RNt2MYfdJhk4bRahnQVezpNg2x9z0vx60YFq2ukZ8vI,11004
|
|
62
62
|
ingestr/src/gorgias/__init__.py,sha256=_mFkMYwlY5OKEY0o_FK1OKol03A-8uk7bm1cKlmt5cs,21432
|
|
63
63
|
ingestr/src/gorgias/helpers.py,sha256=DamuijnvhGY9hysQO4txrVMf4izkGbh5qfBKImdOINE,5427
|
|
64
|
-
ingestr/src/hubspot/__init__.py,sha256=
|
|
65
|
-
ingestr/src/hubspot/helpers.py,sha256=
|
|
66
|
-
ingestr/src/hubspot/settings.py,sha256=
|
|
64
|
+
ingestr/src/hubspot/__init__.py,sha256=rSYmN8h6qqxhWCW6elD-pC7iqHXlIofb1F9wvTzziUE,10962
|
|
65
|
+
ingestr/src/hubspot/helpers.py,sha256=fscilfO_K7HS2XQxzf7MeZwVXLTP0WdqnV-NhdeqQAA,7748
|
|
66
|
+
ingestr/src/hubspot/settings.py,sha256=i73MkSiJfRLMFLfiJgYdhp-rhymHTfoqFzZ4uOJdFJM,2456
|
|
67
67
|
ingestr/src/kafka/__init__.py,sha256=wMCXdiraeKd1Kssi9WcVCGZaNGm2tJEtnNyuB4aR5_k,3541
|
|
68
68
|
ingestr/src/kafka/helpers.py,sha256=V9WcVn3PKnEpggArHda4vnAcaV8VDuh__dSmRviJb5Y,7502
|
|
69
69
|
ingestr/src/kinesis/__init__.py,sha256=u5ThH1y8uObZKXgIo71em1UnX6MsVHWOjcf1jKqKbE8,6205
|
|
@@ -121,8 +121,8 @@ ingestr/testdata/delete_insert_part2.csv,sha256=B_KUzpzbNdDY_n7wWop1mT2cz36TmayS
|
|
|
121
121
|
ingestr/testdata/merge_expected.csv,sha256=DReHqWGnQMsf2PBv_Q2pfjsgvikYFnf1zYcQZ7ZqYN0,276
|
|
122
122
|
ingestr/testdata/merge_part1.csv,sha256=Pw8Z9IDKcNU0qQHx1z6BUf4rF_-SxKGFOvymCt4OY9I,185
|
|
123
123
|
ingestr/testdata/merge_part2.csv,sha256=T_GiWxA81SN63_tMOIuemcvboEFeAmbKc7xRXvL9esw,287
|
|
124
|
-
ingestr-0.13.
|
|
125
|
-
ingestr-0.13.
|
|
126
|
-
ingestr-0.13.
|
|
127
|
-
ingestr-0.13.
|
|
128
|
-
ingestr-0.13.
|
|
124
|
+
ingestr-0.13.30.dist-info/METADATA,sha256=h7uzURTF300XZEFjInqhpEjO6b-RFeBHWgS5jf4qCM0,13659
|
|
125
|
+
ingestr-0.13.30.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
126
|
+
ingestr-0.13.30.dist-info/entry_points.txt,sha256=oPJy0KBnPWYjDtP1k8qwAihcTLHSZokSQvRAw_wtfJM,46
|
|
127
|
+
ingestr-0.13.30.dist-info/licenses/LICENSE.md,sha256=cW8wIhn8HFE-KLStDF9jHQ1O_ARWP3kTpk_-eOccL24,1075
|
|
128
|
+
ingestr-0.13.30.dist-info/RECORD,,
|
ingestr/src/appsflyer/_init_.py
DELETED
|
@@ -1,24 +0,0 @@
|
|
|
1
|
-
from typing import Iterable
|
|
2
|
-
|
|
3
|
-
import dlt
|
|
4
|
-
from dlt.common.typing import TDataItem
|
|
5
|
-
from dlt.sources import DltResource
|
|
6
|
-
|
|
7
|
-
from ingestr.src.appsflyer.client import AppsflyerClient
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
@dlt.source(max_table_nesting=0)
|
|
11
|
-
def appsflyer_source(
|
|
12
|
-
api_key: str, start_date: str, end_date: str
|
|
13
|
-
) -> Iterable[DltResource]:
|
|
14
|
-
client = AppsflyerClient(api_key)
|
|
15
|
-
|
|
16
|
-
@dlt.resource(write_disposition="merge", merge_key="install_time")
|
|
17
|
-
def campaigns() -> Iterable[TDataItem]:
|
|
18
|
-
yield from client.fetch_campaigns(start_date, end_date)
|
|
19
|
-
|
|
20
|
-
@dlt.resource(write_disposition="merge", merge_key="install_time")
|
|
21
|
-
def creatives() -> Iterable[TDataItem]:
|
|
22
|
-
yield from client.fetch_creatives(start_date, end_date)
|
|
23
|
-
|
|
24
|
-
return campaigns, creatives
|
|
File without changes
|
|
File without changes
|
|
File without changes
|