ingestr 0.13.13__py3-none-any.whl → 0.14.104__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ingestr/conftest.py +72 -0
- ingestr/main.py +134 -87
- ingestr/src/adjust/__init__.py +4 -4
- ingestr/src/adjust/adjust_helpers.py +7 -3
- ingestr/src/airtable/__init__.py +3 -2
- ingestr/src/allium/__init__.py +128 -0
- ingestr/src/anthropic/__init__.py +277 -0
- ingestr/src/anthropic/helpers.py +525 -0
- ingestr/src/applovin_max/__init__.py +6 -4
- ingestr/src/appsflyer/__init__.py +325 -0
- ingestr/src/appsflyer/client.py +49 -45
- ingestr/src/appstore/__init__.py +1 -0
- ingestr/src/arrow/__init__.py +9 -1
- ingestr/src/asana_source/__init__.py +1 -1
- ingestr/src/attio/__init__.py +102 -0
- ingestr/src/attio/helpers.py +65 -0
- ingestr/src/blob.py +37 -10
- ingestr/src/buildinfo.py +1 -1
- ingestr/src/chess/__init__.py +1 -1
- ingestr/src/clickup/__init__.py +85 -0
- ingestr/src/clickup/helpers.py +47 -0
- ingestr/src/collector/spinner.py +43 -0
- ingestr/src/couchbase_source/__init__.py +118 -0
- ingestr/src/couchbase_source/helpers.py +135 -0
- ingestr/src/cursor/__init__.py +83 -0
- ingestr/src/cursor/helpers.py +188 -0
- ingestr/src/destinations.py +508 -27
- ingestr/src/docebo/__init__.py +589 -0
- ingestr/src/docebo/client.py +435 -0
- ingestr/src/docebo/helpers.py +97 -0
- ingestr/src/elasticsearch/__init__.py +80 -0
- ingestr/src/elasticsearch/helpers.py +138 -0
- ingestr/src/errors.py +8 -0
- ingestr/src/facebook_ads/__init__.py +47 -28
- ingestr/src/facebook_ads/helpers.py +59 -37
- ingestr/src/facebook_ads/settings.py +2 -0
- ingestr/src/facebook_ads/utils.py +39 -0
- ingestr/src/factory.py +107 -2
- ingestr/src/filesystem/__init__.py +8 -3
- ingestr/src/filters.py +46 -3
- ingestr/src/fluxx/__init__.py +9906 -0
- ingestr/src/fluxx/helpers.py +209 -0
- ingestr/src/frankfurter/__init__.py +157 -0
- ingestr/src/frankfurter/helpers.py +48 -0
- ingestr/src/freshdesk/__init__.py +89 -0
- ingestr/src/freshdesk/freshdesk_client.py +137 -0
- ingestr/src/freshdesk/settings.py +9 -0
- ingestr/src/fundraiseup/__init__.py +95 -0
- ingestr/src/fundraiseup/client.py +81 -0
- ingestr/src/github/__init__.py +41 -6
- ingestr/src/github/helpers.py +5 -5
- ingestr/src/google_analytics/__init__.py +22 -4
- ingestr/src/google_analytics/helpers.py +124 -6
- ingestr/src/google_sheets/__init__.py +4 -4
- ingestr/src/google_sheets/helpers/data_processing.py +2 -2
- ingestr/src/hostaway/__init__.py +302 -0
- ingestr/src/hostaway/client.py +288 -0
- ingestr/src/http/__init__.py +35 -0
- ingestr/src/http/readers.py +114 -0
- ingestr/src/http_client.py +24 -0
- ingestr/src/hubspot/__init__.py +66 -23
- ingestr/src/hubspot/helpers.py +52 -22
- ingestr/src/hubspot/settings.py +14 -7
- ingestr/src/influxdb/__init__.py +46 -0
- ingestr/src/influxdb/client.py +34 -0
- ingestr/src/intercom/__init__.py +142 -0
- ingestr/src/intercom/helpers.py +674 -0
- ingestr/src/intercom/settings.py +279 -0
- ingestr/src/isoc_pulse/__init__.py +159 -0
- ingestr/src/jira_source/__init__.py +340 -0
- ingestr/src/jira_source/helpers.py +439 -0
- ingestr/src/jira_source/settings.py +170 -0
- ingestr/src/kafka/__init__.py +4 -1
- ingestr/src/kinesis/__init__.py +139 -0
- ingestr/src/kinesis/helpers.py +82 -0
- ingestr/src/klaviyo/{_init_.py → __init__.py} +5 -6
- ingestr/src/linear/__init__.py +634 -0
- ingestr/src/linear/helpers.py +111 -0
- ingestr/src/linkedin_ads/helpers.py +0 -1
- ingestr/src/mailchimp/__init__.py +126 -0
- ingestr/src/mailchimp/helpers.py +226 -0
- ingestr/src/mailchimp/settings.py +164 -0
- ingestr/src/masking.py +344 -0
- ingestr/src/mixpanel/__init__.py +62 -0
- ingestr/src/mixpanel/client.py +99 -0
- ingestr/src/monday/__init__.py +246 -0
- ingestr/src/monday/helpers.py +392 -0
- ingestr/src/monday/settings.py +328 -0
- ingestr/src/mongodb/__init__.py +72 -8
- ingestr/src/mongodb/helpers.py +915 -38
- ingestr/src/partition.py +32 -0
- ingestr/src/phantombuster/__init__.py +65 -0
- ingestr/src/phantombuster/client.py +87 -0
- ingestr/src/pinterest/__init__.py +82 -0
- ingestr/src/pipedrive/__init__.py +198 -0
- ingestr/src/pipedrive/helpers/__init__.py +23 -0
- ingestr/src/pipedrive/helpers/custom_fields_munger.py +102 -0
- ingestr/src/pipedrive/helpers/pages.py +115 -0
- ingestr/src/pipedrive/settings.py +27 -0
- ingestr/src/pipedrive/typing.py +3 -0
- ingestr/src/plusvibeai/__init__.py +335 -0
- ingestr/src/plusvibeai/helpers.py +544 -0
- ingestr/src/plusvibeai/settings.py +252 -0
- ingestr/src/quickbooks/__init__.py +117 -0
- ingestr/src/resource.py +40 -0
- ingestr/src/revenuecat/__init__.py +83 -0
- ingestr/src/revenuecat/helpers.py +237 -0
- ingestr/src/salesforce/__init__.py +15 -8
- ingestr/src/shopify/__init__.py +1 -17
- ingestr/src/smartsheets/__init__.py +82 -0
- ingestr/src/snapchat_ads/__init__.py +489 -0
- ingestr/src/snapchat_ads/client.py +72 -0
- ingestr/src/snapchat_ads/helpers.py +535 -0
- ingestr/src/socrata_source/__init__.py +83 -0
- ingestr/src/socrata_source/helpers.py +85 -0
- ingestr/src/socrata_source/settings.py +8 -0
- ingestr/src/solidgate/__init__.py +219 -0
- ingestr/src/solidgate/helpers.py +154 -0
- ingestr/src/sources.py +2933 -245
- ingestr/src/stripe_analytics/__init__.py +49 -21
- ingestr/src/stripe_analytics/helpers.py +286 -1
- ingestr/src/stripe_analytics/settings.py +62 -10
- ingestr/src/telemetry/event.py +10 -9
- ingestr/src/tiktok_ads/__init__.py +12 -6
- ingestr/src/tiktok_ads/tiktok_helpers.py +0 -1
- ingestr/src/trustpilot/__init__.py +48 -0
- ingestr/src/trustpilot/client.py +48 -0
- ingestr/src/wise/__init__.py +68 -0
- ingestr/src/wise/client.py +63 -0
- ingestr/src/zoom/__init__.py +99 -0
- ingestr/src/zoom/helpers.py +102 -0
- ingestr/tests/unit/test_smartsheets.py +133 -0
- {ingestr-0.13.13.dist-info → ingestr-0.14.104.dist-info}/METADATA +229 -19
- ingestr-0.14.104.dist-info/RECORD +203 -0
- ingestr/src/appsflyer/_init_.py +0 -24
- ingestr-0.13.13.dist-info/RECORD +0 -115
- {ingestr-0.13.13.dist-info → ingestr-0.14.104.dist-info}/WHEEL +0 -0
- {ingestr-0.13.13.dist-info → ingestr-0.14.104.dist-info}/entry_points.txt +0 -0
- {ingestr-0.13.13.dist-info → ingestr-0.14.104.dist-info}/licenses/LICENSE.md +0 -0
|
@@ -0,0 +1,325 @@
|
|
|
1
|
+
from typing import Iterable
|
|
2
|
+
|
|
3
|
+
import dlt
|
|
4
|
+
import pendulum
|
|
5
|
+
from dlt.common.typing import TDataItem
|
|
6
|
+
from dlt.sources import DltResource
|
|
7
|
+
|
|
8
|
+
from ingestr.src.appsflyer.client import AppsflyerClient
|
|
9
|
+
|
|
10
|
+
DIMENSION_RESPONSE_MAPPING = {
|
|
11
|
+
"c": "campaign",
|
|
12
|
+
"af_adset_id": "adset_id",
|
|
13
|
+
"af_adset": "adset",
|
|
14
|
+
"af_ad_id": "ad_id",
|
|
15
|
+
}
|
|
16
|
+
HINTS = {
|
|
17
|
+
"app_id": {
|
|
18
|
+
"data_type": "text",
|
|
19
|
+
"nullable": False,
|
|
20
|
+
},
|
|
21
|
+
"campaign": {
|
|
22
|
+
"data_type": "text",
|
|
23
|
+
"nullable": False,
|
|
24
|
+
},
|
|
25
|
+
"geo": {
|
|
26
|
+
"data_type": "text",
|
|
27
|
+
"nullable": False,
|
|
28
|
+
},
|
|
29
|
+
"cost": {
|
|
30
|
+
"data_type": "decimal",
|
|
31
|
+
"precision": 30,
|
|
32
|
+
"scale": 5,
|
|
33
|
+
"nullable": False,
|
|
34
|
+
},
|
|
35
|
+
"clicks": {
|
|
36
|
+
"data_type": "bigint",
|
|
37
|
+
"nullable": False,
|
|
38
|
+
},
|
|
39
|
+
"impressions": {
|
|
40
|
+
"data_type": "bigint",
|
|
41
|
+
"nullable": False,
|
|
42
|
+
},
|
|
43
|
+
"average_ecpi": {
|
|
44
|
+
"data_type": "decimal",
|
|
45
|
+
"precision": 30,
|
|
46
|
+
"scale": 5,
|
|
47
|
+
"nullable": False,
|
|
48
|
+
},
|
|
49
|
+
"installs": {
|
|
50
|
+
"data_type": "bigint",
|
|
51
|
+
"nullable": False,
|
|
52
|
+
},
|
|
53
|
+
"retention_day_7": {
|
|
54
|
+
"data_type": "decimal",
|
|
55
|
+
"precision": 30,
|
|
56
|
+
"scale": 5,
|
|
57
|
+
"nullable": False,
|
|
58
|
+
},
|
|
59
|
+
"retention_day_14": {
|
|
60
|
+
"data_type": "decimal",
|
|
61
|
+
"precision": 30,
|
|
62
|
+
"scale": 5,
|
|
63
|
+
"nullable": False,
|
|
64
|
+
},
|
|
65
|
+
"cohort_day_1_revenue_per_user": {
|
|
66
|
+
"data_type": "decimal",
|
|
67
|
+
"precision": 30,
|
|
68
|
+
"scale": 5,
|
|
69
|
+
"nullable": True,
|
|
70
|
+
},
|
|
71
|
+
"cohort_day_1_total_revenue_per_user": {
|
|
72
|
+
"data_type": "decimal",
|
|
73
|
+
"precision": 30,
|
|
74
|
+
"scale": 5,
|
|
75
|
+
"nullable": True,
|
|
76
|
+
},
|
|
77
|
+
"cohort_day_3_revenue_per_user": {
|
|
78
|
+
"data_type": "decimal",
|
|
79
|
+
"precision": 30,
|
|
80
|
+
"scale": 5,
|
|
81
|
+
"nullable": True,
|
|
82
|
+
},
|
|
83
|
+
"cohort_day_3_total_revenue_per_user": {
|
|
84
|
+
"data_type": "decimal",
|
|
85
|
+
"precision": 30,
|
|
86
|
+
"scale": 5,
|
|
87
|
+
"nullable": True,
|
|
88
|
+
},
|
|
89
|
+
"cohort_day_7_revenue_per_user": {
|
|
90
|
+
"data_type": "decimal",
|
|
91
|
+
"precision": 30,
|
|
92
|
+
"scale": 5,
|
|
93
|
+
"nullable": True,
|
|
94
|
+
},
|
|
95
|
+
"cohort_day_7_total_revenue_per_user": {
|
|
96
|
+
"data_type": "decimal",
|
|
97
|
+
"precision": 30,
|
|
98
|
+
"scale": 5,
|
|
99
|
+
"nullable": True,
|
|
100
|
+
},
|
|
101
|
+
"cohort_day_14_revenue_per_user": {
|
|
102
|
+
"data_type": "decimal",
|
|
103
|
+
"precision": 30,
|
|
104
|
+
"scale": 5,
|
|
105
|
+
"nullable": True,
|
|
106
|
+
},
|
|
107
|
+
"cohort_day_14_total_revenue_per_user": {
|
|
108
|
+
"data_type": "decimal",
|
|
109
|
+
"precision": 30,
|
|
110
|
+
"scale": 5,
|
|
111
|
+
"nullable": True,
|
|
112
|
+
},
|
|
113
|
+
"cohort_day_21_revenue_per_user": {
|
|
114
|
+
"data_type": "decimal",
|
|
115
|
+
"precision": 30,
|
|
116
|
+
"scale": 5,
|
|
117
|
+
"nullable": True,
|
|
118
|
+
},
|
|
119
|
+
"cohort_day_21_total_revenue_per_user": {
|
|
120
|
+
"data_type": "decimal",
|
|
121
|
+
"precision": 30,
|
|
122
|
+
"scale": 5,
|
|
123
|
+
"nullable": True,
|
|
124
|
+
},
|
|
125
|
+
"install_time": {
|
|
126
|
+
"data_type": "date",
|
|
127
|
+
"nullable": False,
|
|
128
|
+
},
|
|
129
|
+
"loyal_users": {
|
|
130
|
+
"data_type": "bigint",
|
|
131
|
+
"nullable": False,
|
|
132
|
+
},
|
|
133
|
+
"revenue": {
|
|
134
|
+
"data_type": "decimal",
|
|
135
|
+
"precision": 30,
|
|
136
|
+
"scale": 5,
|
|
137
|
+
"nullable": True,
|
|
138
|
+
},
|
|
139
|
+
"roi": {
|
|
140
|
+
"data_type": "decimal",
|
|
141
|
+
"precision": 30,
|
|
142
|
+
"scale": 5,
|
|
143
|
+
"nullable": True,
|
|
144
|
+
},
|
|
145
|
+
"uninstalls": {
|
|
146
|
+
"data_type": "bigint",
|
|
147
|
+
"nullable": True,
|
|
148
|
+
},
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
CAMPAIGNS_DIMENSIONS = ["c", "geo", "app_id", "install_time"]
|
|
152
|
+
CAMPAIGNS_METRICS = [
|
|
153
|
+
"average_ecpi",
|
|
154
|
+
"clicks",
|
|
155
|
+
"cohort_day_1_revenue_per_user",
|
|
156
|
+
"cohort_day_1_total_revenue_per_user",
|
|
157
|
+
"cohort_day_14_revenue_per_user",
|
|
158
|
+
"cohort_day_14_total_revenue_per_user",
|
|
159
|
+
"cohort_day_21_revenue_per_user",
|
|
160
|
+
"cohort_day_21_total_revenue_per_user",
|
|
161
|
+
"cohort_day_3_revenue_per_user",
|
|
162
|
+
"cohort_day_3_total_revenue_per_user",
|
|
163
|
+
"cohort_day_7_revenue_per_user",
|
|
164
|
+
"cohort_day_7_total_revenue_per_user",
|
|
165
|
+
"cost",
|
|
166
|
+
"impressions",
|
|
167
|
+
"installs",
|
|
168
|
+
"loyal_users",
|
|
169
|
+
"retention_day_7",
|
|
170
|
+
"revenue",
|
|
171
|
+
"roi",
|
|
172
|
+
"uninstalls",
|
|
173
|
+
]
|
|
174
|
+
|
|
175
|
+
CREATIVES_DIMENSIONS = [
|
|
176
|
+
"c",
|
|
177
|
+
"geo",
|
|
178
|
+
"app_id",
|
|
179
|
+
"install_time",
|
|
180
|
+
"af_adset_id",
|
|
181
|
+
"af_adset",
|
|
182
|
+
"af_ad_id",
|
|
183
|
+
]
|
|
184
|
+
CREATIVES_METRICS = [
|
|
185
|
+
"impressions",
|
|
186
|
+
"clicks",
|
|
187
|
+
"installs",
|
|
188
|
+
"cost",
|
|
189
|
+
"revenue",
|
|
190
|
+
"average_ecpi",
|
|
191
|
+
"loyal_users",
|
|
192
|
+
"uninstalls",
|
|
193
|
+
"roi",
|
|
194
|
+
]
|
|
195
|
+
|
|
196
|
+
|
|
197
|
+
@dlt.source(max_table_nesting=0)
|
|
198
|
+
def appsflyer_source(
|
|
199
|
+
api_key: str,
|
|
200
|
+
start_date: str,
|
|
201
|
+
end_date: str,
|
|
202
|
+
dimensions: list[str],
|
|
203
|
+
metrics: list[str],
|
|
204
|
+
) -> Iterable[DltResource]:
|
|
205
|
+
client = AppsflyerClient(api_key)
|
|
206
|
+
|
|
207
|
+
@dlt.resource(
|
|
208
|
+
write_disposition="merge",
|
|
209
|
+
merge_key="install_time",
|
|
210
|
+
columns=make_hints(CAMPAIGNS_DIMENSIONS, CAMPAIGNS_METRICS),
|
|
211
|
+
)
|
|
212
|
+
def campaigns(
|
|
213
|
+
datetime=dlt.sources.incremental(
|
|
214
|
+
"install_time",
|
|
215
|
+
initial_value=(
|
|
216
|
+
start_date
|
|
217
|
+
if start_date
|
|
218
|
+
else pendulum.today().subtract(days=30).format("YYYY-MM-DD")
|
|
219
|
+
),
|
|
220
|
+
end_value=end_date,
|
|
221
|
+
range_end="closed",
|
|
222
|
+
range_start="closed",
|
|
223
|
+
),
|
|
224
|
+
) -> Iterable[TDataItem]:
|
|
225
|
+
end = (
|
|
226
|
+
datetime.end_value
|
|
227
|
+
if datetime.end_value
|
|
228
|
+
else pendulum.now().format("YYYY-MM-DD")
|
|
229
|
+
)
|
|
230
|
+
|
|
231
|
+
yield from client._fetch_data(
|
|
232
|
+
from_date=datetime.last_value,
|
|
233
|
+
to_date=end,
|
|
234
|
+
dimensions=CAMPAIGNS_DIMENSIONS,
|
|
235
|
+
metrics=CAMPAIGNS_METRICS,
|
|
236
|
+
)
|
|
237
|
+
|
|
238
|
+
@dlt.resource(
|
|
239
|
+
write_disposition="merge",
|
|
240
|
+
merge_key="install_time",
|
|
241
|
+
columns=make_hints(CREATIVES_DIMENSIONS, CREATIVES_METRICS),
|
|
242
|
+
)
|
|
243
|
+
def creatives(
|
|
244
|
+
datetime=dlt.sources.incremental(
|
|
245
|
+
"install_time",
|
|
246
|
+
initial_value=(
|
|
247
|
+
start_date
|
|
248
|
+
if start_date
|
|
249
|
+
else pendulum.today().subtract(days=30).format("YYYY-MM-DD")
|
|
250
|
+
),
|
|
251
|
+
end_value=end_date,
|
|
252
|
+
range_end="closed",
|
|
253
|
+
range_start="closed",
|
|
254
|
+
),
|
|
255
|
+
) -> Iterable[TDataItem]:
|
|
256
|
+
end = (
|
|
257
|
+
datetime.end_value
|
|
258
|
+
if datetime.end_value
|
|
259
|
+
else pendulum.now().format("YYYY-MM-DD")
|
|
260
|
+
)
|
|
261
|
+
yield from client._fetch_data(
|
|
262
|
+
datetime.last_value,
|
|
263
|
+
end,
|
|
264
|
+
dimensions=CREATIVES_DIMENSIONS,
|
|
265
|
+
metrics=CREATIVES_METRICS,
|
|
266
|
+
)
|
|
267
|
+
|
|
268
|
+
primary_keys = []
|
|
269
|
+
if "install_time" not in dimensions:
|
|
270
|
+
dimensions.append("install_time")
|
|
271
|
+
primary_keys.append("install_time")
|
|
272
|
+
|
|
273
|
+
for dimension in dimensions:
|
|
274
|
+
if dimension in DIMENSION_RESPONSE_MAPPING:
|
|
275
|
+
primary_keys.append(DIMENSION_RESPONSE_MAPPING[dimension])
|
|
276
|
+
else:
|
|
277
|
+
primary_keys.append(dimension)
|
|
278
|
+
|
|
279
|
+
@dlt.resource(
|
|
280
|
+
write_disposition="merge",
|
|
281
|
+
primary_key=primary_keys,
|
|
282
|
+
columns=make_hints(dimensions, metrics),
|
|
283
|
+
)
|
|
284
|
+
def custom(
|
|
285
|
+
datetime=dlt.sources.incremental(
|
|
286
|
+
"install_time",
|
|
287
|
+
initial_value=(
|
|
288
|
+
start_date
|
|
289
|
+
if start_date
|
|
290
|
+
else pendulum.today().subtract(days=30).format("YYYY-MM-DD")
|
|
291
|
+
),
|
|
292
|
+
end_value=end_date,
|
|
293
|
+
),
|
|
294
|
+
):
|
|
295
|
+
end = (
|
|
296
|
+
datetime.end_value
|
|
297
|
+
if datetime.end_value
|
|
298
|
+
else pendulum.now().format("YYYY-MM-DD")
|
|
299
|
+
)
|
|
300
|
+
res = client._fetch_data(
|
|
301
|
+
from_date=datetime.last_value,
|
|
302
|
+
to_date=end,
|
|
303
|
+
dimensions=dimensions,
|
|
304
|
+
metrics=metrics,
|
|
305
|
+
)
|
|
306
|
+
yield from res
|
|
307
|
+
|
|
308
|
+
return campaigns, creatives, custom
|
|
309
|
+
|
|
310
|
+
|
|
311
|
+
def make_hints(dimensions: list[str], metrics: list[str]):
|
|
312
|
+
campaign_hints = {}
|
|
313
|
+
for dimension in dimensions:
|
|
314
|
+
resp_key = dimension
|
|
315
|
+
if dimension in DIMENSION_RESPONSE_MAPPING:
|
|
316
|
+
resp_key = DIMENSION_RESPONSE_MAPPING[dimension]
|
|
317
|
+
|
|
318
|
+
if resp_key in HINTS:
|
|
319
|
+
campaign_hints[resp_key] = HINTS[resp_key]
|
|
320
|
+
|
|
321
|
+
for metric in metrics:
|
|
322
|
+
if metric in HINTS:
|
|
323
|
+
campaign_hints[metric] = HINTS[metric]
|
|
324
|
+
|
|
325
|
+
return campaign_hints
|
ingestr/src/appsflyer/client.py
CHANGED
|
@@ -4,19 +4,6 @@ import requests
|
|
|
4
4
|
from dlt.sources.helpers.requests import Client
|
|
5
5
|
from requests.exceptions import HTTPError
|
|
6
6
|
|
|
7
|
-
DEFAULT_GROUPING = ["c", "geo", "app_id", "install_time"]
|
|
8
|
-
DEFAULT_KPIS = [
|
|
9
|
-
"impressions",
|
|
10
|
-
"clicks",
|
|
11
|
-
"installs",
|
|
12
|
-
"cost",
|
|
13
|
-
"revenue",
|
|
14
|
-
"average_ecpi",
|
|
15
|
-
"loyal_users",
|
|
16
|
-
"uninstalls",
|
|
17
|
-
"roi",
|
|
18
|
-
]
|
|
19
|
-
|
|
20
7
|
|
|
21
8
|
class AppsflyerClient:
|
|
22
9
|
def __init__(self, api_key: str):
|
|
@@ -33,15 +20,20 @@ class AppsflyerClient:
|
|
|
33
20
|
self,
|
|
34
21
|
from_date: str,
|
|
35
22
|
to_date: str,
|
|
23
|
+
dimensions: list[str],
|
|
24
|
+
metrics: list[str],
|
|
36
25
|
maximum_rows=1000000,
|
|
37
|
-
dimensions=DEFAULT_GROUPING,
|
|
38
|
-
metrics=DEFAULT_KPIS,
|
|
39
26
|
):
|
|
27
|
+
excluded_metrics = exclude_metrics_for_date_range(metrics, from_date, to_date)
|
|
28
|
+
included_metrics = [
|
|
29
|
+
metric for metric in metrics if metric not in excluded_metrics
|
|
30
|
+
]
|
|
31
|
+
|
|
40
32
|
params = {
|
|
41
33
|
"from": from_date,
|
|
42
34
|
"to": to_date,
|
|
43
35
|
"groupings": ",".join(dimensions),
|
|
44
|
-
"kpis": ",".join(
|
|
36
|
+
"kpis": ",".join(included_metrics),
|
|
45
37
|
"format": "json",
|
|
46
38
|
"maximum_rows": maximum_rows,
|
|
47
39
|
}
|
|
@@ -54,7 +46,6 @@ class AppsflyerClient:
|
|
|
54
46
|
)
|
|
55
47
|
|
|
56
48
|
request_client = Client(
|
|
57
|
-
request_timeout=10.0,
|
|
58
49
|
raise_for_status=False,
|
|
59
50
|
retry_condition=retry_on_limit,
|
|
60
51
|
request_max_attempts=12,
|
|
@@ -68,39 +59,52 @@ class AppsflyerClient:
|
|
|
68
59
|
|
|
69
60
|
if response.status_code == 200:
|
|
70
61
|
result = response.json()
|
|
71
|
-
yield result
|
|
62
|
+
yield standardize_keys(result, excluded_metrics)
|
|
72
63
|
else:
|
|
73
64
|
raise HTTPError(
|
|
74
|
-
f"Request failed with status code: {response.status_code}"
|
|
65
|
+
f"Request failed with status code: {response.status_code}: {response.text}"
|
|
75
66
|
)
|
|
76
67
|
|
|
77
68
|
except requests.RequestException as e:
|
|
78
69
|
raise HTTPError(f"Request failed: {e}")
|
|
79
70
|
|
|
80
|
-
def fetch_campaigns(
|
|
81
|
-
self,
|
|
82
|
-
start_date: str,
|
|
83
|
-
end_date: str,
|
|
84
|
-
):
|
|
85
|
-
metrics = DEFAULT_KPIS + [
|
|
86
|
-
"cohort_day_1_revenue_per_user",
|
|
87
|
-
"cohort_day_1_total_revenue_per_user",
|
|
88
|
-
"cohort_day_3_revenue_per_user",
|
|
89
|
-
"cohort_day_3_total_revenue_per_user",
|
|
90
|
-
"cohort_day_7_total_revenue_per_user",
|
|
91
|
-
"cohort_day_7_revenue_per_user",
|
|
92
|
-
"cohort_day_14_total_revenue_per_user",
|
|
93
|
-
"cohort_day_14_revenue_per_user",
|
|
94
|
-
"cohort_day_21_total_revenue_per_user",
|
|
95
|
-
"cohort_day_21_revenue_per_user",
|
|
96
|
-
"retention_day_7",
|
|
97
|
-
]
|
|
98
|
-
return self._fetch_data(start_date, end_date, metrics=metrics)
|
|
99
71
|
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
72
|
+
def standardize_keys(data: list[dict], excluded_metrics: list[str]) -> list[dict]:
|
|
73
|
+
def fix_key(key: str) -> str:
|
|
74
|
+
return key.lower().replace("-", "").replace(" ", "_").replace(" ", "_")
|
|
75
|
+
|
|
76
|
+
standardized = []
|
|
77
|
+
for item in data:
|
|
78
|
+
standardized_item = {}
|
|
79
|
+
for key, value in item.items():
|
|
80
|
+
standardized_item[fix_key(key)] = value
|
|
81
|
+
|
|
82
|
+
for metric in excluded_metrics:
|
|
83
|
+
if metric not in standardized_item:
|
|
84
|
+
standardized_item[fix_key(metric)] = None
|
|
85
|
+
|
|
86
|
+
standardized.append(standardized_item)
|
|
87
|
+
|
|
88
|
+
return standardized
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def exclude_metrics_for_date_range(
|
|
92
|
+
metrics: list[str], from_date: str, to_date: str
|
|
93
|
+
) -> list[str]:
|
|
94
|
+
"""
|
|
95
|
+
Some of the cohort metrics are not available if there hasn't been enough time to have data for that cohort.
|
|
96
|
+
This means if you request data for yesterday with cohort day 7 metrics, you will get an error because 7 days hasn't passed yet.
|
|
97
|
+
One would expect the API to handle this gracefully, but it doesn't.
|
|
98
|
+
|
|
99
|
+
This function will exclude the metrics that are not available for the given date range.
|
|
100
|
+
"""
|
|
101
|
+
import pendulum
|
|
102
|
+
|
|
103
|
+
excluded_metrics = []
|
|
104
|
+
days_between_today_and_end = (pendulum.now() - pendulum.parse(to_date)).days # type: ignore
|
|
105
|
+
for metric in metrics:
|
|
106
|
+
if "cohort_day_" in metric:
|
|
107
|
+
day_count = int(metric.split("_")[2])
|
|
108
|
+
if days_between_today_and_end <= day_count:
|
|
109
|
+
excluded_metrics.append(metric)
|
|
110
|
+
return excluded_metrics
|
ingestr/src/appstore/__init__.py
CHANGED
ingestr/src/arrow/__init__.py
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
from typing import Any, Optional
|
|
2
2
|
|
|
3
3
|
import dlt
|
|
4
|
+
import pyarrow as pa # type: ignore
|
|
4
5
|
from dlt.common.schema.typing import TColumnNames, TTableSchemaColumns
|
|
5
6
|
from dlt.extract.items import TTableHintTemplate
|
|
6
7
|
|
|
@@ -21,7 +22,6 @@ def memory_mapped_arrow(
|
|
|
21
22
|
def arrow_mmap(
|
|
22
23
|
incremental: Optional[dlt.sources.incremental[Any]] = incremental,
|
|
23
24
|
):
|
|
24
|
-
import pyarrow as pa # type: ignore
|
|
25
25
|
import pyarrow.ipc as ipc # type: ignore
|
|
26
26
|
|
|
27
27
|
with pa.memory_map(path, "rb") as mmap:
|
|
@@ -71,3 +71,11 @@ def memory_mapped_arrow(
|
|
|
71
71
|
yield table
|
|
72
72
|
|
|
73
73
|
return arrow_mmap
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
BATCH_SIZE = 1000
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def as_list(table: pa.Table):
|
|
80
|
+
for batch in table.to_batches(BATCH_SIZE):
|
|
81
|
+
yield from batch.to_pylist()
|
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
from typing import Iterable, Iterator
|
|
2
|
+
|
|
3
|
+
import dlt
|
|
4
|
+
from dlt.sources import DltResource
|
|
5
|
+
|
|
6
|
+
from .helpers import AttioClient
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
@dlt.source(max_table_nesting=0)
|
|
10
|
+
def attio_source(
|
|
11
|
+
api_key: str,
|
|
12
|
+
params: list[str],
|
|
13
|
+
) -> Iterable[DltResource]:
|
|
14
|
+
attio_client = AttioClient(api_key)
|
|
15
|
+
|
|
16
|
+
@dlt.resource(
|
|
17
|
+
name="objects",
|
|
18
|
+
write_disposition="replace",
|
|
19
|
+
columns={
|
|
20
|
+
"created_at": {"data_type": "timestamp", "partition": True},
|
|
21
|
+
},
|
|
22
|
+
)
|
|
23
|
+
# https://docs.attio.com/rest-api/endpoint-reference/objects/list-objects - does not support pagination
|
|
24
|
+
def fetch_objects() -> Iterator[dict]:
|
|
25
|
+
if len(params) != 0:
|
|
26
|
+
raise ValueError("Objects table must be in the format `objects`")
|
|
27
|
+
|
|
28
|
+
path = "objects"
|
|
29
|
+
yield attio_client.fetch_all(path, "get")
|
|
30
|
+
|
|
31
|
+
# https://docs.attio.com/rest-api/endpoint-reference/records/list-records
|
|
32
|
+
@dlt.resource(
|
|
33
|
+
name="records",
|
|
34
|
+
write_disposition="replace",
|
|
35
|
+
columns={
|
|
36
|
+
"created_at": {"data_type": "timestamp", "partition": True},
|
|
37
|
+
},
|
|
38
|
+
)
|
|
39
|
+
def fetch_records() -> Iterator[dict]:
|
|
40
|
+
if len(params) != 1:
|
|
41
|
+
raise ValueError(
|
|
42
|
+
"Records table must be in the format `records:{object_api_slug}`"
|
|
43
|
+
)
|
|
44
|
+
object_id = params[0]
|
|
45
|
+
path = f"objects/{object_id}/records/query"
|
|
46
|
+
|
|
47
|
+
yield attio_client.fetch_paginated(path, "post")
|
|
48
|
+
|
|
49
|
+
# https://docs.attio.com/rest-api/endpoint-reference/lists/list-all-lists -- does not support pagination
|
|
50
|
+
@dlt.resource(
|
|
51
|
+
name="lists",
|
|
52
|
+
write_disposition="replace",
|
|
53
|
+
columns={
|
|
54
|
+
"created_at": {"data_type": "timestamp", "partition": True},
|
|
55
|
+
},
|
|
56
|
+
)
|
|
57
|
+
def fetch_lists() -> Iterator[dict]:
|
|
58
|
+
path = "lists"
|
|
59
|
+
yield attio_client.fetch_all(path, "get")
|
|
60
|
+
|
|
61
|
+
# https://docs.attio.com/rest-api/endpoint-reference/entries/list-entries
|
|
62
|
+
@dlt.resource(
|
|
63
|
+
name="list_entries",
|
|
64
|
+
write_disposition="replace",
|
|
65
|
+
columns={
|
|
66
|
+
"created_at": {"data_type": "timestamp", "partition": True},
|
|
67
|
+
},
|
|
68
|
+
)
|
|
69
|
+
def fetch_list_entries() -> Iterator[dict]:
|
|
70
|
+
if len(params) != 1:
|
|
71
|
+
raise ValueError(
|
|
72
|
+
"List entries table must be in the format `list_entries:{list_id}`"
|
|
73
|
+
)
|
|
74
|
+
path = f"lists/{params[0]}/entries/query"
|
|
75
|
+
|
|
76
|
+
yield attio_client.fetch_paginated(path, "post")
|
|
77
|
+
|
|
78
|
+
@dlt.resource(
|
|
79
|
+
name="all_list_entries",
|
|
80
|
+
write_disposition="replace",
|
|
81
|
+
columns={
|
|
82
|
+
"created_at": {"data_type": "timestamp", "partition": True},
|
|
83
|
+
},
|
|
84
|
+
)
|
|
85
|
+
def fetch_all_list_entries() -> Iterator[dict]:
|
|
86
|
+
if len(params) != 1:
|
|
87
|
+
raise ValueError(
|
|
88
|
+
"All list entries table must be in the format `all_list_entries:{object_api_slug}`"
|
|
89
|
+
)
|
|
90
|
+
path = "lists"
|
|
91
|
+
for lst in attio_client.fetch_all(path, "get"):
|
|
92
|
+
if params[0] in lst["parent_object"]:
|
|
93
|
+
path = f"lists/{lst['id']['list_id']}/entries/query"
|
|
94
|
+
yield from attio_client.fetch_paginated(path, "post")
|
|
95
|
+
|
|
96
|
+
return (
|
|
97
|
+
fetch_objects,
|
|
98
|
+
fetch_records,
|
|
99
|
+
fetch_lists,
|
|
100
|
+
fetch_list_entries,
|
|
101
|
+
fetch_all_list_entries,
|
|
102
|
+
)
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
from ingestr.src.http_client import create_client
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class AttioClient:
|
|
5
|
+
def __init__(self, api_key: str):
|
|
6
|
+
self.base_url = "https://api.attio.com/v2"
|
|
7
|
+
self.headers = {
|
|
8
|
+
"Accept": "application/json",
|
|
9
|
+
"Authorization": f"Bearer {api_key}",
|
|
10
|
+
}
|
|
11
|
+
self.client = create_client()
|
|
12
|
+
|
|
13
|
+
def fetch_paginated(self, path: str, method: str, limit: int = 1000, params=None):
|
|
14
|
+
url = f"{self.base_url}/{path}"
|
|
15
|
+
if params is None:
|
|
16
|
+
params = {}
|
|
17
|
+
offset = 0
|
|
18
|
+
while True:
|
|
19
|
+
query_params = {"limit": limit, "offset": offset, **params}
|
|
20
|
+
if method == "get":
|
|
21
|
+
response = self.client.get(
|
|
22
|
+
url, headers=self.headers, params=query_params
|
|
23
|
+
)
|
|
24
|
+
else:
|
|
25
|
+
json_body = {**params, "limit": limit, "offset": offset}
|
|
26
|
+
response = self.client.post(url, headers=self.headers, json=json_body)
|
|
27
|
+
|
|
28
|
+
if response.status_code != 200:
|
|
29
|
+
raise Exception(f"HTTP {response.status_code} error: {response.text}")
|
|
30
|
+
|
|
31
|
+
response_data = response.json()
|
|
32
|
+
if "data" not in response_data:
|
|
33
|
+
raise Exception(
|
|
34
|
+
"Attio API returned a response without the expected data"
|
|
35
|
+
)
|
|
36
|
+
|
|
37
|
+
data = response_data["data"]
|
|
38
|
+
for item in data:
|
|
39
|
+
flat_item = flatten_item(item)
|
|
40
|
+
yield flat_item
|
|
41
|
+
if len(data) < limit:
|
|
42
|
+
break
|
|
43
|
+
|
|
44
|
+
offset += limit
|
|
45
|
+
|
|
46
|
+
def fetch_all(self, path: str, method: str = "get", params=None):
|
|
47
|
+
url = f"{self.base_url}/{path}"
|
|
48
|
+
params = params or {}
|
|
49
|
+
|
|
50
|
+
if method == "get":
|
|
51
|
+
response = self.client.get(url, headers=self.headers, params=params)
|
|
52
|
+
else:
|
|
53
|
+
response = self.client.post(url, headers=self.headers, json=params)
|
|
54
|
+
|
|
55
|
+
response.raise_for_status()
|
|
56
|
+
data = response.json().get("data", [])
|
|
57
|
+
for item in data:
|
|
58
|
+
yield flatten_item(item)
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def flatten_item(item: dict) -> dict:
|
|
62
|
+
if "id" in item:
|
|
63
|
+
for key, value in item["id"].items():
|
|
64
|
+
item[key] = value
|
|
65
|
+
return item
|