ingestr 0.13.2__py3-none-any.whl → 0.14.104__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (146) hide show
  1. ingestr/conftest.py +72 -0
  2. ingestr/main.py +134 -87
  3. ingestr/src/adjust/__init__.py +4 -4
  4. ingestr/src/adjust/adjust_helpers.py +7 -3
  5. ingestr/src/airtable/__init__.py +3 -2
  6. ingestr/src/allium/__init__.py +128 -0
  7. ingestr/src/anthropic/__init__.py +277 -0
  8. ingestr/src/anthropic/helpers.py +525 -0
  9. ingestr/src/applovin/__init__.py +262 -0
  10. ingestr/src/applovin_max/__init__.py +117 -0
  11. ingestr/src/appsflyer/__init__.py +325 -0
  12. ingestr/src/appsflyer/client.py +49 -45
  13. ingestr/src/appstore/__init__.py +1 -0
  14. ingestr/src/arrow/__init__.py +9 -1
  15. ingestr/src/asana_source/__init__.py +1 -1
  16. ingestr/src/attio/__init__.py +102 -0
  17. ingestr/src/attio/helpers.py +65 -0
  18. ingestr/src/blob.py +38 -11
  19. ingestr/src/buildinfo.py +1 -0
  20. ingestr/src/chess/__init__.py +1 -1
  21. ingestr/src/clickup/__init__.py +85 -0
  22. ingestr/src/clickup/helpers.py +47 -0
  23. ingestr/src/collector/spinner.py +43 -0
  24. ingestr/src/couchbase_source/__init__.py +118 -0
  25. ingestr/src/couchbase_source/helpers.py +135 -0
  26. ingestr/src/cursor/__init__.py +83 -0
  27. ingestr/src/cursor/helpers.py +188 -0
  28. ingestr/src/destinations.py +520 -33
  29. ingestr/src/docebo/__init__.py +589 -0
  30. ingestr/src/docebo/client.py +435 -0
  31. ingestr/src/docebo/helpers.py +97 -0
  32. ingestr/src/elasticsearch/__init__.py +80 -0
  33. ingestr/src/elasticsearch/helpers.py +138 -0
  34. ingestr/src/errors.py +8 -0
  35. ingestr/src/facebook_ads/__init__.py +47 -28
  36. ingestr/src/facebook_ads/helpers.py +59 -37
  37. ingestr/src/facebook_ads/settings.py +2 -0
  38. ingestr/src/facebook_ads/utils.py +39 -0
  39. ingestr/src/factory.py +116 -2
  40. ingestr/src/filesystem/__init__.py +8 -3
  41. ingestr/src/filters.py +46 -3
  42. ingestr/src/fluxx/__init__.py +9906 -0
  43. ingestr/src/fluxx/helpers.py +209 -0
  44. ingestr/src/frankfurter/__init__.py +157 -0
  45. ingestr/src/frankfurter/helpers.py +48 -0
  46. ingestr/src/freshdesk/__init__.py +89 -0
  47. ingestr/src/freshdesk/freshdesk_client.py +137 -0
  48. ingestr/src/freshdesk/settings.py +9 -0
  49. ingestr/src/fundraiseup/__init__.py +95 -0
  50. ingestr/src/fundraiseup/client.py +81 -0
  51. ingestr/src/github/__init__.py +41 -6
  52. ingestr/src/github/helpers.py +5 -5
  53. ingestr/src/google_analytics/__init__.py +22 -4
  54. ingestr/src/google_analytics/helpers.py +124 -6
  55. ingestr/src/google_sheets/__init__.py +4 -4
  56. ingestr/src/google_sheets/helpers/data_processing.py +2 -2
  57. ingestr/src/hostaway/__init__.py +302 -0
  58. ingestr/src/hostaway/client.py +288 -0
  59. ingestr/src/http/__init__.py +35 -0
  60. ingestr/src/http/readers.py +114 -0
  61. ingestr/src/http_client.py +24 -0
  62. ingestr/src/hubspot/__init__.py +66 -23
  63. ingestr/src/hubspot/helpers.py +52 -22
  64. ingestr/src/hubspot/settings.py +14 -7
  65. ingestr/src/influxdb/__init__.py +46 -0
  66. ingestr/src/influxdb/client.py +34 -0
  67. ingestr/src/intercom/__init__.py +142 -0
  68. ingestr/src/intercom/helpers.py +674 -0
  69. ingestr/src/intercom/settings.py +279 -0
  70. ingestr/src/isoc_pulse/__init__.py +159 -0
  71. ingestr/src/jira_source/__init__.py +340 -0
  72. ingestr/src/jira_source/helpers.py +439 -0
  73. ingestr/src/jira_source/settings.py +170 -0
  74. ingestr/src/kafka/__init__.py +4 -1
  75. ingestr/src/kinesis/__init__.py +139 -0
  76. ingestr/src/kinesis/helpers.py +82 -0
  77. ingestr/src/klaviyo/{_init_.py → __init__.py} +5 -6
  78. ingestr/src/linear/__init__.py +634 -0
  79. ingestr/src/linear/helpers.py +111 -0
  80. ingestr/src/linkedin_ads/helpers.py +0 -1
  81. ingestr/src/loader.py +69 -0
  82. ingestr/src/mailchimp/__init__.py +126 -0
  83. ingestr/src/mailchimp/helpers.py +226 -0
  84. ingestr/src/mailchimp/settings.py +164 -0
  85. ingestr/src/masking.py +344 -0
  86. ingestr/src/mixpanel/__init__.py +62 -0
  87. ingestr/src/mixpanel/client.py +99 -0
  88. ingestr/src/monday/__init__.py +246 -0
  89. ingestr/src/monday/helpers.py +392 -0
  90. ingestr/src/monday/settings.py +328 -0
  91. ingestr/src/mongodb/__init__.py +72 -8
  92. ingestr/src/mongodb/helpers.py +915 -38
  93. ingestr/src/partition.py +32 -0
  94. ingestr/src/personio/__init__.py +331 -0
  95. ingestr/src/personio/helpers.py +86 -0
  96. ingestr/src/phantombuster/__init__.py +65 -0
  97. ingestr/src/phantombuster/client.py +87 -0
  98. ingestr/src/pinterest/__init__.py +82 -0
  99. ingestr/src/pipedrive/__init__.py +198 -0
  100. ingestr/src/pipedrive/helpers/__init__.py +23 -0
  101. ingestr/src/pipedrive/helpers/custom_fields_munger.py +102 -0
  102. ingestr/src/pipedrive/helpers/pages.py +115 -0
  103. ingestr/src/pipedrive/settings.py +27 -0
  104. ingestr/src/pipedrive/typing.py +3 -0
  105. ingestr/src/plusvibeai/__init__.py +335 -0
  106. ingestr/src/plusvibeai/helpers.py +544 -0
  107. ingestr/src/plusvibeai/settings.py +252 -0
  108. ingestr/src/quickbooks/__init__.py +117 -0
  109. ingestr/src/resource.py +40 -0
  110. ingestr/src/revenuecat/__init__.py +83 -0
  111. ingestr/src/revenuecat/helpers.py +237 -0
  112. ingestr/src/salesforce/__init__.py +156 -0
  113. ingestr/src/salesforce/helpers.py +64 -0
  114. ingestr/src/shopify/__init__.py +1 -17
  115. ingestr/src/smartsheets/__init__.py +82 -0
  116. ingestr/src/snapchat_ads/__init__.py +489 -0
  117. ingestr/src/snapchat_ads/client.py +72 -0
  118. ingestr/src/snapchat_ads/helpers.py +535 -0
  119. ingestr/src/socrata_source/__init__.py +83 -0
  120. ingestr/src/socrata_source/helpers.py +85 -0
  121. ingestr/src/socrata_source/settings.py +8 -0
  122. ingestr/src/solidgate/__init__.py +219 -0
  123. ingestr/src/solidgate/helpers.py +154 -0
  124. ingestr/src/sources.py +3132 -212
  125. ingestr/src/stripe_analytics/__init__.py +49 -21
  126. ingestr/src/stripe_analytics/helpers.py +286 -1
  127. ingestr/src/stripe_analytics/settings.py +62 -10
  128. ingestr/src/telemetry/event.py +10 -9
  129. ingestr/src/tiktok_ads/__init__.py +12 -6
  130. ingestr/src/tiktok_ads/tiktok_helpers.py +0 -1
  131. ingestr/src/trustpilot/__init__.py +48 -0
  132. ingestr/src/trustpilot/client.py +48 -0
  133. ingestr/src/version.py +6 -1
  134. ingestr/src/wise/__init__.py +68 -0
  135. ingestr/src/wise/client.py +63 -0
  136. ingestr/src/zoom/__init__.py +99 -0
  137. ingestr/src/zoom/helpers.py +102 -0
  138. ingestr/tests/unit/test_smartsheets.py +133 -0
  139. ingestr-0.14.104.dist-info/METADATA +563 -0
  140. ingestr-0.14.104.dist-info/RECORD +203 -0
  141. ingestr/src/appsflyer/_init_.py +0 -24
  142. ingestr-0.13.2.dist-info/METADATA +0 -302
  143. ingestr-0.13.2.dist-info/RECORD +0 -107
  144. {ingestr-0.13.2.dist-info → ingestr-0.14.104.dist-info}/WHEEL +0 -0
  145. {ingestr-0.13.2.dist-info → ingestr-0.14.104.dist-info}/entry_points.txt +0 -0
  146. {ingestr-0.13.2.dist-info → ingestr-0.14.104.dist-info}/licenses/LICENSE.md +0 -0
@@ -0,0 +1,262 @@
1
+ from datetime import datetime, timedelta, timezone
2
+ from enum import Enum
3
+ from typing import Dict, List, Optional
4
+
5
+ import dlt
6
+ from dlt.sources.rest_api import EndpointResource, RESTAPIConfig, rest_api_resources
7
+ from requests import Response
8
+
9
+
10
+ class InvalidCustomReportError(Exception):
11
+ def __init__(self):
12
+ super().__init__(
13
+ "Custom report should be in the format 'custom:{endpoint}:{report_type}:{dimensions}"
14
+ )
15
+
16
+
17
+ class ClientError(Exception):
18
+ pass
19
+
20
+
21
+ TYPE_HINTS = {
22
+ "application_is_hidden": {"data_type": "bool"},
23
+ "average_cpa": {"data_type": "double"},
24
+ "average_cpc": {"data_type": "double"},
25
+ "campaign_bid_goal": {"data_type": "double"},
26
+ "campaign_roas_goal": {"data_type": "double"},
27
+ "clicks": {"data_type": "bigint"},
28
+ "conversions": {"data_type": "bigint"},
29
+ "conversion_rate": {"data_type": "double"},
30
+ "cost": {"data_type": "double"}, # assuming float.
31
+ "ctr": {"data_type": "double"},
32
+ "day": {"data_type": "date"},
33
+ "first_purchase": {"data_type": "bigint"},
34
+ "ecpm": {"data_type": "double"},
35
+ "impressions": {"data_type": "bigint"},
36
+ "installs": {"data_type": "bigint"},
37
+ "revenue": {"data_type": "double"},
38
+ "redownloads": {"data_type": "bigint"},
39
+ "sales": {"data_type": "double"}, # assuming float.
40
+ }
41
+
42
+
43
+ class ReportType(Enum):
44
+ PUBLISHER = "publisher"
45
+ ADVERTISER = "advertiser"
46
+
47
+
48
+ REPORT_SCHEMA: Dict[ReportType, List[str]] = {
49
+ ReportType.PUBLISHER: [
50
+ "ad_type",
51
+ "application",
52
+ "application_is_hidden",
53
+ "bidding_integration",
54
+ "clicks",
55
+ "country",
56
+ "ctr",
57
+ "day",
58
+ "device_type",
59
+ "ecpm",
60
+ "impressions",
61
+ "package_name",
62
+ "placement_type",
63
+ "platform",
64
+ "revenue",
65
+ "size",
66
+ "store_id",
67
+ "zone",
68
+ "zone_id",
69
+ ],
70
+ ReportType.ADVERTISER: [
71
+ "ad",
72
+ "ad_creative_type",
73
+ "ad_id",
74
+ "ad_type",
75
+ "average_cpa",
76
+ "average_cpc",
77
+ "campaign",
78
+ "campaign_ad_type",
79
+ "campaign_bid_goal",
80
+ "campaign_id_external",
81
+ "campaign_package_name",
82
+ "campaign_roas_goal",
83
+ "campaign_store_id",
84
+ "campaign_type",
85
+ "clicks",
86
+ "conversions",
87
+ "conversion_rate",
88
+ "cost",
89
+ "country",
90
+ "creative_set",
91
+ "creative_set_id",
92
+ "ctr",
93
+ "custom_page_id",
94
+ "day",
95
+ "device_type",
96
+ "external_placement_id",
97
+ "first_purchase",
98
+ "impressions",
99
+ "installs",
100
+ "optimization_day_target",
101
+ "placement_type",
102
+ "platform",
103
+ "redownloads",
104
+ "sales",
105
+ "size",
106
+ "target_event",
107
+ "traffic_source",
108
+ ],
109
+ }
110
+
111
+ PROBABILISTIC_REPORT_EXCLUDE = [
112
+ "installs",
113
+ "redownloads",
114
+ ]
115
+
116
+
117
+ @dlt.source
118
+ def applovin_source(
119
+ api_key: str,
120
+ start_date: str,
121
+ end_date: Optional[str],
122
+ custom: Optional[str],
123
+ ):
124
+ backfill = False
125
+ if end_date is None:
126
+ backfill = True
127
+
128
+ # use the greatest of yesterday and start_date
129
+ end_date = max(
130
+ datetime.now(timezone.utc) - timedelta(days=1),
131
+ datetime.fromisoformat(start_date).replace(tzinfo=timezone.utc),
132
+ ).strftime("%Y-%m-%d")
133
+
134
+ config: RESTAPIConfig = {
135
+ "client": {
136
+ "base_url": "https://r.applovin.com/",
137
+ "auth": {
138
+ "type": "api_key",
139
+ "name": "api_key",
140
+ "location": "query",
141
+ "api_key": api_key,
142
+ },
143
+ },
144
+ "resource_defaults": {
145
+ "write_disposition": "merge",
146
+ "endpoint": {
147
+ "incremental": {
148
+ "cursor_path": "day",
149
+ "start_param": "start",
150
+ "initial_value": start_date,
151
+ "range_start": "closed",
152
+ "range_end": "closed",
153
+ },
154
+ "params": {
155
+ "format": "json",
156
+ "end": end_date,
157
+ },
158
+ "paginator": "single_page",
159
+ "response_actions": [
160
+ http_error_handler,
161
+ ],
162
+ },
163
+ },
164
+ "resources": [
165
+ resource(
166
+ "publisher-report",
167
+ "report",
168
+ REPORT_SCHEMA[ReportType.PUBLISHER],
169
+ ReportType.PUBLISHER,
170
+ ),
171
+ resource(
172
+ "advertiser-report",
173
+ "report",
174
+ REPORT_SCHEMA[ReportType.ADVERTISER],
175
+ ReportType.ADVERTISER,
176
+ ),
177
+ resource(
178
+ "advertiser-probabilistic-report",
179
+ "probabilisticReport",
180
+ exclude(
181
+ REPORT_SCHEMA[ReportType.ADVERTISER], PROBABILISTIC_REPORT_EXCLUDE
182
+ ),
183
+ ReportType.ADVERTISER,
184
+ ),
185
+ resource(
186
+ "advertiser-ska-report",
187
+ "skaReport",
188
+ REPORT_SCHEMA[ReportType.ADVERTISER],
189
+ ReportType.ADVERTISER,
190
+ ),
191
+ ],
192
+ }
193
+
194
+ if custom:
195
+ custom_report = custom_report_from_spec(custom)
196
+ config["resources"].append(custom_report)
197
+
198
+ if backfill:
199
+ config["resource_defaults"]["endpoint"]["incremental"]["end_value"] = end_date # type: ignore
200
+
201
+ yield from rest_api_resources(config)
202
+
203
+
204
+ def resource(
205
+ name: str,
206
+ endpoint: str,
207
+ dimensions: List[str],
208
+ report_type: ReportType,
209
+ ) -> EndpointResource:
210
+ return {
211
+ "name": name,
212
+ "columns": build_type_hints(dimensions),
213
+ "merge_key": "day",
214
+ "endpoint": {
215
+ "path": endpoint,
216
+ "params": {
217
+ "report_type": report_type.value,
218
+ "columns": ",".join(dimensions),
219
+ },
220
+ },
221
+ }
222
+
223
+
224
+ def custom_report_from_spec(spec: str) -> EndpointResource:
225
+ parts = spec.split(":")
226
+ if len(parts) != 4:
227
+ raise InvalidCustomReportError()
228
+
229
+ _, endpoint, report, dims = parts
230
+ report_type = ReportType(report.strip())
231
+ dimensions = validate_dimensions(dims)
232
+ endpoint = endpoint.strip()
233
+
234
+ return resource(
235
+ name="custom_report",
236
+ endpoint=endpoint,
237
+ dimensions=dimensions,
238
+ report_type=report_type,
239
+ )
240
+
241
+
242
+ def validate_dimensions(dimensions: str) -> List[str]:
243
+ dims = [dim.strip() for dim in dimensions.split(",")]
244
+
245
+ if "day" not in dims:
246
+ dims.append("day")
247
+
248
+ return dims
249
+
250
+
251
+ def exclude(source: List[str], exclude_list: List[str]) -> List[str]:
252
+ excludes = set(exclude_list)
253
+ return [col for col in source if col not in excludes]
254
+
255
+
256
+ def build_type_hints(cols: List[str]) -> dict:
257
+ return {col: TYPE_HINTS[col] for col in cols if col in TYPE_HINTS}
258
+
259
+
260
+ def http_error_handler(resp: Response):
261
+ if not resp.ok:
262
+ raise ClientError(f"HTTP Status {resp.status_code}: {resp.text}")
@@ -0,0 +1,117 @@
1
+ from datetime import timedelta
2
+ from typing import Iterator
3
+
4
+ import dlt
5
+ import pandas as pd # type: ignore[import-untyped]
6
+ import pendulum
7
+ import requests
8
+ from dlt.sources import DltResource
9
+ from dlt.sources.helpers.requests import Client
10
+ from pendulum.date import Date
11
+
12
+
13
+ @dlt.source(max_table_nesting=0)
14
+ def applovin_max_source(
15
+ start_date: Date,
16
+ applications: list[str],
17
+ api_key: str,
18
+ end_date: Date | None,
19
+ ) -> DltResource:
20
+ @dlt.resource(
21
+ name="user_ad_revenue",
22
+ write_disposition="merge",
23
+ merge_key="partition_date",
24
+ columns={
25
+ "partition_date": {"data_type": "date", "partition": True},
26
+ },
27
+ )
28
+ def fetch_ad_revenue_report(
29
+ dateTime=(
30
+ dlt.sources.incremental(
31
+ "partition_date",
32
+ initial_value=start_date,
33
+ end_value=end_date,
34
+ range_start="closed",
35
+ range_end="closed",
36
+ )
37
+ ),
38
+ ) -> Iterator[dict]:
39
+ url = "https://r.applovin.com/max/userAdRevenueReport"
40
+ start_date = dateTime.last_value
41
+
42
+ if dateTime.end_value is None:
43
+ end_date = (pendulum.yesterday("UTC")).date()
44
+ else:
45
+ end_date = dateTime.end_value
46
+
47
+ client = create_client()
48
+ platforms = ["ios", "android", "fireos"]
49
+
50
+ for app in applications:
51
+ current_date = start_date
52
+ while current_date <= end_date:
53
+ for platform in platforms:
54
+ df = get_data(
55
+ url=url,
56
+ current_date=current_date,
57
+ application=app,
58
+ api_key=api_key,
59
+ client=client,
60
+ platform=platform,
61
+ )
62
+ if df is not None:
63
+ yield df
64
+ current_date = current_date + timedelta(days=1)
65
+
66
+ return fetch_ad_revenue_report
67
+
68
+
69
+ def create_client() -> requests.Session:
70
+ return Client(
71
+ raise_for_status=False,
72
+ retry_condition=retry_on_limit,
73
+ request_max_attempts=12,
74
+ ).session
75
+
76
+
77
+ def retry_on_limit(
78
+ response: requests.Response | None, exception: BaseException | None
79
+ ) -> bool:
80
+ if response is None:
81
+ return False
82
+ return response.status_code == 429
83
+
84
+
85
+ def get_data(
86
+ url: str,
87
+ current_date: Date,
88
+ application: str,
89
+ api_key: str,
90
+ platform: str,
91
+ client: requests.Session,
92
+ ):
93
+ params = {
94
+ "api_key": api_key,
95
+ "date": current_date.isoformat(),
96
+ "platform": platform,
97
+ "application": application,
98
+ "aggregated": "false",
99
+ }
100
+
101
+ response = client.get(url=url, params=params)
102
+
103
+ if response.status_code != 200:
104
+ if response.status_code == 404:
105
+ if "No Mediation App Id found for platform" in response.text:
106
+ return None
107
+ error_message = (
108
+ f"AppLovin MAX API error (status {response.status_code}): {response.text}"
109
+ )
110
+ raise requests.HTTPError(error_message)
111
+
112
+ response_url = response.json().get("ad_revenue_report_url")
113
+ df = pd.read_csv(response_url)
114
+ df["Date"] = pd.to_datetime(df["Date"])
115
+ df["partition_date"] = df["Date"].dt.date
116
+ df["platform"] = platform
117
+ return df
@@ -0,0 +1,325 @@
1
+ from typing import Iterable
2
+
3
+ import dlt
4
+ import pendulum
5
+ from dlt.common.typing import TDataItem
6
+ from dlt.sources import DltResource
7
+
8
+ from ingestr.src.appsflyer.client import AppsflyerClient
9
+
10
+ DIMENSION_RESPONSE_MAPPING = {
11
+ "c": "campaign",
12
+ "af_adset_id": "adset_id",
13
+ "af_adset": "adset",
14
+ "af_ad_id": "ad_id",
15
+ }
16
+ HINTS = {
17
+ "app_id": {
18
+ "data_type": "text",
19
+ "nullable": False,
20
+ },
21
+ "campaign": {
22
+ "data_type": "text",
23
+ "nullable": False,
24
+ },
25
+ "geo": {
26
+ "data_type": "text",
27
+ "nullable": False,
28
+ },
29
+ "cost": {
30
+ "data_type": "decimal",
31
+ "precision": 30,
32
+ "scale": 5,
33
+ "nullable": False,
34
+ },
35
+ "clicks": {
36
+ "data_type": "bigint",
37
+ "nullable": False,
38
+ },
39
+ "impressions": {
40
+ "data_type": "bigint",
41
+ "nullable": False,
42
+ },
43
+ "average_ecpi": {
44
+ "data_type": "decimal",
45
+ "precision": 30,
46
+ "scale": 5,
47
+ "nullable": False,
48
+ },
49
+ "installs": {
50
+ "data_type": "bigint",
51
+ "nullable": False,
52
+ },
53
+ "retention_day_7": {
54
+ "data_type": "decimal",
55
+ "precision": 30,
56
+ "scale": 5,
57
+ "nullable": False,
58
+ },
59
+ "retention_day_14": {
60
+ "data_type": "decimal",
61
+ "precision": 30,
62
+ "scale": 5,
63
+ "nullable": False,
64
+ },
65
+ "cohort_day_1_revenue_per_user": {
66
+ "data_type": "decimal",
67
+ "precision": 30,
68
+ "scale": 5,
69
+ "nullable": True,
70
+ },
71
+ "cohort_day_1_total_revenue_per_user": {
72
+ "data_type": "decimal",
73
+ "precision": 30,
74
+ "scale": 5,
75
+ "nullable": True,
76
+ },
77
+ "cohort_day_3_revenue_per_user": {
78
+ "data_type": "decimal",
79
+ "precision": 30,
80
+ "scale": 5,
81
+ "nullable": True,
82
+ },
83
+ "cohort_day_3_total_revenue_per_user": {
84
+ "data_type": "decimal",
85
+ "precision": 30,
86
+ "scale": 5,
87
+ "nullable": True,
88
+ },
89
+ "cohort_day_7_revenue_per_user": {
90
+ "data_type": "decimal",
91
+ "precision": 30,
92
+ "scale": 5,
93
+ "nullable": True,
94
+ },
95
+ "cohort_day_7_total_revenue_per_user": {
96
+ "data_type": "decimal",
97
+ "precision": 30,
98
+ "scale": 5,
99
+ "nullable": True,
100
+ },
101
+ "cohort_day_14_revenue_per_user": {
102
+ "data_type": "decimal",
103
+ "precision": 30,
104
+ "scale": 5,
105
+ "nullable": True,
106
+ },
107
+ "cohort_day_14_total_revenue_per_user": {
108
+ "data_type": "decimal",
109
+ "precision": 30,
110
+ "scale": 5,
111
+ "nullable": True,
112
+ },
113
+ "cohort_day_21_revenue_per_user": {
114
+ "data_type": "decimal",
115
+ "precision": 30,
116
+ "scale": 5,
117
+ "nullable": True,
118
+ },
119
+ "cohort_day_21_total_revenue_per_user": {
120
+ "data_type": "decimal",
121
+ "precision": 30,
122
+ "scale": 5,
123
+ "nullable": True,
124
+ },
125
+ "install_time": {
126
+ "data_type": "date",
127
+ "nullable": False,
128
+ },
129
+ "loyal_users": {
130
+ "data_type": "bigint",
131
+ "nullable": False,
132
+ },
133
+ "revenue": {
134
+ "data_type": "decimal",
135
+ "precision": 30,
136
+ "scale": 5,
137
+ "nullable": True,
138
+ },
139
+ "roi": {
140
+ "data_type": "decimal",
141
+ "precision": 30,
142
+ "scale": 5,
143
+ "nullable": True,
144
+ },
145
+ "uninstalls": {
146
+ "data_type": "bigint",
147
+ "nullable": True,
148
+ },
149
+ }
150
+
151
+ CAMPAIGNS_DIMENSIONS = ["c", "geo", "app_id", "install_time"]
152
+ CAMPAIGNS_METRICS = [
153
+ "average_ecpi",
154
+ "clicks",
155
+ "cohort_day_1_revenue_per_user",
156
+ "cohort_day_1_total_revenue_per_user",
157
+ "cohort_day_14_revenue_per_user",
158
+ "cohort_day_14_total_revenue_per_user",
159
+ "cohort_day_21_revenue_per_user",
160
+ "cohort_day_21_total_revenue_per_user",
161
+ "cohort_day_3_revenue_per_user",
162
+ "cohort_day_3_total_revenue_per_user",
163
+ "cohort_day_7_revenue_per_user",
164
+ "cohort_day_7_total_revenue_per_user",
165
+ "cost",
166
+ "impressions",
167
+ "installs",
168
+ "loyal_users",
169
+ "retention_day_7",
170
+ "revenue",
171
+ "roi",
172
+ "uninstalls",
173
+ ]
174
+
175
+ CREATIVES_DIMENSIONS = [
176
+ "c",
177
+ "geo",
178
+ "app_id",
179
+ "install_time",
180
+ "af_adset_id",
181
+ "af_adset",
182
+ "af_ad_id",
183
+ ]
184
+ CREATIVES_METRICS = [
185
+ "impressions",
186
+ "clicks",
187
+ "installs",
188
+ "cost",
189
+ "revenue",
190
+ "average_ecpi",
191
+ "loyal_users",
192
+ "uninstalls",
193
+ "roi",
194
+ ]
195
+
196
+
197
+ @dlt.source(max_table_nesting=0)
198
+ def appsflyer_source(
199
+ api_key: str,
200
+ start_date: str,
201
+ end_date: str,
202
+ dimensions: list[str],
203
+ metrics: list[str],
204
+ ) -> Iterable[DltResource]:
205
+ client = AppsflyerClient(api_key)
206
+
207
+ @dlt.resource(
208
+ write_disposition="merge",
209
+ merge_key="install_time",
210
+ columns=make_hints(CAMPAIGNS_DIMENSIONS, CAMPAIGNS_METRICS),
211
+ )
212
+ def campaigns(
213
+ datetime=dlt.sources.incremental(
214
+ "install_time",
215
+ initial_value=(
216
+ start_date
217
+ if start_date
218
+ else pendulum.today().subtract(days=30).format("YYYY-MM-DD")
219
+ ),
220
+ end_value=end_date,
221
+ range_end="closed",
222
+ range_start="closed",
223
+ ),
224
+ ) -> Iterable[TDataItem]:
225
+ end = (
226
+ datetime.end_value
227
+ if datetime.end_value
228
+ else pendulum.now().format("YYYY-MM-DD")
229
+ )
230
+
231
+ yield from client._fetch_data(
232
+ from_date=datetime.last_value,
233
+ to_date=end,
234
+ dimensions=CAMPAIGNS_DIMENSIONS,
235
+ metrics=CAMPAIGNS_METRICS,
236
+ )
237
+
238
+ @dlt.resource(
239
+ write_disposition="merge",
240
+ merge_key="install_time",
241
+ columns=make_hints(CREATIVES_DIMENSIONS, CREATIVES_METRICS),
242
+ )
243
+ def creatives(
244
+ datetime=dlt.sources.incremental(
245
+ "install_time",
246
+ initial_value=(
247
+ start_date
248
+ if start_date
249
+ else pendulum.today().subtract(days=30).format("YYYY-MM-DD")
250
+ ),
251
+ end_value=end_date,
252
+ range_end="closed",
253
+ range_start="closed",
254
+ ),
255
+ ) -> Iterable[TDataItem]:
256
+ end = (
257
+ datetime.end_value
258
+ if datetime.end_value
259
+ else pendulum.now().format("YYYY-MM-DD")
260
+ )
261
+ yield from client._fetch_data(
262
+ datetime.last_value,
263
+ end,
264
+ dimensions=CREATIVES_DIMENSIONS,
265
+ metrics=CREATIVES_METRICS,
266
+ )
267
+
268
+ primary_keys = []
269
+ if "install_time" not in dimensions:
270
+ dimensions.append("install_time")
271
+ primary_keys.append("install_time")
272
+
273
+ for dimension in dimensions:
274
+ if dimension in DIMENSION_RESPONSE_MAPPING:
275
+ primary_keys.append(DIMENSION_RESPONSE_MAPPING[dimension])
276
+ else:
277
+ primary_keys.append(dimension)
278
+
279
+ @dlt.resource(
280
+ write_disposition="merge",
281
+ primary_key=primary_keys,
282
+ columns=make_hints(dimensions, metrics),
283
+ )
284
+ def custom(
285
+ datetime=dlt.sources.incremental(
286
+ "install_time",
287
+ initial_value=(
288
+ start_date
289
+ if start_date
290
+ else pendulum.today().subtract(days=30).format("YYYY-MM-DD")
291
+ ),
292
+ end_value=end_date,
293
+ ),
294
+ ):
295
+ end = (
296
+ datetime.end_value
297
+ if datetime.end_value
298
+ else pendulum.now().format("YYYY-MM-DD")
299
+ )
300
+ res = client._fetch_data(
301
+ from_date=datetime.last_value,
302
+ to_date=end,
303
+ dimensions=dimensions,
304
+ metrics=metrics,
305
+ )
306
+ yield from res
307
+
308
+ return campaigns, creatives, custom
309
+
310
+
311
+ def make_hints(dimensions: list[str], metrics: list[str]):
312
+ campaign_hints = {}
313
+ for dimension in dimensions:
314
+ resp_key = dimension
315
+ if dimension in DIMENSION_RESPONSE_MAPPING:
316
+ resp_key = DIMENSION_RESPONSE_MAPPING[dimension]
317
+
318
+ if resp_key in HINTS:
319
+ campaign_hints[resp_key] = HINTS[resp_key]
320
+
321
+ for metric in metrics:
322
+ if metric in HINTS:
323
+ campaign_hints[metric] = HINTS[metric]
324
+
325
+ return campaign_hints