ingestr 0.13.2__py3-none-any.whl → 0.14.104__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (146) hide show
  1. ingestr/conftest.py +72 -0
  2. ingestr/main.py +134 -87
  3. ingestr/src/adjust/__init__.py +4 -4
  4. ingestr/src/adjust/adjust_helpers.py +7 -3
  5. ingestr/src/airtable/__init__.py +3 -2
  6. ingestr/src/allium/__init__.py +128 -0
  7. ingestr/src/anthropic/__init__.py +277 -0
  8. ingestr/src/anthropic/helpers.py +525 -0
  9. ingestr/src/applovin/__init__.py +262 -0
  10. ingestr/src/applovin_max/__init__.py +117 -0
  11. ingestr/src/appsflyer/__init__.py +325 -0
  12. ingestr/src/appsflyer/client.py +49 -45
  13. ingestr/src/appstore/__init__.py +1 -0
  14. ingestr/src/arrow/__init__.py +9 -1
  15. ingestr/src/asana_source/__init__.py +1 -1
  16. ingestr/src/attio/__init__.py +102 -0
  17. ingestr/src/attio/helpers.py +65 -0
  18. ingestr/src/blob.py +38 -11
  19. ingestr/src/buildinfo.py +1 -0
  20. ingestr/src/chess/__init__.py +1 -1
  21. ingestr/src/clickup/__init__.py +85 -0
  22. ingestr/src/clickup/helpers.py +47 -0
  23. ingestr/src/collector/spinner.py +43 -0
  24. ingestr/src/couchbase_source/__init__.py +118 -0
  25. ingestr/src/couchbase_source/helpers.py +135 -0
  26. ingestr/src/cursor/__init__.py +83 -0
  27. ingestr/src/cursor/helpers.py +188 -0
  28. ingestr/src/destinations.py +520 -33
  29. ingestr/src/docebo/__init__.py +589 -0
  30. ingestr/src/docebo/client.py +435 -0
  31. ingestr/src/docebo/helpers.py +97 -0
  32. ingestr/src/elasticsearch/__init__.py +80 -0
  33. ingestr/src/elasticsearch/helpers.py +138 -0
  34. ingestr/src/errors.py +8 -0
  35. ingestr/src/facebook_ads/__init__.py +47 -28
  36. ingestr/src/facebook_ads/helpers.py +59 -37
  37. ingestr/src/facebook_ads/settings.py +2 -0
  38. ingestr/src/facebook_ads/utils.py +39 -0
  39. ingestr/src/factory.py +116 -2
  40. ingestr/src/filesystem/__init__.py +8 -3
  41. ingestr/src/filters.py +46 -3
  42. ingestr/src/fluxx/__init__.py +9906 -0
  43. ingestr/src/fluxx/helpers.py +209 -0
  44. ingestr/src/frankfurter/__init__.py +157 -0
  45. ingestr/src/frankfurter/helpers.py +48 -0
  46. ingestr/src/freshdesk/__init__.py +89 -0
  47. ingestr/src/freshdesk/freshdesk_client.py +137 -0
  48. ingestr/src/freshdesk/settings.py +9 -0
  49. ingestr/src/fundraiseup/__init__.py +95 -0
  50. ingestr/src/fundraiseup/client.py +81 -0
  51. ingestr/src/github/__init__.py +41 -6
  52. ingestr/src/github/helpers.py +5 -5
  53. ingestr/src/google_analytics/__init__.py +22 -4
  54. ingestr/src/google_analytics/helpers.py +124 -6
  55. ingestr/src/google_sheets/__init__.py +4 -4
  56. ingestr/src/google_sheets/helpers/data_processing.py +2 -2
  57. ingestr/src/hostaway/__init__.py +302 -0
  58. ingestr/src/hostaway/client.py +288 -0
  59. ingestr/src/http/__init__.py +35 -0
  60. ingestr/src/http/readers.py +114 -0
  61. ingestr/src/http_client.py +24 -0
  62. ingestr/src/hubspot/__init__.py +66 -23
  63. ingestr/src/hubspot/helpers.py +52 -22
  64. ingestr/src/hubspot/settings.py +14 -7
  65. ingestr/src/influxdb/__init__.py +46 -0
  66. ingestr/src/influxdb/client.py +34 -0
  67. ingestr/src/intercom/__init__.py +142 -0
  68. ingestr/src/intercom/helpers.py +674 -0
  69. ingestr/src/intercom/settings.py +279 -0
  70. ingestr/src/isoc_pulse/__init__.py +159 -0
  71. ingestr/src/jira_source/__init__.py +340 -0
  72. ingestr/src/jira_source/helpers.py +439 -0
  73. ingestr/src/jira_source/settings.py +170 -0
  74. ingestr/src/kafka/__init__.py +4 -1
  75. ingestr/src/kinesis/__init__.py +139 -0
  76. ingestr/src/kinesis/helpers.py +82 -0
  77. ingestr/src/klaviyo/{_init_.py → __init__.py} +5 -6
  78. ingestr/src/linear/__init__.py +634 -0
  79. ingestr/src/linear/helpers.py +111 -0
  80. ingestr/src/linkedin_ads/helpers.py +0 -1
  81. ingestr/src/loader.py +69 -0
  82. ingestr/src/mailchimp/__init__.py +126 -0
  83. ingestr/src/mailchimp/helpers.py +226 -0
  84. ingestr/src/mailchimp/settings.py +164 -0
  85. ingestr/src/masking.py +344 -0
  86. ingestr/src/mixpanel/__init__.py +62 -0
  87. ingestr/src/mixpanel/client.py +99 -0
  88. ingestr/src/monday/__init__.py +246 -0
  89. ingestr/src/monday/helpers.py +392 -0
  90. ingestr/src/monday/settings.py +328 -0
  91. ingestr/src/mongodb/__init__.py +72 -8
  92. ingestr/src/mongodb/helpers.py +915 -38
  93. ingestr/src/partition.py +32 -0
  94. ingestr/src/personio/__init__.py +331 -0
  95. ingestr/src/personio/helpers.py +86 -0
  96. ingestr/src/phantombuster/__init__.py +65 -0
  97. ingestr/src/phantombuster/client.py +87 -0
  98. ingestr/src/pinterest/__init__.py +82 -0
  99. ingestr/src/pipedrive/__init__.py +198 -0
  100. ingestr/src/pipedrive/helpers/__init__.py +23 -0
  101. ingestr/src/pipedrive/helpers/custom_fields_munger.py +102 -0
  102. ingestr/src/pipedrive/helpers/pages.py +115 -0
  103. ingestr/src/pipedrive/settings.py +27 -0
  104. ingestr/src/pipedrive/typing.py +3 -0
  105. ingestr/src/plusvibeai/__init__.py +335 -0
  106. ingestr/src/plusvibeai/helpers.py +544 -0
  107. ingestr/src/plusvibeai/settings.py +252 -0
  108. ingestr/src/quickbooks/__init__.py +117 -0
  109. ingestr/src/resource.py +40 -0
  110. ingestr/src/revenuecat/__init__.py +83 -0
  111. ingestr/src/revenuecat/helpers.py +237 -0
  112. ingestr/src/salesforce/__init__.py +156 -0
  113. ingestr/src/salesforce/helpers.py +64 -0
  114. ingestr/src/shopify/__init__.py +1 -17
  115. ingestr/src/smartsheets/__init__.py +82 -0
  116. ingestr/src/snapchat_ads/__init__.py +489 -0
  117. ingestr/src/snapchat_ads/client.py +72 -0
  118. ingestr/src/snapchat_ads/helpers.py +535 -0
  119. ingestr/src/socrata_source/__init__.py +83 -0
  120. ingestr/src/socrata_source/helpers.py +85 -0
  121. ingestr/src/socrata_source/settings.py +8 -0
  122. ingestr/src/solidgate/__init__.py +219 -0
  123. ingestr/src/solidgate/helpers.py +154 -0
  124. ingestr/src/sources.py +3132 -212
  125. ingestr/src/stripe_analytics/__init__.py +49 -21
  126. ingestr/src/stripe_analytics/helpers.py +286 -1
  127. ingestr/src/stripe_analytics/settings.py +62 -10
  128. ingestr/src/telemetry/event.py +10 -9
  129. ingestr/src/tiktok_ads/__init__.py +12 -6
  130. ingestr/src/tiktok_ads/tiktok_helpers.py +0 -1
  131. ingestr/src/trustpilot/__init__.py +48 -0
  132. ingestr/src/trustpilot/client.py +48 -0
  133. ingestr/src/version.py +6 -1
  134. ingestr/src/wise/__init__.py +68 -0
  135. ingestr/src/wise/client.py +63 -0
  136. ingestr/src/zoom/__init__.py +99 -0
  137. ingestr/src/zoom/helpers.py +102 -0
  138. ingestr/tests/unit/test_smartsheets.py +133 -0
  139. ingestr-0.14.104.dist-info/METADATA +563 -0
  140. ingestr-0.14.104.dist-info/RECORD +203 -0
  141. ingestr/src/appsflyer/_init_.py +0 -24
  142. ingestr-0.13.2.dist-info/METADATA +0 -302
  143. ingestr-0.13.2.dist-info/RECORD +0 -107
  144. {ingestr-0.13.2.dist-info → ingestr-0.14.104.dist-info}/WHEEL +0 -0
  145. {ingestr-0.13.2.dist-info → ingestr-0.14.104.dist-info}/entry_points.txt +0 -0
  146. {ingestr-0.13.2.dist-info → ingestr-0.14.104.dist-info}/licenses/LICENSE.md +0 -0
@@ -0,0 +1,535 @@
1
+ from typing import Iterator
2
+
3
+ import requests
4
+
5
+ from .client import SnapchatAdsAPI, create_client
6
+
7
+
8
+ def client_side_date_filter(data: dict, start_date, end_date) -> bool:
9
+ """
10
+ Check if data item falls within the specified date range based on updated_at.
11
+
12
+ """
13
+ if not start_date and not end_date:
14
+ return True
15
+
16
+ from dlt.common.time import ensure_pendulum_datetime
17
+
18
+ updated_at_str = data.get("updated_at")
19
+ if not updated_at_str:
20
+ return True
21
+
22
+ updated_at = ensure_pendulum_datetime(updated_at_str)
23
+
24
+ if start_date and updated_at < ensure_pendulum_datetime(start_date):
25
+ return False
26
+
27
+ if end_date and updated_at > ensure_pendulum_datetime(end_date):
28
+ return False
29
+
30
+ return True
31
+
32
+
33
+ def paginate(client: requests.Session, headers: dict, url: str, page_size: int = 1000):
34
+ """
35
+ Helper to paginate through Snapchat API responses.
36
+ """
37
+ from urllib.parse import parse_qs, urlparse
38
+
39
+ params: dict[str, int | str] = {"limit": page_size}
40
+
41
+ while url:
42
+ response = client.get(url, headers=headers, params=params)
43
+ response.raise_for_status()
44
+
45
+ result = response.json()
46
+
47
+ if result.get("request_status", "").upper() != "SUCCESS":
48
+ raise ValueError(
49
+ f"Request failed: {result.get('request_status')} - {result}"
50
+ )
51
+
52
+ yield result
53
+
54
+ # Check for next page
55
+ paging = result.get("paging", {})
56
+ next_link = paging.get("next_link")
57
+
58
+ if next_link:
59
+ # Extract cursor from next_link
60
+ parsed = urlparse(next_link)
61
+ query_params = parse_qs(parsed.query)
62
+ cursor_list = query_params.get("cursor", [None])
63
+ cursor = cursor_list[0] if cursor_list else None
64
+
65
+ if cursor:
66
+ params["cursor"] = cursor
67
+ else:
68
+ break
69
+ else:
70
+ break
71
+
72
+
73
+ def get_account_ids(
74
+ api: "SnapchatAdsAPI",
75
+ ad_account_id: str | None,
76
+ organization_id: str | None,
77
+ base_url: str,
78
+ resource_name: str,
79
+ start_date=None,
80
+ end_date=None,
81
+ ) -> list[str]:
82
+ """
83
+ Get list of account IDs to fetch data for.
84
+
85
+ If ad_account_id is provided, returns a list with that single account.
86
+ Otherwise, fetches all ad accounts for the organization.
87
+ """
88
+ if ad_account_id:
89
+ return [ad_account_id]
90
+
91
+ if not organization_id:
92
+ raise ValueError(
93
+ f"organization_id is required to fetch {resource_name} for all ad accounts"
94
+ )
95
+
96
+ accounts_url = f"{base_url}/organizations/{organization_id}/adaccounts"
97
+ # Don't filter accounts by date - we want all accounts, then filter stats by date
98
+ accounts_data = list(
99
+ fetch_snapchat_data(api, accounts_url, "adaccounts", "adaccount", None, None)
100
+ )
101
+ return [
102
+ account_id
103
+ for account in accounts_data
104
+ if (account_id := account.get("id")) is not None
105
+ ]
106
+
107
+
108
+ def fetch_snapchat_data(
109
+ api: "SnapchatAdsAPI",
110
+ url: str,
111
+ resource_key: str,
112
+ item_key: str,
113
+ start_date=None,
114
+ end_date=None,
115
+ ) -> Iterator[dict]:
116
+ """
117
+ Generic helper to fetch data from Snapchat API.
118
+ """
119
+ client = create_client()
120
+ headers = api.get_headers()
121
+
122
+ response = client.get(url, headers=headers)
123
+ response.raise_for_status()
124
+
125
+ result = response.json()
126
+
127
+ if result.get("request_status", "").upper() != "SUCCESS":
128
+ raise ValueError(f"Request failed: {result.get('request_status')} - {result}")
129
+
130
+ items_data = result.get(resource_key, [])
131
+
132
+ for item in items_data:
133
+ if item.get("sub_request_status", "").upper() == "SUCCESS":
134
+ data = item.get(item_key, {})
135
+ if data:
136
+ # Client-side filtering by updated_at
137
+ if client_side_date_filter(data, start_date, end_date):
138
+ yield data
139
+
140
+
141
+ def fetch_snapchat_data_with_params(
142
+ api: "SnapchatAdsAPI",
143
+ url: str,
144
+ resource_key: str,
145
+ item_key: str,
146
+ params: dict | None = None,
147
+ ) -> Iterator[dict]:
148
+ """
149
+ Generic helper to fetch data from Snapchat API with query parameters.
150
+ """
151
+ client = create_client()
152
+ headers = api.get_headers()
153
+
154
+ response = client.get(url, headers=headers, params=params or {})
155
+ response.raise_for_status()
156
+
157
+ result = response.json()
158
+
159
+ if result.get("request_status", "").upper() != "SUCCESS":
160
+ raise ValueError(f"Request failed: {result.get('request_status')} - {result}")
161
+
162
+ items_data = result.get(resource_key, [])
163
+
164
+ for item in items_data:
165
+ if item.get("sub_request_status", "").upper() == "SUCCESS":
166
+ data = item.get(item_key, {})
167
+ if data:
168
+ yield data
169
+
170
+
171
+ def fetch_account_id_resource(
172
+ api: "SnapchatAdsAPI",
173
+ ad_account_id: str | None,
174
+ organization_id: str | None,
175
+ base_url: str,
176
+ resource_name: str,
177
+ item_key: str,
178
+ start_date=None,
179
+ end_date=None,
180
+ ) -> Iterator[dict]:
181
+ """
182
+ Fetch resource data for ad accounts without pagination.
183
+
184
+ If ad_account_id is provided, fetches data for that specific account.
185
+ Otherwise, fetches all ad accounts and then fetches data for each account.
186
+ """
187
+ account_ids = get_account_ids(
188
+ api,
189
+ ad_account_id,
190
+ organization_id,
191
+ base_url,
192
+ resource_name,
193
+ start_date,
194
+ end_date,
195
+ )
196
+
197
+ for account_id in account_ids:
198
+ url = f"{base_url}/adaccounts/{account_id}/{resource_name}"
199
+ yield from fetch_snapchat_data(
200
+ api, url, resource_name, item_key, start_date, end_date
201
+ )
202
+
203
+
204
+ def fetch_with_paginate_account_id(
205
+ api: "SnapchatAdsAPI",
206
+ ad_account_id: str | None,
207
+ organization_id: str | None,
208
+ base_url: str,
209
+ resource_name: str,
210
+ item_key: str,
211
+ start_date=None,
212
+ end_date=None,
213
+ ) -> Iterator[dict]:
214
+ """
215
+ Fetch paginated resource data for ad accounts.
216
+
217
+ If ad_account_id is provided, fetches data for that specific account.
218
+ Otherwise, fetches all ad accounts and then fetches data for each account.
219
+ """
220
+ account_ids = get_account_ids(
221
+ api,
222
+ ad_account_id,
223
+ organization_id,
224
+ base_url,
225
+ resource_name,
226
+ start_date,
227
+ end_date,
228
+ )
229
+
230
+ client = create_client()
231
+ headers = api.get_headers()
232
+
233
+ for account_id in account_ids:
234
+ url = f"{base_url}/adaccounts/{account_id}/{resource_name}"
235
+
236
+ for result in paginate(client, headers, url, page_size=1000):
237
+ items_data = result.get(resource_name, [])
238
+
239
+ for item in items_data:
240
+ if item.get("sub_request_status", "").upper() == "SUCCESS":
241
+ data = item.get(item_key, {})
242
+ if data:
243
+ if client_side_date_filter(data, start_date, end_date):
244
+ yield data
245
+
246
+
247
+ def build_stats_url(
248
+ base_url: str,
249
+ entity_type: str,
250
+ entity_id: str,
251
+ ) -> str:
252
+ """
253
+ Build the stats URL for a given entity type and ID.
254
+
255
+ Args:
256
+ base_url: Base API URL
257
+ entity_type: Type of entity (campaign, adsquad, ad, adaccount)
258
+ entity_id: ID of the entity
259
+
260
+ Returns:
261
+ Complete stats URL
262
+ """
263
+ entity_type_map = {
264
+ "campaign": "campaigns",
265
+ "adsquad": "adsquads",
266
+ "ad": "ads",
267
+ "adaccount": "adaccounts",
268
+ }
269
+
270
+ plural_entity = entity_type_map.get(entity_type)
271
+ if not plural_entity:
272
+ raise ValueError(
273
+ f"Invalid entity_type: {entity_type}. Must be one of: {list(entity_type_map.keys())}"
274
+ )
275
+
276
+ return f"{base_url}/{plural_entity}/{entity_id}/stats"
277
+
278
+
279
+ def fetch_stats_data(
280
+ api: "SnapchatAdsAPI",
281
+ url: str,
282
+ params: dict,
283
+ granularity: str,
284
+ ) -> Iterator[dict]:
285
+ """
286
+ Fetch stats data from Snapchat API.
287
+
288
+ Args:
289
+ api: SnapchatAdsAPI instance
290
+ url: Stats endpoint URL
291
+ params: Query parameters
292
+ granularity: Granularity of stats (TOTAL, DAY, HOUR, LIFETIME)
293
+
294
+ Yields:
295
+ Flattened stats records
296
+ """
297
+ client = create_client()
298
+ headers = api.get_headers()
299
+
300
+ response = client.get(url, headers=headers, params=params)
301
+ if not response.ok:
302
+ raise ValueError(
303
+ f"Stats request failed: {response.status_code} - {response.text}"
304
+ )
305
+ response.raise_for_status()
306
+
307
+ result = response.json()
308
+
309
+ if result.get("request_status", "").upper() != "SUCCESS":
310
+ raise ValueError(f"Request failed: {result.get('request_status')} - {result}")
311
+
312
+ # Parse based on granularity
313
+ if granularity in ["TOTAL", "LIFETIME"]:
314
+ yield from parse_total_stats(result)
315
+ else: # DAY or HOUR
316
+ yield from parse_timeseries_stats(result)
317
+
318
+
319
+ def parse_total_stats(result: dict) -> Iterator[dict]:
320
+ """
321
+ Parse TOTAL or LIFETIME granularity stats response.
322
+
323
+ Args:
324
+ result: API response JSON
325
+
326
+ Yields:
327
+ Flattened stats records
328
+ """
329
+ # Handle both total_stats and lifetime_stats response formats
330
+ total_stats = result.get("total_stats", []) or result.get("lifetime_stats", [])
331
+
332
+ for stat_item in total_stats:
333
+ if stat_item.get("sub_request_status", "").upper() == "SUCCESS":
334
+ # Handle both total_stat and lifetime_stat keys
335
+ total_stat = stat_item.get("total_stat", {}) or stat_item.get(
336
+ "lifetime_stat", {}
337
+ )
338
+ if total_stat:
339
+ # Flatten the stats object
340
+ record = {
341
+ "id": total_stat.get("id"),
342
+ "type": total_stat.get("type"),
343
+ "granularity": total_stat.get("granularity"),
344
+ "start_time": total_stat.get("start_time"),
345
+ "end_time": total_stat.get("end_time"),
346
+ "finalized_data_end_time": total_stat.get(
347
+ "finalized_data_end_time"
348
+ ),
349
+ "conversion_data_processed_end_time": total_stat.get(
350
+ "conversion_data_processed_end_time"
351
+ ),
352
+ "swipe_up_attribution_window": total_stat.get(
353
+ "swipe_up_attribution_window"
354
+ ),
355
+ "view_attribution_window": total_stat.get(
356
+ "view_attribution_window"
357
+ ),
358
+ }
359
+
360
+ # Flatten nested stats
361
+ stats = total_stat.get("stats", {})
362
+ for key, value in stats.items():
363
+ record[key] = value
364
+
365
+ # Handle breakdown_stats if present
366
+ breakdown_stats = total_stat.get("breakdown_stats", {})
367
+ if breakdown_stats:
368
+ for breakdown_type, breakdown_items in breakdown_stats.items():
369
+ for item in breakdown_items:
370
+ breakdown_record = record.copy()
371
+ breakdown_record["breakdown_type"] = breakdown_type
372
+ breakdown_record["breakdown_id"] = item.get("id")
373
+ breakdown_record["breakdown_entity_type"] = item.get("type")
374
+
375
+ item_stats = item.get("stats", {})
376
+ for key, value in item_stats.items():
377
+ breakdown_record[key] = value
378
+
379
+ yield breakdown_record
380
+ else:
381
+ yield record
382
+
383
+
384
+ def parse_timeseries_stats(result: dict) -> Iterator[dict]:
385
+ """
386
+ Parse DAY or HOUR granularity stats response.
387
+
388
+ Args:
389
+ result: API response JSON
390
+
391
+ Yields:
392
+ Flattened stats records for each time period
393
+ """
394
+ timeseries_stats = result.get("timeseries_stats", [])
395
+
396
+ for stat_item in timeseries_stats:
397
+ if stat_item.get("sub_request_status", "").upper() == "SUCCESS":
398
+ timeseries_stat = stat_item.get("timeseries_stat", {})
399
+ if timeseries_stat:
400
+ entity_id = timeseries_stat.get("id")
401
+ entity_type = timeseries_stat.get("type")
402
+ granularity = timeseries_stat.get("granularity")
403
+ finalized_data_end_time = timeseries_stat.get("finalized_data_end_time")
404
+ conversion_data_processed_end_time = timeseries_stat.get(
405
+ "conversion_data_processed_end_time"
406
+ )
407
+ swipe_up_attribution_window = timeseries_stat.get(
408
+ "swipe_up_attribution_window"
409
+ )
410
+ view_attribution_window = timeseries_stat.get("view_attribution_window")
411
+
412
+ # Iterate through each time period
413
+ timeseries = timeseries_stat.get("timeseries", [])
414
+ for period in timeseries:
415
+ record = {
416
+ "id": entity_id,
417
+ "type": entity_type,
418
+ "granularity": granularity,
419
+ "start_time": period.get("start_time"),
420
+ "end_time": period.get("end_time"),
421
+ "finalized_data_end_time": finalized_data_end_time,
422
+ "conversion_data_processed_end_time": conversion_data_processed_end_time,
423
+ "swipe_up_attribution_window": swipe_up_attribution_window,
424
+ "view_attribution_window": view_attribution_window,
425
+ }
426
+
427
+ # Flatten nested stats
428
+ stats = period.get("stats", {})
429
+ for key, value in stats.items():
430
+ record[key] = value
431
+
432
+ yield record
433
+
434
+ # Handle breakdown_stats if present in timeseries
435
+ breakdown_stats = timeseries_stat.get("breakdown_stats", {})
436
+ if breakdown_stats:
437
+ for breakdown_type, breakdown_items in breakdown_stats.items():
438
+ for item in breakdown_items:
439
+ item_timeseries = item.get("timeseries", [])
440
+ for period in item_timeseries:
441
+ breakdown_record = {
442
+ "id": entity_id,
443
+ "type": entity_type,
444
+ "granularity": granularity,
445
+ "start_time": period.get("start_time"),
446
+ "end_time": period.get("end_time"),
447
+ "finalized_data_end_time": finalized_data_end_time,
448
+ "conversion_data_processed_end_time": conversion_data_processed_end_time,
449
+ "swipe_up_attribution_window": swipe_up_attribution_window,
450
+ "view_attribution_window": view_attribution_window,
451
+ "breakdown_type": breakdown_type,
452
+ "breakdown_id": item.get("id"),
453
+ "breakdown_entity_type": item.get("type"),
454
+ }
455
+
456
+ item_stats = period.get("stats", {})
457
+ for key, value in item_stats.items():
458
+ breakdown_record[key] = value
459
+
460
+ yield breakdown_record
461
+
462
+
463
+ def fetch_entity_stats(
464
+ api: "SnapchatAdsAPI",
465
+ entity_type: str,
466
+ ad_account_id: str | None,
467
+ organization_id: str | None,
468
+ base_url: str,
469
+ params: dict,
470
+ granularity: str,
471
+ start_date=None,
472
+ end_date=None,
473
+ ) -> Iterator[dict]:
474
+ """
475
+ Fetch stats for all entities of a given type.
476
+
477
+ First fetches all entities (campaigns, ads, adsquads, or adaccounts),
478
+ then fetches stats for each entity.
479
+
480
+ Args:
481
+ api: SnapchatAdsAPI instance
482
+ entity_type: Type of entity (campaign, adsquad, ad, adaccount)
483
+ ad_account_id: Specific ad account ID (optional)
484
+ organization_id: Organization ID (required if ad_account_id not provided)
485
+ base_url: Base API URL
486
+ params: Query parameters for stats request
487
+ granularity: Granularity of stats (TOTAL, DAY, HOUR, LIFETIME)
488
+ start_date: Start date for filtering entities
489
+ end_date: End date for filtering entities
490
+
491
+ Yields:
492
+ Flattened stats records
493
+ """
494
+ # Get account IDs
495
+ account_ids = get_account_ids(
496
+ api, ad_account_id, organization_id, base_url, "stats", start_date, end_date
497
+ )
498
+
499
+ if not account_ids:
500
+ return
501
+
502
+ if entity_type == "adaccount":
503
+ # For ad accounts, fetch stats directly for each account
504
+ for account_id in account_ids:
505
+ url = f"{base_url}/adaccounts/{account_id}/stats"
506
+ yield from fetch_stats_data(api, url, params, granularity)
507
+ else:
508
+ # For campaign, adsquad, ad - first fetch entities, then stats
509
+ entity_type_map = {
510
+ "campaign": ("campaigns", "campaign"),
511
+ "adsquad": ("adsquads", "adsquad"),
512
+ "ad": ("ads", "ad"),
513
+ }
514
+
515
+ resource_name, item_key = entity_type_map[entity_type]
516
+ client = create_client()
517
+ headers = api.get_headers()
518
+
519
+ for account_id in account_ids:
520
+ url = f"{base_url}/adaccounts/{account_id}/{resource_name}"
521
+
522
+ for result in paginate(client, headers, url, page_size=1000):
523
+ items_data = result.get(resource_name, [])
524
+
525
+ for item in items_data:
526
+ if item.get("sub_request_status", "").upper() == "SUCCESS":
527
+ data = item.get(item_key, {})
528
+ if data and data.get("id"):
529
+ entity_id = data["id"]
530
+ stats_url = build_stats_url(
531
+ base_url, entity_type, entity_id
532
+ )
533
+ yield from fetch_stats_data(
534
+ api, stats_url, params, granularity
535
+ )
@@ -0,0 +1,83 @@
1
+ """A source loading data from Socrata open data platform"""
2
+
3
+ from typing import Any, Dict, Iterator, Optional
4
+
5
+ import dlt
6
+
7
+ from .helpers import fetch_data
8
+
9
+
10
+ @dlt.source(name="socrata", max_table_nesting=0)
11
+ def source(
12
+ domain: str,
13
+ dataset_id: str,
14
+ app_token: Optional[str] = None,
15
+ username: Optional[str] = None,
16
+ password: Optional[str] = None,
17
+ incremental: Optional[Any] = None,
18
+ primary_key: Optional[str] = None,
19
+ write_disposition: Optional[str] = dlt.config.value,
20
+ ):
21
+ """
22
+ A dlt source for the Socrata open data platform.
23
+
24
+ Supports both full refresh (replace) and incremental loading (merge).
25
+
26
+ Args:
27
+ domain: The Socrata domain (e.g., "evergreen.data.socrata.com")
28
+ dataset_id: The dataset identifier (e.g., "6udu-fhnu")
29
+ app_token: Socrata app token for higher rate limits (recommended)
30
+ username: Username for authentication (if dataset is private)
31
+ password: Password for authentication (if dataset is private)
32
+ incremental: DLT incremental object for incremental loading
33
+ primary_key: Primary key field for merge operations (default: ":id")
34
+ write_disposition: Write disposition ("replace", "append", "merge").
35
+ If not provided, automatically determined based on incremental setting.
36
+
37
+ Returns:
38
+ A dlt source with a single "dataset" resource
39
+ """
40
+
41
+ @dlt.resource(
42
+ write_disposition=write_disposition or "replace",
43
+ primary_key=primary_key, # type: ignore[call-overload]
44
+ )
45
+ def dataset(
46
+ incremental: Optional[dlt.sources.incremental] = incremental, # type: ignore[type-arg]
47
+ ) -> Iterator[Dict[str, Any]]:
48
+ """
49
+ Yields records from a Socrata dataset.
50
+
51
+ Supports both full refresh (replace) and incremental loading (merge).
52
+ When incremental is provided, filters data using SoQL WHERE clause on the server side.
53
+
54
+ Yields:
55
+ Dict[str, Any]: Individual records from the dataset
56
+ """
57
+ fetch_kwargs: Dict[str, Any] = {
58
+ "domain": domain,
59
+ "dataset_id": dataset_id,
60
+ "app_token": app_token,
61
+ "username": username,
62
+ "password": password,
63
+ }
64
+
65
+ if incremental and incremental.cursor_path:
66
+ fetch_kwargs["incremental_key"] = incremental.cursor_path
67
+ fetch_kwargs["start_value"] = (
68
+ str(incremental.last_value)
69
+ if incremental.last_value is not None
70
+ else None
71
+ )
72
+ if getattr(incremental, "end_value", None) is not None:
73
+ ev = incremental.end_value # type: ignore[attr-defined]
74
+ fetch_kwargs["end_value"] = (
75
+ ev.isoformat() # type: ignore[union-attr]
76
+ if hasattr(ev, "isoformat")
77
+ else str(ev)
78
+ )
79
+
80
+ # Fetch and yield records
81
+ yield from fetch_data(**fetch_kwargs)
82
+
83
+ return (dataset,)
@@ -0,0 +1,85 @@
1
+ """Socrata API helpers"""
2
+
3
+ from typing import Any, Dict, Iterator, Optional
4
+
5
+ from dlt.sources.helpers import requests
6
+
7
+ from .settings import DEFAULT_PAGE_SIZE, REQUEST_TIMEOUT
8
+
9
+
10
+ def fetch_data(
11
+ domain: str,
12
+ dataset_id: str,
13
+ app_token: Optional[str] = None,
14
+ username: Optional[str] = None,
15
+ password: Optional[str] = None,
16
+ incremental_key: Optional[str] = None,
17
+ start_value: Optional[str] = None,
18
+ end_value: Optional[str] = None,
19
+ ) -> Iterator[Dict[str, Any]]:
20
+ """
21
+ Fetch records from Socrata dataset with pagination and optional filtering.
22
+
23
+ Uses offset-based pagination to get all records, not just first 50000.
24
+ Supports incremental loading via SoQL WHERE clause for server-side filtering.
25
+
26
+ Args:
27
+ domain: Socrata domain (e.g., "data.seattle.gov")
28
+ dataset_id: Dataset identifier (e.g., "6udu-fhnu")
29
+ app_token: Socrata app token for higher rate limits
30
+ username: Username for authentication
31
+ password: Password for authentication
32
+ start_value: Minimum value for incremental_key (inclusive)
33
+ end_value: Maximum value for incremental_key (exclusive)
34
+
35
+ Yields:
36
+ Lists of records (one list per page)
37
+
38
+ Raises:
39
+ requests.HTTPError: If API request fails
40
+ """
41
+ url = f"https://{domain}/resource/{dataset_id}.json"
42
+
43
+ headers = {"Accept": "application/json"}
44
+ if app_token:
45
+ headers["X-App-Token"] = app_token
46
+
47
+ auth = (username, password) if username and password else None
48
+
49
+ limit = DEFAULT_PAGE_SIZE
50
+ offset = 0
51
+
52
+ while True:
53
+ params: Dict[str, Any] = {"$limit": limit, "$offset": offset}
54
+
55
+ if incremental_key and start_value:
56
+ start_value_iso = str(start_value).replace(" ", "T")
57
+ where_conditions = [f"{incremental_key} >= '{start_value_iso}'"]
58
+
59
+ if end_value:
60
+ end_value_iso = str(end_value).replace(" ", "T")
61
+ where_conditions.append(f"{incremental_key} < '{end_value_iso}'")
62
+
63
+ params["$where"] = " AND ".join(where_conditions)
64
+ params["$order"] = f"{incremental_key} ASC"
65
+
66
+ response = requests.get(
67
+ url,
68
+ headers=headers,
69
+ auth=auth,
70
+ params=params,
71
+ timeout=REQUEST_TIMEOUT,
72
+ )
73
+ response.raise_for_status()
74
+
75
+ data = response.json()
76
+
77
+ if not data:
78
+ break
79
+
80
+ yield data
81
+
82
+ if len(data) < limit:
83
+ break
84
+
85
+ offset += limit