omniload 0.0.0.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (218) hide show
  1. omniload/conftest.py +72 -0
  2. omniload/main.py +810 -0
  3. omniload/src/.gitignore +10 -0
  4. omniload/src/adjust/__init__.py +108 -0
  5. omniload/src/adjust/adjust_helpers.py +122 -0
  6. omniload/src/airtable/__init__.py +84 -0
  7. omniload/src/allium/__init__.py +128 -0
  8. omniload/src/anthropic/__init__.py +277 -0
  9. omniload/src/anthropic/helpers.py +525 -0
  10. omniload/src/applovin/__init__.py +316 -0
  11. omniload/src/applovin_max/__init__.py +117 -0
  12. omniload/src/appsflyer/__init__.py +325 -0
  13. omniload/src/appsflyer/client.py +110 -0
  14. omniload/src/appstore/__init__.py +142 -0
  15. omniload/src/appstore/client.py +126 -0
  16. omniload/src/appstore/errors.py +15 -0
  17. omniload/src/appstore/models.py +117 -0
  18. omniload/src/appstore/resources.py +179 -0
  19. omniload/src/arrow/__init__.py +81 -0
  20. omniload/src/asana_source/__init__.py +281 -0
  21. omniload/src/asana_source/helpers.py +30 -0
  22. omniload/src/asana_source/settings.py +158 -0
  23. omniload/src/attio/__init__.py +102 -0
  24. omniload/src/attio/helpers.py +65 -0
  25. omniload/src/blob.py +95 -0
  26. omniload/src/bruin/__init__.py +76 -0
  27. omniload/src/chess/__init__.py +180 -0
  28. omniload/src/chess/helpers.py +35 -0
  29. omniload/src/chess/settings.py +18 -0
  30. omniload/src/clickup/__init__.py +85 -0
  31. omniload/src/clickup/helpers.py +47 -0
  32. omniload/src/collector/spinner.py +43 -0
  33. omniload/src/couchbase_source/__init__.py +118 -0
  34. omniload/src/couchbase_source/helpers.py +135 -0
  35. omniload/src/cursor/__init__.py +83 -0
  36. omniload/src/cursor/helpers.py +188 -0
  37. omniload/src/customer_io/__init__.py +486 -0
  38. omniload/src/customer_io/helpers.py +530 -0
  39. omniload/src/destinations.py +982 -0
  40. omniload/src/docebo/__init__.py +589 -0
  41. omniload/src/docebo/client.py +435 -0
  42. omniload/src/docebo/helpers.py +97 -0
  43. omniload/src/dune/__init__.py +104 -0
  44. omniload/src/dune/helpers.py +108 -0
  45. omniload/src/dynamodb/__init__.py +86 -0
  46. omniload/src/elasticsearch/__init__.py +80 -0
  47. omniload/src/elasticsearch/helpers.py +141 -0
  48. omniload/src/errors.py +26 -0
  49. omniload/src/facebook_ads/__init__.py +403 -0
  50. omniload/src/facebook_ads/exceptions.py +19 -0
  51. omniload/src/facebook_ads/helpers.py +296 -0
  52. omniload/src/facebook_ads/settings.py +224 -0
  53. omniload/src/facebook_ads/utils.py +53 -0
  54. omniload/src/factory.py +305 -0
  55. omniload/src/filesystem/__init__.py +133 -0
  56. omniload/src/filesystem/helpers.py +114 -0
  57. omniload/src/filesystem/readers.py +187 -0
  58. omniload/src/filters.py +62 -0
  59. omniload/src/fireflies/__init__.py +151 -0
  60. omniload/src/fireflies/helpers.py +753 -0
  61. omniload/src/fluxx/__init__.py +10013 -0
  62. omniload/src/fluxx/helpers.py +233 -0
  63. omniload/src/frankfurter/__init__.py +157 -0
  64. omniload/src/frankfurter/helpers.py +48 -0
  65. omniload/src/freshdesk/__init__.py +103 -0
  66. omniload/src/freshdesk/freshdesk_client.py +151 -0
  67. omniload/src/freshdesk/settings.py +23 -0
  68. omniload/src/fundraiseup/__init__.py +95 -0
  69. omniload/src/fundraiseup/client.py +81 -0
  70. omniload/src/github/__init__.py +202 -0
  71. omniload/src/github/helpers.py +207 -0
  72. omniload/src/github/queries.py +129 -0
  73. omniload/src/github/settings.py +24 -0
  74. omniload/src/google_ads/__init__.py +198 -0
  75. omniload/src/google_ads/field.py +17 -0
  76. omniload/src/google_ads/metrics.py +254 -0
  77. omniload/src/google_ads/predicates.py +37 -0
  78. omniload/src/google_ads/reports.py +411 -0
  79. omniload/src/google_ads/test_google_ads.py +184 -0
  80. omniload/src/google_analytics/__init__.py +144 -0
  81. omniload/src/google_analytics/helpers.py +312 -0
  82. omniload/src/google_sheets/README.md +95 -0
  83. omniload/src/google_sheets/__init__.py +166 -0
  84. omniload/src/google_sheets/helpers/__init__.py +15 -0
  85. omniload/src/google_sheets/helpers/api_calls.py +160 -0
  86. omniload/src/google_sheets/helpers/data_processing.py +316 -0
  87. omniload/src/gorgias/__init__.py +595 -0
  88. omniload/src/gorgias/helpers.py +166 -0
  89. omniload/src/hostaway/__init__.py +302 -0
  90. omniload/src/hostaway/client.py +288 -0
  91. omniload/src/http/__init__.py +38 -0
  92. omniload/src/http/readers.py +146 -0
  93. omniload/src/http_client.py +24 -0
  94. omniload/src/hubspot/__init__.py +800 -0
  95. omniload/src/hubspot/helpers.py +417 -0
  96. omniload/src/hubspot/settings.py +329 -0
  97. omniload/src/indeed/__init__.py +153 -0
  98. omniload/src/indeed/helpers.py +228 -0
  99. omniload/src/influxdb/__init__.py +46 -0
  100. omniload/src/influxdb/client.py +34 -0
  101. omniload/src/intercom/__init__.py +142 -0
  102. omniload/src/intercom/helpers.py +674 -0
  103. omniload/src/intercom/settings.py +279 -0
  104. omniload/src/isoc_pulse/__init__.py +159 -0
  105. omniload/src/jira_source/__init__.py +377 -0
  106. omniload/src/jira_source/helpers.py +510 -0
  107. omniload/src/jira_source/settings.py +184 -0
  108. omniload/src/kafka/__init__.py +120 -0
  109. omniload/src/kafka/helpers.py +241 -0
  110. omniload/src/kinesis/__init__.py +153 -0
  111. omniload/src/kinesis/helpers.py +96 -0
  112. omniload/src/klaviyo/__init__.py +237 -0
  113. omniload/src/klaviyo/client.py +212 -0
  114. omniload/src/klaviyo/helpers.py +19 -0
  115. omniload/src/linear/__init__.py +634 -0
  116. omniload/src/linear/helpers.py +111 -0
  117. omniload/src/linkedin_ads/__init__.py +266 -0
  118. omniload/src/linkedin_ads/dimension_time_enum.py +17 -0
  119. omniload/src/linkedin_ads/helpers.py +246 -0
  120. omniload/src/loader.py +69 -0
  121. omniload/src/mailchimp/__init__.py +126 -0
  122. omniload/src/mailchimp/helpers.py +226 -0
  123. omniload/src/mailchimp/settings.py +164 -0
  124. omniload/src/masking.py +344 -0
  125. omniload/src/mixpanel/__init__.py +62 -0
  126. omniload/src/mixpanel/client.py +104 -0
  127. omniload/src/monday/__init__.py +246 -0
  128. omniload/src/monday/helpers.py +392 -0
  129. omniload/src/monday/settings.py +325 -0
  130. omniload/src/mongodb/__init__.py +281 -0
  131. omniload/src/mongodb/helpers.py +975 -0
  132. omniload/src/notion/__init__.py +69 -0
  133. omniload/src/notion/helpers/__init__.py +14 -0
  134. omniload/src/notion/helpers/client.py +178 -0
  135. omniload/src/notion/helpers/database.py +92 -0
  136. omniload/src/notion/settings.py +17 -0
  137. omniload/src/partition.py +32 -0
  138. omniload/src/personio/__init__.py +345 -0
  139. omniload/src/personio/helpers.py +100 -0
  140. omniload/src/phantombuster/__init__.py +65 -0
  141. omniload/src/phantombuster/client.py +87 -0
  142. omniload/src/pinterest/__init__.py +82 -0
  143. omniload/src/pipedrive/__init__.py +212 -0
  144. omniload/src/pipedrive/helpers/__init__.py +37 -0
  145. omniload/src/pipedrive/helpers/custom_fields_munger.py +116 -0
  146. omniload/src/pipedrive/helpers/pages.py +129 -0
  147. omniload/src/pipedrive/settings.py +41 -0
  148. omniload/src/pipedrive/typing.py +17 -0
  149. omniload/src/plusvibeai/__init__.py +335 -0
  150. omniload/src/plusvibeai/helpers.py +544 -0
  151. omniload/src/plusvibeai/settings.py +252 -0
  152. omniload/src/primer/__init__.py +45 -0
  153. omniload/src/primer/helpers.py +79 -0
  154. omniload/src/quickbooks/__init__.py +117 -0
  155. omniload/src/reddit_ads/__init__.py +183 -0
  156. omniload/src/reddit_ads/helpers.py +232 -0
  157. omniload/src/resource.py +40 -0
  158. omniload/src/revenuecat/__init__.py +83 -0
  159. omniload/src/revenuecat/helpers.py +237 -0
  160. omniload/src/salesforce/__init__.py +170 -0
  161. omniload/src/salesforce/helpers.py +78 -0
  162. omniload/src/shopify/__init__.py +1953 -0
  163. omniload/src/shopify/exceptions.py +17 -0
  164. omniload/src/shopify/helpers.py +202 -0
  165. omniload/src/shopify/settings.py +19 -0
  166. omniload/src/slack/__init__.py +290 -0
  167. omniload/src/slack/helpers.py +218 -0
  168. omniload/src/slack/settings.py +36 -0
  169. omniload/src/smartsheets/__init__.py +82 -0
  170. omniload/src/snapchat_ads/__init__.py +455 -0
  171. omniload/src/snapchat_ads/client.py +72 -0
  172. omniload/src/snapchat_ads/helpers.py +630 -0
  173. omniload/src/snapchat_ads/settings.py +130 -0
  174. omniload/src/socrata_source/__init__.py +83 -0
  175. omniload/src/socrata_source/helpers.py +85 -0
  176. omniload/src/socrata_source/settings.py +8 -0
  177. omniload/src/solidgate/__init__.py +219 -0
  178. omniload/src/solidgate/helpers.py +154 -0
  179. omniload/src/sources.py +5408 -0
  180. omniload/src/sql_database/__init__.py +0 -0
  181. omniload/src/sql_database/callbacks.py +66 -0
  182. omniload/src/stripe_analytics/__init__.py +183 -0
  183. omniload/src/stripe_analytics/helpers.py +386 -0
  184. omniload/src/stripe_analytics/settings.py +80 -0
  185. omniload/src/table_definition.py +15 -0
  186. omniload/src/testdata/fakebqcredentials.json +14 -0
  187. omniload/src/tiktok_ads/__init__.py +150 -0
  188. omniload/src/tiktok_ads/tiktok_helpers.py +130 -0
  189. omniload/src/time.py +11 -0
  190. omniload/src/trustpilot/__init__.py +48 -0
  191. omniload/src/trustpilot/client.py +48 -0
  192. omniload/src/version.py +6 -0
  193. omniload/src/wise/__init__.py +68 -0
  194. omniload/src/wise/client.py +63 -0
  195. omniload/src/zendesk/__init__.py +480 -0
  196. omniload/src/zendesk/helpers/__init__.py +39 -0
  197. omniload/src/zendesk/helpers/api_helpers.py +119 -0
  198. omniload/src/zendesk/helpers/credentials.py +68 -0
  199. omniload/src/zendesk/helpers/talk_api.py +132 -0
  200. omniload/src/zendesk/settings.py +71 -0
  201. omniload/src/zoom/__init__.py +99 -0
  202. omniload/src/zoom/helpers.py +102 -0
  203. omniload/testdata/.gitignore +2 -0
  204. omniload/testdata/create_replace.csv +21 -0
  205. omniload/testdata/delete_insert_expected.csv +6 -0
  206. omniload/testdata/delete_insert_part1.csv +5 -0
  207. omniload/testdata/delete_insert_part2.csv +6 -0
  208. omniload/testdata/merge_expected.csv +5 -0
  209. omniload/testdata/merge_part1.csv +4 -0
  210. omniload/testdata/merge_part2.csv +5 -0
  211. omniload/tests/unit/test_smartsheets.py +133 -0
  212. omniload-0.0.0.dev0.dist-info/METADATA +439 -0
  213. omniload-0.0.0.dev0.dist-info/RECORD +218 -0
  214. omniload-0.0.0.dev0.dist-info/WHEEL +4 -0
  215. omniload-0.0.0.dev0.dist-info/entry_points.txt +2 -0
  216. omniload-0.0.0.dev0.dist-info/licenses/LICENSE.Apache-2.0 +201 -0
  217. omniload-0.0.0.dev0.dist-info/licenses/LICENSE.md +21 -0
  218. omniload-0.0.0.dev0.dist-info/licenses/NOTICE +35 -0
@@ -0,0 +1,296 @@
1
+ # Copyright 2022-2025 ScaleVector
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ """Facebook ads source helpers"""
16
+
17
+ import functools
18
+ import itertools
19
+ import time
20
+ from datetime import datetime
21
+ from typing import Any, Iterator, Sequence
22
+
23
+ import humanize
24
+ import pendulum
25
+ from dlt.common import logger
26
+ from dlt.common.configuration.inject import with_config
27
+ from dlt.common.typing import DictStrAny, TDataItem, TDataItems
28
+ from dlt.sources.helpers import requests
29
+ from dlt.sources.helpers.requests import Client
30
+ from facebook_business import FacebookAdsApi
31
+ from facebook_business.adobjects.abstractcrudobject import AbstractCrudObject
32
+ from facebook_business.adobjects.abstractobject import AbstractObject
33
+ from facebook_business.adobjects.adaccount import AdAccount
34
+ from facebook_business.adobjects.user import User
35
+ from facebook_business.api import FacebookResponse
36
+
37
+ from .exceptions import InsightsJobTimeout
38
+ from .settings import (
39
+ INSIGHTS_PRIMARY_KEY,
40
+ TFbMethod,
41
+ )
42
+
43
+
44
+ def process_report_item(item: AbstractObject) -> DictStrAny:
45
+ if "date_start" in item:
46
+ item["date_start"] = datetime.strptime(item["date_start"], "%Y-%m-%d").date()
47
+ if "date_stop" in item:
48
+ item["date_stop"] = datetime.strptime(item["date_stop"], "%Y-%m-%d").date()
49
+
50
+ d: DictStrAny = item.export_all_data()
51
+ for pki in INSIGHTS_PRIMARY_KEY:
52
+ if pki not in d:
53
+ d[pki] = "no_" + pki
54
+ return d
55
+
56
+
57
+ def get_data_chunked(
58
+ method: TFbMethod,
59
+ fields: Sequence[str],
60
+ states: Sequence[str],
61
+ chunk_size: int,
62
+ updated_since: int = None,
63
+ ) -> Iterator[TDataItems]:
64
+ # add pagination and chunk into lists
65
+ params: DictStrAny = {"limit": chunk_size}
66
+ if states:
67
+ params.update({"effective_status": states})
68
+ if updated_since:
69
+ params.update({"updated_since": updated_since})
70
+ it: map[DictStrAny] = map(
71
+ lambda c: c.export_all_data(), method(fields=fields, params=params)
72
+ )
73
+ while True:
74
+ chunk = list(itertools.islice(it, chunk_size))
75
+ if not chunk:
76
+ break
77
+ yield chunk
78
+
79
+
80
+ def enrich_ad_objects(fb_obj_type: AbstractObject, fields: Sequence[str]) -> Any:
81
+ """Returns a transformation that will enrich any of the resources returned by `` with additional fields
82
+
83
+ In example below we add "thumbnail_url" to all objects loaded by `ad_creatives` resource:
84
+ >>> fb_ads = facebook_ads_source()
85
+ >>> fb_ads.ad_creatives.add_step(enrich_ad_objects(AdCreative, ["thumbnail_url"]))
86
+
87
+ Internally, the method uses batch API to get data efficiently. Refer to demo script for full examples
88
+
89
+ Args:
90
+ fb_obj_type (AbstractObject): A Facebook Business object type (Ad, Campaign, AdSet, AdCreative, Lead). Import those types from this module
91
+ fields (Sequence[str]): A list/tuple of fields to add to each object.
92
+
93
+ Returns:
94
+ ItemTransformFunctionWithMeta[TDataItems]: A transformation function to be added to a resource with `add_step` method
95
+ """
96
+
97
+ def _wrap(items: TDataItems, meta: Any = None) -> TDataItems:
98
+ api_batch = FacebookAdsApi.get_default_api().new_batch()
99
+
100
+ def update_item(resp: FacebookResponse, item: TDataItem) -> None:
101
+ item.update(resp.json())
102
+
103
+ def fail(resp: FacebookResponse) -> None:
104
+ raise resp.error()
105
+
106
+ for item in items:
107
+ o: AbstractCrudObject = fb_obj_type(item["id"])
108
+ o.api_get(
109
+ fields=fields,
110
+ batch=api_batch,
111
+ success=functools.partial(update_item, item=item),
112
+ failure=fail,
113
+ )
114
+ api_batch.execute()
115
+ return items
116
+
117
+ return _wrap
118
+
119
+
120
+ JOB_TIMEOUT_INFO = """This is an intermittent error and may resolve itself on subsequent queries to the Facebook API.
121
+ You should remove the fields in `fields` argument that are not necessary, as that may help improve the reliability of the Facebook API."""
122
+
123
+
124
+ def execute_job(
125
+ job: AbstractCrudObject,
126
+ insights_max_wait_to_start_seconds: int = 5 * 60,
127
+ insights_max_wait_to_finish_seconds: int = 30 * 60,
128
+ insights_max_async_sleep_seconds: int = 5 * 60,
129
+ ) -> AbstractCrudObject:
130
+ status: str = None
131
+ time_start = time.time()
132
+ sleep_time = 3
133
+ while status != "Job Completed":
134
+ duration = time.time() - time_start
135
+ job = job.api_get()
136
+ status = job["async_status"]
137
+ percent_complete = job["async_percent_completion"]
138
+
139
+ job_id = job["id"]
140
+ logger.info("%s, %d%% done", status, percent_complete)
141
+
142
+ if status == "Job Completed":
143
+ return job
144
+
145
+ if duration > insights_max_wait_to_start_seconds and percent_complete == 0:
146
+ pretty_error_message = (
147
+ "Insights job {} did not start after {} seconds. " + JOB_TIMEOUT_INFO
148
+ )
149
+ raise InsightsJobTimeout(
150
+ "facebook_insights",
151
+ pretty_error_message.format(job_id, insights_max_wait_to_start_seconds),
152
+ )
153
+ elif (
154
+ duration > insights_max_wait_to_finish_seconds and status != "Job Completed"
155
+ ):
156
+ pretty_error_message = (
157
+ "Insights job {} did not complete after {} seconds. " + JOB_TIMEOUT_INFO
158
+ )
159
+ raise InsightsJobTimeout(
160
+ "facebook_insights",
161
+ pretty_error_message.format(
162
+ job_id, insights_max_wait_to_finish_seconds
163
+ ),
164
+ )
165
+
166
+ logger.info("sleeping for %d seconds until job is done", sleep_time)
167
+ time.sleep(sleep_time)
168
+ if sleep_time < insights_max_async_sleep_seconds:
169
+ sleep_time = 2 * sleep_time
170
+ return job
171
+
172
+
173
+ def _init_facebook_api(
174
+ access_token: str, request_timeout: float, app_api_version: str
175
+ ) -> None:
176
+ """Initialize Facebook API with retry session."""
177
+ notify_on_token_expiration()
178
+
179
+ def retry_on_limit(response: requests.Response, exception: BaseException) -> bool:
180
+ try:
181
+ error = response.json()["error"]
182
+ code = error["code"]
183
+ message = error["message"]
184
+ should_retry = code in (
185
+ 1,
186
+ 2,
187
+ 4,
188
+ 17,
189
+ 341,
190
+ 32,
191
+ 613,
192
+ *range(80000, 80007),
193
+ 800008,
194
+ 800009,
195
+ 80014,
196
+ )
197
+ if should_retry:
198
+ logger.warning(
199
+ "facebook_ads source will retry due to %s with error code %i"
200
+ % (message, code)
201
+ )
202
+ return should_retry
203
+ except Exception:
204
+ return False
205
+
206
+ retry_session = Client(
207
+ request_timeout=request_timeout,
208
+ raise_for_status=False,
209
+ retry_condition=retry_on_limit,
210
+ request_max_attempts=12,
211
+ request_backoff_factor=2,
212
+ ).session
213
+ retry_session.params.update({"access_token": access_token}) # type: ignore
214
+ API = FacebookAdsApi.init(
215
+ access_token=access_token,
216
+ api_version=app_api_version,
217
+ )
218
+ API._session.requests = retry_session
219
+
220
+
221
+ def get_ads_account(
222
+ account_id: str, access_token: str, request_timeout: float, app_api_version: str
223
+ ) -> AdAccount:
224
+ """Get a specific ad account by ID."""
225
+ _init_facebook_api(access_token, request_timeout, app_api_version)
226
+ return AdAccount(f"act_{account_id}")
227
+
228
+
229
+ def get_all_ad_accounts(
230
+ access_token: str, request_timeout: float, app_api_version: str
231
+ ) -> list[AdAccount]:
232
+ """Get all ad accounts for the authenticated user."""
233
+ _init_facebook_api(access_token, request_timeout, app_api_version)
234
+ user = User(fbid="me")
235
+ return list(user.get_ad_accounts())
236
+
237
+
238
+ @with_config(sections=("sources", "facebook_ads"))
239
+ def notify_on_token_expiration(access_token_expires_at: int = None) -> None:
240
+ """Notifies (currently via logger) if access token expires in less than 7 days. Needs `access_token_expires_at` to be configured."""
241
+ if not access_token_expires_at:
242
+ logger.warning(
243
+ "Token expiration time notification disabled. Configure token expiration timestamp in access_token_expires_at config value"
244
+ )
245
+ else:
246
+ expires_at = pendulum.from_timestamp(access_token_expires_at)
247
+ if expires_at < pendulum.now().add(days=7):
248
+ logger.error(
249
+ f"Access Token expires in {humanize.precisedelta(pendulum.now() - expires_at)}. Replace the token now!"
250
+ )
251
+
252
+
253
+ def parse_insights_table_to_source_kwargs(table: str) -> DictStrAny:
254
+ import typing
255
+
256
+ from omniload.src.facebook_ads.settings import (
257
+ INSIGHTS_BREAKDOWNS_OPTIONS,
258
+ TInsightsBreakdownOptions,
259
+ TInsightsLevels,
260
+ )
261
+
262
+ parts = table.split(":")
263
+
264
+ source_kwargs = {}
265
+
266
+ breakdown_type = parts[1]
267
+
268
+ valid_breakdowns = list(typing.get_args(TInsightsBreakdownOptions))
269
+ if breakdown_type in valid_breakdowns:
270
+ dimensions = INSIGHTS_BREAKDOWNS_OPTIONS[breakdown_type]["breakdowns"]
271
+ fields = INSIGHTS_BREAKDOWNS_OPTIONS[breakdown_type]["fields"]
272
+ source_kwargs["dimensions"] = dimensions
273
+ source_kwargs["fields"] = fields
274
+ else:
275
+ dimensions = breakdown_type.split(",")
276
+ valid_levels = list(typing.get_args(TInsightsLevels))
277
+ level = None
278
+ for valid_level in reversed(valid_levels):
279
+ if valid_level in dimensions:
280
+ level = valid_level
281
+ dimensions.remove(valid_level)
282
+ break
283
+
284
+ source_kwargs["level"] = level
285
+ source_kwargs["dimensions"] = dimensions
286
+
287
+ # If custom metrics are provided, parse them
288
+ if len(parts) == 3:
289
+ fields = [f.strip() for f in parts[2].split(",") if f.strip()]
290
+ if not fields:
291
+ raise ValueError(
292
+ "Custom metrics must be provided after the second colon in format: facebook_insights:breakdown_type:metric1,metric2..."
293
+ )
294
+ source_kwargs["fields"] = fields
295
+
296
+ return source_kwargs
@@ -0,0 +1,224 @@
1
+ # Copyright 2022-2025 ScaleVector
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ """Facebook ads source settings and constants"""
16
+
17
+ from typing import Any, Callable, Dict, Iterator, Literal
18
+
19
+ from dlt.common.schema.typing import TTableSchemaColumns
20
+ from facebook_business.adobjects.abstractobject import AbstractObject
21
+
22
+ TFbMethod = Callable[..., Iterator[AbstractObject]]
23
+
24
+
25
+ DEFAULT_FIELDS = (
26
+ "id",
27
+ "updated_time",
28
+ "created_time",
29
+ "name",
30
+ "status",
31
+ "effective_status",
32
+ )
33
+
34
+ DEFAULT_CAMPAIGN_FIELDS = DEFAULT_FIELDS + (
35
+ "objective",
36
+ "start_time",
37
+ "stop_time",
38
+ "daily_budget",
39
+ "lifetime_budget",
40
+ )
41
+
42
+ DEFAULT_AD_FIELDS = DEFAULT_FIELDS + (
43
+ "adset_id",
44
+ "campaign_id",
45
+ "creative",
46
+ "targeting",
47
+ "tracking_specs",
48
+ "conversion_specs",
49
+ )
50
+
51
+ DEFAULT_ADSET_FIELDS = DEFAULT_FIELDS + (
52
+ "campaign_id",
53
+ "start_time",
54
+ "end_time",
55
+ "daily_budget",
56
+ "lifetime_budget",
57
+ "optimization_goal",
58
+ "promoted_object",
59
+ "billing_event",
60
+ "bid_amount",
61
+ "bid_strategy",
62
+ "targeting",
63
+ )
64
+
65
+ DEFAULT_ADCREATIVE_FIELDS = (
66
+ "id",
67
+ "name",
68
+ "status",
69
+ "thumbnail_url",
70
+ "object_story_spec",
71
+ "effective_object_story_id",
72
+ "call_to_action_type",
73
+ "object_type",
74
+ "template_url",
75
+ "url_tags",
76
+ "instagram_actor_id",
77
+ "product_set_id",
78
+ )
79
+
80
+ DEFAULT_LEAD_FIELDS = (
81
+ "id",
82
+ "created_time",
83
+ "ad_id",
84
+ "ad_name",
85
+ "adset_id",
86
+ "adset_name",
87
+ "campaign_id",
88
+ "campaign_name",
89
+ "form_id",
90
+ "field_data",
91
+ )
92
+
93
+ DEFAULT_INSIGHT_FIELDS = (
94
+ "campaign_id",
95
+ "adset_id",
96
+ "ad_id",
97
+ "date_start",
98
+ "date_stop",
99
+ "reach",
100
+ "impressions",
101
+ "frequency",
102
+ "clicks",
103
+ "unique_clicks",
104
+ "ctr",
105
+ "unique_ctr",
106
+ "cpc",
107
+ "cpm",
108
+ "cpp",
109
+ "spend",
110
+ "actions",
111
+ "action_values",
112
+ "cost_per_action_type",
113
+ "website_ctr",
114
+ "account_currency",
115
+ "ad_click_actions",
116
+ "ad_name",
117
+ "adset_name",
118
+ "campaign_name",
119
+ "country",
120
+ "dma",
121
+ "full_view_impressions",
122
+ "full_view_reach",
123
+ "inline_link_click_ctr",
124
+ "outbound_clicks",
125
+ "reach",
126
+ "social_spend",
127
+ "spend",
128
+ "website_ctr",
129
+ "conversions",
130
+ "video_thruplay_watched_actions",
131
+ )
132
+
133
+ TInsightsLevels = Literal["account", "campaign", "adset", "ad"]
134
+
135
+ INSIGHTS_PRIMARY_KEY = ("campaign_id", "adset_id", "ad_id", "date_start")
136
+
137
+ ALL_STATES = {
138
+ "effective_status": [
139
+ "ACTIVE",
140
+ "PAUSED",
141
+ "DELETED",
142
+ "PENDING_REVIEW",
143
+ "DISAPPROVED",
144
+ "PREAPPROVED",
145
+ "PENDING_BILLING_INFO",
146
+ "CAMPAIGN_PAUSED",
147
+ "ARCHIVED",
148
+ "ADSET_PAUSED",
149
+ ]
150
+ }
151
+
152
+ TInsightsBreakdownOptions = Literal[
153
+ "ads_insights",
154
+ "ads_insights_age_and_gender",
155
+ "ads_insights_country",
156
+ "ads_insights_platform_and_device",
157
+ "ads_insights_region",
158
+ "ads_insights_dma",
159
+ "ads_insights_hourly_advertiser",
160
+ ]
161
+
162
+ ALL_ACTION_ATTRIBUTION_WINDOWS = (
163
+ "1d_click",
164
+ "7d_click",
165
+ "28d_click",
166
+ "1d_view",
167
+ "7d_view",
168
+ "28d_view",
169
+ )
170
+
171
+ ALL_ACTION_BREAKDOWNS = ("action_type", "action_target_id", "action_destination")
172
+
173
+ INSIGHTS_BREAKDOWNS_OPTIONS: Dict[TInsightsBreakdownOptions, Any] = {
174
+ "ads_insights": {"breakdowns": (), "fields": ()},
175
+ "ads_insights_age_and_gender": {
176
+ "breakdowns": ("age", "gender"),
177
+ "fields": ("age", "gender"),
178
+ },
179
+ "ads_insights_country": {"breakdowns": ("country",), "fields": ("country",)},
180
+ "ads_insights_platform_and_device": {
181
+ "breakdowns": ("publisher_platform", "platform_position", "impression_device"),
182
+ "fields": ("publisher_platform", "platform_position", "impression_device"),
183
+ },
184
+ "ads_insights_region": {"breakdowns": ("region",), "fields": ("region",)},
185
+ "ads_insights_dma": {"breakdowns": ("dma",), "fields": ("dma",)},
186
+ "ads_insights_hourly_advertiser": {
187
+ "breakdowns": ("hourly_stats_aggregated_by_advertiser_time_zone",),
188
+ "fields": ("hourly_stats_aggregated_by_advertiser_time_zone",),
189
+ },
190
+ }
191
+
192
+ INSIGHT_FIELDS_TYPES: TTableSchemaColumns = {
193
+ "campaign_id": {"data_type": "bigint"},
194
+ "adset_id": {"data_type": "bigint"},
195
+ "ad_id": {"data_type": "bigint"},
196
+ "date_start": {"data_type": "timestamp"},
197
+ "date_stop": {"data_type": "timestamp"},
198
+ "reach": {"data_type": "bigint"},
199
+ "impressions": {"data_type": "bigint"},
200
+ "frequency": {"data_type": "decimal"},
201
+ "clicks": {"data_type": "bigint"},
202
+ "unique_clicks": {"data_type": "bigint"},
203
+ "ctr": {"data_type": "decimal"},
204
+ "unique_ctr": {"data_type": "decimal"},
205
+ "cpc": {"data_type": "decimal"},
206
+ "cpm": {"data_type": "decimal"},
207
+ "cpp": {"data_type": "decimal"},
208
+ "spend": {"data_type": "decimal"},
209
+ }
210
+
211
+ INVALID_INSIGHTS_FIELDS = [
212
+ "impression_device",
213
+ "publisher_platform",
214
+ "platform_position",
215
+ "age",
216
+ "gender",
217
+ "country",
218
+ "placement",
219
+ "region",
220
+ "dma",
221
+ "hourly_stats_aggregated_by_advertiser_time_zone",
222
+ ]
223
+
224
+ FACEBOOK_INSIGHTS_RETENTION_PERIOD = 37 # months
@@ -0,0 +1,53 @@
1
+ # Copyright 2022-2025 ScaleVector
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ from typing import Dict
16
+
17
+ import dlt
18
+ from dlt.common.configuration.inject import with_config
19
+ from dlt.sources.helpers import requests
20
+
21
+
22
+ @with_config(sections=("sources", "facebook_ads"))
23
+ def debug_access_token(
24
+ access_token: str = dlt.secrets.value,
25
+ client_id: str = dlt.secrets.value,
26
+ client_secret: str = dlt.secrets.value,
27
+ ) -> str:
28
+ """Debugs the `access_token` providing info on expiration time, scopes etc. If arguments are not provides, `dlt` will inject them from configuration"""
29
+ debug_url = f"https://graph.facebook.com/debug_token?input_token={access_token}&access_token={client_id}|{client_secret}"
30
+ response = requests.get(debug_url)
31
+ data: Dict[str, str] = response.json()
32
+
33
+ if "error" in data:
34
+ raise Exception(f"Error debugging token: {data['error']}")
35
+
36
+ return data["data"]
37
+
38
+
39
+ @with_config(sections=("sources", "facebook_ads"))
40
+ def get_long_lived_token(
41
+ access_token: str = dlt.secrets.value,
42
+ client_id: str = dlt.secrets.value,
43
+ client_secret: str = dlt.secrets.value,
44
+ ) -> str:
45
+ """Gets the long lived access token (60 days) from `access_token`. If arguments are not provides, `dlt` will inject them from configuration"""
46
+ exchange_url = f"https://graph.facebook.com/v13.0/oauth/access_token?grant_type=fb_exchange_token&client_id={client_id}&client_secret={client_secret}&fb_exchange_token={access_token}"
47
+ response = requests.get(exchange_url)
48
+ data: Dict[str, str] = response.json()
49
+
50
+ if "error" in data:
51
+ raise Exception(f"Error refreshing token: {data['error']}")
52
+
53
+ return data["access_token"]