omniload 0.0.0.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (218) hide show
  1. omniload/conftest.py +72 -0
  2. omniload/main.py +810 -0
  3. omniload/src/.gitignore +10 -0
  4. omniload/src/adjust/__init__.py +108 -0
  5. omniload/src/adjust/adjust_helpers.py +122 -0
  6. omniload/src/airtable/__init__.py +84 -0
  7. omniload/src/allium/__init__.py +128 -0
  8. omniload/src/anthropic/__init__.py +277 -0
  9. omniload/src/anthropic/helpers.py +525 -0
  10. omniload/src/applovin/__init__.py +316 -0
  11. omniload/src/applovin_max/__init__.py +117 -0
  12. omniload/src/appsflyer/__init__.py +325 -0
  13. omniload/src/appsflyer/client.py +110 -0
  14. omniload/src/appstore/__init__.py +142 -0
  15. omniload/src/appstore/client.py +126 -0
  16. omniload/src/appstore/errors.py +15 -0
  17. omniload/src/appstore/models.py +117 -0
  18. omniload/src/appstore/resources.py +179 -0
  19. omniload/src/arrow/__init__.py +81 -0
  20. omniload/src/asana_source/__init__.py +281 -0
  21. omniload/src/asana_source/helpers.py +30 -0
  22. omniload/src/asana_source/settings.py +158 -0
  23. omniload/src/attio/__init__.py +102 -0
  24. omniload/src/attio/helpers.py +65 -0
  25. omniload/src/blob.py +95 -0
  26. omniload/src/bruin/__init__.py +76 -0
  27. omniload/src/chess/__init__.py +180 -0
  28. omniload/src/chess/helpers.py +35 -0
  29. omniload/src/chess/settings.py +18 -0
  30. omniload/src/clickup/__init__.py +85 -0
  31. omniload/src/clickup/helpers.py +47 -0
  32. omniload/src/collector/spinner.py +43 -0
  33. omniload/src/couchbase_source/__init__.py +118 -0
  34. omniload/src/couchbase_source/helpers.py +135 -0
  35. omniload/src/cursor/__init__.py +83 -0
  36. omniload/src/cursor/helpers.py +188 -0
  37. omniload/src/customer_io/__init__.py +486 -0
  38. omniload/src/customer_io/helpers.py +530 -0
  39. omniload/src/destinations.py +982 -0
  40. omniload/src/docebo/__init__.py +589 -0
  41. omniload/src/docebo/client.py +435 -0
  42. omniload/src/docebo/helpers.py +97 -0
  43. omniload/src/dune/__init__.py +104 -0
  44. omniload/src/dune/helpers.py +108 -0
  45. omniload/src/dynamodb/__init__.py +86 -0
  46. omniload/src/elasticsearch/__init__.py +80 -0
  47. omniload/src/elasticsearch/helpers.py +141 -0
  48. omniload/src/errors.py +26 -0
  49. omniload/src/facebook_ads/__init__.py +403 -0
  50. omniload/src/facebook_ads/exceptions.py +19 -0
  51. omniload/src/facebook_ads/helpers.py +296 -0
  52. omniload/src/facebook_ads/settings.py +224 -0
  53. omniload/src/facebook_ads/utils.py +53 -0
  54. omniload/src/factory.py +305 -0
  55. omniload/src/filesystem/__init__.py +133 -0
  56. omniload/src/filesystem/helpers.py +114 -0
  57. omniload/src/filesystem/readers.py +187 -0
  58. omniload/src/filters.py +62 -0
  59. omniload/src/fireflies/__init__.py +151 -0
  60. omniload/src/fireflies/helpers.py +753 -0
  61. omniload/src/fluxx/__init__.py +10013 -0
  62. omniload/src/fluxx/helpers.py +233 -0
  63. omniload/src/frankfurter/__init__.py +157 -0
  64. omniload/src/frankfurter/helpers.py +48 -0
  65. omniload/src/freshdesk/__init__.py +103 -0
  66. omniload/src/freshdesk/freshdesk_client.py +151 -0
  67. omniload/src/freshdesk/settings.py +23 -0
  68. omniload/src/fundraiseup/__init__.py +95 -0
  69. omniload/src/fundraiseup/client.py +81 -0
  70. omniload/src/github/__init__.py +202 -0
  71. omniload/src/github/helpers.py +207 -0
  72. omniload/src/github/queries.py +129 -0
  73. omniload/src/github/settings.py +24 -0
  74. omniload/src/google_ads/__init__.py +198 -0
  75. omniload/src/google_ads/field.py +17 -0
  76. omniload/src/google_ads/metrics.py +254 -0
  77. omniload/src/google_ads/predicates.py +37 -0
  78. omniload/src/google_ads/reports.py +411 -0
  79. omniload/src/google_ads/test_google_ads.py +184 -0
  80. omniload/src/google_analytics/__init__.py +144 -0
  81. omniload/src/google_analytics/helpers.py +312 -0
  82. omniload/src/google_sheets/README.md +95 -0
  83. omniload/src/google_sheets/__init__.py +166 -0
  84. omniload/src/google_sheets/helpers/__init__.py +15 -0
  85. omniload/src/google_sheets/helpers/api_calls.py +160 -0
  86. omniload/src/google_sheets/helpers/data_processing.py +316 -0
  87. omniload/src/gorgias/__init__.py +595 -0
  88. omniload/src/gorgias/helpers.py +166 -0
  89. omniload/src/hostaway/__init__.py +302 -0
  90. omniload/src/hostaway/client.py +288 -0
  91. omniload/src/http/__init__.py +38 -0
  92. omniload/src/http/readers.py +146 -0
  93. omniload/src/http_client.py +24 -0
  94. omniload/src/hubspot/__init__.py +800 -0
  95. omniload/src/hubspot/helpers.py +417 -0
  96. omniload/src/hubspot/settings.py +329 -0
  97. omniload/src/indeed/__init__.py +153 -0
  98. omniload/src/indeed/helpers.py +228 -0
  99. omniload/src/influxdb/__init__.py +46 -0
  100. omniload/src/influxdb/client.py +34 -0
  101. omniload/src/intercom/__init__.py +142 -0
  102. omniload/src/intercom/helpers.py +674 -0
  103. omniload/src/intercom/settings.py +279 -0
  104. omniload/src/isoc_pulse/__init__.py +159 -0
  105. omniload/src/jira_source/__init__.py +377 -0
  106. omniload/src/jira_source/helpers.py +510 -0
  107. omniload/src/jira_source/settings.py +184 -0
  108. omniload/src/kafka/__init__.py +120 -0
  109. omniload/src/kafka/helpers.py +241 -0
  110. omniload/src/kinesis/__init__.py +153 -0
  111. omniload/src/kinesis/helpers.py +96 -0
  112. omniload/src/klaviyo/__init__.py +237 -0
  113. omniload/src/klaviyo/client.py +212 -0
  114. omniload/src/klaviyo/helpers.py +19 -0
  115. omniload/src/linear/__init__.py +634 -0
  116. omniload/src/linear/helpers.py +111 -0
  117. omniload/src/linkedin_ads/__init__.py +266 -0
  118. omniload/src/linkedin_ads/dimension_time_enum.py +17 -0
  119. omniload/src/linkedin_ads/helpers.py +246 -0
  120. omniload/src/loader.py +69 -0
  121. omniload/src/mailchimp/__init__.py +126 -0
  122. omniload/src/mailchimp/helpers.py +226 -0
  123. omniload/src/mailchimp/settings.py +164 -0
  124. omniload/src/masking.py +344 -0
  125. omniload/src/mixpanel/__init__.py +62 -0
  126. omniload/src/mixpanel/client.py +104 -0
  127. omniload/src/monday/__init__.py +246 -0
  128. omniload/src/monday/helpers.py +392 -0
  129. omniload/src/monday/settings.py +325 -0
  130. omniload/src/mongodb/__init__.py +281 -0
  131. omniload/src/mongodb/helpers.py +975 -0
  132. omniload/src/notion/__init__.py +69 -0
  133. omniload/src/notion/helpers/__init__.py +14 -0
  134. omniload/src/notion/helpers/client.py +178 -0
  135. omniload/src/notion/helpers/database.py +92 -0
  136. omniload/src/notion/settings.py +17 -0
  137. omniload/src/partition.py +32 -0
  138. omniload/src/personio/__init__.py +345 -0
  139. omniload/src/personio/helpers.py +100 -0
  140. omniload/src/phantombuster/__init__.py +65 -0
  141. omniload/src/phantombuster/client.py +87 -0
  142. omniload/src/pinterest/__init__.py +82 -0
  143. omniload/src/pipedrive/__init__.py +212 -0
  144. omniload/src/pipedrive/helpers/__init__.py +37 -0
  145. omniload/src/pipedrive/helpers/custom_fields_munger.py +116 -0
  146. omniload/src/pipedrive/helpers/pages.py +129 -0
  147. omniload/src/pipedrive/settings.py +41 -0
  148. omniload/src/pipedrive/typing.py +17 -0
  149. omniload/src/plusvibeai/__init__.py +335 -0
  150. omniload/src/plusvibeai/helpers.py +544 -0
  151. omniload/src/plusvibeai/settings.py +252 -0
  152. omniload/src/primer/__init__.py +45 -0
  153. omniload/src/primer/helpers.py +79 -0
  154. omniload/src/quickbooks/__init__.py +117 -0
  155. omniload/src/reddit_ads/__init__.py +183 -0
  156. omniload/src/reddit_ads/helpers.py +232 -0
  157. omniload/src/resource.py +40 -0
  158. omniload/src/revenuecat/__init__.py +83 -0
  159. omniload/src/revenuecat/helpers.py +237 -0
  160. omniload/src/salesforce/__init__.py +170 -0
  161. omniload/src/salesforce/helpers.py +78 -0
  162. omniload/src/shopify/__init__.py +1953 -0
  163. omniload/src/shopify/exceptions.py +17 -0
  164. omniload/src/shopify/helpers.py +202 -0
  165. omniload/src/shopify/settings.py +19 -0
  166. omniload/src/slack/__init__.py +290 -0
  167. omniload/src/slack/helpers.py +218 -0
  168. omniload/src/slack/settings.py +36 -0
  169. omniload/src/smartsheets/__init__.py +82 -0
  170. omniload/src/snapchat_ads/__init__.py +455 -0
  171. omniload/src/snapchat_ads/client.py +72 -0
  172. omniload/src/snapchat_ads/helpers.py +630 -0
  173. omniload/src/snapchat_ads/settings.py +130 -0
  174. omniload/src/socrata_source/__init__.py +83 -0
  175. omniload/src/socrata_source/helpers.py +85 -0
  176. omniload/src/socrata_source/settings.py +8 -0
  177. omniload/src/solidgate/__init__.py +219 -0
  178. omniload/src/solidgate/helpers.py +154 -0
  179. omniload/src/sources.py +5408 -0
  180. omniload/src/sql_database/__init__.py +0 -0
  181. omniload/src/sql_database/callbacks.py +66 -0
  182. omniload/src/stripe_analytics/__init__.py +183 -0
  183. omniload/src/stripe_analytics/helpers.py +386 -0
  184. omniload/src/stripe_analytics/settings.py +80 -0
  185. omniload/src/table_definition.py +15 -0
  186. omniload/src/testdata/fakebqcredentials.json +14 -0
  187. omniload/src/tiktok_ads/__init__.py +150 -0
  188. omniload/src/tiktok_ads/tiktok_helpers.py +130 -0
  189. omniload/src/time.py +11 -0
  190. omniload/src/trustpilot/__init__.py +48 -0
  191. omniload/src/trustpilot/client.py +48 -0
  192. omniload/src/version.py +6 -0
  193. omniload/src/wise/__init__.py +68 -0
  194. omniload/src/wise/client.py +63 -0
  195. omniload/src/zendesk/__init__.py +480 -0
  196. omniload/src/zendesk/helpers/__init__.py +39 -0
  197. omniload/src/zendesk/helpers/api_helpers.py +119 -0
  198. omniload/src/zendesk/helpers/credentials.py +68 -0
  199. omniload/src/zendesk/helpers/talk_api.py +132 -0
  200. omniload/src/zendesk/settings.py +71 -0
  201. omniload/src/zoom/__init__.py +99 -0
  202. omniload/src/zoom/helpers.py +102 -0
  203. omniload/testdata/.gitignore +2 -0
  204. omniload/testdata/create_replace.csv +21 -0
  205. omniload/testdata/delete_insert_expected.csv +6 -0
  206. omniload/testdata/delete_insert_part1.csv +5 -0
  207. omniload/testdata/delete_insert_part2.csv +6 -0
  208. omniload/testdata/merge_expected.csv +5 -0
  209. omniload/testdata/merge_part1.csv +4 -0
  210. omniload/testdata/merge_part2.csv +5 -0
  211. omniload/tests/unit/test_smartsheets.py +133 -0
  212. omniload-0.0.0.dev0.dist-info/METADATA +439 -0
  213. omniload-0.0.0.dev0.dist-info/RECORD +218 -0
  214. omniload-0.0.0.dev0.dist-info/WHEEL +4 -0
  215. omniload-0.0.0.dev0.dist-info/entry_points.txt +2 -0
  216. omniload-0.0.0.dev0.dist-info/licenses/LICENSE.Apache-2.0 +201 -0
  217. omniload-0.0.0.dev0.dist-info/licenses/LICENSE.md +21 -0
  218. omniload-0.0.0.dev0.dist-info/licenses/NOTICE +35 -0
@@ -0,0 +1,417 @@
1
+ # Copyright 2022-2025 ScaleVector
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ """Hubspot source helpers"""
16
+
17
+ import urllib.parse
18
+ from typing import Any, Dict, Iterator, List, Optional
19
+
20
+ from dlt.sources.helpers import requests
21
+
22
+ from .settings import (
23
+ DEFAULT_LAST_MODIFIED_PROPERTY,
24
+ LAST_MODIFIED_PROPERTY,
25
+ OBJECT_TYPE_PLURAL,
26
+ )
27
+
28
+ BASE_URL = "https://api.hubapi.com/"
29
+
30
+
31
+ def get_url(endpoint: str) -> str:
32
+ """Get absolute hubspot endpoint URL"""
33
+ return urllib.parse.urljoin(BASE_URL, endpoint)
34
+
35
+
36
+ def _get_headers(api_key: str) -> Dict[str, str]:
37
+ """
38
+ Return a dictionary of HTTP headers to use for API requests, including the specified API key.
39
+
40
+ Args:
41
+ api_key (str): The API key to use for authentication, as a string.
42
+
43
+ Returns:
44
+ dict: A dictionary of HTTP headers to include in API requests, with the `Authorization` header
45
+ set to the specified API key in the format `Bearer {api_key}`.
46
+
47
+ """
48
+ # Construct the dictionary of HTTP headers to use for API requests
49
+ return dict(authorization=f"Bearer {api_key}")
50
+
51
+
52
+ def extract_property_history(objects: List[Dict[str, Any]]) -> Iterator[Dict[str, Any]]:
53
+ for item in objects:
54
+ history = item.get("propertiesWithHistory")
55
+ if not history:
56
+ continue
57
+ # Yield a flat list of property history entries
58
+ for key, changes in history.items():
59
+ if not changes:
60
+ continue
61
+ for entry in changes:
62
+ yield {"object_id": item["id"], "property_name": key, **entry}
63
+
64
+
65
+ def fetch_property_history(
66
+ endpoint: str,
67
+ api_key: str,
68
+ props: str,
69
+ params: Optional[Dict[str, Any]] = None,
70
+ ) -> Iterator[List[Dict[str, Any]]]:
71
+ """Fetch property history from the given CRM endpoint.
72
+
73
+ Args:
74
+ endpoint: The endpoint to fetch data from, as a string.
75
+ api_key: The API key to use for authentication, as a string.
76
+ props: A comma separated list of properties to retrieve the history for
77
+ params: Optional dict of query params to include in the request
78
+
79
+ Yields:
80
+ List of property history entries (dicts)
81
+ """
82
+ # Construct the URL and headers for the API request
83
+ url = get_url(endpoint)
84
+ headers = _get_headers(api_key)
85
+
86
+ params = dict(params or {})
87
+ params["propertiesWithHistory"] = props
88
+ params["limit"] = 50
89
+ # Make the API request
90
+ r = requests.get(url, headers=headers, params=params)
91
+ # Parse the API response and yield the properties of each result
92
+
93
+ # Parse the response JSON data
94
+ _data = r.json()
95
+ while _data is not None:
96
+ if "results" in _data:
97
+ yield list(extract_property_history(_data["results"]))
98
+
99
+ # Follow pagination links if they exist
100
+ _next = _data.get("paging", {}).get("next", None)
101
+ if _next:
102
+ next_url = _next["link"]
103
+ # Get the next page response
104
+ r = requests.get(next_url, headers=headers)
105
+ _data = r.json()
106
+ else:
107
+ _data = None
108
+
109
+
110
+ def fetch_data(
111
+ endpoint: str,
112
+ api_key: str,
113
+ params: Optional[Dict[str, Any]] = None,
114
+ resource_name: str = None,
115
+ ) -> Iterator[List[Dict[str, Any]]]:
116
+ """
117
+ Fetch data from HUBSPOT endpoint using a specified API key and yield the properties of each result.
118
+ For paginated endpoint this function yields item from all pages.
119
+
120
+ Args:
121
+ endpoint (str): The endpoint to fetch data from, as a string.
122
+ api_key (str): The API key to use for authentication, as a string.
123
+ params: Optional dict of query params to include in the request
124
+
125
+ Yields:
126
+ A List of CRM object dicts
127
+
128
+ Raises:
129
+ requests.exceptions.HTTPError: If the API returns an HTTP error status code.
130
+
131
+ Notes:
132
+ This function uses the `requests` library to make a GET request to the specified endpoint, with
133
+ the API key included in the headers. If the API returns a non-successful HTTP status code (e.g.
134
+ 404 Not Found), a `requests.exceptions.HTTPError` exception will be raised.
135
+
136
+ The `endpoint` argument should be a relative URL, which will be appended to the base URL for the
137
+ API. The `params` argument is used to pass additional query parameters to the request
138
+
139
+ This function also includes a retry decorator that will automatically retry the API call up to
140
+ 3 times with a 5-second delay between retries, using an exponential backoff strategy.
141
+ """
142
+ # Construct the URL and headers for the API request
143
+ url = get_url(endpoint)
144
+ headers = _get_headers(api_key)
145
+
146
+ # Make the API request
147
+ r = requests.get(url, headers=headers, params=params)
148
+ # Parse the API response and yield the properties of each result
149
+ # Parse the response JSON data
150
+ _data = r.json()
151
+
152
+ # Yield the properties of each result in the API response
153
+ while _data is not None:
154
+ if "results" in _data:
155
+ _objects: List[Dict[str, Any]] = []
156
+ for _result in _data["results"]:
157
+ if resource_name == "schemas":
158
+ _objects.append(
159
+ {
160
+ "name": _result["labels"].get("singular", ""),
161
+ "objectTypeId": _result.get("objectTypeId", ""),
162
+ "id": _result.get("id", ""),
163
+ "fullyQualifiedName": _result.get("fullyQualifiedName", ""),
164
+ "properties": _result.get("properties", ""),
165
+ "createdAt": _result.get("createdAt", ""),
166
+ "updatedAt": _result.get("updatedAt", ""),
167
+ }
168
+ )
169
+ elif resource_name == "owners":
170
+ _objects.append(
171
+ {
172
+ "id": _result.get("id", ""),
173
+ "email": _result.get("email", ""),
174
+ "type": _result.get("type", ""),
175
+ "firstName": _result.get("firstName", ""),
176
+ "lastName": _result.get("lastName", ""),
177
+ "createdAt": _result.get("createdAt", ""),
178
+ "updatedAt": _result.get("updatedAt", ""),
179
+ "userId": _result.get("userId", ""),
180
+ "teams": _result.get("teams", []),
181
+ }
182
+ )
183
+ else:
184
+ _obj = _result.get("properties", _result)
185
+ if "id" not in _obj and "id" in _result:
186
+ # Move id from properties to top level
187
+ _obj["id"] = _result["id"]
188
+
189
+ if "associations" in _result:
190
+ for association in _result["associations"]:
191
+ __values = [
192
+ {
193
+ "value": _obj["hs_object_id"],
194
+ f"{association}_id": __r["id"],
195
+ }
196
+ for __r in _result["associations"][association][
197
+ "results"
198
+ ]
199
+ ]
200
+
201
+ # remove duplicates from list of dicts
202
+ __values = [
203
+ dict(t) for t in {tuple(d.items()) for d in __values}
204
+ ]
205
+
206
+ _obj[association] = __values
207
+
208
+ _objects.append(_obj)
209
+ yield _objects
210
+
211
+ # Follow pagination links if they exist
212
+ _next = _data.get("paging", {}).get("next", None)
213
+ if _next:
214
+ next_url = _next["link"]
215
+ # Get the next page response
216
+ r = requests.get(next_url, headers=headers)
217
+ _data = r.json()
218
+ else:
219
+ _data = None
220
+
221
+
222
+ def _get_property_names(api_key: str, object_type: str) -> List[str]:
223
+ """
224
+ Retrieve property names for a given entity from the HubSpot API.
225
+
226
+ Args:
227
+ entity: The entity name for which to retrieve property names.
228
+
229
+ Returns:
230
+ A list of property names.
231
+
232
+ Raises:
233
+ Exception: If an error occurs during the API request.
234
+ """
235
+ properties = []
236
+ endpoint = f"/crm/v3/properties/{OBJECT_TYPE_PLURAL[object_type]}"
237
+
238
+ for page in fetch_data(endpoint, api_key):
239
+ properties.extend([prop["name"] for prop in page])
240
+
241
+ return properties
242
+
243
+
244
+ def _fetch_associations_batch(
245
+ from_object_type: str,
246
+ to_object_type: str,
247
+ object_ids: List[str],
248
+ api_key: str,
249
+ ) -> Dict[str, List[str]]:
250
+ """Fetch associations for a batch of objects via the HubSpot v4 batch associations API.
251
+
252
+ Returns a dict mapping from_id -> list of to_ids.
253
+ Returns an empty dict if the association type is unsupported.
254
+ """
255
+ if not object_ids:
256
+ return {}
257
+
258
+ url = get_url(
259
+ f"/crm/v4/associations/{from_object_type}/{to_object_type}/batch/read"
260
+ )
261
+ headers = _get_headers(api_key)
262
+ r = requests.post(
263
+ url, headers=headers, json={"inputs": [{"id": oid} for oid in object_ids]}
264
+ )
265
+
266
+ if r.status_code in (400, 404):
267
+ return {}
268
+ r.raise_for_status()
269
+
270
+ result: Dict[str, List[str]] = {}
271
+ for item in r.json().get("results", []):
272
+ from_id = str(item.get("from", {}).get("id", ""))
273
+ to_ids = [
274
+ str(a["toObjectId"]) for a in item.get("to", []) if a.get("toObjectId")
275
+ ]
276
+ if from_id and to_ids:
277
+ result[from_id] = to_ids
278
+ return result
279
+
280
+
281
+ def fetch_data_search(
282
+ object_type: str,
283
+ api_key: str,
284
+ properties: str,
285
+ start_date_ms: str,
286
+ end_date_ms: Optional[str] = None,
287
+ association_types: Optional[List[str]] = None,
288
+ ) -> Iterator[List[Dict[str, Any]]]:
289
+ import logging
290
+
291
+ logger = logging.getLogger("hubspot.search")
292
+
293
+ url = get_url(f"/crm/v3/objects/{OBJECT_TYPE_PLURAL[object_type]}/search")
294
+ headers = _get_headers(api_key)
295
+ from_type = OBJECT_TYPE_PLURAL[object_type]
296
+ modified_prop = LAST_MODIFIED_PROPERTY.get(
297
+ object_type, DEFAULT_LAST_MODIFIED_PROPERTY
298
+ )
299
+
300
+ props_list = [p for p in properties.split(",") if p]
301
+ last_id: Optional[str] = None
302
+
303
+ while True:
304
+ filters = [
305
+ {
306
+ "propertyName": modified_prop,
307
+ "operator": "GTE",
308
+ "value": start_date_ms,
309
+ }
310
+ ]
311
+ if end_date_ms is not None:
312
+ filters.append(
313
+ {
314
+ "propertyName": modified_prop,
315
+ "operator": "LTE",
316
+ "value": end_date_ms,
317
+ }
318
+ )
319
+ if last_id is not None:
320
+ filters.append(
321
+ {
322
+ "propertyName": "hs_object_id",
323
+ "operator": "GT",
324
+ "value": last_id,
325
+ }
326
+ )
327
+
328
+ logger.info(
329
+ f"[hubspot] search {object_type}: "
330
+ f"GTE={start_date_ms} LTE={end_date_ms} after_id={last_id}"
331
+ )
332
+
333
+ body: Dict[str, Any] = {
334
+ "filterGroups": [{"filters": filters}],
335
+ "properties": props_list,
336
+ "sorts": [{"propertyName": "hs_object_id", "direction": "ASCENDING"}],
337
+ "limit": 100,
338
+ }
339
+
340
+ total_yielded = 0
341
+
342
+ while True:
343
+ r = requests.post(url, headers=headers, json=body)
344
+ r.raise_for_status()
345
+ _data = r.json()
346
+
347
+ if _data.get("status") == "error":
348
+ raise ValueError(
349
+ f"HubSpot search error: {_data.get('message')} (correlationId: {_data.get('correlationId')})"
350
+ )
351
+
352
+ if "results" in _data:
353
+ _objects: List[Dict[str, Any]] = []
354
+ for _result in _data["results"]:
355
+ _obj = _result.get("properties", _result)
356
+ if "id" not in _obj and "id" in _result:
357
+ _obj["id"] = _result["id"]
358
+ _objects.append(_obj)
359
+
360
+ obj_id = str(_obj.get("hs_object_id") or _obj.get("id") or "")
361
+ if last_id is None or int(obj_id) > int(last_id):
362
+ last_id = obj_id
363
+
364
+ if association_types and _objects:
365
+ obj_ids = [
366
+ str(obj.get("hs_object_id") or obj.get("id") or "")
367
+ for obj in _objects
368
+ ]
369
+ for assoc_type in association_types:
370
+ if not assoc_type:
371
+ continue
372
+ assoc_map = _fetch_associations_batch(
373
+ from_type, assoc_type, obj_ids, api_key
374
+ )
375
+ for obj in _objects:
376
+ obj_id = str(obj.get("hs_object_id") or obj.get("id") or "")
377
+ values = [
378
+ {"value": obj_id, f"{assoc_type}_id": aid}
379
+ for aid in assoc_map.get(obj_id, [])
380
+ ]
381
+ obj[assoc_type] = [
382
+ dict(t) for t in {tuple(d.items()) for d in values}
383
+ ]
384
+
385
+ total_yielded += len(_objects)
386
+ yield _objects
387
+
388
+ # Break BEFORE trying to fetch beyond the 10k limit — HubSpot's
389
+ # search API hangs when paging past 10,000 results.
390
+ if total_yielded >= 10000:
391
+ break
392
+
393
+ _next = _data.get("paging", {}).get("next", None)
394
+ if _next:
395
+ body["after"] = _next["after"]
396
+ else:
397
+ break
398
+
399
+ logger.info(
400
+ f"[hubspot] search {object_type}: window done, "
401
+ f"yielded={total_yielded} last_id={last_id}"
402
+ )
403
+
404
+ # HubSpot search API has a 10,000 result hard limit. If we hit it,
405
+ # restart with the same date filters plus hs_object_id > last_id
406
+ # to continue from where we left off.
407
+ if total_yielded < 10000:
408
+ break
409
+
410
+
411
+ def fetch_data_raw(
412
+ endpoint: str, api_key: str, params: Optional[Dict[str, Any]] = None
413
+ ) -> Iterator[List[Dict[str, Any]]]:
414
+ url = get_url(endpoint)
415
+ headers = _get_headers(api_key)
416
+ r = requests.get(url, headers=headers, params=params)
417
+ return r.json()