omniload 0.0.0.dev0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- omniload/conftest.py +72 -0
- omniload/main.py +810 -0
- omniload/src/.gitignore +10 -0
- omniload/src/adjust/__init__.py +108 -0
- omniload/src/adjust/adjust_helpers.py +122 -0
- omniload/src/airtable/__init__.py +84 -0
- omniload/src/allium/__init__.py +128 -0
- omniload/src/anthropic/__init__.py +277 -0
- omniload/src/anthropic/helpers.py +525 -0
- omniload/src/applovin/__init__.py +316 -0
- omniload/src/applovin_max/__init__.py +117 -0
- omniload/src/appsflyer/__init__.py +325 -0
- omniload/src/appsflyer/client.py +110 -0
- omniload/src/appstore/__init__.py +142 -0
- omniload/src/appstore/client.py +126 -0
- omniload/src/appstore/errors.py +15 -0
- omniload/src/appstore/models.py +117 -0
- omniload/src/appstore/resources.py +179 -0
- omniload/src/arrow/__init__.py +81 -0
- omniload/src/asana_source/__init__.py +281 -0
- omniload/src/asana_source/helpers.py +30 -0
- omniload/src/asana_source/settings.py +158 -0
- omniload/src/attio/__init__.py +102 -0
- omniload/src/attio/helpers.py +65 -0
- omniload/src/blob.py +95 -0
- omniload/src/bruin/__init__.py +76 -0
- omniload/src/chess/__init__.py +180 -0
- omniload/src/chess/helpers.py +35 -0
- omniload/src/chess/settings.py +18 -0
- omniload/src/clickup/__init__.py +85 -0
- omniload/src/clickup/helpers.py +47 -0
- omniload/src/collector/spinner.py +43 -0
- omniload/src/couchbase_source/__init__.py +118 -0
- omniload/src/couchbase_source/helpers.py +135 -0
- omniload/src/cursor/__init__.py +83 -0
- omniload/src/cursor/helpers.py +188 -0
- omniload/src/customer_io/__init__.py +486 -0
- omniload/src/customer_io/helpers.py +530 -0
- omniload/src/destinations.py +982 -0
- omniload/src/docebo/__init__.py +589 -0
- omniload/src/docebo/client.py +435 -0
- omniload/src/docebo/helpers.py +97 -0
- omniload/src/dune/__init__.py +104 -0
- omniload/src/dune/helpers.py +108 -0
- omniload/src/dynamodb/__init__.py +86 -0
- omniload/src/elasticsearch/__init__.py +80 -0
- omniload/src/elasticsearch/helpers.py +141 -0
- omniload/src/errors.py +26 -0
- omniload/src/facebook_ads/__init__.py +403 -0
- omniload/src/facebook_ads/exceptions.py +19 -0
- omniload/src/facebook_ads/helpers.py +296 -0
- omniload/src/facebook_ads/settings.py +224 -0
- omniload/src/facebook_ads/utils.py +53 -0
- omniload/src/factory.py +305 -0
- omniload/src/filesystem/__init__.py +133 -0
- omniload/src/filesystem/helpers.py +114 -0
- omniload/src/filesystem/readers.py +187 -0
- omniload/src/filters.py +62 -0
- omniload/src/fireflies/__init__.py +151 -0
- omniload/src/fireflies/helpers.py +753 -0
- omniload/src/fluxx/__init__.py +10013 -0
- omniload/src/fluxx/helpers.py +233 -0
- omniload/src/frankfurter/__init__.py +157 -0
- omniload/src/frankfurter/helpers.py +48 -0
- omniload/src/freshdesk/__init__.py +103 -0
- omniload/src/freshdesk/freshdesk_client.py +151 -0
- omniload/src/freshdesk/settings.py +23 -0
- omniload/src/fundraiseup/__init__.py +95 -0
- omniload/src/fundraiseup/client.py +81 -0
- omniload/src/github/__init__.py +202 -0
- omniload/src/github/helpers.py +207 -0
- omniload/src/github/queries.py +129 -0
- omniload/src/github/settings.py +24 -0
- omniload/src/google_ads/__init__.py +198 -0
- omniload/src/google_ads/field.py +17 -0
- omniload/src/google_ads/metrics.py +254 -0
- omniload/src/google_ads/predicates.py +37 -0
- omniload/src/google_ads/reports.py +411 -0
- omniload/src/google_ads/test_google_ads.py +184 -0
- omniload/src/google_analytics/__init__.py +144 -0
- omniload/src/google_analytics/helpers.py +312 -0
- omniload/src/google_sheets/README.md +95 -0
- omniload/src/google_sheets/__init__.py +166 -0
- omniload/src/google_sheets/helpers/__init__.py +15 -0
- omniload/src/google_sheets/helpers/api_calls.py +160 -0
- omniload/src/google_sheets/helpers/data_processing.py +316 -0
- omniload/src/gorgias/__init__.py +595 -0
- omniload/src/gorgias/helpers.py +166 -0
- omniload/src/hostaway/__init__.py +302 -0
- omniload/src/hostaway/client.py +288 -0
- omniload/src/http/__init__.py +38 -0
- omniload/src/http/readers.py +146 -0
- omniload/src/http_client.py +24 -0
- omniload/src/hubspot/__init__.py +800 -0
- omniload/src/hubspot/helpers.py +417 -0
- omniload/src/hubspot/settings.py +329 -0
- omniload/src/indeed/__init__.py +153 -0
- omniload/src/indeed/helpers.py +228 -0
- omniload/src/influxdb/__init__.py +46 -0
- omniload/src/influxdb/client.py +34 -0
- omniload/src/intercom/__init__.py +142 -0
- omniload/src/intercom/helpers.py +674 -0
- omniload/src/intercom/settings.py +279 -0
- omniload/src/isoc_pulse/__init__.py +159 -0
- omniload/src/jira_source/__init__.py +377 -0
- omniload/src/jira_source/helpers.py +510 -0
- omniload/src/jira_source/settings.py +184 -0
- omniload/src/kafka/__init__.py +120 -0
- omniload/src/kafka/helpers.py +241 -0
- omniload/src/kinesis/__init__.py +153 -0
- omniload/src/kinesis/helpers.py +96 -0
- omniload/src/klaviyo/__init__.py +237 -0
- omniload/src/klaviyo/client.py +212 -0
- omniload/src/klaviyo/helpers.py +19 -0
- omniload/src/linear/__init__.py +634 -0
- omniload/src/linear/helpers.py +111 -0
- omniload/src/linkedin_ads/__init__.py +266 -0
- omniload/src/linkedin_ads/dimension_time_enum.py +17 -0
- omniload/src/linkedin_ads/helpers.py +246 -0
- omniload/src/loader.py +69 -0
- omniload/src/mailchimp/__init__.py +126 -0
- omniload/src/mailchimp/helpers.py +226 -0
- omniload/src/mailchimp/settings.py +164 -0
- omniload/src/masking.py +344 -0
- omniload/src/mixpanel/__init__.py +62 -0
- omniload/src/mixpanel/client.py +104 -0
- omniload/src/monday/__init__.py +246 -0
- omniload/src/monday/helpers.py +392 -0
- omniload/src/monday/settings.py +325 -0
- omniload/src/mongodb/__init__.py +281 -0
- omniload/src/mongodb/helpers.py +975 -0
- omniload/src/notion/__init__.py +69 -0
- omniload/src/notion/helpers/__init__.py +14 -0
- omniload/src/notion/helpers/client.py +178 -0
- omniload/src/notion/helpers/database.py +92 -0
- omniload/src/notion/settings.py +17 -0
- omniload/src/partition.py +32 -0
- omniload/src/personio/__init__.py +345 -0
- omniload/src/personio/helpers.py +100 -0
- omniload/src/phantombuster/__init__.py +65 -0
- omniload/src/phantombuster/client.py +87 -0
- omniload/src/pinterest/__init__.py +82 -0
- omniload/src/pipedrive/__init__.py +212 -0
- omniload/src/pipedrive/helpers/__init__.py +37 -0
- omniload/src/pipedrive/helpers/custom_fields_munger.py +116 -0
- omniload/src/pipedrive/helpers/pages.py +129 -0
- omniload/src/pipedrive/settings.py +41 -0
- omniload/src/pipedrive/typing.py +17 -0
- omniload/src/plusvibeai/__init__.py +335 -0
- omniload/src/plusvibeai/helpers.py +544 -0
- omniload/src/plusvibeai/settings.py +252 -0
- omniload/src/primer/__init__.py +45 -0
- omniload/src/primer/helpers.py +79 -0
- omniload/src/quickbooks/__init__.py +117 -0
- omniload/src/reddit_ads/__init__.py +183 -0
- omniload/src/reddit_ads/helpers.py +232 -0
- omniload/src/resource.py +40 -0
- omniload/src/revenuecat/__init__.py +83 -0
- omniload/src/revenuecat/helpers.py +237 -0
- omniload/src/salesforce/__init__.py +170 -0
- omniload/src/salesforce/helpers.py +78 -0
- omniload/src/shopify/__init__.py +1953 -0
- omniload/src/shopify/exceptions.py +17 -0
- omniload/src/shopify/helpers.py +202 -0
- omniload/src/shopify/settings.py +19 -0
- omniload/src/slack/__init__.py +290 -0
- omniload/src/slack/helpers.py +218 -0
- omniload/src/slack/settings.py +36 -0
- omniload/src/smartsheets/__init__.py +82 -0
- omniload/src/snapchat_ads/__init__.py +455 -0
- omniload/src/snapchat_ads/client.py +72 -0
- omniload/src/snapchat_ads/helpers.py +630 -0
- omniload/src/snapchat_ads/settings.py +130 -0
- omniload/src/socrata_source/__init__.py +83 -0
- omniload/src/socrata_source/helpers.py +85 -0
- omniload/src/socrata_source/settings.py +8 -0
- omniload/src/solidgate/__init__.py +219 -0
- omniload/src/solidgate/helpers.py +154 -0
- omniload/src/sources.py +5408 -0
- omniload/src/sql_database/__init__.py +0 -0
- omniload/src/sql_database/callbacks.py +66 -0
- omniload/src/stripe_analytics/__init__.py +183 -0
- omniload/src/stripe_analytics/helpers.py +386 -0
- omniload/src/stripe_analytics/settings.py +80 -0
- omniload/src/table_definition.py +15 -0
- omniload/src/testdata/fakebqcredentials.json +14 -0
- omniload/src/tiktok_ads/__init__.py +150 -0
- omniload/src/tiktok_ads/tiktok_helpers.py +130 -0
- omniload/src/time.py +11 -0
- omniload/src/trustpilot/__init__.py +48 -0
- omniload/src/trustpilot/client.py +48 -0
- omniload/src/version.py +6 -0
- omniload/src/wise/__init__.py +68 -0
- omniload/src/wise/client.py +63 -0
- omniload/src/zendesk/__init__.py +480 -0
- omniload/src/zendesk/helpers/__init__.py +39 -0
- omniload/src/zendesk/helpers/api_helpers.py +119 -0
- omniload/src/zendesk/helpers/credentials.py +68 -0
- omniload/src/zendesk/helpers/talk_api.py +132 -0
- omniload/src/zendesk/settings.py +71 -0
- omniload/src/zoom/__init__.py +99 -0
- omniload/src/zoom/helpers.py +102 -0
- omniload/testdata/.gitignore +2 -0
- omniload/testdata/create_replace.csv +21 -0
- omniload/testdata/delete_insert_expected.csv +6 -0
- omniload/testdata/delete_insert_part1.csv +5 -0
- omniload/testdata/delete_insert_part2.csv +6 -0
- omniload/testdata/merge_expected.csv +5 -0
- omniload/testdata/merge_part1.csv +4 -0
- omniload/testdata/merge_part2.csv +5 -0
- omniload/tests/unit/test_smartsheets.py +133 -0
- omniload-0.0.0.dev0.dist-info/METADATA +439 -0
- omniload-0.0.0.dev0.dist-info/RECORD +218 -0
- omniload-0.0.0.dev0.dist-info/WHEEL +4 -0
- omniload-0.0.0.dev0.dist-info/entry_points.txt +2 -0
- omniload-0.0.0.dev0.dist-info/licenses/LICENSE.Apache-2.0 +201 -0
- omniload-0.0.0.dev0.dist-info/licenses/LICENSE.md +21 -0
- omniload-0.0.0.dev0.dist-info/licenses/NOTICE +35 -0
|
@@ -0,0 +1,417 @@
|
|
|
1
|
+
# Copyright 2022-2025 ScaleVector
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
"""Hubspot source helpers"""
|
|
16
|
+
|
|
17
|
+
import urllib.parse
|
|
18
|
+
from typing import Any, Dict, Iterator, List, Optional
|
|
19
|
+
|
|
20
|
+
from dlt.sources.helpers import requests
|
|
21
|
+
|
|
22
|
+
from .settings import (
|
|
23
|
+
DEFAULT_LAST_MODIFIED_PROPERTY,
|
|
24
|
+
LAST_MODIFIED_PROPERTY,
|
|
25
|
+
OBJECT_TYPE_PLURAL,
|
|
26
|
+
)
|
|
27
|
+
|
|
28
|
+
BASE_URL = "https://api.hubapi.com/"
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def get_url(endpoint: str) -> str:
|
|
32
|
+
"""Get absolute hubspot endpoint URL"""
|
|
33
|
+
return urllib.parse.urljoin(BASE_URL, endpoint)
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def _get_headers(api_key: str) -> Dict[str, str]:
|
|
37
|
+
"""
|
|
38
|
+
Return a dictionary of HTTP headers to use for API requests, including the specified API key.
|
|
39
|
+
|
|
40
|
+
Args:
|
|
41
|
+
api_key (str): The API key to use for authentication, as a string.
|
|
42
|
+
|
|
43
|
+
Returns:
|
|
44
|
+
dict: A dictionary of HTTP headers to include in API requests, with the `Authorization` header
|
|
45
|
+
set to the specified API key in the format `Bearer {api_key}`.
|
|
46
|
+
|
|
47
|
+
"""
|
|
48
|
+
# Construct the dictionary of HTTP headers to use for API requests
|
|
49
|
+
return dict(authorization=f"Bearer {api_key}")
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def extract_property_history(objects: List[Dict[str, Any]]) -> Iterator[Dict[str, Any]]:
|
|
53
|
+
for item in objects:
|
|
54
|
+
history = item.get("propertiesWithHistory")
|
|
55
|
+
if not history:
|
|
56
|
+
continue
|
|
57
|
+
# Yield a flat list of property history entries
|
|
58
|
+
for key, changes in history.items():
|
|
59
|
+
if not changes:
|
|
60
|
+
continue
|
|
61
|
+
for entry in changes:
|
|
62
|
+
yield {"object_id": item["id"], "property_name": key, **entry}
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def fetch_property_history(
|
|
66
|
+
endpoint: str,
|
|
67
|
+
api_key: str,
|
|
68
|
+
props: str,
|
|
69
|
+
params: Optional[Dict[str, Any]] = None,
|
|
70
|
+
) -> Iterator[List[Dict[str, Any]]]:
|
|
71
|
+
"""Fetch property history from the given CRM endpoint.
|
|
72
|
+
|
|
73
|
+
Args:
|
|
74
|
+
endpoint: The endpoint to fetch data from, as a string.
|
|
75
|
+
api_key: The API key to use for authentication, as a string.
|
|
76
|
+
props: A comma separated list of properties to retrieve the history for
|
|
77
|
+
params: Optional dict of query params to include in the request
|
|
78
|
+
|
|
79
|
+
Yields:
|
|
80
|
+
List of property history entries (dicts)
|
|
81
|
+
"""
|
|
82
|
+
# Construct the URL and headers for the API request
|
|
83
|
+
url = get_url(endpoint)
|
|
84
|
+
headers = _get_headers(api_key)
|
|
85
|
+
|
|
86
|
+
params = dict(params or {})
|
|
87
|
+
params["propertiesWithHistory"] = props
|
|
88
|
+
params["limit"] = 50
|
|
89
|
+
# Make the API request
|
|
90
|
+
r = requests.get(url, headers=headers, params=params)
|
|
91
|
+
# Parse the API response and yield the properties of each result
|
|
92
|
+
|
|
93
|
+
# Parse the response JSON data
|
|
94
|
+
_data = r.json()
|
|
95
|
+
while _data is not None:
|
|
96
|
+
if "results" in _data:
|
|
97
|
+
yield list(extract_property_history(_data["results"]))
|
|
98
|
+
|
|
99
|
+
# Follow pagination links if they exist
|
|
100
|
+
_next = _data.get("paging", {}).get("next", None)
|
|
101
|
+
if _next:
|
|
102
|
+
next_url = _next["link"]
|
|
103
|
+
# Get the next page response
|
|
104
|
+
r = requests.get(next_url, headers=headers)
|
|
105
|
+
_data = r.json()
|
|
106
|
+
else:
|
|
107
|
+
_data = None
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
def fetch_data(
|
|
111
|
+
endpoint: str,
|
|
112
|
+
api_key: str,
|
|
113
|
+
params: Optional[Dict[str, Any]] = None,
|
|
114
|
+
resource_name: str = None,
|
|
115
|
+
) -> Iterator[List[Dict[str, Any]]]:
|
|
116
|
+
"""
|
|
117
|
+
Fetch data from HUBSPOT endpoint using a specified API key and yield the properties of each result.
|
|
118
|
+
For paginated endpoint this function yields item from all pages.
|
|
119
|
+
|
|
120
|
+
Args:
|
|
121
|
+
endpoint (str): The endpoint to fetch data from, as a string.
|
|
122
|
+
api_key (str): The API key to use for authentication, as a string.
|
|
123
|
+
params: Optional dict of query params to include in the request
|
|
124
|
+
|
|
125
|
+
Yields:
|
|
126
|
+
A List of CRM object dicts
|
|
127
|
+
|
|
128
|
+
Raises:
|
|
129
|
+
requests.exceptions.HTTPError: If the API returns an HTTP error status code.
|
|
130
|
+
|
|
131
|
+
Notes:
|
|
132
|
+
This function uses the `requests` library to make a GET request to the specified endpoint, with
|
|
133
|
+
the API key included in the headers. If the API returns a non-successful HTTP status code (e.g.
|
|
134
|
+
404 Not Found), a `requests.exceptions.HTTPError` exception will be raised.
|
|
135
|
+
|
|
136
|
+
The `endpoint` argument should be a relative URL, which will be appended to the base URL for the
|
|
137
|
+
API. The `params` argument is used to pass additional query parameters to the request
|
|
138
|
+
|
|
139
|
+
This function also includes a retry decorator that will automatically retry the API call up to
|
|
140
|
+
3 times with a 5-second delay between retries, using an exponential backoff strategy.
|
|
141
|
+
"""
|
|
142
|
+
# Construct the URL and headers for the API request
|
|
143
|
+
url = get_url(endpoint)
|
|
144
|
+
headers = _get_headers(api_key)
|
|
145
|
+
|
|
146
|
+
# Make the API request
|
|
147
|
+
r = requests.get(url, headers=headers, params=params)
|
|
148
|
+
# Parse the API response and yield the properties of each result
|
|
149
|
+
# Parse the response JSON data
|
|
150
|
+
_data = r.json()
|
|
151
|
+
|
|
152
|
+
# Yield the properties of each result in the API response
|
|
153
|
+
while _data is not None:
|
|
154
|
+
if "results" in _data:
|
|
155
|
+
_objects: List[Dict[str, Any]] = []
|
|
156
|
+
for _result in _data["results"]:
|
|
157
|
+
if resource_name == "schemas":
|
|
158
|
+
_objects.append(
|
|
159
|
+
{
|
|
160
|
+
"name": _result["labels"].get("singular", ""),
|
|
161
|
+
"objectTypeId": _result.get("objectTypeId", ""),
|
|
162
|
+
"id": _result.get("id", ""),
|
|
163
|
+
"fullyQualifiedName": _result.get("fullyQualifiedName", ""),
|
|
164
|
+
"properties": _result.get("properties", ""),
|
|
165
|
+
"createdAt": _result.get("createdAt", ""),
|
|
166
|
+
"updatedAt": _result.get("updatedAt", ""),
|
|
167
|
+
}
|
|
168
|
+
)
|
|
169
|
+
elif resource_name == "owners":
|
|
170
|
+
_objects.append(
|
|
171
|
+
{
|
|
172
|
+
"id": _result.get("id", ""),
|
|
173
|
+
"email": _result.get("email", ""),
|
|
174
|
+
"type": _result.get("type", ""),
|
|
175
|
+
"firstName": _result.get("firstName", ""),
|
|
176
|
+
"lastName": _result.get("lastName", ""),
|
|
177
|
+
"createdAt": _result.get("createdAt", ""),
|
|
178
|
+
"updatedAt": _result.get("updatedAt", ""),
|
|
179
|
+
"userId": _result.get("userId", ""),
|
|
180
|
+
"teams": _result.get("teams", []),
|
|
181
|
+
}
|
|
182
|
+
)
|
|
183
|
+
else:
|
|
184
|
+
_obj = _result.get("properties", _result)
|
|
185
|
+
if "id" not in _obj and "id" in _result:
|
|
186
|
+
# Move id from properties to top level
|
|
187
|
+
_obj["id"] = _result["id"]
|
|
188
|
+
|
|
189
|
+
if "associations" in _result:
|
|
190
|
+
for association in _result["associations"]:
|
|
191
|
+
__values = [
|
|
192
|
+
{
|
|
193
|
+
"value": _obj["hs_object_id"],
|
|
194
|
+
f"{association}_id": __r["id"],
|
|
195
|
+
}
|
|
196
|
+
for __r in _result["associations"][association][
|
|
197
|
+
"results"
|
|
198
|
+
]
|
|
199
|
+
]
|
|
200
|
+
|
|
201
|
+
# remove duplicates from list of dicts
|
|
202
|
+
__values = [
|
|
203
|
+
dict(t) for t in {tuple(d.items()) for d in __values}
|
|
204
|
+
]
|
|
205
|
+
|
|
206
|
+
_obj[association] = __values
|
|
207
|
+
|
|
208
|
+
_objects.append(_obj)
|
|
209
|
+
yield _objects
|
|
210
|
+
|
|
211
|
+
# Follow pagination links if they exist
|
|
212
|
+
_next = _data.get("paging", {}).get("next", None)
|
|
213
|
+
if _next:
|
|
214
|
+
next_url = _next["link"]
|
|
215
|
+
# Get the next page response
|
|
216
|
+
r = requests.get(next_url, headers=headers)
|
|
217
|
+
_data = r.json()
|
|
218
|
+
else:
|
|
219
|
+
_data = None
|
|
220
|
+
|
|
221
|
+
|
|
222
|
+
def _get_property_names(api_key: str, object_type: str) -> List[str]:
|
|
223
|
+
"""
|
|
224
|
+
Retrieve property names for a given entity from the HubSpot API.
|
|
225
|
+
|
|
226
|
+
Args:
|
|
227
|
+
entity: The entity name for which to retrieve property names.
|
|
228
|
+
|
|
229
|
+
Returns:
|
|
230
|
+
A list of property names.
|
|
231
|
+
|
|
232
|
+
Raises:
|
|
233
|
+
Exception: If an error occurs during the API request.
|
|
234
|
+
"""
|
|
235
|
+
properties = []
|
|
236
|
+
endpoint = f"/crm/v3/properties/{OBJECT_TYPE_PLURAL[object_type]}"
|
|
237
|
+
|
|
238
|
+
for page in fetch_data(endpoint, api_key):
|
|
239
|
+
properties.extend([prop["name"] for prop in page])
|
|
240
|
+
|
|
241
|
+
return properties
|
|
242
|
+
|
|
243
|
+
|
|
244
|
+
def _fetch_associations_batch(
|
|
245
|
+
from_object_type: str,
|
|
246
|
+
to_object_type: str,
|
|
247
|
+
object_ids: List[str],
|
|
248
|
+
api_key: str,
|
|
249
|
+
) -> Dict[str, List[str]]:
|
|
250
|
+
"""Fetch associations for a batch of objects via the HubSpot v4 batch associations API.
|
|
251
|
+
|
|
252
|
+
Returns a dict mapping from_id -> list of to_ids.
|
|
253
|
+
Returns an empty dict if the association type is unsupported.
|
|
254
|
+
"""
|
|
255
|
+
if not object_ids:
|
|
256
|
+
return {}
|
|
257
|
+
|
|
258
|
+
url = get_url(
|
|
259
|
+
f"/crm/v4/associations/{from_object_type}/{to_object_type}/batch/read"
|
|
260
|
+
)
|
|
261
|
+
headers = _get_headers(api_key)
|
|
262
|
+
r = requests.post(
|
|
263
|
+
url, headers=headers, json={"inputs": [{"id": oid} for oid in object_ids]}
|
|
264
|
+
)
|
|
265
|
+
|
|
266
|
+
if r.status_code in (400, 404):
|
|
267
|
+
return {}
|
|
268
|
+
r.raise_for_status()
|
|
269
|
+
|
|
270
|
+
result: Dict[str, List[str]] = {}
|
|
271
|
+
for item in r.json().get("results", []):
|
|
272
|
+
from_id = str(item.get("from", {}).get("id", ""))
|
|
273
|
+
to_ids = [
|
|
274
|
+
str(a["toObjectId"]) for a in item.get("to", []) if a.get("toObjectId")
|
|
275
|
+
]
|
|
276
|
+
if from_id and to_ids:
|
|
277
|
+
result[from_id] = to_ids
|
|
278
|
+
return result
|
|
279
|
+
|
|
280
|
+
|
|
281
|
+
def fetch_data_search(
|
|
282
|
+
object_type: str,
|
|
283
|
+
api_key: str,
|
|
284
|
+
properties: str,
|
|
285
|
+
start_date_ms: str,
|
|
286
|
+
end_date_ms: Optional[str] = None,
|
|
287
|
+
association_types: Optional[List[str]] = None,
|
|
288
|
+
) -> Iterator[List[Dict[str, Any]]]:
|
|
289
|
+
import logging
|
|
290
|
+
|
|
291
|
+
logger = logging.getLogger("hubspot.search")
|
|
292
|
+
|
|
293
|
+
url = get_url(f"/crm/v3/objects/{OBJECT_TYPE_PLURAL[object_type]}/search")
|
|
294
|
+
headers = _get_headers(api_key)
|
|
295
|
+
from_type = OBJECT_TYPE_PLURAL[object_type]
|
|
296
|
+
modified_prop = LAST_MODIFIED_PROPERTY.get(
|
|
297
|
+
object_type, DEFAULT_LAST_MODIFIED_PROPERTY
|
|
298
|
+
)
|
|
299
|
+
|
|
300
|
+
props_list = [p for p in properties.split(",") if p]
|
|
301
|
+
last_id: Optional[str] = None
|
|
302
|
+
|
|
303
|
+
while True:
|
|
304
|
+
filters = [
|
|
305
|
+
{
|
|
306
|
+
"propertyName": modified_prop,
|
|
307
|
+
"operator": "GTE",
|
|
308
|
+
"value": start_date_ms,
|
|
309
|
+
}
|
|
310
|
+
]
|
|
311
|
+
if end_date_ms is not None:
|
|
312
|
+
filters.append(
|
|
313
|
+
{
|
|
314
|
+
"propertyName": modified_prop,
|
|
315
|
+
"operator": "LTE",
|
|
316
|
+
"value": end_date_ms,
|
|
317
|
+
}
|
|
318
|
+
)
|
|
319
|
+
if last_id is not None:
|
|
320
|
+
filters.append(
|
|
321
|
+
{
|
|
322
|
+
"propertyName": "hs_object_id",
|
|
323
|
+
"operator": "GT",
|
|
324
|
+
"value": last_id,
|
|
325
|
+
}
|
|
326
|
+
)
|
|
327
|
+
|
|
328
|
+
logger.info(
|
|
329
|
+
f"[hubspot] search {object_type}: "
|
|
330
|
+
f"GTE={start_date_ms} LTE={end_date_ms} after_id={last_id}"
|
|
331
|
+
)
|
|
332
|
+
|
|
333
|
+
body: Dict[str, Any] = {
|
|
334
|
+
"filterGroups": [{"filters": filters}],
|
|
335
|
+
"properties": props_list,
|
|
336
|
+
"sorts": [{"propertyName": "hs_object_id", "direction": "ASCENDING"}],
|
|
337
|
+
"limit": 100,
|
|
338
|
+
}
|
|
339
|
+
|
|
340
|
+
total_yielded = 0
|
|
341
|
+
|
|
342
|
+
while True:
|
|
343
|
+
r = requests.post(url, headers=headers, json=body)
|
|
344
|
+
r.raise_for_status()
|
|
345
|
+
_data = r.json()
|
|
346
|
+
|
|
347
|
+
if _data.get("status") == "error":
|
|
348
|
+
raise ValueError(
|
|
349
|
+
f"HubSpot search error: {_data.get('message')} (correlationId: {_data.get('correlationId')})"
|
|
350
|
+
)
|
|
351
|
+
|
|
352
|
+
if "results" in _data:
|
|
353
|
+
_objects: List[Dict[str, Any]] = []
|
|
354
|
+
for _result in _data["results"]:
|
|
355
|
+
_obj = _result.get("properties", _result)
|
|
356
|
+
if "id" not in _obj and "id" in _result:
|
|
357
|
+
_obj["id"] = _result["id"]
|
|
358
|
+
_objects.append(_obj)
|
|
359
|
+
|
|
360
|
+
obj_id = str(_obj.get("hs_object_id") or _obj.get("id") or "")
|
|
361
|
+
if last_id is None or int(obj_id) > int(last_id):
|
|
362
|
+
last_id = obj_id
|
|
363
|
+
|
|
364
|
+
if association_types and _objects:
|
|
365
|
+
obj_ids = [
|
|
366
|
+
str(obj.get("hs_object_id") or obj.get("id") or "")
|
|
367
|
+
for obj in _objects
|
|
368
|
+
]
|
|
369
|
+
for assoc_type in association_types:
|
|
370
|
+
if not assoc_type:
|
|
371
|
+
continue
|
|
372
|
+
assoc_map = _fetch_associations_batch(
|
|
373
|
+
from_type, assoc_type, obj_ids, api_key
|
|
374
|
+
)
|
|
375
|
+
for obj in _objects:
|
|
376
|
+
obj_id = str(obj.get("hs_object_id") or obj.get("id") or "")
|
|
377
|
+
values = [
|
|
378
|
+
{"value": obj_id, f"{assoc_type}_id": aid}
|
|
379
|
+
for aid in assoc_map.get(obj_id, [])
|
|
380
|
+
]
|
|
381
|
+
obj[assoc_type] = [
|
|
382
|
+
dict(t) for t in {tuple(d.items()) for d in values}
|
|
383
|
+
]
|
|
384
|
+
|
|
385
|
+
total_yielded += len(_objects)
|
|
386
|
+
yield _objects
|
|
387
|
+
|
|
388
|
+
# Break BEFORE trying to fetch beyond the 10k limit — HubSpot's
|
|
389
|
+
# search API hangs when paging past 10,000 results.
|
|
390
|
+
if total_yielded >= 10000:
|
|
391
|
+
break
|
|
392
|
+
|
|
393
|
+
_next = _data.get("paging", {}).get("next", None)
|
|
394
|
+
if _next:
|
|
395
|
+
body["after"] = _next["after"]
|
|
396
|
+
else:
|
|
397
|
+
break
|
|
398
|
+
|
|
399
|
+
logger.info(
|
|
400
|
+
f"[hubspot] search {object_type}: window done, "
|
|
401
|
+
f"yielded={total_yielded} last_id={last_id}"
|
|
402
|
+
)
|
|
403
|
+
|
|
404
|
+
# HubSpot search API has a 10,000 result hard limit. If we hit it,
|
|
405
|
+
# restart with the same date filters plus hs_object_id > last_id
|
|
406
|
+
# to continue from where we left off.
|
|
407
|
+
if total_yielded < 10000:
|
|
408
|
+
break
|
|
409
|
+
|
|
410
|
+
|
|
411
|
+
def fetch_data_raw(
|
|
412
|
+
endpoint: str, api_key: str, params: Optional[Dict[str, Any]] = None
|
|
413
|
+
) -> Iterator[List[Dict[str, Any]]]:
|
|
414
|
+
url = get_url(endpoint)
|
|
415
|
+
headers = _get_headers(api_key)
|
|
416
|
+
r = requests.get(url, headers=headers, params=params)
|
|
417
|
+
return r.json()
|