ingestr 0.14.0__py3-none-any.whl → 0.14.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ingestr might be problematic. Click here for more details.
- ingestr/src/buildinfo.py +1 -1
- ingestr/src/factory.py +2 -0
- ingestr/src/fluxx/__init__.py +1 -4
- ingestr/src/fluxx/helpers.py +0 -7
- ingestr/src/intercom/__init__.py +142 -0
- ingestr/src/intercom/helpers.py +674 -0
- ingestr/src/intercom/settings.py +277 -0
- ingestr/src/sources.py +73 -0
- {ingestr-0.14.0.dist-info → ingestr-0.14.2.dist-info}/METADATA +1 -1
- {ingestr-0.14.0.dist-info → ingestr-0.14.2.dist-info}/RECORD +13 -10
- {ingestr-0.14.0.dist-info → ingestr-0.14.2.dist-info}/WHEEL +0 -0
- {ingestr-0.14.0.dist-info → ingestr-0.14.2.dist-info}/entry_points.txt +0 -0
- {ingestr-0.14.0.dist-info → ingestr-0.14.2.dist-info}/licenses/LICENSE.md +0 -0
ingestr/src/buildinfo.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
version = "v0.14.
|
|
1
|
+
version = "v0.14.2"
|
ingestr/src/factory.py
CHANGED
|
@@ -54,6 +54,7 @@ from ingestr.src.sources import (
|
|
|
54
54
|
GorgiasSource,
|
|
55
55
|
HubspotSource,
|
|
56
56
|
InfluxDBSource,
|
|
57
|
+
IntercomSource,
|
|
57
58
|
IsocPulseSource,
|
|
58
59
|
KafkaSource,
|
|
59
60
|
KinesisSource,
|
|
@@ -166,6 +167,7 @@ class SourceDestinationFactory:
|
|
|
166
167
|
"fluxx": FluxxSource,
|
|
167
168
|
"slack": SlackSource,
|
|
168
169
|
"hubspot": HubspotSource,
|
|
170
|
+
"intercom": IntercomSource,
|
|
169
171
|
"airtable": AirtableSource,
|
|
170
172
|
"klaviyo": KlaviyoSource,
|
|
171
173
|
"mixpanel": MixpanelSource,
|
ingestr/src/fluxx/__init__.py
CHANGED
|
@@ -9879,10 +9879,7 @@ def fluxx_source(
|
|
|
9879
9879
|
if field_name in fields_to_extract:
|
|
9880
9880
|
filtered_fields[field_name] = fields_to_extract[field_name]
|
|
9881
9881
|
else:
|
|
9882
|
-
|
|
9883
|
-
print(
|
|
9884
|
-
f"Warning: Field '{field_name}' not found in {resource_name} configuration"
|
|
9885
|
-
)
|
|
9882
|
+
filtered_fields[field_name] = {}
|
|
9886
9883
|
|
|
9887
9884
|
fields_to_extract = filtered_fields
|
|
9888
9885
|
|
ingestr/src/fluxx/helpers.py
CHANGED
|
@@ -43,12 +43,6 @@ def fluxx_api_request(
|
|
|
43
43
|
"Authorization": f"Bearer {access_token}",
|
|
44
44
|
"Content-Type": "application/json",
|
|
45
45
|
}
|
|
46
|
-
# print(f"Making request to Fluxx API:")
|
|
47
|
-
# print(f" Method: {method}")
|
|
48
|
-
# print(f" URL: {url}")
|
|
49
|
-
# print(f" Headers: {headers}")
|
|
50
|
-
# print(f" Params: {params}")
|
|
51
|
-
# print(f" Data: {data}")
|
|
52
46
|
|
|
53
47
|
response = requests.request(
|
|
54
48
|
method=method,
|
|
@@ -88,7 +82,6 @@ def paginate_fluxx_resource(
|
|
|
88
82
|
params=params,
|
|
89
83
|
)
|
|
90
84
|
|
|
91
|
-
print("resssponse", response)
|
|
92
85
|
if not response:
|
|
93
86
|
break
|
|
94
87
|
|
|
@@ -0,0 +1,142 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Intercom source implementation for data ingestion.
|
|
3
|
+
|
|
4
|
+
This module provides DLT sources for retrieving data from Intercom API endpoints
|
|
5
|
+
including contacts, companies, conversations, tickets, and more.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from typing import Optional, Sequence
|
|
9
|
+
|
|
10
|
+
import dlt
|
|
11
|
+
from dlt.common.time import ensure_pendulum_datetime
|
|
12
|
+
from dlt.common.typing import TAnyDateTime
|
|
13
|
+
from dlt.sources import DltResource, DltSource
|
|
14
|
+
|
|
15
|
+
from .helpers import (
|
|
16
|
+
IntercomAPIClient,
|
|
17
|
+
IntercomCredentialsAccessToken,
|
|
18
|
+
TIntercomCredentials,
|
|
19
|
+
convert_datetime_to_timestamp,
|
|
20
|
+
create_resource_from_config,
|
|
21
|
+
transform_company,
|
|
22
|
+
transform_contact,
|
|
23
|
+
transform_conversation,
|
|
24
|
+
)
|
|
25
|
+
from .helpers import (
|
|
26
|
+
IntercomCredentialsOAuth as IntercomCredentialsOAuth,
|
|
27
|
+
)
|
|
28
|
+
from .settings import (
|
|
29
|
+
DEFAULT_START_DATE,
|
|
30
|
+
RESOURCE_CONFIGS,
|
|
31
|
+
)
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
@dlt.source(name="intercom", max_table_nesting=0)
|
|
35
|
+
def intercom_source(
|
|
36
|
+
credentials: TIntercomCredentials = dlt.secrets.value,
|
|
37
|
+
start_date: Optional[TAnyDateTime] = DEFAULT_START_DATE,
|
|
38
|
+
end_date: Optional[TAnyDateTime] = None,
|
|
39
|
+
) -> Sequence[DltResource]:
|
|
40
|
+
"""
|
|
41
|
+
A DLT source that retrieves data from Intercom API.
|
|
42
|
+
|
|
43
|
+
This source provides access to various Intercom resources including contacts,
|
|
44
|
+
companies, conversations, tickets, and more. It supports incremental loading
|
|
45
|
+
for resources that track updated timestamps.
|
|
46
|
+
|
|
47
|
+
Args:
|
|
48
|
+
credentials: Intercom API credentials (AccessToken or OAuth).
|
|
49
|
+
Defaults to dlt.secrets.value.
|
|
50
|
+
start_date: The start date for incremental loading.
|
|
51
|
+
Defaults to January 1, 2020.
|
|
52
|
+
end_date: Optional end date for incremental loading.
|
|
53
|
+
If not provided, loads all data from start_date to present.
|
|
54
|
+
|
|
55
|
+
Returns:
|
|
56
|
+
Sequence of DLT resources for different Intercom endpoints.
|
|
57
|
+
|
|
58
|
+
Example:
|
|
59
|
+
>>> source = intercom_source(
|
|
60
|
+
... credentials=IntercomCredentialsAccessToken(
|
|
61
|
+
... access_token="your_token",
|
|
62
|
+
... region="us"
|
|
63
|
+
... ),
|
|
64
|
+
... start_date=datetime(2024, 1, 1)
|
|
65
|
+
... )
|
|
66
|
+
"""
|
|
67
|
+
# Initialize API client
|
|
68
|
+
api_client = IntercomAPIClient(credentials)
|
|
69
|
+
|
|
70
|
+
# Convert dates to pendulum and then to unix timestamps for Intercom API
|
|
71
|
+
start_date_obj = ensure_pendulum_datetime(start_date) if start_date else None
|
|
72
|
+
end_date_obj = ensure_pendulum_datetime(end_date) if end_date else None
|
|
73
|
+
|
|
74
|
+
# Convert to unix timestamps for API compatibility
|
|
75
|
+
# Use default start date if none provided
|
|
76
|
+
if not start_date_obj:
|
|
77
|
+
from .settings import DEFAULT_START_DATE
|
|
78
|
+
|
|
79
|
+
start_date_obj = ensure_pendulum_datetime(DEFAULT_START_DATE)
|
|
80
|
+
|
|
81
|
+
start_timestamp = convert_datetime_to_timestamp(start_date_obj)
|
|
82
|
+
end_timestamp = (
|
|
83
|
+
convert_datetime_to_timestamp(end_date_obj) if end_date_obj else None
|
|
84
|
+
)
|
|
85
|
+
|
|
86
|
+
# Transform function mapping
|
|
87
|
+
transform_functions = {
|
|
88
|
+
"transform_contact": transform_contact,
|
|
89
|
+
"transform_company": transform_company,
|
|
90
|
+
"transform_conversation": transform_conversation,
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
# Generate all resources from configuration
|
|
94
|
+
resources = []
|
|
95
|
+
for resource_name, config in RESOURCE_CONFIGS.items():
|
|
96
|
+
resource_func = create_resource_from_config(
|
|
97
|
+
resource_name,
|
|
98
|
+
config,
|
|
99
|
+
api_client,
|
|
100
|
+
start_timestamp,
|
|
101
|
+
end_timestamp,
|
|
102
|
+
transform_functions,
|
|
103
|
+
)
|
|
104
|
+
|
|
105
|
+
# Call the resource function to get the actual resource
|
|
106
|
+
resources.append(resource_func())
|
|
107
|
+
|
|
108
|
+
return resources
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
def intercom(
|
|
112
|
+
api_key: str,
|
|
113
|
+
region: str = "us",
|
|
114
|
+
start_date: Optional[TAnyDateTime] = DEFAULT_START_DATE,
|
|
115
|
+
end_date: Optional[TAnyDateTime] = None,
|
|
116
|
+
) -> DltSource:
|
|
117
|
+
"""
|
|
118
|
+
Convenience function to create Intercom source with access token.
|
|
119
|
+
|
|
120
|
+
Args:
|
|
121
|
+
api_key: Intercom API access token.
|
|
122
|
+
region: Data region (us, eu, or au). Defaults to "us".
|
|
123
|
+
start_date: Start date for incremental loading.
|
|
124
|
+
end_date: Optional end date for incremental loading.
|
|
125
|
+
|
|
126
|
+
Returns:
|
|
127
|
+
Sequence of DLT resources.
|
|
128
|
+
|
|
129
|
+
Example:
|
|
130
|
+
>>> source = intercom(
|
|
131
|
+
... api_key="your_access_token",
|
|
132
|
+
... region="us",
|
|
133
|
+
... start_date=datetime(2024, 1, 1)
|
|
134
|
+
... )
|
|
135
|
+
"""
|
|
136
|
+
credentials = IntercomCredentialsAccessToken(access_token=api_key, region=region)
|
|
137
|
+
|
|
138
|
+
return intercom_source(
|
|
139
|
+
credentials=credentials,
|
|
140
|
+
start_date=start_date,
|
|
141
|
+
end_date=end_date,
|
|
142
|
+
)
|
|
@@ -0,0 +1,674 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Helper functions and API client for Intercom integration.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from dataclasses import dataclass
|
|
6
|
+
from enum import Enum
|
|
7
|
+
from typing import Any, Callable, Dict, Iterator, Optional, Union
|
|
8
|
+
|
|
9
|
+
from dlt.common.typing import TDataItem, TDataItems, TSecretValue
|
|
10
|
+
|
|
11
|
+
from ingestr.src.http_client import create_client
|
|
12
|
+
|
|
13
|
+
from .settings import (
|
|
14
|
+
API_VERSION,
|
|
15
|
+
DEFAULT_PAGE_SIZE,
|
|
16
|
+
REGIONAL_ENDPOINTS,
|
|
17
|
+
)
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class PaginationType(Enum):
|
|
21
|
+
"""Types of pagination supported by Intercom API."""
|
|
22
|
+
|
|
23
|
+
CURSOR = "cursor"
|
|
24
|
+
SCROLL = "scroll"
|
|
25
|
+
SIMPLE = "simple" # No pagination, single page
|
|
26
|
+
SEARCH = "search" # Search API pagination
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class IntercomCredentials:
|
|
30
|
+
"""Base class for Intercom credentials."""
|
|
31
|
+
|
|
32
|
+
def __init__(self, region: str = "us"):
|
|
33
|
+
self.region = region
|
|
34
|
+
if self.region not in REGIONAL_ENDPOINTS:
|
|
35
|
+
raise ValueError(
|
|
36
|
+
f"Invalid region: {self.region}. Must be one of {list(REGIONAL_ENDPOINTS.keys())}"
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
@property
|
|
40
|
+
def base_url(self) -> str:
|
|
41
|
+
"""Get the base URL for the specified region."""
|
|
42
|
+
return REGIONAL_ENDPOINTS[self.region]
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
@dataclass
|
|
46
|
+
class IntercomCredentialsAccessToken(IntercomCredentials):
|
|
47
|
+
"""Credentials for Intercom API using Access Token authentication."""
|
|
48
|
+
|
|
49
|
+
access_token: TSecretValue
|
|
50
|
+
region: str = "us" # us, eu, or au
|
|
51
|
+
|
|
52
|
+
def __post_init__(self):
|
|
53
|
+
super().__init__(self.region)
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
@dataclass
|
|
57
|
+
class IntercomCredentialsOAuth(IntercomCredentials):
|
|
58
|
+
"""Credentials for Intercom API using OAuth authentication."""
|
|
59
|
+
|
|
60
|
+
oauth_token: TSecretValue
|
|
61
|
+
region: str = "us" # us, eu, or au
|
|
62
|
+
|
|
63
|
+
def __post_init__(self):
|
|
64
|
+
super().__init__(self.region)
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
TIntercomCredentials = Union[IntercomCredentialsAccessToken, IntercomCredentialsOAuth]
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
class IntercomAPIClient:
|
|
71
|
+
"""
|
|
72
|
+
API client for making requests to Intercom API.
|
|
73
|
+
Handles authentication, pagination, and rate limiting.
|
|
74
|
+
"""
|
|
75
|
+
|
|
76
|
+
def __init__(self, credentials: TIntercomCredentials):
|
|
77
|
+
"""
|
|
78
|
+
Initialize the Intercom API client.
|
|
79
|
+
|
|
80
|
+
Args:
|
|
81
|
+
credentials: Intercom API credentials
|
|
82
|
+
"""
|
|
83
|
+
self.credentials = credentials
|
|
84
|
+
self.base_url = credentials.base_url
|
|
85
|
+
|
|
86
|
+
# Set up authentication headers
|
|
87
|
+
self.headers = {
|
|
88
|
+
"Accept": "application/json",
|
|
89
|
+
"Content-Type": "application/json",
|
|
90
|
+
"Intercom-Version": API_VERSION, # REQUIRED header
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
if isinstance(credentials, IntercomCredentialsAccessToken):
|
|
94
|
+
self.headers["Authorization"] = f"Bearer {credentials.access_token}"
|
|
95
|
+
elif isinstance(credentials, IntercomCredentialsOAuth):
|
|
96
|
+
self.headers["Authorization"] = f"Bearer {credentials.oauth_token}"
|
|
97
|
+
else:
|
|
98
|
+
raise TypeError(
|
|
99
|
+
"Invalid credentials type. Must be IntercomCredentialsAccessToken or IntercomCredentialsOAuth"
|
|
100
|
+
)
|
|
101
|
+
|
|
102
|
+
# Create HTTP client with rate limit retry for 429 status codes
|
|
103
|
+
self.client = create_client(retry_status_codes=[429, 502, 503])
|
|
104
|
+
|
|
105
|
+
def _make_request(
|
|
106
|
+
self,
|
|
107
|
+
method: str,
|
|
108
|
+
endpoint: str,
|
|
109
|
+
params: Optional[Dict[str, Any]] = None,
|
|
110
|
+
json_data: Optional[Dict[str, Any]] = None,
|
|
111
|
+
) -> Dict[str, Any]:
|
|
112
|
+
"""
|
|
113
|
+
Make a request to the Intercom API.
|
|
114
|
+
|
|
115
|
+
Args:
|
|
116
|
+
method: HTTP method (GET, POST, etc.)
|
|
117
|
+
endpoint: API endpoint path
|
|
118
|
+
params: Query parameters
|
|
119
|
+
json_data: JSON body data
|
|
120
|
+
|
|
121
|
+
Returns:
|
|
122
|
+
Response JSON data
|
|
123
|
+
"""
|
|
124
|
+
url = f"{self.base_url}{endpoint}"
|
|
125
|
+
|
|
126
|
+
if method.upper() == "GET":
|
|
127
|
+
response = self.client.get(url, headers=self.headers, params=params)
|
|
128
|
+
elif method.upper() == "POST":
|
|
129
|
+
response = self.client.post(
|
|
130
|
+
url, headers=self.headers, json=json_data, params=params
|
|
131
|
+
)
|
|
132
|
+
else:
|
|
133
|
+
response = self.client.request(
|
|
134
|
+
method, url, headers=self.headers, json=json_data, params=params
|
|
135
|
+
)
|
|
136
|
+
|
|
137
|
+
# The create_client already handles rate limiting (429) with retries
|
|
138
|
+
# Just check for other errors
|
|
139
|
+
if response.status_code >= 400:
|
|
140
|
+
error_msg = f"Intercom API error {response.status_code}: {response.text}"
|
|
141
|
+
raise Exception(error_msg)
|
|
142
|
+
|
|
143
|
+
return response.json()
|
|
144
|
+
|
|
145
|
+
def get_pages(
|
|
146
|
+
self,
|
|
147
|
+
endpoint: str,
|
|
148
|
+
data_key: str,
|
|
149
|
+
pagination_type: PaginationType,
|
|
150
|
+
params: Optional[Dict[str, Any]] = None,
|
|
151
|
+
search_query: Optional[Dict[str, Any]] = None,
|
|
152
|
+
) -> Iterator[TDataItems]:
|
|
153
|
+
"""
|
|
154
|
+
Get paginated data from an Intercom endpoint.
|
|
155
|
+
|
|
156
|
+
Args:
|
|
157
|
+
endpoint: API endpoint path
|
|
158
|
+
data_key: Key in response containing the data items
|
|
159
|
+
pagination_type: Type of pagination to use
|
|
160
|
+
params: Query parameters
|
|
161
|
+
search_query: Search query for search endpoints
|
|
162
|
+
|
|
163
|
+
Yields:
|
|
164
|
+
Lists of data items from each page
|
|
165
|
+
"""
|
|
166
|
+
params = params or {}
|
|
167
|
+
|
|
168
|
+
if pagination_type == PaginationType.SIMPLE:
|
|
169
|
+
# Single page, no pagination
|
|
170
|
+
response = self._make_request("GET", endpoint, params)
|
|
171
|
+
if data_key in response:
|
|
172
|
+
yield response[data_key]
|
|
173
|
+
return
|
|
174
|
+
|
|
175
|
+
elif pagination_type == PaginationType.CURSOR:
|
|
176
|
+
# Cursor-based pagination
|
|
177
|
+
params["per_page"] = params.get("per_page", DEFAULT_PAGE_SIZE)
|
|
178
|
+
next_cursor = None
|
|
179
|
+
|
|
180
|
+
while True:
|
|
181
|
+
if next_cursor:
|
|
182
|
+
params["starting_after"] = next_cursor
|
|
183
|
+
|
|
184
|
+
response = self._make_request("GET", endpoint, params)
|
|
185
|
+
|
|
186
|
+
# Yield the data
|
|
187
|
+
if data_key in response and response[data_key]:
|
|
188
|
+
yield response[data_key]
|
|
189
|
+
|
|
190
|
+
# Check for next page
|
|
191
|
+
pages_info = response.get("pages", {})
|
|
192
|
+
if not pages_info.get("next"):
|
|
193
|
+
break
|
|
194
|
+
|
|
195
|
+
next_cursor = pages_info.get("next", {}).get("starting_after")
|
|
196
|
+
if not next_cursor:
|
|
197
|
+
break
|
|
198
|
+
|
|
199
|
+
elif pagination_type == PaginationType.SCROLL:
|
|
200
|
+
# Scroll API pagination (for large exports)
|
|
201
|
+
scroll_param = None
|
|
202
|
+
|
|
203
|
+
while True:
|
|
204
|
+
scroll_endpoint = endpoint
|
|
205
|
+
if scroll_param:
|
|
206
|
+
scroll_endpoint = f"{endpoint}/scroll"
|
|
207
|
+
params = {"scroll_param": scroll_param}
|
|
208
|
+
|
|
209
|
+
response = self._make_request("GET", scroll_endpoint, params)
|
|
210
|
+
|
|
211
|
+
# Yield the data
|
|
212
|
+
if data_key in response and response[data_key]:
|
|
213
|
+
yield response[data_key]
|
|
214
|
+
|
|
215
|
+
# Get next scroll parameter
|
|
216
|
+
scroll_param = response.get("scroll_param")
|
|
217
|
+
if not scroll_param:
|
|
218
|
+
break
|
|
219
|
+
|
|
220
|
+
elif pagination_type == PaginationType.SEARCH:
|
|
221
|
+
# Search API pagination
|
|
222
|
+
if not search_query:
|
|
223
|
+
raise ValueError("Search query required for search pagination")
|
|
224
|
+
|
|
225
|
+
pagination_info = search_query.get("pagination", {})
|
|
226
|
+
pagination_info["per_page"] = pagination_info.get(
|
|
227
|
+
"per_page", DEFAULT_PAGE_SIZE
|
|
228
|
+
)
|
|
229
|
+
|
|
230
|
+
while True:
|
|
231
|
+
# Build search request
|
|
232
|
+
request_data = {
|
|
233
|
+
"query": search_query.get("query", {}),
|
|
234
|
+
"pagination": pagination_info,
|
|
235
|
+
}
|
|
236
|
+
|
|
237
|
+
if "sort" in search_query:
|
|
238
|
+
request_data["sort"] = search_query["sort"]
|
|
239
|
+
|
|
240
|
+
response = self._make_request("POST", endpoint, json_data=request_data)
|
|
241
|
+
|
|
242
|
+
# Yield the data
|
|
243
|
+
if data_key in response and response[data_key]:
|
|
244
|
+
yield response[data_key]
|
|
245
|
+
|
|
246
|
+
# Check for next page
|
|
247
|
+
pages_info = response.get("pages", {})
|
|
248
|
+
if not pages_info.get("next"):
|
|
249
|
+
break
|
|
250
|
+
|
|
251
|
+
next_cursor = pages_info.get("next", {}).get("starting_after")
|
|
252
|
+
if not next_cursor:
|
|
253
|
+
break
|
|
254
|
+
|
|
255
|
+
pagination_info["starting_after"] = next_cursor
|
|
256
|
+
|
|
257
|
+
def get_single_resource(self, endpoint: str, resource_id: str) -> TDataItem:
|
|
258
|
+
"""
|
|
259
|
+
Get a single resource by ID.
|
|
260
|
+
|
|
261
|
+
Args:
|
|
262
|
+
endpoint: Base endpoint path
|
|
263
|
+
resource_id: Resource ID
|
|
264
|
+
|
|
265
|
+
Returns:
|
|
266
|
+
Resource data
|
|
267
|
+
"""
|
|
268
|
+
return self._make_request("GET", f"{endpoint}/{resource_id}")
|
|
269
|
+
|
|
270
|
+
def search(
|
|
271
|
+
self,
|
|
272
|
+
resource_type: str,
|
|
273
|
+
query: Dict[str, Any],
|
|
274
|
+
sort: Optional[Dict[str, str]] = None,
|
|
275
|
+
) -> Iterator[TDataItems]:
|
|
276
|
+
"""
|
|
277
|
+
Search for resources using the Search API.
|
|
278
|
+
|
|
279
|
+
Args:
|
|
280
|
+
resource_type: Type of resource to search (contacts, companies, conversations)
|
|
281
|
+
query: Search query following Intercom's query format
|
|
282
|
+
sort: Optional sort configuration
|
|
283
|
+
|
|
284
|
+
Yields:
|
|
285
|
+
Lists of matching resources
|
|
286
|
+
"""
|
|
287
|
+
endpoint = f"/{resource_type}/search"
|
|
288
|
+
search_query = {"query": query}
|
|
289
|
+
|
|
290
|
+
if sort:
|
|
291
|
+
search_query["sort"] = sort
|
|
292
|
+
|
|
293
|
+
yield from self.get_pages(
|
|
294
|
+
endpoint=endpoint,
|
|
295
|
+
data_key="data",
|
|
296
|
+
pagination_type=PaginationType.SEARCH,
|
|
297
|
+
search_query=search_query,
|
|
298
|
+
)
|
|
299
|
+
|
|
300
|
+
|
|
301
|
+
def transform_contact(contact: Dict[str, Any]) -> Dict[str, Any]:
|
|
302
|
+
"""
|
|
303
|
+
Transform a contact record to ensure consistent format.
|
|
304
|
+
|
|
305
|
+
Args:
|
|
306
|
+
contact: Raw contact data from API
|
|
307
|
+
|
|
308
|
+
Returns:
|
|
309
|
+
Transformed contact data
|
|
310
|
+
"""
|
|
311
|
+
# Ensure consistent field names and types
|
|
312
|
+
transformed = contact.copy()
|
|
313
|
+
|
|
314
|
+
# Flatten location data if present
|
|
315
|
+
if "location" in transformed and isinstance(transformed["location"], dict):
|
|
316
|
+
location = transformed.pop("location")
|
|
317
|
+
transformed["location_country"] = location.get("country")
|
|
318
|
+
transformed["location_region"] = location.get("region")
|
|
319
|
+
transformed["location_city"] = location.get("city")
|
|
320
|
+
|
|
321
|
+
# Flatten companies relationship
|
|
322
|
+
if "companies" in transformed and isinstance(transformed["companies"], dict):
|
|
323
|
+
companies_data = transformed["companies"].get("data", [])
|
|
324
|
+
transformed["company_ids"] = [
|
|
325
|
+
c.get("id") for c in companies_data if c.get("id")
|
|
326
|
+
]
|
|
327
|
+
transformed["companies_count"] = len(companies_data)
|
|
328
|
+
|
|
329
|
+
# Ensure custom_attributes is always a dict
|
|
330
|
+
if "custom_attributes" not in transformed:
|
|
331
|
+
transformed["custom_attributes"] = {}
|
|
332
|
+
|
|
333
|
+
return transformed
|
|
334
|
+
|
|
335
|
+
|
|
336
|
+
def transform_company(company: Dict[str, Any]) -> Dict[str, Any]:
|
|
337
|
+
"""
|
|
338
|
+
Transform a company record to ensure consistent format.
|
|
339
|
+
|
|
340
|
+
Args:
|
|
341
|
+
company: Raw company data from API
|
|
342
|
+
|
|
343
|
+
Returns:
|
|
344
|
+
Transformed company data
|
|
345
|
+
"""
|
|
346
|
+
transformed = company.copy()
|
|
347
|
+
|
|
348
|
+
# Ensure custom_attributes is always a dict
|
|
349
|
+
if "custom_attributes" not in transformed:
|
|
350
|
+
transformed["custom_attributes"] = {}
|
|
351
|
+
|
|
352
|
+
# Flatten plan information if it's an object
|
|
353
|
+
if "plan" in transformed and isinstance(transformed["plan"], dict):
|
|
354
|
+
plan = transformed.pop("plan")
|
|
355
|
+
transformed["plan_id"] = plan.get("id")
|
|
356
|
+
transformed["plan_name"] = plan.get("name")
|
|
357
|
+
|
|
358
|
+
return transformed
|
|
359
|
+
|
|
360
|
+
|
|
361
|
+
def transform_conversation(conversation: Dict[str, Any]) -> Dict[str, Any]:
|
|
362
|
+
"""
|
|
363
|
+
Transform a conversation record to ensure consistent format.
|
|
364
|
+
|
|
365
|
+
Args:
|
|
366
|
+
conversation: Raw conversation data from API
|
|
367
|
+
|
|
368
|
+
Returns:
|
|
369
|
+
Transformed conversation data
|
|
370
|
+
"""
|
|
371
|
+
transformed = conversation.copy()
|
|
372
|
+
|
|
373
|
+
# Extract statistics if present
|
|
374
|
+
if "statistics" in transformed and isinstance(transformed["statistics"], dict):
|
|
375
|
+
stats = transformed.pop("statistics")
|
|
376
|
+
transformed["first_contact_reply_at"] = stats.get("first_contact_reply_at")
|
|
377
|
+
transformed["first_admin_reply_at"] = stats.get("first_admin_reply_at")
|
|
378
|
+
transformed["last_contact_reply_at"] = stats.get("last_contact_reply_at")
|
|
379
|
+
transformed["last_admin_reply_at"] = stats.get("last_admin_reply_at")
|
|
380
|
+
transformed["median_admin_reply_time"] = stats.get("median_admin_reply_time")
|
|
381
|
+
transformed["mean_admin_reply_time"] = stats.get("mean_admin_reply_time")
|
|
382
|
+
|
|
383
|
+
# Flatten conversation parts count
|
|
384
|
+
if "conversation_parts" in transformed and isinstance(
|
|
385
|
+
transformed["conversation_parts"], dict
|
|
386
|
+
):
|
|
387
|
+
parts = transformed["conversation_parts"]
|
|
388
|
+
transformed["conversation_parts_count"] = parts.get("total_count", 0)
|
|
389
|
+
|
|
390
|
+
return transformed
|
|
391
|
+
|
|
392
|
+
|
|
393
|
+
def convert_datetime_to_timestamp(dt_obj: Any) -> int:
|
|
394
|
+
"""
|
|
395
|
+
Convert datetime object to Unix timestamp for Intercom API compatibility.
|
|
396
|
+
|
|
397
|
+
Args:
|
|
398
|
+
dt_obj: DateTime object (pendulum or datetime)
|
|
399
|
+
|
|
400
|
+
Returns:
|
|
401
|
+
Unix timestamp as integer
|
|
402
|
+
"""
|
|
403
|
+
if hasattr(dt_obj, "int_timestamp"):
|
|
404
|
+
return dt_obj.int_timestamp
|
|
405
|
+
elif hasattr(dt_obj, "timestamp"):
|
|
406
|
+
return int(dt_obj.timestamp())
|
|
407
|
+
else:
|
|
408
|
+
raise ValueError(f"Cannot convert {type(dt_obj)} to timestamp")
|
|
409
|
+
|
|
410
|
+
|
|
411
|
+
def create_search_resource(
|
|
412
|
+
api_client: "IntercomAPIClient",
|
|
413
|
+
resource_name: str,
|
|
414
|
+
updated_at_incremental: Any,
|
|
415
|
+
transform_func: Optional[Callable] = None,
|
|
416
|
+
) -> Iterator[TDataItems]:
|
|
417
|
+
"""
|
|
418
|
+
Generic function for search-based incremental resources.
|
|
419
|
+
|
|
420
|
+
Args:
|
|
421
|
+
api_client: Intercom API client
|
|
422
|
+
resource_name: Name of the resource (contacts, conversations)
|
|
423
|
+
updated_at_incremental: DLT incremental object
|
|
424
|
+
transform_func: Optional transformation function
|
|
425
|
+
|
|
426
|
+
Yields:
|
|
427
|
+
Transformed resource records
|
|
428
|
+
"""
|
|
429
|
+
query = build_incremental_query(
|
|
430
|
+
"updated_at",
|
|
431
|
+
updated_at_incremental.last_value,
|
|
432
|
+
updated_at_incremental.end_value,
|
|
433
|
+
)
|
|
434
|
+
|
|
435
|
+
for page in api_client.search(resource_name, query):
|
|
436
|
+
if transform_func:
|
|
437
|
+
transformed_items = [transform_func(item) for item in page]
|
|
438
|
+
yield transformed_items
|
|
439
|
+
else:
|
|
440
|
+
yield page
|
|
441
|
+
|
|
442
|
+
if updated_at_incremental.end_out_of_range:
|
|
443
|
+
return
|
|
444
|
+
|
|
445
|
+
|
|
446
|
+
def create_tickets_resource(
|
|
447
|
+
api_client: "IntercomAPIClient",
|
|
448
|
+
updated_at_incremental: Any,
|
|
449
|
+
) -> Iterator[TDataItems]:
|
|
450
|
+
"""
|
|
451
|
+
Special function for tickets resource with updated_since parameter.
|
|
452
|
+
|
|
453
|
+
Args:
|
|
454
|
+
api_client: Intercom API client
|
|
455
|
+
updated_at_incremental: DLT incremental object
|
|
456
|
+
|
|
457
|
+
Yields:
|
|
458
|
+
Filtered ticket records
|
|
459
|
+
"""
|
|
460
|
+
params = {"updated_since": updated_at_incremental.last_value}
|
|
461
|
+
|
|
462
|
+
end_timestamp = (
|
|
463
|
+
updated_at_incremental.end_value if updated_at_incremental.end_value else None
|
|
464
|
+
)
|
|
465
|
+
|
|
466
|
+
for page in api_client.get_pages(
|
|
467
|
+
"/tickets", "tickets", PaginationType.CURSOR, params=params
|
|
468
|
+
):
|
|
469
|
+
if end_timestamp:
|
|
470
|
+
filtered_tickets = [
|
|
471
|
+
t for t in page if t.get("updated_at", 0) <= end_timestamp
|
|
472
|
+
]
|
|
473
|
+
if filtered_tickets:
|
|
474
|
+
yield filtered_tickets
|
|
475
|
+
|
|
476
|
+
if any(t.get("updated_at", 0) > end_timestamp for t in page):
|
|
477
|
+
return
|
|
478
|
+
else:
|
|
479
|
+
yield page
|
|
480
|
+
|
|
481
|
+
|
|
482
|
+
def create_pagination_resource(
|
|
483
|
+
api_client: "IntercomAPIClient",
|
|
484
|
+
endpoint: str,
|
|
485
|
+
data_key: str,
|
|
486
|
+
pagination_type: PaginationType,
|
|
487
|
+
updated_at_incremental: Any,
|
|
488
|
+
transform_func: Optional[Callable] = None,
|
|
489
|
+
params: Optional[Dict[str, Any]] = None,
|
|
490
|
+
) -> Iterator[TDataItems]:
|
|
491
|
+
"""
|
|
492
|
+
Generic function for cursor/simple pagination with client-side filtering.
|
|
493
|
+
|
|
494
|
+
Args:
|
|
495
|
+
api_client: Intercom API client
|
|
496
|
+
endpoint: API endpoint path
|
|
497
|
+
data_key: Key in response containing data
|
|
498
|
+
pagination_type: Type of pagination
|
|
499
|
+
updated_at_incremental: DLT incremental object
|
|
500
|
+
transform_func: Optional transformation function
|
|
501
|
+
params: Additional query parameters
|
|
502
|
+
|
|
503
|
+
Yields:
|
|
504
|
+
Filtered and transformed resource records
|
|
505
|
+
"""
|
|
506
|
+
for page in api_client.get_pages(
|
|
507
|
+
endpoint, data_key, pagination_type, params=params
|
|
508
|
+
):
|
|
509
|
+
filtered_items = []
|
|
510
|
+
for item in page:
|
|
511
|
+
item_updated = item.get("updated_at", 0)
|
|
512
|
+
if item_updated >= updated_at_incremental.last_value:
|
|
513
|
+
if (
|
|
514
|
+
updated_at_incremental.end_value
|
|
515
|
+
and item_updated > updated_at_incremental.end_value
|
|
516
|
+
):
|
|
517
|
+
continue
|
|
518
|
+
|
|
519
|
+
if transform_func:
|
|
520
|
+
filtered_items.append(transform_func(item))
|
|
521
|
+
else:
|
|
522
|
+
filtered_items.append(item)
|
|
523
|
+
|
|
524
|
+
if filtered_items:
|
|
525
|
+
yield filtered_items
|
|
526
|
+
|
|
527
|
+
if updated_at_incremental.end_out_of_range:
|
|
528
|
+
return
|
|
529
|
+
|
|
530
|
+
|
|
531
|
+
def create_resource_from_config(
|
|
532
|
+
resource_name: str,
|
|
533
|
+
config: Dict[str, Any],
|
|
534
|
+
api_client: "IntercomAPIClient",
|
|
535
|
+
start_timestamp: int,
|
|
536
|
+
end_timestamp: Optional[int],
|
|
537
|
+
transform_functions: Dict[str, Callable],
|
|
538
|
+
) -> Any:
|
|
539
|
+
"""
|
|
540
|
+
Create a DLT resource from configuration.
|
|
541
|
+
|
|
542
|
+
Args:
|
|
543
|
+
resource_name: Name of the resource
|
|
544
|
+
config: Resource configuration dict
|
|
545
|
+
api_client: Intercom API client
|
|
546
|
+
start_timestamp: Start timestamp for incremental loading
|
|
547
|
+
end_timestamp: End timestamp for incremental loading
|
|
548
|
+
transform_functions: Dict mapping transform function names to actual functions
|
|
549
|
+
|
|
550
|
+
Returns:
|
|
551
|
+
DLT resource function
|
|
552
|
+
"""
|
|
553
|
+
import dlt
|
|
554
|
+
|
|
555
|
+
# Determine write disposition
|
|
556
|
+
write_disposition = "merge" if config["incremental"] else "replace"
|
|
557
|
+
|
|
558
|
+
# Get transform function if specified
|
|
559
|
+
transform_func = None
|
|
560
|
+
if config.get("transform_func"):
|
|
561
|
+
transform_func = transform_functions.get(config["transform_func"])
|
|
562
|
+
|
|
563
|
+
def resource_function(
|
|
564
|
+
updated_at: Optional[dlt.sources.incremental[int]] = dlt.sources.incremental(
|
|
565
|
+
"updated_at",
|
|
566
|
+
initial_value=start_timestamp,
|
|
567
|
+
end_value=end_timestamp,
|
|
568
|
+
allow_external_schedulers=True,
|
|
569
|
+
)
|
|
570
|
+
if config["incremental"]
|
|
571
|
+
else None,
|
|
572
|
+
) -> Iterator[TDataItems]:
|
|
573
|
+
"""
|
|
574
|
+
Auto-generated resource function.
|
|
575
|
+
"""
|
|
576
|
+
resource_type = config["type"]
|
|
577
|
+
|
|
578
|
+
if resource_type == "search":
|
|
579
|
+
yield from create_search_resource(
|
|
580
|
+
api_client, resource_name, updated_at, transform_func
|
|
581
|
+
)
|
|
582
|
+
elif resource_type == "pagination":
|
|
583
|
+
yield from create_pagination_resource(
|
|
584
|
+
api_client,
|
|
585
|
+
config["endpoint"],
|
|
586
|
+
config["data_key"],
|
|
587
|
+
getattr(PaginationType, config["pagination_type"].upper()),
|
|
588
|
+
updated_at,
|
|
589
|
+
transform_func,
|
|
590
|
+
config.get("params"),
|
|
591
|
+
)
|
|
592
|
+
elif resource_type == "tickets":
|
|
593
|
+
yield from create_tickets_resource(api_client, updated_at)
|
|
594
|
+
elif resource_type == "simple":
|
|
595
|
+
# Non-incremental resources
|
|
596
|
+
yield from api_client.get_pages(
|
|
597
|
+
config["endpoint"],
|
|
598
|
+
config["data_key"],
|
|
599
|
+
getattr(PaginationType, config["pagination_type"].upper()),
|
|
600
|
+
)
|
|
601
|
+
else:
|
|
602
|
+
raise ValueError(f"Unknown resource type: {resource_type}")
|
|
603
|
+
|
|
604
|
+
# For non-incremental resources, we need to return a function without parameters
|
|
605
|
+
if not config["incremental"]:
|
|
606
|
+
|
|
607
|
+
@dlt.resource(
|
|
608
|
+
name=resource_name,
|
|
609
|
+
primary_key="id",
|
|
610
|
+
write_disposition="replace",
|
|
611
|
+
columns=config.get("columns", {}),
|
|
612
|
+
)
|
|
613
|
+
def simple_resource_function() -> Iterator[TDataItems]:
|
|
614
|
+
"""
|
|
615
|
+
Auto-generated simple resource function.
|
|
616
|
+
"""
|
|
617
|
+
yield from api_client.get_pages(
|
|
618
|
+
config["endpoint"],
|
|
619
|
+
config["data_key"],
|
|
620
|
+
getattr(PaginationType, config["pagination_type"].upper()),
|
|
621
|
+
)
|
|
622
|
+
|
|
623
|
+
return simple_resource_function
|
|
624
|
+
|
|
625
|
+
# Apply the decorator to the function
|
|
626
|
+
return dlt.resource( # type: ignore[call-overload]
|
|
627
|
+
resource_function,
|
|
628
|
+
name=resource_name,
|
|
629
|
+
primary_key="id",
|
|
630
|
+
write_disposition=write_disposition,
|
|
631
|
+
columns=config.get("columns", {}),
|
|
632
|
+
)
|
|
633
|
+
|
|
634
|
+
|
|
635
|
+
def build_incremental_query(
|
|
636
|
+
field: str,
|
|
637
|
+
start_value: Any,
|
|
638
|
+
end_value: Optional[Any] = None,
|
|
639
|
+
) -> Dict[str, Any]:
|
|
640
|
+
"""
|
|
641
|
+
Build a search query for incremental loading.
|
|
642
|
+
|
|
643
|
+
Args:
|
|
644
|
+
field: Field to filter on
|
|
645
|
+
start_value: Start value (inclusive)
|
|
646
|
+
end_value: Optional end value (inclusive)
|
|
647
|
+
|
|
648
|
+
Returns:
|
|
649
|
+
Query dict for Intercom Search API
|
|
650
|
+
"""
|
|
651
|
+
conditions = [
|
|
652
|
+
{
|
|
653
|
+
"field": field,
|
|
654
|
+
"operator": ">",
|
|
655
|
+
"value": start_value,
|
|
656
|
+
}
|
|
657
|
+
]
|
|
658
|
+
|
|
659
|
+
if end_value is not None:
|
|
660
|
+
conditions.append(
|
|
661
|
+
{
|
|
662
|
+
"field": field,
|
|
663
|
+
"operator": "<",
|
|
664
|
+
"value": end_value,
|
|
665
|
+
}
|
|
666
|
+
)
|
|
667
|
+
|
|
668
|
+
if len(conditions) == 1:
|
|
669
|
+
return conditions[0]
|
|
670
|
+
else:
|
|
671
|
+
return {
|
|
672
|
+
"operator": "AND",
|
|
673
|
+
"value": conditions,
|
|
674
|
+
}
|
|
@@ -0,0 +1,277 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Configuration settings and constants for Intercom API integration.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from datetime import datetime
|
|
6
|
+
from typing import Dict, List, Tuple
|
|
7
|
+
|
|
8
|
+
# API Version - REQUIRED for all requests
|
|
9
|
+
API_VERSION = "2.14"
|
|
10
|
+
|
|
11
|
+
# Default start date for incremental loading
|
|
12
|
+
DEFAULT_START_DATE = datetime(2020, 1, 1)
|
|
13
|
+
|
|
14
|
+
# Pagination settings
|
|
15
|
+
DEFAULT_PAGE_SIZE = 150
|
|
16
|
+
MAX_PAGE_SIZE = 150 # Intercom's maximum
|
|
17
|
+
SCROLL_EXPIRY_SECONDS = 60 # Scroll sessions expire after 1 minute
|
|
18
|
+
|
|
19
|
+
# Rate limiting settings
|
|
20
|
+
RATE_LIMIT_PER_10_SECONDS = 166
|
|
21
|
+
RATE_LIMIT_RETRY_AFTER_DEFAULT = 10
|
|
22
|
+
|
|
23
|
+
# Regional API endpoints
|
|
24
|
+
REGIONAL_ENDPOINTS = {
|
|
25
|
+
"us": "https://api.intercom.io",
|
|
26
|
+
"eu": "https://api.eu.intercom.io",
|
|
27
|
+
"au": "https://api.au.intercom.io",
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
# Resource configuration for automatic generation
|
|
31
|
+
# Format: resource_name -> config dict
|
|
32
|
+
RESOURCE_CONFIGS = {
|
|
33
|
+
# Search-based incremental resources
|
|
34
|
+
"contacts": {
|
|
35
|
+
"type": "search",
|
|
36
|
+
"incremental": True,
|
|
37
|
+
"transform_func": "transform_contact",
|
|
38
|
+
"columns": {
|
|
39
|
+
"custom_attributes": {"data_type": "json"},
|
|
40
|
+
"tags": {"data_type": "json"},
|
|
41
|
+
},
|
|
42
|
+
},
|
|
43
|
+
"conversations": {
|
|
44
|
+
"type": "search",
|
|
45
|
+
"incremental": True,
|
|
46
|
+
"transform_func": "transform_conversation",
|
|
47
|
+
"columns": {
|
|
48
|
+
"custom_attributes": {"data_type": "json"},
|
|
49
|
+
"tags": {"data_type": "json"},
|
|
50
|
+
},
|
|
51
|
+
},
|
|
52
|
+
# Pagination-based incremental resources
|
|
53
|
+
"companies": {
|
|
54
|
+
"type": "pagination",
|
|
55
|
+
"endpoint": "/companies",
|
|
56
|
+
"data_key": "data",
|
|
57
|
+
"pagination_type": "cursor",
|
|
58
|
+
"incremental": True,
|
|
59
|
+
"transform_func": "transform_company",
|
|
60
|
+
"params": {"per_page": 50},
|
|
61
|
+
"columns": {
|
|
62
|
+
"custom_attributes": {"data_type": "json"},
|
|
63
|
+
"tags": {"data_type": "json"},
|
|
64
|
+
},
|
|
65
|
+
},
|
|
66
|
+
"articles": {
|
|
67
|
+
"type": "pagination",
|
|
68
|
+
"endpoint": "/articles",
|
|
69
|
+
"data_key": "data",
|
|
70
|
+
"pagination_type": "cursor",
|
|
71
|
+
"incremental": True,
|
|
72
|
+
"transform_func": None,
|
|
73
|
+
"params": None,
|
|
74
|
+
"columns": {},
|
|
75
|
+
},
|
|
76
|
+
# Special case - tickets
|
|
77
|
+
"tickets": {
|
|
78
|
+
"type": "tickets",
|
|
79
|
+
"incremental": True,
|
|
80
|
+
"transform_func": None,
|
|
81
|
+
"columns": {
|
|
82
|
+
"ticket_attributes": {"data_type": "json"},
|
|
83
|
+
},
|
|
84
|
+
},
|
|
85
|
+
# Simple replace resources (non-incremental)
|
|
86
|
+
"tags": {
|
|
87
|
+
"type": "simple",
|
|
88
|
+
"endpoint": "/tags",
|
|
89
|
+
"data_key": "data",
|
|
90
|
+
"pagination_type": "simple",
|
|
91
|
+
"incremental": False,
|
|
92
|
+
"transform_func": None,
|
|
93
|
+
"columns": {},
|
|
94
|
+
},
|
|
95
|
+
"segments": {
|
|
96
|
+
"type": "simple",
|
|
97
|
+
"endpoint": "/segments",
|
|
98
|
+
"data_key": "segments",
|
|
99
|
+
"pagination_type": "cursor",
|
|
100
|
+
"incremental": False,
|
|
101
|
+
"transform_func": None,
|
|
102
|
+
"columns": {},
|
|
103
|
+
},
|
|
104
|
+
"teams": {
|
|
105
|
+
"type": "simple",
|
|
106
|
+
"endpoint": "/teams",
|
|
107
|
+
"data_key": "teams",
|
|
108
|
+
"pagination_type": "simple",
|
|
109
|
+
"incremental": False,
|
|
110
|
+
"transform_func": None,
|
|
111
|
+
"columns": {},
|
|
112
|
+
},
|
|
113
|
+
"admins": {
|
|
114
|
+
"type": "simple",
|
|
115
|
+
"endpoint": "/admins",
|
|
116
|
+
"data_key": "admins",
|
|
117
|
+
"pagination_type": "simple",
|
|
118
|
+
"incremental": False,
|
|
119
|
+
"transform_func": None,
|
|
120
|
+
"columns": {},
|
|
121
|
+
},
|
|
122
|
+
"data_attributes": {
|
|
123
|
+
"type": "simple",
|
|
124
|
+
"endpoint": "/data_attributes",
|
|
125
|
+
"data_key": "data",
|
|
126
|
+
"pagination_type": "cursor",
|
|
127
|
+
"incremental": False,
|
|
128
|
+
"transform_func": None,
|
|
129
|
+
"columns": {},
|
|
130
|
+
},
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
# Core endpoints with their configuration (kept for backwards compatibility)
|
|
134
|
+
# Format: (endpoint_path, data_key, supports_incremental, pagination_type)
|
|
135
|
+
CORE_ENDPOINTS: Dict[str, Tuple[str, str, bool, str]] = {
|
|
136
|
+
"contacts": ("/contacts", "data", True, "cursor"),
|
|
137
|
+
"companies": ("/companies", "data", True, "cursor"),
|
|
138
|
+
"conversations": ("/conversations", "conversations", True, "cursor"),
|
|
139
|
+
"tickets": ("/tickets", "tickets", True, "cursor"),
|
|
140
|
+
"admins": ("/admins", "admins", False, "simple"),
|
|
141
|
+
"teams": ("/teams", "teams", False, "simple"),
|
|
142
|
+
"tags": ("/tags", "data", False, "simple"),
|
|
143
|
+
"segments": ("/segments", "segments", False, "cursor"),
|
|
144
|
+
"articles": ("/articles", "data", True, "cursor"),
|
|
145
|
+
"collections": ("/help_center/collections", "data", False, "cursor"),
|
|
146
|
+
"data_attributes": ("/data_attributes", "data", False, "cursor"),
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
# Incremental endpoints using search API
|
|
150
|
+
SEARCH_ENDPOINTS: Dict[str, str] = {
|
|
151
|
+
"contacts_search": "/contacts/search",
|
|
152
|
+
"companies_search": "/companies/search",
|
|
153
|
+
"conversations_search": "/conversations/search",
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
# Special endpoints requiring different handling
|
|
157
|
+
SCROLL_ENDPOINTS: List[str] = [
|
|
158
|
+
"companies", # Can use scroll for large exports
|
|
159
|
+
]
|
|
160
|
+
|
|
161
|
+
# Event tracking endpoint
|
|
162
|
+
EVENTS_ENDPOINT = "/events"
|
|
163
|
+
|
|
164
|
+
# Ticket fields endpoint for custom field mapping
|
|
165
|
+
TICKET_FIELDS_ENDPOINT = "/ticket_types/{ticket_type_id}/attributes"
|
|
166
|
+
|
|
167
|
+
# Default fields to retrieve for each resource type
|
|
168
|
+
DEFAULT_CONTACT_FIELDS = [
|
|
169
|
+
"id",
|
|
170
|
+
"type",
|
|
171
|
+
"external_id",
|
|
172
|
+
"email",
|
|
173
|
+
"phone",
|
|
174
|
+
"name",
|
|
175
|
+
"created_at",
|
|
176
|
+
"updated_at",
|
|
177
|
+
"signed_up_at",
|
|
178
|
+
"last_seen_at",
|
|
179
|
+
"last_contacted_at",
|
|
180
|
+
"last_email_opened_at",
|
|
181
|
+
"last_email_clicked_at",
|
|
182
|
+
"browser",
|
|
183
|
+
"browser_language",
|
|
184
|
+
"browser_version",
|
|
185
|
+
"location",
|
|
186
|
+
"os",
|
|
187
|
+
"role",
|
|
188
|
+
"custom_attributes",
|
|
189
|
+
"tags",
|
|
190
|
+
"companies",
|
|
191
|
+
]
|
|
192
|
+
|
|
193
|
+
DEFAULT_COMPANY_FIELDS = [
|
|
194
|
+
"id",
|
|
195
|
+
"type",
|
|
196
|
+
"company_id",
|
|
197
|
+
"name",
|
|
198
|
+
"plan",
|
|
199
|
+
"size",
|
|
200
|
+
"website",
|
|
201
|
+
"industry",
|
|
202
|
+
"created_at",
|
|
203
|
+
"updated_at",
|
|
204
|
+
"monthly_spend",
|
|
205
|
+
"session_count",
|
|
206
|
+
"user_count",
|
|
207
|
+
"custom_attributes",
|
|
208
|
+
"tags",
|
|
209
|
+
]
|
|
210
|
+
|
|
211
|
+
DEFAULT_CONVERSATION_FIELDS = [
|
|
212
|
+
"id",
|
|
213
|
+
"type",
|
|
214
|
+
"created_at",
|
|
215
|
+
"updated_at",
|
|
216
|
+
"waiting_since",
|
|
217
|
+
"snoozed_until",
|
|
218
|
+
"state",
|
|
219
|
+
"open",
|
|
220
|
+
"read",
|
|
221
|
+
"priority",
|
|
222
|
+
"admin_assignee_id",
|
|
223
|
+
"team_assignee_id",
|
|
224
|
+
"tags",
|
|
225
|
+
"conversation_rating",
|
|
226
|
+
"source",
|
|
227
|
+
"contacts",
|
|
228
|
+
"teammates",
|
|
229
|
+
"custom_attributes",
|
|
230
|
+
"first_contact_reply",
|
|
231
|
+
"sla_applied",
|
|
232
|
+
"statistics",
|
|
233
|
+
"conversation_parts",
|
|
234
|
+
]
|
|
235
|
+
|
|
236
|
+
DEFAULT_TICKET_FIELDS = [
|
|
237
|
+
"id",
|
|
238
|
+
"type",
|
|
239
|
+
"ticket_id",
|
|
240
|
+
"category",
|
|
241
|
+
"ticket_attributes",
|
|
242
|
+
"ticket_state",
|
|
243
|
+
"ticket_type",
|
|
244
|
+
"created_at",
|
|
245
|
+
"updated_at",
|
|
246
|
+
"ticket_parts",
|
|
247
|
+
"contacts",
|
|
248
|
+
"admin_assignee_id",
|
|
249
|
+
"team_assignee_id",
|
|
250
|
+
"open",
|
|
251
|
+
"snoozed_until",
|
|
252
|
+
]
|
|
253
|
+
|
|
254
|
+
# Resources that support custom attributes
|
|
255
|
+
SUPPORTS_CUSTOM_ATTRIBUTES = [
|
|
256
|
+
"contacts",
|
|
257
|
+
"companies",
|
|
258
|
+
"conversations",
|
|
259
|
+
]
|
|
260
|
+
|
|
261
|
+
# Maximum limits
|
|
262
|
+
MAX_CUSTOM_ATTRIBUTES_PER_RESOURCE = 100
|
|
263
|
+
MAX_EVENT_TYPES_PER_WORKSPACE = 120
|
|
264
|
+
MAX_CONVERSATION_PARTS = 500
|
|
265
|
+
MAX_SEARCH_RESULTS = 10000
|
|
266
|
+
|
|
267
|
+
# Field type mapping for custom attributes
|
|
268
|
+
INTERCOM_TO_DLT_TYPE_MAPPING = {
|
|
269
|
+
"string": "text",
|
|
270
|
+
"integer": "bigint",
|
|
271
|
+
"float": "double",
|
|
272
|
+
"boolean": "bool",
|
|
273
|
+
"date": "timestamp",
|
|
274
|
+
"datetime": "timestamp",
|
|
275
|
+
"object": "json",
|
|
276
|
+
"list": "json",
|
|
277
|
+
}
|
ingestr/src/sources.py
CHANGED
|
@@ -3703,3 +3703,76 @@ class AnthropicSource:
|
|
|
3703
3703
|
initial_start_date=start_date,
|
|
3704
3704
|
end_date=end_date,
|
|
3705
3705
|
).with_resources(table)
|
|
3706
|
+
|
|
3707
|
+
|
|
3708
|
+
class IntercomSource:
|
|
3709
|
+
def handles_incrementality(self) -> bool:
|
|
3710
|
+
return True
|
|
3711
|
+
|
|
3712
|
+
def dlt_source(self, uri: str, table: str, **kwargs):
|
|
3713
|
+
# intercom://?access_token=<token>®ion=<us|eu|au>
|
|
3714
|
+
# OR intercom://?oauth_token=<token>®ion=<us|eu|au>
|
|
3715
|
+
parsed_uri = urlparse(uri)
|
|
3716
|
+
params = parse_qs(parsed_uri.query)
|
|
3717
|
+
|
|
3718
|
+
# Check for authentication
|
|
3719
|
+
access_token = params.get("access_token")
|
|
3720
|
+
oauth_token = params.get("oauth_token")
|
|
3721
|
+
region = params.get("region", ["us"])[0]
|
|
3722
|
+
|
|
3723
|
+
if not access_token and not oauth_token:
|
|
3724
|
+
raise MissingValueError("access_token or oauth_token", "Intercom")
|
|
3725
|
+
|
|
3726
|
+
# Validate table/resource
|
|
3727
|
+
supported_tables = [
|
|
3728
|
+
"contacts",
|
|
3729
|
+
"companies",
|
|
3730
|
+
"conversations",
|
|
3731
|
+
"tickets",
|
|
3732
|
+
"tags",
|
|
3733
|
+
"segments",
|
|
3734
|
+
"teams",
|
|
3735
|
+
"admins",
|
|
3736
|
+
"articles",
|
|
3737
|
+
"data_attributes",
|
|
3738
|
+
]
|
|
3739
|
+
|
|
3740
|
+
if table not in supported_tables:
|
|
3741
|
+
raise UnsupportedResourceError(table, "Intercom")
|
|
3742
|
+
|
|
3743
|
+
# Get date parameters
|
|
3744
|
+
start_date = kwargs.get("interval_start")
|
|
3745
|
+
if start_date:
|
|
3746
|
+
start_date = ensure_pendulum_datetime(start_date)
|
|
3747
|
+
else:
|
|
3748
|
+
start_date = pendulum.datetime(2020, 1, 1)
|
|
3749
|
+
|
|
3750
|
+
end_date = kwargs.get("interval_end")
|
|
3751
|
+
if end_date:
|
|
3752
|
+
end_date = ensure_pendulum_datetime(end_date)
|
|
3753
|
+
|
|
3754
|
+
# Import and initialize the source
|
|
3755
|
+
from ingestr.src.intercom import (
|
|
3756
|
+
IntercomCredentialsAccessToken,
|
|
3757
|
+
IntercomCredentialsOAuth,
|
|
3758
|
+
TIntercomCredentials,
|
|
3759
|
+
intercom_source,
|
|
3760
|
+
)
|
|
3761
|
+
|
|
3762
|
+
credentials: TIntercomCredentials
|
|
3763
|
+
if access_token:
|
|
3764
|
+
credentials = IntercomCredentialsAccessToken(
|
|
3765
|
+
access_token=access_token[0], region=region
|
|
3766
|
+
)
|
|
3767
|
+
else:
|
|
3768
|
+
if not oauth_token:
|
|
3769
|
+
raise MissingValueError("oauth_token", "Intercom")
|
|
3770
|
+
credentials = IntercomCredentialsOAuth(
|
|
3771
|
+
oauth_token=oauth_token[0], region=region
|
|
3772
|
+
)
|
|
3773
|
+
|
|
3774
|
+
return intercom_source(
|
|
3775
|
+
credentials=credentials,
|
|
3776
|
+
start_date=start_date,
|
|
3777
|
+
end_date=end_date,
|
|
3778
|
+
).with_resources(table)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: ingestr
|
|
3
|
-
Version: 0.14.
|
|
3
|
+
Version: 0.14.2
|
|
4
4
|
Summary: ingestr is a command-line application that ingests data from various sources and stores them in any database.
|
|
5
5
|
Project-URL: Homepage, https://github.com/bruin-data/ingestr
|
|
6
6
|
Project-URL: Issues, https://github.com/bruin-data/ingestr/issues
|
|
@@ -2,17 +2,17 @@ ingestr/conftest.py,sha256=OE2yxeTCosS9CUFVuqNypm-2ftYvVBeeq7egm3878cI,1981
|
|
|
2
2
|
ingestr/main.py,sha256=qo0g3wCFl8a_1jUwXagX8L1Q8PKKQlTF7md9pfnzW0Y,27155
|
|
3
3
|
ingestr/src/.gitignore,sha256=8cX1AZTSI0TcdZFGTmS_oyBjpfCzhOEt0DdAo2dFIY8,203
|
|
4
4
|
ingestr/src/blob.py,sha256=UUWMjHUuoR9xP1XZQ6UANQmnMVyDx3d0X4-2FQC271I,2138
|
|
5
|
-
ingestr/src/buildinfo.py,sha256=
|
|
5
|
+
ingestr/src/buildinfo.py,sha256=etAF7mJbjXgR22CNwj2pF9fzWwF_eMqGhIYdwvHnftw,20
|
|
6
6
|
ingestr/src/destinations.py,sha256=BE8tK69Aq7FLBV_j6ijMhZCRIUYpSTnk7FTZMZW5Q5M,28557
|
|
7
7
|
ingestr/src/errors.py,sha256=Ufs4_DfE77_E3vnA1fOQdi6cmuLVNm7_SbFLkL1XPGk,686
|
|
8
|
-
ingestr/src/factory.py,sha256=
|
|
8
|
+
ingestr/src/factory.py,sha256=u6IxjM9Rt6klr0Sa44OiDO-zFTJMvp84hFX3vZKAP4M,7432
|
|
9
9
|
ingestr/src/filters.py,sha256=0n0sNAVG_f-B_1r7lW5iNtw9z_G1bxWzPaiL1i6tnbU,1665
|
|
10
10
|
ingestr/src/http_client.py,sha256=bxqsk6nJNXCo-79gW04B53DQO-yr25vaSsqP0AKtjx4,732
|
|
11
11
|
ingestr/src/loader.py,sha256=9NaWAyfkXdqAZSS-N72Iwo36Lbx4PyqIfaaH1dNdkFs,1712
|
|
12
12
|
ingestr/src/masking.py,sha256=VN0LdfvExhQ1bZMRylGtaBUIoH-vjuIUmRnYKwo3yiY,11358
|
|
13
13
|
ingestr/src/partition.py,sha256=BrIP6wFJvyR7Nus_3ElnfxknUXeCipK_E_bB8kZowfc,969
|
|
14
14
|
ingestr/src/resource.py,sha256=ZqmZxFQVGlF8rFPhBiUB08HES0yoTj8sZ--jKfaaVps,1164
|
|
15
|
-
ingestr/src/sources.py,sha256=
|
|
15
|
+
ingestr/src/sources.py,sha256=gcweUJVZ4ScQzSeu_pCm4kgsLUymNuF6K0qWy6OE6OM,132655
|
|
16
16
|
ingestr/src/table_definition.py,sha256=REbAbqdlmUMUuRh8nEQRreWjPVOQ5ZcfqGkScKdCrmk,390
|
|
17
17
|
ingestr/src/time.py,sha256=H_Fk2J4ShXyUM-EMY7MqCLZQhlnZMZvO952bmZPc4yE,254
|
|
18
18
|
ingestr/src/version.py,sha256=J_2xgZ0mKlvuHcjdKCx2nlioneLH0I47JiU_Slr_Nwc,189
|
|
@@ -56,8 +56,8 @@ ingestr/src/facebook_ads/utils.py,sha256=ES2ylPoW3j3fjp6OMUgp21n1cG1OktXsmWWMk5v
|
|
|
56
56
|
ingestr/src/filesystem/__init__.py,sha256=42YAOHQxZ7TkTXC1eeaLUJpjqJ3l7DH7C8j927pV4pc,4353
|
|
57
57
|
ingestr/src/filesystem/helpers.py,sha256=bg0muSHZr3hMa8H4jN2-LGWzI-SUoKlQNiWJ74-YYms,3211
|
|
58
58
|
ingestr/src/filesystem/readers.py,sha256=a0fKkaRpnAOGsXI3EBNYZa7x6tlmAOsgRzb883StY30,3987
|
|
59
|
-
ingestr/src/fluxx/__init__.py,sha256=
|
|
60
|
-
ingestr/src/fluxx/helpers.py,sha256=
|
|
59
|
+
ingestr/src/fluxx/__init__.py,sha256=xDtkqt4_f9LgVNt-APCQ7tsJ8gM6msEroyKAUToNC8A,566053
|
|
60
|
+
ingestr/src/fluxx/helpers.py,sha256=zJmlQWwiv9snnLqTygiWVZy7-0rGi_K427hRUuZeHEM,6352
|
|
61
61
|
ingestr/src/frankfurter/__init__.py,sha256=gOdL8ZqgHHYZByjtfE3WX3BTRHdYqyn9FpQwzDHSAx0,5089
|
|
62
62
|
ingestr/src/frankfurter/helpers.py,sha256=SpRr992OcSf7IDI5y-ToUdO6m6sGpqFz59LTY0ojchI,1502
|
|
63
63
|
ingestr/src/freshdesk/__init__.py,sha256=ukyorgCNsW_snzsYBDsr3Q0WB8f-to9Fk0enqHHFQlk,3087
|
|
@@ -88,6 +88,9 @@ ingestr/src/hubspot/helpers.py,sha256=k2b-lhxqBNKHoOSHoHegFSsk8xxjjGA0I04V0XyX2b
|
|
|
88
88
|
ingestr/src/hubspot/settings.py,sha256=i73MkSiJfRLMFLfiJgYdhp-rhymHTfoqFzZ4uOJdFJM,2456
|
|
89
89
|
ingestr/src/influxdb/__init__.py,sha256=cYsGnDPNHRTe9pp14ogDQgPTCI9TOdyJm1MaNuQLHdk,1290
|
|
90
90
|
ingestr/src/influxdb/client.py,sha256=hCxSNREAWWEvvAV3RQbKaWp2-e_7EE8xmVRjTwLFEFo,1230
|
|
91
|
+
ingestr/src/intercom/__init__.py,sha256=rqorWFwcfcTYrCrpSsPPM2sGOc7qq5XbYZRCDVJXjyI,4451
|
|
92
|
+
ingestr/src/intercom/helpers.py,sha256=IljM0x4K70nuahidZaP7mtIlsHkPIcZq56j9mmuSck4,21074
|
|
93
|
+
ingestr/src/intercom/settings.py,sha256=BU-jmlIOA2BrR-mWyjL6QXhiNqdY3WrNjzaWd5MCKLk,6983
|
|
91
94
|
ingestr/src/isoc_pulse/__init__.py,sha256=9b4eN4faatpiwTuRNPuYcEt1hEFDEjua9XhfakUigBk,4648
|
|
92
95
|
ingestr/src/kafka/__init__.py,sha256=QUHsGmdv5_E-3z0GDHXvbk39puwuGDBsyYSDhvbA89E,3595
|
|
93
96
|
ingestr/src/kafka/helpers.py,sha256=V9WcVn3PKnEpggArHda4vnAcaV8VDuh__dSmRviJb5Y,7502
|
|
@@ -166,8 +169,8 @@ ingestr/testdata/merge_expected.csv,sha256=DReHqWGnQMsf2PBv_Q2pfjsgvikYFnf1zYcQZ
|
|
|
166
169
|
ingestr/testdata/merge_part1.csv,sha256=Pw8Z9IDKcNU0qQHx1z6BUf4rF_-SxKGFOvymCt4OY9I,185
|
|
167
170
|
ingestr/testdata/merge_part2.csv,sha256=T_GiWxA81SN63_tMOIuemcvboEFeAmbKc7xRXvL9esw,287
|
|
168
171
|
ingestr/tests/unit/test_smartsheets.py,sha256=zf3DXT29Y4TH2lNPBFphdjlaelUUyPJcsW2UO68RzDs,4862
|
|
169
|
-
ingestr-0.14.
|
|
170
|
-
ingestr-0.14.
|
|
171
|
-
ingestr-0.14.
|
|
172
|
-
ingestr-0.14.
|
|
173
|
-
ingestr-0.14.
|
|
172
|
+
ingestr-0.14.2.dist-info/METADATA,sha256=-UyaGpKgZSYETEBSfKLzRXn5vG3VA9qmofpmoD-gGa0,15265
|
|
173
|
+
ingestr-0.14.2.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
174
|
+
ingestr-0.14.2.dist-info/entry_points.txt,sha256=oPJy0KBnPWYjDtP1k8qwAihcTLHSZokSQvRAw_wtfJM,46
|
|
175
|
+
ingestr-0.14.2.dist-info/licenses/LICENSE.md,sha256=cW8wIhn8HFE-KLStDF9jHQ1O_ARWP3kTpk_-eOccL24,1075
|
|
176
|
+
ingestr-0.14.2.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|