ingestr 0.14.1__py3-none-any.whl → 0.14.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ingestr might be problematic. Click here for more details.

ingestr/src/buildinfo.py CHANGED
@@ -1 +1 @@
1
- version = "v0.14.1"
1
+ version = "v0.14.2"
ingestr/src/factory.py CHANGED
@@ -54,6 +54,7 @@ from ingestr.src.sources import (
54
54
  GorgiasSource,
55
55
  HubspotSource,
56
56
  InfluxDBSource,
57
+ IntercomSource,
57
58
  IsocPulseSource,
58
59
  KafkaSource,
59
60
  KinesisSource,
@@ -166,6 +167,7 @@ class SourceDestinationFactory:
166
167
  "fluxx": FluxxSource,
167
168
  "slack": SlackSource,
168
169
  "hubspot": HubspotSource,
170
+ "intercom": IntercomSource,
169
171
  "airtable": AirtableSource,
170
172
  "klaviyo": KlaviyoSource,
171
173
  "mixpanel": MixpanelSource,
@@ -0,0 +1,142 @@
1
+ """
2
+ Intercom source implementation for data ingestion.
3
+
4
+ This module provides DLT sources for retrieving data from Intercom API endpoints
5
+ including contacts, companies, conversations, tickets, and more.
6
+ """
7
+
8
+ from typing import Optional, Sequence
9
+
10
+ import dlt
11
+ from dlt.common.time import ensure_pendulum_datetime
12
+ from dlt.common.typing import TAnyDateTime
13
+ from dlt.sources import DltResource, DltSource
14
+
15
+ from .helpers import (
16
+ IntercomAPIClient,
17
+ IntercomCredentialsAccessToken,
18
+ TIntercomCredentials,
19
+ convert_datetime_to_timestamp,
20
+ create_resource_from_config,
21
+ transform_company,
22
+ transform_contact,
23
+ transform_conversation,
24
+ )
25
+ from .helpers import (
26
+ IntercomCredentialsOAuth as IntercomCredentialsOAuth,
27
+ )
28
+ from .settings import (
29
+ DEFAULT_START_DATE,
30
+ RESOURCE_CONFIGS,
31
+ )
32
+
33
+
34
+ @dlt.source(name="intercom", max_table_nesting=0)
35
+ def intercom_source(
36
+ credentials: TIntercomCredentials = dlt.secrets.value,
37
+ start_date: Optional[TAnyDateTime] = DEFAULT_START_DATE,
38
+ end_date: Optional[TAnyDateTime] = None,
39
+ ) -> Sequence[DltResource]:
40
+ """
41
+ A DLT source that retrieves data from Intercom API.
42
+
43
+ This source provides access to various Intercom resources including contacts,
44
+ companies, conversations, tickets, and more. It supports incremental loading
45
+ for resources that track updated timestamps.
46
+
47
+ Args:
48
+ credentials: Intercom API credentials (AccessToken or OAuth).
49
+ Defaults to dlt.secrets.value.
50
+ start_date: The start date for incremental loading.
51
+ Defaults to January 1, 2020.
52
+ end_date: Optional end date for incremental loading.
53
+ If not provided, loads all data from start_date to present.
54
+
55
+ Returns:
56
+ Sequence of DLT resources for different Intercom endpoints.
57
+
58
+ Example:
59
+ >>> source = intercom_source(
60
+ ... credentials=IntercomCredentialsAccessToken(
61
+ ... access_token="your_token",
62
+ ... region="us"
63
+ ... ),
64
+ ... start_date=datetime(2024, 1, 1)
65
+ ... )
66
+ """
67
+ # Initialize API client
68
+ api_client = IntercomAPIClient(credentials)
69
+
70
+ # Convert dates to pendulum and then to unix timestamps for Intercom API
71
+ start_date_obj = ensure_pendulum_datetime(start_date) if start_date else None
72
+ end_date_obj = ensure_pendulum_datetime(end_date) if end_date else None
73
+
74
+ # Convert to unix timestamps for API compatibility
75
+ # Use default start date if none provided
76
+ if not start_date_obj:
77
+ from .settings import DEFAULT_START_DATE
78
+
79
+ start_date_obj = ensure_pendulum_datetime(DEFAULT_START_DATE)
80
+
81
+ start_timestamp = convert_datetime_to_timestamp(start_date_obj)
82
+ end_timestamp = (
83
+ convert_datetime_to_timestamp(end_date_obj) if end_date_obj else None
84
+ )
85
+
86
+ # Transform function mapping
87
+ transform_functions = {
88
+ "transform_contact": transform_contact,
89
+ "transform_company": transform_company,
90
+ "transform_conversation": transform_conversation,
91
+ }
92
+
93
+ # Generate all resources from configuration
94
+ resources = []
95
+ for resource_name, config in RESOURCE_CONFIGS.items():
96
+ resource_func = create_resource_from_config(
97
+ resource_name,
98
+ config,
99
+ api_client,
100
+ start_timestamp,
101
+ end_timestamp,
102
+ transform_functions,
103
+ )
104
+
105
+ # Call the resource function to get the actual resource
106
+ resources.append(resource_func())
107
+
108
+ return resources
109
+
110
+
111
+ def intercom(
112
+ api_key: str,
113
+ region: str = "us",
114
+ start_date: Optional[TAnyDateTime] = DEFAULT_START_DATE,
115
+ end_date: Optional[TAnyDateTime] = None,
116
+ ) -> DltSource:
117
+ """
118
+ Convenience function to create Intercom source with access token.
119
+
120
+ Args:
121
+ api_key: Intercom API access token.
122
+ region: Data region (us, eu, or au). Defaults to "us".
123
+ start_date: Start date for incremental loading.
124
+ end_date: Optional end date for incremental loading.
125
+
126
+ Returns:
127
+ Sequence of DLT resources.
128
+
129
+ Example:
130
+ >>> source = intercom(
131
+ ... api_key="your_access_token",
132
+ ... region="us",
133
+ ... start_date=datetime(2024, 1, 1)
134
+ ... )
135
+ """
136
+ credentials = IntercomCredentialsAccessToken(access_token=api_key, region=region)
137
+
138
+ return intercom_source(
139
+ credentials=credentials,
140
+ start_date=start_date,
141
+ end_date=end_date,
142
+ )
@@ -0,0 +1,674 @@
1
+ """
2
+ Helper functions and API client for Intercom integration.
3
+ """
4
+
5
+ from dataclasses import dataclass
6
+ from enum import Enum
7
+ from typing import Any, Callable, Dict, Iterator, Optional, Union
8
+
9
+ from dlt.common.typing import TDataItem, TDataItems, TSecretValue
10
+
11
+ from ingestr.src.http_client import create_client
12
+
13
+ from .settings import (
14
+ API_VERSION,
15
+ DEFAULT_PAGE_SIZE,
16
+ REGIONAL_ENDPOINTS,
17
+ )
18
+
19
+
20
+ class PaginationType(Enum):
21
+ """Types of pagination supported by Intercom API."""
22
+
23
+ CURSOR = "cursor"
24
+ SCROLL = "scroll"
25
+ SIMPLE = "simple" # No pagination, single page
26
+ SEARCH = "search" # Search API pagination
27
+
28
+
29
+ class IntercomCredentials:
30
+ """Base class for Intercom credentials."""
31
+
32
+ def __init__(self, region: str = "us"):
33
+ self.region = region
34
+ if self.region not in REGIONAL_ENDPOINTS:
35
+ raise ValueError(
36
+ f"Invalid region: {self.region}. Must be one of {list(REGIONAL_ENDPOINTS.keys())}"
37
+ )
38
+
39
+ @property
40
+ def base_url(self) -> str:
41
+ """Get the base URL for the specified region."""
42
+ return REGIONAL_ENDPOINTS[self.region]
43
+
44
+
45
+ @dataclass
46
+ class IntercomCredentialsAccessToken(IntercomCredentials):
47
+ """Credentials for Intercom API using Access Token authentication."""
48
+
49
+ access_token: TSecretValue
50
+ region: str = "us" # us, eu, or au
51
+
52
+ def __post_init__(self):
53
+ super().__init__(self.region)
54
+
55
+
56
+ @dataclass
57
+ class IntercomCredentialsOAuth(IntercomCredentials):
58
+ """Credentials for Intercom API using OAuth authentication."""
59
+
60
+ oauth_token: TSecretValue
61
+ region: str = "us" # us, eu, or au
62
+
63
+ def __post_init__(self):
64
+ super().__init__(self.region)
65
+
66
+
67
+ TIntercomCredentials = Union[IntercomCredentialsAccessToken, IntercomCredentialsOAuth]
68
+
69
+
70
+ class IntercomAPIClient:
71
+ """
72
+ API client for making requests to Intercom API.
73
+ Handles authentication, pagination, and rate limiting.
74
+ """
75
+
76
+ def __init__(self, credentials: TIntercomCredentials):
77
+ """
78
+ Initialize the Intercom API client.
79
+
80
+ Args:
81
+ credentials: Intercom API credentials
82
+ """
83
+ self.credentials = credentials
84
+ self.base_url = credentials.base_url
85
+
86
+ # Set up authentication headers
87
+ self.headers = {
88
+ "Accept": "application/json",
89
+ "Content-Type": "application/json",
90
+ "Intercom-Version": API_VERSION, # REQUIRED header
91
+ }
92
+
93
+ if isinstance(credentials, IntercomCredentialsAccessToken):
94
+ self.headers["Authorization"] = f"Bearer {credentials.access_token}"
95
+ elif isinstance(credentials, IntercomCredentialsOAuth):
96
+ self.headers["Authorization"] = f"Bearer {credentials.oauth_token}"
97
+ else:
98
+ raise TypeError(
99
+ "Invalid credentials type. Must be IntercomCredentialsAccessToken or IntercomCredentialsOAuth"
100
+ )
101
+
102
+ # Create HTTP client with rate limit retry for 429 status codes
103
+ self.client = create_client(retry_status_codes=[429, 502, 503])
104
+
105
+ def _make_request(
106
+ self,
107
+ method: str,
108
+ endpoint: str,
109
+ params: Optional[Dict[str, Any]] = None,
110
+ json_data: Optional[Dict[str, Any]] = None,
111
+ ) -> Dict[str, Any]:
112
+ """
113
+ Make a request to the Intercom API.
114
+
115
+ Args:
116
+ method: HTTP method (GET, POST, etc.)
117
+ endpoint: API endpoint path
118
+ params: Query parameters
119
+ json_data: JSON body data
120
+
121
+ Returns:
122
+ Response JSON data
123
+ """
124
+ url = f"{self.base_url}{endpoint}"
125
+
126
+ if method.upper() == "GET":
127
+ response = self.client.get(url, headers=self.headers, params=params)
128
+ elif method.upper() == "POST":
129
+ response = self.client.post(
130
+ url, headers=self.headers, json=json_data, params=params
131
+ )
132
+ else:
133
+ response = self.client.request(
134
+ method, url, headers=self.headers, json=json_data, params=params
135
+ )
136
+
137
+ # The create_client already handles rate limiting (429) with retries
138
+ # Just check for other errors
139
+ if response.status_code >= 400:
140
+ error_msg = f"Intercom API error {response.status_code}: {response.text}"
141
+ raise Exception(error_msg)
142
+
143
+ return response.json()
144
+
145
+ def get_pages(
146
+ self,
147
+ endpoint: str,
148
+ data_key: str,
149
+ pagination_type: PaginationType,
150
+ params: Optional[Dict[str, Any]] = None,
151
+ search_query: Optional[Dict[str, Any]] = None,
152
+ ) -> Iterator[TDataItems]:
153
+ """
154
+ Get paginated data from an Intercom endpoint.
155
+
156
+ Args:
157
+ endpoint: API endpoint path
158
+ data_key: Key in response containing the data items
159
+ pagination_type: Type of pagination to use
160
+ params: Query parameters
161
+ search_query: Search query for search endpoints
162
+
163
+ Yields:
164
+ Lists of data items from each page
165
+ """
166
+ params = params or {}
167
+
168
+ if pagination_type == PaginationType.SIMPLE:
169
+ # Single page, no pagination
170
+ response = self._make_request("GET", endpoint, params)
171
+ if data_key in response:
172
+ yield response[data_key]
173
+ return
174
+
175
+ elif pagination_type == PaginationType.CURSOR:
176
+ # Cursor-based pagination
177
+ params["per_page"] = params.get("per_page", DEFAULT_PAGE_SIZE)
178
+ next_cursor = None
179
+
180
+ while True:
181
+ if next_cursor:
182
+ params["starting_after"] = next_cursor
183
+
184
+ response = self._make_request("GET", endpoint, params)
185
+
186
+ # Yield the data
187
+ if data_key in response and response[data_key]:
188
+ yield response[data_key]
189
+
190
+ # Check for next page
191
+ pages_info = response.get("pages", {})
192
+ if not pages_info.get("next"):
193
+ break
194
+
195
+ next_cursor = pages_info.get("next", {}).get("starting_after")
196
+ if not next_cursor:
197
+ break
198
+
199
+ elif pagination_type == PaginationType.SCROLL:
200
+ # Scroll API pagination (for large exports)
201
+ scroll_param = None
202
+
203
+ while True:
204
+ scroll_endpoint = endpoint
205
+ if scroll_param:
206
+ scroll_endpoint = f"{endpoint}/scroll"
207
+ params = {"scroll_param": scroll_param}
208
+
209
+ response = self._make_request("GET", scroll_endpoint, params)
210
+
211
+ # Yield the data
212
+ if data_key in response and response[data_key]:
213
+ yield response[data_key]
214
+
215
+ # Get next scroll parameter
216
+ scroll_param = response.get("scroll_param")
217
+ if not scroll_param:
218
+ break
219
+
220
+ elif pagination_type == PaginationType.SEARCH:
221
+ # Search API pagination
222
+ if not search_query:
223
+ raise ValueError("Search query required for search pagination")
224
+
225
+ pagination_info = search_query.get("pagination", {})
226
+ pagination_info["per_page"] = pagination_info.get(
227
+ "per_page", DEFAULT_PAGE_SIZE
228
+ )
229
+
230
+ while True:
231
+ # Build search request
232
+ request_data = {
233
+ "query": search_query.get("query", {}),
234
+ "pagination": pagination_info,
235
+ }
236
+
237
+ if "sort" in search_query:
238
+ request_data["sort"] = search_query["sort"]
239
+
240
+ response = self._make_request("POST", endpoint, json_data=request_data)
241
+
242
+ # Yield the data
243
+ if data_key in response and response[data_key]:
244
+ yield response[data_key]
245
+
246
+ # Check for next page
247
+ pages_info = response.get("pages", {})
248
+ if not pages_info.get("next"):
249
+ break
250
+
251
+ next_cursor = pages_info.get("next", {}).get("starting_after")
252
+ if not next_cursor:
253
+ break
254
+
255
+ pagination_info["starting_after"] = next_cursor
256
+
257
+ def get_single_resource(self, endpoint: str, resource_id: str) -> TDataItem:
258
+ """
259
+ Get a single resource by ID.
260
+
261
+ Args:
262
+ endpoint: Base endpoint path
263
+ resource_id: Resource ID
264
+
265
+ Returns:
266
+ Resource data
267
+ """
268
+ return self._make_request("GET", f"{endpoint}/{resource_id}")
269
+
270
+ def search(
271
+ self,
272
+ resource_type: str,
273
+ query: Dict[str, Any],
274
+ sort: Optional[Dict[str, str]] = None,
275
+ ) -> Iterator[TDataItems]:
276
+ """
277
+ Search for resources using the Search API.
278
+
279
+ Args:
280
+ resource_type: Type of resource to search (contacts, companies, conversations)
281
+ query: Search query following Intercom's query format
282
+ sort: Optional sort configuration
283
+
284
+ Yields:
285
+ Lists of matching resources
286
+ """
287
+ endpoint = f"/{resource_type}/search"
288
+ search_query = {"query": query}
289
+
290
+ if sort:
291
+ search_query["sort"] = sort
292
+
293
+ yield from self.get_pages(
294
+ endpoint=endpoint,
295
+ data_key="data",
296
+ pagination_type=PaginationType.SEARCH,
297
+ search_query=search_query,
298
+ )
299
+
300
+
301
+ def transform_contact(contact: Dict[str, Any]) -> Dict[str, Any]:
302
+ """
303
+ Transform a contact record to ensure consistent format.
304
+
305
+ Args:
306
+ contact: Raw contact data from API
307
+
308
+ Returns:
309
+ Transformed contact data
310
+ """
311
+ # Ensure consistent field names and types
312
+ transformed = contact.copy()
313
+
314
+ # Flatten location data if present
315
+ if "location" in transformed and isinstance(transformed["location"], dict):
316
+ location = transformed.pop("location")
317
+ transformed["location_country"] = location.get("country")
318
+ transformed["location_region"] = location.get("region")
319
+ transformed["location_city"] = location.get("city")
320
+
321
+ # Flatten companies relationship
322
+ if "companies" in transformed and isinstance(transformed["companies"], dict):
323
+ companies_data = transformed["companies"].get("data", [])
324
+ transformed["company_ids"] = [
325
+ c.get("id") for c in companies_data if c.get("id")
326
+ ]
327
+ transformed["companies_count"] = len(companies_data)
328
+
329
+ # Ensure custom_attributes is always a dict
330
+ if "custom_attributes" not in transformed:
331
+ transformed["custom_attributes"] = {}
332
+
333
+ return transformed
334
+
335
+
336
+ def transform_company(company: Dict[str, Any]) -> Dict[str, Any]:
337
+ """
338
+ Transform a company record to ensure consistent format.
339
+
340
+ Args:
341
+ company: Raw company data from API
342
+
343
+ Returns:
344
+ Transformed company data
345
+ """
346
+ transformed = company.copy()
347
+
348
+ # Ensure custom_attributes is always a dict
349
+ if "custom_attributes" not in transformed:
350
+ transformed["custom_attributes"] = {}
351
+
352
+ # Flatten plan information if it's an object
353
+ if "plan" in transformed and isinstance(transformed["plan"], dict):
354
+ plan = transformed.pop("plan")
355
+ transformed["plan_id"] = plan.get("id")
356
+ transformed["plan_name"] = plan.get("name")
357
+
358
+ return transformed
359
+
360
+
361
+ def transform_conversation(conversation: Dict[str, Any]) -> Dict[str, Any]:
362
+ """
363
+ Transform a conversation record to ensure consistent format.
364
+
365
+ Args:
366
+ conversation: Raw conversation data from API
367
+
368
+ Returns:
369
+ Transformed conversation data
370
+ """
371
+ transformed = conversation.copy()
372
+
373
+ # Extract statistics if present
374
+ if "statistics" in transformed and isinstance(transformed["statistics"], dict):
375
+ stats = transformed.pop("statistics")
376
+ transformed["first_contact_reply_at"] = stats.get("first_contact_reply_at")
377
+ transformed["first_admin_reply_at"] = stats.get("first_admin_reply_at")
378
+ transformed["last_contact_reply_at"] = stats.get("last_contact_reply_at")
379
+ transformed["last_admin_reply_at"] = stats.get("last_admin_reply_at")
380
+ transformed["median_admin_reply_time"] = stats.get("median_admin_reply_time")
381
+ transformed["mean_admin_reply_time"] = stats.get("mean_admin_reply_time")
382
+
383
+ # Flatten conversation parts count
384
+ if "conversation_parts" in transformed and isinstance(
385
+ transformed["conversation_parts"], dict
386
+ ):
387
+ parts = transformed["conversation_parts"]
388
+ transformed["conversation_parts_count"] = parts.get("total_count", 0)
389
+
390
+ return transformed
391
+
392
+
393
+ def convert_datetime_to_timestamp(dt_obj: Any) -> int:
394
+ """
395
+ Convert datetime object to Unix timestamp for Intercom API compatibility.
396
+
397
+ Args:
398
+ dt_obj: DateTime object (pendulum or datetime)
399
+
400
+ Returns:
401
+ Unix timestamp as integer
402
+ """
403
+ if hasattr(dt_obj, "int_timestamp"):
404
+ return dt_obj.int_timestamp
405
+ elif hasattr(dt_obj, "timestamp"):
406
+ return int(dt_obj.timestamp())
407
+ else:
408
+ raise ValueError(f"Cannot convert {type(dt_obj)} to timestamp")
409
+
410
+
411
+ def create_search_resource(
412
+ api_client: "IntercomAPIClient",
413
+ resource_name: str,
414
+ updated_at_incremental: Any,
415
+ transform_func: Optional[Callable] = None,
416
+ ) -> Iterator[TDataItems]:
417
+ """
418
+ Generic function for search-based incremental resources.
419
+
420
+ Args:
421
+ api_client: Intercom API client
422
+ resource_name: Name of the resource (contacts, conversations)
423
+ updated_at_incremental: DLT incremental object
424
+ transform_func: Optional transformation function
425
+
426
+ Yields:
427
+ Transformed resource records
428
+ """
429
+ query = build_incremental_query(
430
+ "updated_at",
431
+ updated_at_incremental.last_value,
432
+ updated_at_incremental.end_value,
433
+ )
434
+
435
+ for page in api_client.search(resource_name, query):
436
+ if transform_func:
437
+ transformed_items = [transform_func(item) for item in page]
438
+ yield transformed_items
439
+ else:
440
+ yield page
441
+
442
+ if updated_at_incremental.end_out_of_range:
443
+ return
444
+
445
+
446
+ def create_tickets_resource(
447
+ api_client: "IntercomAPIClient",
448
+ updated_at_incremental: Any,
449
+ ) -> Iterator[TDataItems]:
450
+ """
451
+ Special function for tickets resource with updated_since parameter.
452
+
453
+ Args:
454
+ api_client: Intercom API client
455
+ updated_at_incremental: DLT incremental object
456
+
457
+ Yields:
458
+ Filtered ticket records
459
+ """
460
+ params = {"updated_since": updated_at_incremental.last_value}
461
+
462
+ end_timestamp = (
463
+ updated_at_incremental.end_value if updated_at_incremental.end_value else None
464
+ )
465
+
466
+ for page in api_client.get_pages(
467
+ "/tickets", "tickets", PaginationType.CURSOR, params=params
468
+ ):
469
+ if end_timestamp:
470
+ filtered_tickets = [
471
+ t for t in page if t.get("updated_at", 0) <= end_timestamp
472
+ ]
473
+ if filtered_tickets:
474
+ yield filtered_tickets
475
+
476
+ if any(t.get("updated_at", 0) > end_timestamp for t in page):
477
+ return
478
+ else:
479
+ yield page
480
+
481
+
482
+ def create_pagination_resource(
483
+ api_client: "IntercomAPIClient",
484
+ endpoint: str,
485
+ data_key: str,
486
+ pagination_type: PaginationType,
487
+ updated_at_incremental: Any,
488
+ transform_func: Optional[Callable] = None,
489
+ params: Optional[Dict[str, Any]] = None,
490
+ ) -> Iterator[TDataItems]:
491
+ """
492
+ Generic function for cursor/simple pagination with client-side filtering.
493
+
494
+ Args:
495
+ api_client: Intercom API client
496
+ endpoint: API endpoint path
497
+ data_key: Key in response containing data
498
+ pagination_type: Type of pagination
499
+ updated_at_incremental: DLT incremental object
500
+ transform_func: Optional transformation function
501
+ params: Additional query parameters
502
+
503
+ Yields:
504
+ Filtered and transformed resource records
505
+ """
506
+ for page in api_client.get_pages(
507
+ endpoint, data_key, pagination_type, params=params
508
+ ):
509
+ filtered_items = []
510
+ for item in page:
511
+ item_updated = item.get("updated_at", 0)
512
+ if item_updated >= updated_at_incremental.last_value:
513
+ if (
514
+ updated_at_incremental.end_value
515
+ and item_updated > updated_at_incremental.end_value
516
+ ):
517
+ continue
518
+
519
+ if transform_func:
520
+ filtered_items.append(transform_func(item))
521
+ else:
522
+ filtered_items.append(item)
523
+
524
+ if filtered_items:
525
+ yield filtered_items
526
+
527
+ if updated_at_incremental.end_out_of_range:
528
+ return
529
+
530
+
531
+ def create_resource_from_config(
532
+ resource_name: str,
533
+ config: Dict[str, Any],
534
+ api_client: "IntercomAPIClient",
535
+ start_timestamp: int,
536
+ end_timestamp: Optional[int],
537
+ transform_functions: Dict[str, Callable],
538
+ ) -> Any:
539
+ """
540
+ Create a DLT resource from configuration.
541
+
542
+ Args:
543
+ resource_name: Name of the resource
544
+ config: Resource configuration dict
545
+ api_client: Intercom API client
546
+ start_timestamp: Start timestamp for incremental loading
547
+ end_timestamp: End timestamp for incremental loading
548
+ transform_functions: Dict mapping transform function names to actual functions
549
+
550
+ Returns:
551
+ DLT resource function
552
+ """
553
+ import dlt
554
+
555
+ # Determine write disposition
556
+ write_disposition = "merge" if config["incremental"] else "replace"
557
+
558
+ # Get transform function if specified
559
+ transform_func = None
560
+ if config.get("transform_func"):
561
+ transform_func = transform_functions.get(config["transform_func"])
562
+
563
+ def resource_function(
564
+ updated_at: Optional[dlt.sources.incremental[int]] = dlt.sources.incremental(
565
+ "updated_at",
566
+ initial_value=start_timestamp,
567
+ end_value=end_timestamp,
568
+ allow_external_schedulers=True,
569
+ )
570
+ if config["incremental"]
571
+ else None,
572
+ ) -> Iterator[TDataItems]:
573
+ """
574
+ Auto-generated resource function.
575
+ """
576
+ resource_type = config["type"]
577
+
578
+ if resource_type == "search":
579
+ yield from create_search_resource(
580
+ api_client, resource_name, updated_at, transform_func
581
+ )
582
+ elif resource_type == "pagination":
583
+ yield from create_pagination_resource(
584
+ api_client,
585
+ config["endpoint"],
586
+ config["data_key"],
587
+ getattr(PaginationType, config["pagination_type"].upper()),
588
+ updated_at,
589
+ transform_func,
590
+ config.get("params"),
591
+ )
592
+ elif resource_type == "tickets":
593
+ yield from create_tickets_resource(api_client, updated_at)
594
+ elif resource_type == "simple":
595
+ # Non-incremental resources
596
+ yield from api_client.get_pages(
597
+ config["endpoint"],
598
+ config["data_key"],
599
+ getattr(PaginationType, config["pagination_type"].upper()),
600
+ )
601
+ else:
602
+ raise ValueError(f"Unknown resource type: {resource_type}")
603
+
604
+ # For non-incremental resources, we need to return a function without parameters
605
+ if not config["incremental"]:
606
+
607
+ @dlt.resource(
608
+ name=resource_name,
609
+ primary_key="id",
610
+ write_disposition="replace",
611
+ columns=config.get("columns", {}),
612
+ )
613
+ def simple_resource_function() -> Iterator[TDataItems]:
614
+ """
615
+ Auto-generated simple resource function.
616
+ """
617
+ yield from api_client.get_pages(
618
+ config["endpoint"],
619
+ config["data_key"],
620
+ getattr(PaginationType, config["pagination_type"].upper()),
621
+ )
622
+
623
+ return simple_resource_function
624
+
625
+ # Apply the decorator to the function
626
+ return dlt.resource( # type: ignore[call-overload]
627
+ resource_function,
628
+ name=resource_name,
629
+ primary_key="id",
630
+ write_disposition=write_disposition,
631
+ columns=config.get("columns", {}),
632
+ )
633
+
634
+
635
+ def build_incremental_query(
636
+ field: str,
637
+ start_value: Any,
638
+ end_value: Optional[Any] = None,
639
+ ) -> Dict[str, Any]:
640
+ """
641
+ Build a search query for incremental loading.
642
+
643
+ Args:
644
+ field: Field to filter on
645
+ start_value: Start value (inclusive)
646
+ end_value: Optional end value (inclusive)
647
+
648
+ Returns:
649
+ Query dict for Intercom Search API
650
+ """
651
+ conditions = [
652
+ {
653
+ "field": field,
654
+ "operator": ">",
655
+ "value": start_value,
656
+ }
657
+ ]
658
+
659
+ if end_value is not None:
660
+ conditions.append(
661
+ {
662
+ "field": field,
663
+ "operator": "<",
664
+ "value": end_value,
665
+ }
666
+ )
667
+
668
+ if len(conditions) == 1:
669
+ return conditions[0]
670
+ else:
671
+ return {
672
+ "operator": "AND",
673
+ "value": conditions,
674
+ }
@@ -0,0 +1,277 @@
1
+ """
2
+ Configuration settings and constants for Intercom API integration.
3
+ """
4
+
5
+ from datetime import datetime
6
+ from typing import Dict, List, Tuple
7
+
8
+ # API Version - REQUIRED for all requests
9
+ API_VERSION = "2.14"
10
+
11
+ # Default start date for incremental loading
12
+ DEFAULT_START_DATE = datetime(2020, 1, 1)
13
+
14
+ # Pagination settings
15
+ DEFAULT_PAGE_SIZE = 150
16
+ MAX_PAGE_SIZE = 150 # Intercom's maximum
17
+ SCROLL_EXPIRY_SECONDS = 60 # Scroll sessions expire after 1 minute
18
+
19
+ # Rate limiting settings
20
+ RATE_LIMIT_PER_10_SECONDS = 166
21
+ RATE_LIMIT_RETRY_AFTER_DEFAULT = 10
22
+
23
+ # Regional API endpoints
24
+ REGIONAL_ENDPOINTS = {
25
+ "us": "https://api.intercom.io",
26
+ "eu": "https://api.eu.intercom.io",
27
+ "au": "https://api.au.intercom.io",
28
+ }
29
+
30
+ # Resource configuration for automatic generation
31
+ # Format: resource_name -> config dict
32
+ RESOURCE_CONFIGS = {
33
+ # Search-based incremental resources
34
+ "contacts": {
35
+ "type": "search",
36
+ "incremental": True,
37
+ "transform_func": "transform_contact",
38
+ "columns": {
39
+ "custom_attributes": {"data_type": "json"},
40
+ "tags": {"data_type": "json"},
41
+ },
42
+ },
43
+ "conversations": {
44
+ "type": "search",
45
+ "incremental": True,
46
+ "transform_func": "transform_conversation",
47
+ "columns": {
48
+ "custom_attributes": {"data_type": "json"},
49
+ "tags": {"data_type": "json"},
50
+ },
51
+ },
52
+ # Pagination-based incremental resources
53
+ "companies": {
54
+ "type": "pagination",
55
+ "endpoint": "/companies",
56
+ "data_key": "data",
57
+ "pagination_type": "cursor",
58
+ "incremental": True,
59
+ "transform_func": "transform_company",
60
+ "params": {"per_page": 50},
61
+ "columns": {
62
+ "custom_attributes": {"data_type": "json"},
63
+ "tags": {"data_type": "json"},
64
+ },
65
+ },
66
+ "articles": {
67
+ "type": "pagination",
68
+ "endpoint": "/articles",
69
+ "data_key": "data",
70
+ "pagination_type": "cursor",
71
+ "incremental": True,
72
+ "transform_func": None,
73
+ "params": None,
74
+ "columns": {},
75
+ },
76
+ # Special case - tickets
77
+ "tickets": {
78
+ "type": "tickets",
79
+ "incremental": True,
80
+ "transform_func": None,
81
+ "columns": {
82
+ "ticket_attributes": {"data_type": "json"},
83
+ },
84
+ },
85
+ # Simple replace resources (non-incremental)
86
+ "tags": {
87
+ "type": "simple",
88
+ "endpoint": "/tags",
89
+ "data_key": "data",
90
+ "pagination_type": "simple",
91
+ "incremental": False,
92
+ "transform_func": None,
93
+ "columns": {},
94
+ },
95
+ "segments": {
96
+ "type": "simple",
97
+ "endpoint": "/segments",
98
+ "data_key": "segments",
99
+ "pagination_type": "cursor",
100
+ "incremental": False,
101
+ "transform_func": None,
102
+ "columns": {},
103
+ },
104
+ "teams": {
105
+ "type": "simple",
106
+ "endpoint": "/teams",
107
+ "data_key": "teams",
108
+ "pagination_type": "simple",
109
+ "incremental": False,
110
+ "transform_func": None,
111
+ "columns": {},
112
+ },
113
+ "admins": {
114
+ "type": "simple",
115
+ "endpoint": "/admins",
116
+ "data_key": "admins",
117
+ "pagination_type": "simple",
118
+ "incremental": False,
119
+ "transform_func": None,
120
+ "columns": {},
121
+ },
122
+ "data_attributes": {
123
+ "type": "simple",
124
+ "endpoint": "/data_attributes",
125
+ "data_key": "data",
126
+ "pagination_type": "cursor",
127
+ "incremental": False,
128
+ "transform_func": None,
129
+ "columns": {},
130
+ },
131
+ }
132
+
133
+ # Core endpoints with their configuration (kept for backwards compatibility)
134
+ # Format: (endpoint_path, data_key, supports_incremental, pagination_type)
135
+ CORE_ENDPOINTS: Dict[str, Tuple[str, str, bool, str]] = {
136
+ "contacts": ("/contacts", "data", True, "cursor"),
137
+ "companies": ("/companies", "data", True, "cursor"),
138
+ "conversations": ("/conversations", "conversations", True, "cursor"),
139
+ "tickets": ("/tickets", "tickets", True, "cursor"),
140
+ "admins": ("/admins", "admins", False, "simple"),
141
+ "teams": ("/teams", "teams", False, "simple"),
142
+ "tags": ("/tags", "data", False, "simple"),
143
+ "segments": ("/segments", "segments", False, "cursor"),
144
+ "articles": ("/articles", "data", True, "cursor"),
145
+ "collections": ("/help_center/collections", "data", False, "cursor"),
146
+ "data_attributes": ("/data_attributes", "data", False, "cursor"),
147
+ }
148
+
149
+ # Incremental endpoints using search API
150
+ SEARCH_ENDPOINTS: Dict[str, str] = {
151
+ "contacts_search": "/contacts/search",
152
+ "companies_search": "/companies/search",
153
+ "conversations_search": "/conversations/search",
154
+ }
155
+
156
+ # Special endpoints requiring different handling
157
+ SCROLL_ENDPOINTS: List[str] = [
158
+ "companies", # Can use scroll for large exports
159
+ ]
160
+
161
+ # Event tracking endpoint
162
+ EVENTS_ENDPOINT = "/events"
163
+
164
+ # Ticket fields endpoint for custom field mapping
165
+ TICKET_FIELDS_ENDPOINT = "/ticket_types/{ticket_type_id}/attributes"
166
+
167
+ # Default fields to retrieve for each resource type
168
+ DEFAULT_CONTACT_FIELDS = [
169
+ "id",
170
+ "type",
171
+ "external_id",
172
+ "email",
173
+ "phone",
174
+ "name",
175
+ "created_at",
176
+ "updated_at",
177
+ "signed_up_at",
178
+ "last_seen_at",
179
+ "last_contacted_at",
180
+ "last_email_opened_at",
181
+ "last_email_clicked_at",
182
+ "browser",
183
+ "browser_language",
184
+ "browser_version",
185
+ "location",
186
+ "os",
187
+ "role",
188
+ "custom_attributes",
189
+ "tags",
190
+ "companies",
191
+ ]
192
+
193
+ DEFAULT_COMPANY_FIELDS = [
194
+ "id",
195
+ "type",
196
+ "company_id",
197
+ "name",
198
+ "plan",
199
+ "size",
200
+ "website",
201
+ "industry",
202
+ "created_at",
203
+ "updated_at",
204
+ "monthly_spend",
205
+ "session_count",
206
+ "user_count",
207
+ "custom_attributes",
208
+ "tags",
209
+ ]
210
+
211
+ DEFAULT_CONVERSATION_FIELDS = [
212
+ "id",
213
+ "type",
214
+ "created_at",
215
+ "updated_at",
216
+ "waiting_since",
217
+ "snoozed_until",
218
+ "state",
219
+ "open",
220
+ "read",
221
+ "priority",
222
+ "admin_assignee_id",
223
+ "team_assignee_id",
224
+ "tags",
225
+ "conversation_rating",
226
+ "source",
227
+ "contacts",
228
+ "teammates",
229
+ "custom_attributes",
230
+ "first_contact_reply",
231
+ "sla_applied",
232
+ "statistics",
233
+ "conversation_parts",
234
+ ]
235
+
236
+ DEFAULT_TICKET_FIELDS = [
237
+ "id",
238
+ "type",
239
+ "ticket_id",
240
+ "category",
241
+ "ticket_attributes",
242
+ "ticket_state",
243
+ "ticket_type",
244
+ "created_at",
245
+ "updated_at",
246
+ "ticket_parts",
247
+ "contacts",
248
+ "admin_assignee_id",
249
+ "team_assignee_id",
250
+ "open",
251
+ "snoozed_until",
252
+ ]
253
+
254
+ # Resources that support custom attributes
255
+ SUPPORTS_CUSTOM_ATTRIBUTES = [
256
+ "contacts",
257
+ "companies",
258
+ "conversations",
259
+ ]
260
+
261
+ # Maximum limits
262
+ MAX_CUSTOM_ATTRIBUTES_PER_RESOURCE = 100
263
+ MAX_EVENT_TYPES_PER_WORKSPACE = 120
264
+ MAX_CONVERSATION_PARTS = 500
265
+ MAX_SEARCH_RESULTS = 10000
266
+
267
+ # Field type mapping for custom attributes
268
+ INTERCOM_TO_DLT_TYPE_MAPPING = {
269
+ "string": "text",
270
+ "integer": "bigint",
271
+ "float": "double",
272
+ "boolean": "bool",
273
+ "date": "timestamp",
274
+ "datetime": "timestamp",
275
+ "object": "json",
276
+ "list": "json",
277
+ }
ingestr/src/sources.py CHANGED
@@ -3703,3 +3703,76 @@ class AnthropicSource:
3703
3703
  initial_start_date=start_date,
3704
3704
  end_date=end_date,
3705
3705
  ).with_resources(table)
3706
+
3707
+
3708
+ class IntercomSource:
3709
+ def handles_incrementality(self) -> bool:
3710
+ return True
3711
+
3712
+ def dlt_source(self, uri: str, table: str, **kwargs):
3713
+ # intercom://?access_token=<token>&region=<us|eu|au>
3714
+ # OR intercom://?oauth_token=<token>&region=<us|eu|au>
3715
+ parsed_uri = urlparse(uri)
3716
+ params = parse_qs(parsed_uri.query)
3717
+
3718
+ # Check for authentication
3719
+ access_token = params.get("access_token")
3720
+ oauth_token = params.get("oauth_token")
3721
+ region = params.get("region", ["us"])[0]
3722
+
3723
+ if not access_token and not oauth_token:
3724
+ raise MissingValueError("access_token or oauth_token", "Intercom")
3725
+
3726
+ # Validate table/resource
3727
+ supported_tables = [
3728
+ "contacts",
3729
+ "companies",
3730
+ "conversations",
3731
+ "tickets",
3732
+ "tags",
3733
+ "segments",
3734
+ "teams",
3735
+ "admins",
3736
+ "articles",
3737
+ "data_attributes",
3738
+ ]
3739
+
3740
+ if table not in supported_tables:
3741
+ raise UnsupportedResourceError(table, "Intercom")
3742
+
3743
+ # Get date parameters
3744
+ start_date = kwargs.get("interval_start")
3745
+ if start_date:
3746
+ start_date = ensure_pendulum_datetime(start_date)
3747
+ else:
3748
+ start_date = pendulum.datetime(2020, 1, 1)
3749
+
3750
+ end_date = kwargs.get("interval_end")
3751
+ if end_date:
3752
+ end_date = ensure_pendulum_datetime(end_date)
3753
+
3754
+ # Import and initialize the source
3755
+ from ingestr.src.intercom import (
3756
+ IntercomCredentialsAccessToken,
3757
+ IntercomCredentialsOAuth,
3758
+ TIntercomCredentials,
3759
+ intercom_source,
3760
+ )
3761
+
3762
+ credentials: TIntercomCredentials
3763
+ if access_token:
3764
+ credentials = IntercomCredentialsAccessToken(
3765
+ access_token=access_token[0], region=region
3766
+ )
3767
+ else:
3768
+ if not oauth_token:
3769
+ raise MissingValueError("oauth_token", "Intercom")
3770
+ credentials = IntercomCredentialsOAuth(
3771
+ oauth_token=oauth_token[0], region=region
3772
+ )
3773
+
3774
+ return intercom_source(
3775
+ credentials=credentials,
3776
+ start_date=start_date,
3777
+ end_date=end_date,
3778
+ ).with_resources(table)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ingestr
3
- Version: 0.14.1
3
+ Version: 0.14.2
4
4
  Summary: ingestr is a command-line application that ingests data from various sources and stores them in any database.
5
5
  Project-URL: Homepage, https://github.com/bruin-data/ingestr
6
6
  Project-URL: Issues, https://github.com/bruin-data/ingestr/issues
@@ -2,17 +2,17 @@ ingestr/conftest.py,sha256=OE2yxeTCosS9CUFVuqNypm-2ftYvVBeeq7egm3878cI,1981
2
2
  ingestr/main.py,sha256=qo0g3wCFl8a_1jUwXagX8L1Q8PKKQlTF7md9pfnzW0Y,27155
3
3
  ingestr/src/.gitignore,sha256=8cX1AZTSI0TcdZFGTmS_oyBjpfCzhOEt0DdAo2dFIY8,203
4
4
  ingestr/src/blob.py,sha256=UUWMjHUuoR9xP1XZQ6UANQmnMVyDx3d0X4-2FQC271I,2138
5
- ingestr/src/buildinfo.py,sha256=vBwHqLcmS8wd269RLIXI-LgqeEmD5o43bmLyp6Zpk6I,20
5
+ ingestr/src/buildinfo.py,sha256=etAF7mJbjXgR22CNwj2pF9fzWwF_eMqGhIYdwvHnftw,20
6
6
  ingestr/src/destinations.py,sha256=BE8tK69Aq7FLBV_j6ijMhZCRIUYpSTnk7FTZMZW5Q5M,28557
7
7
  ingestr/src/errors.py,sha256=Ufs4_DfE77_E3vnA1fOQdi6cmuLVNm7_SbFLkL1XPGk,686
8
- ingestr/src/factory.py,sha256=sT5x4R95Qa0QIjkS7a5lFKUFwFpVpdb1nv8HIxOu_RM,7376
8
+ ingestr/src/factory.py,sha256=u6IxjM9Rt6klr0Sa44OiDO-zFTJMvp84hFX3vZKAP4M,7432
9
9
  ingestr/src/filters.py,sha256=0n0sNAVG_f-B_1r7lW5iNtw9z_G1bxWzPaiL1i6tnbU,1665
10
10
  ingestr/src/http_client.py,sha256=bxqsk6nJNXCo-79gW04B53DQO-yr25vaSsqP0AKtjx4,732
11
11
  ingestr/src/loader.py,sha256=9NaWAyfkXdqAZSS-N72Iwo36Lbx4PyqIfaaH1dNdkFs,1712
12
12
  ingestr/src/masking.py,sha256=VN0LdfvExhQ1bZMRylGtaBUIoH-vjuIUmRnYKwo3yiY,11358
13
13
  ingestr/src/partition.py,sha256=BrIP6wFJvyR7Nus_3ElnfxknUXeCipK_E_bB8kZowfc,969
14
14
  ingestr/src/resource.py,sha256=ZqmZxFQVGlF8rFPhBiUB08HES0yoTj8sZ--jKfaaVps,1164
15
- ingestr/src/sources.py,sha256=ADnE41x6-9U0fK1CYqPc6XRfGqg_6mZGqP10tlLUxDU,130361
15
+ ingestr/src/sources.py,sha256=gcweUJVZ4ScQzSeu_pCm4kgsLUymNuF6K0qWy6OE6OM,132655
16
16
  ingestr/src/table_definition.py,sha256=REbAbqdlmUMUuRh8nEQRreWjPVOQ5ZcfqGkScKdCrmk,390
17
17
  ingestr/src/time.py,sha256=H_Fk2J4ShXyUM-EMY7MqCLZQhlnZMZvO952bmZPc4yE,254
18
18
  ingestr/src/version.py,sha256=J_2xgZ0mKlvuHcjdKCx2nlioneLH0I47JiU_Slr_Nwc,189
@@ -88,6 +88,9 @@ ingestr/src/hubspot/helpers.py,sha256=k2b-lhxqBNKHoOSHoHegFSsk8xxjjGA0I04V0XyX2b
88
88
  ingestr/src/hubspot/settings.py,sha256=i73MkSiJfRLMFLfiJgYdhp-rhymHTfoqFzZ4uOJdFJM,2456
89
89
  ingestr/src/influxdb/__init__.py,sha256=cYsGnDPNHRTe9pp14ogDQgPTCI9TOdyJm1MaNuQLHdk,1290
90
90
  ingestr/src/influxdb/client.py,sha256=hCxSNREAWWEvvAV3RQbKaWp2-e_7EE8xmVRjTwLFEFo,1230
91
+ ingestr/src/intercom/__init__.py,sha256=rqorWFwcfcTYrCrpSsPPM2sGOc7qq5XbYZRCDVJXjyI,4451
92
+ ingestr/src/intercom/helpers.py,sha256=IljM0x4K70nuahidZaP7mtIlsHkPIcZq56j9mmuSck4,21074
93
+ ingestr/src/intercom/settings.py,sha256=BU-jmlIOA2BrR-mWyjL6QXhiNqdY3WrNjzaWd5MCKLk,6983
91
94
  ingestr/src/isoc_pulse/__init__.py,sha256=9b4eN4faatpiwTuRNPuYcEt1hEFDEjua9XhfakUigBk,4648
92
95
  ingestr/src/kafka/__init__.py,sha256=QUHsGmdv5_E-3z0GDHXvbk39puwuGDBsyYSDhvbA89E,3595
93
96
  ingestr/src/kafka/helpers.py,sha256=V9WcVn3PKnEpggArHda4vnAcaV8VDuh__dSmRviJb5Y,7502
@@ -166,8 +169,8 @@ ingestr/testdata/merge_expected.csv,sha256=DReHqWGnQMsf2PBv_Q2pfjsgvikYFnf1zYcQZ
166
169
  ingestr/testdata/merge_part1.csv,sha256=Pw8Z9IDKcNU0qQHx1z6BUf4rF_-SxKGFOvymCt4OY9I,185
167
170
  ingestr/testdata/merge_part2.csv,sha256=T_GiWxA81SN63_tMOIuemcvboEFeAmbKc7xRXvL9esw,287
168
171
  ingestr/tests/unit/test_smartsheets.py,sha256=zf3DXT29Y4TH2lNPBFphdjlaelUUyPJcsW2UO68RzDs,4862
169
- ingestr-0.14.1.dist-info/METADATA,sha256=BdnG8Q5dOYWFc_8R3YWvXB-eq96c6Rton95YLkV1C5Q,15265
170
- ingestr-0.14.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
171
- ingestr-0.14.1.dist-info/entry_points.txt,sha256=oPJy0KBnPWYjDtP1k8qwAihcTLHSZokSQvRAw_wtfJM,46
172
- ingestr-0.14.1.dist-info/licenses/LICENSE.md,sha256=cW8wIhn8HFE-KLStDF9jHQ1O_ARWP3kTpk_-eOccL24,1075
173
- ingestr-0.14.1.dist-info/RECORD,,
172
+ ingestr-0.14.2.dist-info/METADATA,sha256=-UyaGpKgZSYETEBSfKLzRXn5vG3VA9qmofpmoD-gGa0,15265
173
+ ingestr-0.14.2.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
174
+ ingestr-0.14.2.dist-info/entry_points.txt,sha256=oPJy0KBnPWYjDtP1k8qwAihcTLHSZokSQvRAw_wtfJM,46
175
+ ingestr-0.14.2.dist-info/licenses/LICENSE.md,sha256=cW8wIhn8HFE-KLStDF9jHQ1O_ARWP3kTpk_-eOccL24,1075
176
+ ingestr-0.14.2.dist-info/RECORD,,