ingestr 0.13.13__py3-none-any.whl → 0.14.104__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (139) hide show
  1. ingestr/conftest.py +72 -0
  2. ingestr/main.py +134 -87
  3. ingestr/src/adjust/__init__.py +4 -4
  4. ingestr/src/adjust/adjust_helpers.py +7 -3
  5. ingestr/src/airtable/__init__.py +3 -2
  6. ingestr/src/allium/__init__.py +128 -0
  7. ingestr/src/anthropic/__init__.py +277 -0
  8. ingestr/src/anthropic/helpers.py +525 -0
  9. ingestr/src/applovin_max/__init__.py +6 -4
  10. ingestr/src/appsflyer/__init__.py +325 -0
  11. ingestr/src/appsflyer/client.py +49 -45
  12. ingestr/src/appstore/__init__.py +1 -0
  13. ingestr/src/arrow/__init__.py +9 -1
  14. ingestr/src/asana_source/__init__.py +1 -1
  15. ingestr/src/attio/__init__.py +102 -0
  16. ingestr/src/attio/helpers.py +65 -0
  17. ingestr/src/blob.py +37 -10
  18. ingestr/src/buildinfo.py +1 -1
  19. ingestr/src/chess/__init__.py +1 -1
  20. ingestr/src/clickup/__init__.py +85 -0
  21. ingestr/src/clickup/helpers.py +47 -0
  22. ingestr/src/collector/spinner.py +43 -0
  23. ingestr/src/couchbase_source/__init__.py +118 -0
  24. ingestr/src/couchbase_source/helpers.py +135 -0
  25. ingestr/src/cursor/__init__.py +83 -0
  26. ingestr/src/cursor/helpers.py +188 -0
  27. ingestr/src/destinations.py +508 -27
  28. ingestr/src/docebo/__init__.py +589 -0
  29. ingestr/src/docebo/client.py +435 -0
  30. ingestr/src/docebo/helpers.py +97 -0
  31. ingestr/src/elasticsearch/__init__.py +80 -0
  32. ingestr/src/elasticsearch/helpers.py +138 -0
  33. ingestr/src/errors.py +8 -0
  34. ingestr/src/facebook_ads/__init__.py +47 -28
  35. ingestr/src/facebook_ads/helpers.py +59 -37
  36. ingestr/src/facebook_ads/settings.py +2 -0
  37. ingestr/src/facebook_ads/utils.py +39 -0
  38. ingestr/src/factory.py +107 -2
  39. ingestr/src/filesystem/__init__.py +8 -3
  40. ingestr/src/filters.py +46 -3
  41. ingestr/src/fluxx/__init__.py +9906 -0
  42. ingestr/src/fluxx/helpers.py +209 -0
  43. ingestr/src/frankfurter/__init__.py +157 -0
  44. ingestr/src/frankfurter/helpers.py +48 -0
  45. ingestr/src/freshdesk/__init__.py +89 -0
  46. ingestr/src/freshdesk/freshdesk_client.py +137 -0
  47. ingestr/src/freshdesk/settings.py +9 -0
  48. ingestr/src/fundraiseup/__init__.py +95 -0
  49. ingestr/src/fundraiseup/client.py +81 -0
  50. ingestr/src/github/__init__.py +41 -6
  51. ingestr/src/github/helpers.py +5 -5
  52. ingestr/src/google_analytics/__init__.py +22 -4
  53. ingestr/src/google_analytics/helpers.py +124 -6
  54. ingestr/src/google_sheets/__init__.py +4 -4
  55. ingestr/src/google_sheets/helpers/data_processing.py +2 -2
  56. ingestr/src/hostaway/__init__.py +302 -0
  57. ingestr/src/hostaway/client.py +288 -0
  58. ingestr/src/http/__init__.py +35 -0
  59. ingestr/src/http/readers.py +114 -0
  60. ingestr/src/http_client.py +24 -0
  61. ingestr/src/hubspot/__init__.py +66 -23
  62. ingestr/src/hubspot/helpers.py +52 -22
  63. ingestr/src/hubspot/settings.py +14 -7
  64. ingestr/src/influxdb/__init__.py +46 -0
  65. ingestr/src/influxdb/client.py +34 -0
  66. ingestr/src/intercom/__init__.py +142 -0
  67. ingestr/src/intercom/helpers.py +674 -0
  68. ingestr/src/intercom/settings.py +279 -0
  69. ingestr/src/isoc_pulse/__init__.py +159 -0
  70. ingestr/src/jira_source/__init__.py +340 -0
  71. ingestr/src/jira_source/helpers.py +439 -0
  72. ingestr/src/jira_source/settings.py +170 -0
  73. ingestr/src/kafka/__init__.py +4 -1
  74. ingestr/src/kinesis/__init__.py +139 -0
  75. ingestr/src/kinesis/helpers.py +82 -0
  76. ingestr/src/klaviyo/{_init_.py → __init__.py} +5 -6
  77. ingestr/src/linear/__init__.py +634 -0
  78. ingestr/src/linear/helpers.py +111 -0
  79. ingestr/src/linkedin_ads/helpers.py +0 -1
  80. ingestr/src/mailchimp/__init__.py +126 -0
  81. ingestr/src/mailchimp/helpers.py +226 -0
  82. ingestr/src/mailchimp/settings.py +164 -0
  83. ingestr/src/masking.py +344 -0
  84. ingestr/src/mixpanel/__init__.py +62 -0
  85. ingestr/src/mixpanel/client.py +99 -0
  86. ingestr/src/monday/__init__.py +246 -0
  87. ingestr/src/monday/helpers.py +392 -0
  88. ingestr/src/monday/settings.py +328 -0
  89. ingestr/src/mongodb/__init__.py +72 -8
  90. ingestr/src/mongodb/helpers.py +915 -38
  91. ingestr/src/partition.py +32 -0
  92. ingestr/src/phantombuster/__init__.py +65 -0
  93. ingestr/src/phantombuster/client.py +87 -0
  94. ingestr/src/pinterest/__init__.py +82 -0
  95. ingestr/src/pipedrive/__init__.py +198 -0
  96. ingestr/src/pipedrive/helpers/__init__.py +23 -0
  97. ingestr/src/pipedrive/helpers/custom_fields_munger.py +102 -0
  98. ingestr/src/pipedrive/helpers/pages.py +115 -0
  99. ingestr/src/pipedrive/settings.py +27 -0
  100. ingestr/src/pipedrive/typing.py +3 -0
  101. ingestr/src/plusvibeai/__init__.py +335 -0
  102. ingestr/src/plusvibeai/helpers.py +544 -0
  103. ingestr/src/plusvibeai/settings.py +252 -0
  104. ingestr/src/quickbooks/__init__.py +117 -0
  105. ingestr/src/resource.py +40 -0
  106. ingestr/src/revenuecat/__init__.py +83 -0
  107. ingestr/src/revenuecat/helpers.py +237 -0
  108. ingestr/src/salesforce/__init__.py +15 -8
  109. ingestr/src/shopify/__init__.py +1 -17
  110. ingestr/src/smartsheets/__init__.py +82 -0
  111. ingestr/src/snapchat_ads/__init__.py +489 -0
  112. ingestr/src/snapchat_ads/client.py +72 -0
  113. ingestr/src/snapchat_ads/helpers.py +535 -0
  114. ingestr/src/socrata_source/__init__.py +83 -0
  115. ingestr/src/socrata_source/helpers.py +85 -0
  116. ingestr/src/socrata_source/settings.py +8 -0
  117. ingestr/src/solidgate/__init__.py +219 -0
  118. ingestr/src/solidgate/helpers.py +154 -0
  119. ingestr/src/sources.py +2933 -245
  120. ingestr/src/stripe_analytics/__init__.py +49 -21
  121. ingestr/src/stripe_analytics/helpers.py +286 -1
  122. ingestr/src/stripe_analytics/settings.py +62 -10
  123. ingestr/src/telemetry/event.py +10 -9
  124. ingestr/src/tiktok_ads/__init__.py +12 -6
  125. ingestr/src/tiktok_ads/tiktok_helpers.py +0 -1
  126. ingestr/src/trustpilot/__init__.py +48 -0
  127. ingestr/src/trustpilot/client.py +48 -0
  128. ingestr/src/wise/__init__.py +68 -0
  129. ingestr/src/wise/client.py +63 -0
  130. ingestr/src/zoom/__init__.py +99 -0
  131. ingestr/src/zoom/helpers.py +102 -0
  132. ingestr/tests/unit/test_smartsheets.py +133 -0
  133. {ingestr-0.13.13.dist-info → ingestr-0.14.104.dist-info}/METADATA +229 -19
  134. ingestr-0.14.104.dist-info/RECORD +203 -0
  135. ingestr/src/appsflyer/_init_.py +0 -24
  136. ingestr-0.13.13.dist-info/RECORD +0 -115
  137. {ingestr-0.13.13.dist-info → ingestr-0.14.104.dist-info}/WHEEL +0 -0
  138. {ingestr-0.13.13.dist-info → ingestr-0.14.104.dist-info}/entry_points.txt +0 -0
  139. {ingestr-0.13.13.dist-info → ingestr-0.14.104.dist-info}/licenses/LICENSE.md +0 -0
@@ -0,0 +1,237 @@
1
+ import asyncio
2
+ import time
3
+ from typing import Any, Dict, Iterator, List, Optional
4
+
5
+ import aiohttp
6
+ import pendulum
7
+ import requests
8
+
9
+ REVENUECAT_API_BASE = "https://api.revenuecat.com/v2"
10
+
11
+
12
+ def _make_request(
13
+ api_key: str,
14
+ endpoint: str,
15
+ params: Optional[Dict[str, Any]] = None,
16
+ max_retries: int = 3,
17
+ ) -> Dict[str, Any]:
18
+ """Make a REST API request to RevenueCat API v2 with rate limiting."""
19
+ auth_header = f"Bearer {api_key}"
20
+
21
+ headers = {"Authorization": auth_header, "Content-Type": "application/json"}
22
+
23
+ url = f"{REVENUECAT_API_BASE}{endpoint}"
24
+
25
+ for attempt in range(max_retries + 1):
26
+ try:
27
+ response = requests.get(url, headers=headers, params=params or {})
28
+
29
+ # Handle rate limiting (429 Too Many Requests)
30
+ if response.status_code == 429:
31
+ if attempt < max_retries:
32
+ # Wait based on Retry-After header or exponential backoff
33
+ retry_after = response.headers.get("Retry-After")
34
+ if retry_after:
35
+ wait_time = int(retry_after)
36
+ else:
37
+ wait_time = (2**attempt) * 5 # 5, 10, 20 seconds
38
+
39
+ time.sleep(wait_time)
40
+ continue
41
+
42
+ response.raise_for_status()
43
+ return response.json()
44
+
45
+ except requests.exceptions.RequestException:
46
+ if attempt < max_retries:
47
+ wait_time = (2**attempt) * 2 # 2, 4, 8 seconds
48
+ time.sleep(wait_time)
49
+ continue
50
+ raise
51
+
52
+ # If we get here, all retries failed
53
+ response.raise_for_status()
54
+ return response.json()
55
+
56
+
57
+ def _paginate(
58
+ api_key: str, endpoint: str, params: Optional[Dict[str, Any]] = None
59
+ ) -> Iterator[Dict[str, Any]]:
60
+ """Paginate through RevenueCat API results."""
61
+ current_params = params.copy() if params is not None else {}
62
+ current_params["limit"] = 1000
63
+
64
+ while True:
65
+ data = _make_request(api_key, endpoint, current_params)
66
+
67
+ if "items" in data and data["items"] is not None:
68
+ yield data["items"]
69
+
70
+ if "next_page" not in data:
71
+ break
72
+
73
+ # Extract starting_after parameter from next_page URL
74
+ next_page_url = data["next_page"]
75
+ if next_page_url and "starting_after=" in next_page_url:
76
+ starting_after = next_page_url.split("starting_after=")[1].split("&")[0]
77
+ current_params["starting_after"] = starting_after
78
+ else:
79
+ break
80
+
81
+
82
+ def convert_timestamps_to_iso(
83
+ record: Dict[str, Any], timestamp_fields: List[str]
84
+ ) -> Dict[str, Any]:
85
+ """Convert timestamp fields from milliseconds to ISO format."""
86
+ for field in timestamp_fields:
87
+ if field in record and record[field] is not None:
88
+ timestamp_ms = record[field]
89
+ dt = pendulum.from_timestamp(timestamp_ms / 1000)
90
+ record[field] = dt.to_iso8601_string()
91
+
92
+ return record
93
+
94
+
95
+ async def _make_request_async(
96
+ session: aiohttp.ClientSession,
97
+ api_key: str,
98
+ endpoint: str,
99
+ params: Optional[Dict[str, Any]] = None,
100
+ max_retries: int = 3,
101
+ ) -> Dict[str, Any]:
102
+ """Make an async REST API request to RevenueCat API v2 with rate limiting."""
103
+ auth_header = f"Bearer {api_key}"
104
+
105
+ headers = {"Authorization": auth_header, "Content-Type": "application/json"}
106
+
107
+ url = f"{REVENUECAT_API_BASE}{endpoint}"
108
+
109
+ for attempt in range(max_retries + 1):
110
+ try:
111
+ async with session.get(
112
+ url, headers=headers, params=params or {}
113
+ ) as response:
114
+ # Handle rate limiting (429 Too Many Requests)
115
+ if response.status == 429:
116
+ if attempt < max_retries:
117
+ # Wait based on Retry-After header or exponential backoff
118
+ retry_after = response.headers.get("Retry-After")
119
+ if retry_after:
120
+ wait_time = int(retry_after)
121
+ else:
122
+ wait_time = (2**attempt) * 5 # 5, 10, 20 seconds
123
+
124
+ await asyncio.sleep(wait_time)
125
+ continue
126
+
127
+ response.raise_for_status()
128
+ return await response.json()
129
+
130
+ except aiohttp.ClientError:
131
+ if attempt < max_retries:
132
+ wait_time = (2**attempt) * 2 # 2, 4, 8 seconds
133
+ await asyncio.sleep(wait_time)
134
+ continue
135
+ raise
136
+
137
+ # If we get here, all retries failed
138
+ async with session.get(url, headers=headers, params=params or {}) as response:
139
+ response.raise_for_status()
140
+ return await response.json()
141
+
142
+
143
+ async def _paginate_async(
144
+ session: aiohttp.ClientSession,
145
+ api_key: str,
146
+ endpoint: str,
147
+ params: Optional[Dict[str, Any]] = None,
148
+ ) -> List[Dict[str, Any]]:
149
+ """Paginate through RevenueCat API results asynchronously."""
150
+ items = []
151
+ current_params = params.copy() if params is not None else {}
152
+ current_params["limit"] = 1000
153
+
154
+ while True:
155
+ data = await _make_request_async(session, api_key, endpoint, current_params)
156
+
157
+ # Collect items from the current page
158
+ if "items" in data and data["items"] is not None:
159
+ items.extend(data["items"])
160
+
161
+ # Check if there's a next page
162
+ if "next_page" not in data:
163
+ break
164
+
165
+ # Extract starting_after parameter from next_page URL
166
+ next_page_url = data["next_page"]
167
+ if next_page_url and "starting_after=" in next_page_url:
168
+ starting_after = next_page_url.split("starting_after=")[1].split("&")[0]
169
+ current_params["starting_after"] = starting_after
170
+ else:
171
+ break
172
+
173
+ return items
174
+
175
+
176
+ async def process_customer_with_nested_resources_async(
177
+ session: aiohttp.ClientSession,
178
+ api_key: str,
179
+ project_id: str,
180
+ customer: Dict[str, Any],
181
+ ) -> Dict[str, Any]:
182
+ customer_id = customer["id"]
183
+ customer = convert_timestamps_to_iso(customer, ["first_seen_at", "last_seen_at"])
184
+ nested_resources = [
185
+ ("subscriptions", ["purchased_at", "expires_at", "grace_period_expires_at"]),
186
+ ("purchases", ["purchased_at", "expires_at"]),
187
+ ]
188
+
189
+ async def fetch_and_convert(resource_name, timestamp_fields):
190
+ if resource_name not in customer or customer[resource_name] is None:
191
+ endpoint = f"/projects/{project_id}/customers/{customer_id}/{resource_name}"
192
+ customer[resource_name] = await _paginate_async(session, api_key, endpoint)
193
+ if (
194
+ timestamp_fields
195
+ and resource_name in customer
196
+ and customer[resource_name] is not None
197
+ ):
198
+ for item in customer[resource_name]:
199
+ convert_timestamps_to_iso(item, timestamp_fields)
200
+
201
+ await asyncio.gather(
202
+ *[
203
+ fetch_and_convert(resource_name, timestamp_fields)
204
+ for resource_name, timestamp_fields in nested_resources
205
+ ]
206
+ )
207
+
208
+ return customer
209
+
210
+
211
+ def create_project_resource(
212
+ resource_name: str,
213
+ api_key: str,
214
+ project_id: str = None,
215
+ timestamp_fields: List[str] = None,
216
+ ) -> Iterator[Dict[str, Any]]:
217
+ """
218
+ Helper function to create DLT resources for project-dependent endpoints.
219
+
220
+ Args:
221
+ resource_name: Name of the resource (e.g., 'products', 'entitlements', 'offerings')
222
+ api_key: RevenueCat API key
223
+ project_id: RevenueCat project ID
224
+ timestamp_fields: List of timestamp fields to convert to ISO format
225
+
226
+ Returns:
227
+ Iterator of resource data
228
+ """
229
+ if project_id is None:
230
+ raise ValueError(f"project_id is required for {resource_name} resource")
231
+
232
+ endpoint = f"/projects/{project_id}/{resource_name}"
233
+ default_timestamp_fields = timestamp_fields or ["created_at", "updated_at"]
234
+
235
+ for item in _paginate(api_key, endpoint):
236
+ item = convert_timestamps_to_iso(item, default_timestamp_fields)
237
+ yield item
@@ -13,6 +13,8 @@ def salesforce_source(
13
13
  username: str,
14
14
  password: str,
15
15
  token: str,
16
+ domain: str,
17
+ custom_object: str = None,
16
18
  ) -> Iterable[DltResource]:
17
19
  """
18
20
  Retrieves data from Salesforce using the Salesforce API.
@@ -26,7 +28,7 @@ def salesforce_source(
26
28
  DltResource: Data resources from Salesforce.
27
29
  """
28
30
 
29
- client = Salesforce(username, password, token)
31
+ client = Salesforce(username, password, token, domain=domain)
30
32
 
31
33
  # define resources
32
34
  @dlt.resource(write_disposition="replace")
@@ -37,7 +39,7 @@ def salesforce_source(
37
39
  def user_role() -> Iterable[TDataItem]:
38
40
  yield get_records(client, "UserRole")
39
41
 
40
- @dlt.resource(write_disposition="merge")
42
+ @dlt.resource(write_disposition="merge", primary_key="id")
41
43
  def opportunity(
42
44
  last_timestamp: incremental[str] = dlt.sources.incremental(
43
45
  "SystemModstamp", initial_value=None
@@ -47,7 +49,7 @@ def salesforce_source(
47
49
  client, "Opportunity", last_timestamp.last_value, "SystemModstamp"
48
50
  )
49
51
 
50
- @dlt.resource(write_disposition="merge")
52
+ @dlt.resource(write_disposition="merge", primary_key="id")
51
53
  def opportunity_line_item(
52
54
  last_timestamp: incremental[str] = dlt.sources.incremental(
53
55
  "SystemModstamp", initial_value=None
@@ -57,7 +59,7 @@ def salesforce_source(
57
59
  client, "OpportunityLineItem", last_timestamp.last_value, "SystemModstamp"
58
60
  )
59
61
 
60
- @dlt.resource(write_disposition="merge")
62
+ @dlt.resource(write_disposition="merge", primary_key="id")
61
63
  def opportunity_contact_role(
62
64
  last_timestamp: incremental[str] = dlt.sources.incremental(
63
65
  "SystemModstamp", initial_value=None
@@ -70,7 +72,7 @@ def salesforce_source(
70
72
  "SystemModstamp",
71
73
  )
72
74
 
73
- @dlt.resource(write_disposition="merge")
75
+ @dlt.resource(write_disposition="merge", primary_key="id")
74
76
  def account(
75
77
  last_timestamp: incremental[str] = dlt.sources.incremental(
76
78
  "LastModifiedDate", initial_value=None
@@ -92,7 +94,7 @@ def salesforce_source(
92
94
  def campaign() -> Iterable[TDataItem]:
93
95
  yield get_records(client, "Campaign")
94
96
 
95
- @dlt.resource(write_disposition="merge")
97
+ @dlt.resource(write_disposition="merge", primary_key="id")
96
98
  def campaign_member(
97
99
  last_timestamp: incremental[str] = dlt.sources.incremental(
98
100
  "SystemModstamp", initial_value=None
@@ -114,7 +116,7 @@ def salesforce_source(
114
116
  def pricebook_entry() -> Iterable[TDataItem]:
115
117
  yield get_records(client, "PricebookEntry")
116
118
 
117
- @dlt.resource(write_disposition="merge")
119
+ @dlt.resource(write_disposition="merge", primary_key="id")
118
120
  def task(
119
121
  last_timestamp: incremental[str] = dlt.sources.incremental(
120
122
  "SystemModstamp", initial_value=None
@@ -122,7 +124,7 @@ def salesforce_source(
122
124
  ) -> Iterable[TDataItem]:
123
125
  yield get_records(client, "Task", last_timestamp.last_value, "SystemModstamp")
124
126
 
125
- @dlt.resource(write_disposition="merge")
127
+ @dlt.resource(write_disposition="merge", primary_key="id")
126
128
  def event(
127
129
  last_timestamp: incremental[str] = dlt.sources.incremental(
128
130
  "SystemModstamp", initial_value=None
@@ -130,6 +132,10 @@ def salesforce_source(
130
132
  ) -> Iterable[TDataItem]:
131
133
  yield get_records(client, "Event", last_timestamp.last_value, "SystemModstamp")
132
134
 
135
+ @dlt.resource(write_disposition="replace")
136
+ def custom() -> Iterable[TDataItem]:
137
+ yield get_records(client, custom_object)
138
+
133
139
  return (
134
140
  user,
135
141
  user_role,
@@ -146,4 +152,5 @@ def salesforce_source(
146
152
  pricebook_entry,
147
153
  task,
148
154
  event,
155
+ custom,
149
156
  )
@@ -669,7 +669,7 @@ def shopify_source(
669
669
  params["updated_at_max"] = updated_at.end_value.isoformat()
670
670
  yield from client.get_pages("customers", params)
671
671
 
672
- @dlt.resource(primary_key="id", write_disposition="append")
672
+ @dlt.resource(primary_key="id", write_disposition="merge")
673
673
  def events(
674
674
  created_at: dlt.sources.incremental[
675
675
  pendulum.DateTime
@@ -1690,16 +1690,6 @@ query discountNodes($after: String, $query: String, $first: Int) {
1690
1690
  "nullable": True,
1691
1691
  "description": "The category of the product from Shopify's Standard Product Taxonomy.",
1692
1692
  },
1693
- "combinedListing": {
1694
- "data_type": "json",
1695
- "nullable": True,
1696
- "description": "A special product type that combines separate products into a single product listing.",
1697
- },
1698
- "combinedListingRole": {
1699
- "data_type": "json",
1700
- "nullable": True,
1701
- "description": "The role of the product in a combined listing.",
1702
- },
1703
1693
  "compareAtPriceRange": {
1704
1694
  "data_type": "json",
1705
1695
  "nullable": True,
@@ -1841,12 +1831,6 @@ query products($after: String, $query: String, $first: Int) {
1841
1831
  category {
1842
1832
  id
1843
1833
  }
1844
- combinedListing {
1845
- parentProduct {
1846
- id
1847
- }
1848
- }
1849
- combinedListingRole
1850
1834
  compareAtPriceRange {
1851
1835
  maxVariantCompareAtPrice {
1852
1836
  amount
@@ -0,0 +1,82 @@
1
+ from typing import Iterable
2
+
3
+ import dlt
4
+ import smartsheet # type: ignore
5
+ from dlt.extract import DltResource
6
+ from smartsheet.models.enums import ColumnType # type: ignore
7
+ from smartsheet.models.sheet import Sheet # type: ignore
8
+
9
+ TYPE_MAPPING = {
10
+ ColumnType.TEXT_NUMBER: "text",
11
+ ColumnType.DATE: "date",
12
+ ColumnType.DATETIME: "timestamp",
13
+ ColumnType.CONTACT_LIST: "text",
14
+ ColumnType.CHECKBOX: "bool",
15
+ ColumnType.PICKLIST: "text",
16
+ ColumnType.DURATION: "text",
17
+ ColumnType.PREDECESSOR: "text",
18
+ ColumnType.ABSTRACT_DATETIME: "timestamp",
19
+ ColumnType.MULTI_CONTACT_LIST: "text",
20
+ ColumnType.MULTI_PICKLIST: "text",
21
+ }
22
+
23
+
24
+ @dlt.source
25
+ def smartsheet_source(
26
+ access_token: str,
27
+ sheet_id: str,
28
+ ) -> Iterable[DltResource]:
29
+ """
30
+ A DLT source for Smartsheet.
31
+
32
+ Args:
33
+ access_token: The Smartsheet API access token.
34
+ sheet_id: The ID of the sheet to load.
35
+
36
+ Returns:
37
+ An iterable of DLT resources.
38
+ """
39
+
40
+ # Initialize Smartsheet client
41
+ smartsheet_client = smartsheet.Smartsheet(access_token)
42
+ smartsheet_client.errors_as_exceptions(True)
43
+
44
+ # The SDK expects sheet_id to be an int
45
+ sheet_id_int = int(sheet_id)
46
+ # Sanitize the sheet name to be a valid resource name
47
+ # We get objectValue to ensure `name` attribute is populated for the sheet
48
+ sheet_details = smartsheet_client.Sheets.get_sheet(
49
+ sheet_id_int, include=["objectValue"]
50
+ )
51
+ sheet_name = sheet_details.name
52
+ resource_name = f"sheet_{sheet_name.replace(' ', '_').lower()}"
53
+ sheet = smartsheet_client.Sheets.get_sheet(sheet_id_int)
54
+
55
+ yield dlt.resource(
56
+ _get_sheet_data(sheet),
57
+ name=resource_name,
58
+ columns=_generate_type_hints(sheet),
59
+ write_disposition="replace",
60
+ )
61
+
62
+
63
+ def _get_sheet_data(sheet: Sheet):
64
+ """Helper function to get all rows from a sheet."""
65
+
66
+ column_titles = [col.title for col in sheet.columns]
67
+ for row in sheet.rows:
68
+ row_data = {"_row_id": row.id}
69
+ for i, cell in enumerate(row.cells):
70
+ row_data[column_titles[i]] = cell.value
71
+ yield row_data
72
+
73
+
74
+ def _generate_type_hints(sheet: Sheet):
75
+ return {
76
+ col.title: {
77
+ "data_type": TYPE_MAPPING.get(col.type.value),
78
+ "nullable": True,
79
+ }
80
+ for col in sheet.columns
81
+ if col.type.value in TYPE_MAPPING
82
+ }