ingestr 0.13.2__py3-none-any.whl → 0.14.104__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (146) hide show
  1. ingestr/conftest.py +72 -0
  2. ingestr/main.py +134 -87
  3. ingestr/src/adjust/__init__.py +4 -4
  4. ingestr/src/adjust/adjust_helpers.py +7 -3
  5. ingestr/src/airtable/__init__.py +3 -2
  6. ingestr/src/allium/__init__.py +128 -0
  7. ingestr/src/anthropic/__init__.py +277 -0
  8. ingestr/src/anthropic/helpers.py +525 -0
  9. ingestr/src/applovin/__init__.py +262 -0
  10. ingestr/src/applovin_max/__init__.py +117 -0
  11. ingestr/src/appsflyer/__init__.py +325 -0
  12. ingestr/src/appsflyer/client.py +49 -45
  13. ingestr/src/appstore/__init__.py +1 -0
  14. ingestr/src/arrow/__init__.py +9 -1
  15. ingestr/src/asana_source/__init__.py +1 -1
  16. ingestr/src/attio/__init__.py +102 -0
  17. ingestr/src/attio/helpers.py +65 -0
  18. ingestr/src/blob.py +38 -11
  19. ingestr/src/buildinfo.py +1 -0
  20. ingestr/src/chess/__init__.py +1 -1
  21. ingestr/src/clickup/__init__.py +85 -0
  22. ingestr/src/clickup/helpers.py +47 -0
  23. ingestr/src/collector/spinner.py +43 -0
  24. ingestr/src/couchbase_source/__init__.py +118 -0
  25. ingestr/src/couchbase_source/helpers.py +135 -0
  26. ingestr/src/cursor/__init__.py +83 -0
  27. ingestr/src/cursor/helpers.py +188 -0
  28. ingestr/src/destinations.py +520 -33
  29. ingestr/src/docebo/__init__.py +589 -0
  30. ingestr/src/docebo/client.py +435 -0
  31. ingestr/src/docebo/helpers.py +97 -0
  32. ingestr/src/elasticsearch/__init__.py +80 -0
  33. ingestr/src/elasticsearch/helpers.py +138 -0
  34. ingestr/src/errors.py +8 -0
  35. ingestr/src/facebook_ads/__init__.py +47 -28
  36. ingestr/src/facebook_ads/helpers.py +59 -37
  37. ingestr/src/facebook_ads/settings.py +2 -0
  38. ingestr/src/facebook_ads/utils.py +39 -0
  39. ingestr/src/factory.py +116 -2
  40. ingestr/src/filesystem/__init__.py +8 -3
  41. ingestr/src/filters.py +46 -3
  42. ingestr/src/fluxx/__init__.py +9906 -0
  43. ingestr/src/fluxx/helpers.py +209 -0
  44. ingestr/src/frankfurter/__init__.py +157 -0
  45. ingestr/src/frankfurter/helpers.py +48 -0
  46. ingestr/src/freshdesk/__init__.py +89 -0
  47. ingestr/src/freshdesk/freshdesk_client.py +137 -0
  48. ingestr/src/freshdesk/settings.py +9 -0
  49. ingestr/src/fundraiseup/__init__.py +95 -0
  50. ingestr/src/fundraiseup/client.py +81 -0
  51. ingestr/src/github/__init__.py +41 -6
  52. ingestr/src/github/helpers.py +5 -5
  53. ingestr/src/google_analytics/__init__.py +22 -4
  54. ingestr/src/google_analytics/helpers.py +124 -6
  55. ingestr/src/google_sheets/__init__.py +4 -4
  56. ingestr/src/google_sheets/helpers/data_processing.py +2 -2
  57. ingestr/src/hostaway/__init__.py +302 -0
  58. ingestr/src/hostaway/client.py +288 -0
  59. ingestr/src/http/__init__.py +35 -0
  60. ingestr/src/http/readers.py +114 -0
  61. ingestr/src/http_client.py +24 -0
  62. ingestr/src/hubspot/__init__.py +66 -23
  63. ingestr/src/hubspot/helpers.py +52 -22
  64. ingestr/src/hubspot/settings.py +14 -7
  65. ingestr/src/influxdb/__init__.py +46 -0
  66. ingestr/src/influxdb/client.py +34 -0
  67. ingestr/src/intercom/__init__.py +142 -0
  68. ingestr/src/intercom/helpers.py +674 -0
  69. ingestr/src/intercom/settings.py +279 -0
  70. ingestr/src/isoc_pulse/__init__.py +159 -0
  71. ingestr/src/jira_source/__init__.py +340 -0
  72. ingestr/src/jira_source/helpers.py +439 -0
  73. ingestr/src/jira_source/settings.py +170 -0
  74. ingestr/src/kafka/__init__.py +4 -1
  75. ingestr/src/kinesis/__init__.py +139 -0
  76. ingestr/src/kinesis/helpers.py +82 -0
  77. ingestr/src/klaviyo/{_init_.py → __init__.py} +5 -6
  78. ingestr/src/linear/__init__.py +634 -0
  79. ingestr/src/linear/helpers.py +111 -0
  80. ingestr/src/linkedin_ads/helpers.py +0 -1
  81. ingestr/src/loader.py +69 -0
  82. ingestr/src/mailchimp/__init__.py +126 -0
  83. ingestr/src/mailchimp/helpers.py +226 -0
  84. ingestr/src/mailchimp/settings.py +164 -0
  85. ingestr/src/masking.py +344 -0
  86. ingestr/src/mixpanel/__init__.py +62 -0
  87. ingestr/src/mixpanel/client.py +99 -0
  88. ingestr/src/monday/__init__.py +246 -0
  89. ingestr/src/monday/helpers.py +392 -0
  90. ingestr/src/monday/settings.py +328 -0
  91. ingestr/src/mongodb/__init__.py +72 -8
  92. ingestr/src/mongodb/helpers.py +915 -38
  93. ingestr/src/partition.py +32 -0
  94. ingestr/src/personio/__init__.py +331 -0
  95. ingestr/src/personio/helpers.py +86 -0
  96. ingestr/src/phantombuster/__init__.py +65 -0
  97. ingestr/src/phantombuster/client.py +87 -0
  98. ingestr/src/pinterest/__init__.py +82 -0
  99. ingestr/src/pipedrive/__init__.py +198 -0
  100. ingestr/src/pipedrive/helpers/__init__.py +23 -0
  101. ingestr/src/pipedrive/helpers/custom_fields_munger.py +102 -0
  102. ingestr/src/pipedrive/helpers/pages.py +115 -0
  103. ingestr/src/pipedrive/settings.py +27 -0
  104. ingestr/src/pipedrive/typing.py +3 -0
  105. ingestr/src/plusvibeai/__init__.py +335 -0
  106. ingestr/src/plusvibeai/helpers.py +544 -0
  107. ingestr/src/plusvibeai/settings.py +252 -0
  108. ingestr/src/quickbooks/__init__.py +117 -0
  109. ingestr/src/resource.py +40 -0
  110. ingestr/src/revenuecat/__init__.py +83 -0
  111. ingestr/src/revenuecat/helpers.py +237 -0
  112. ingestr/src/salesforce/__init__.py +156 -0
  113. ingestr/src/salesforce/helpers.py +64 -0
  114. ingestr/src/shopify/__init__.py +1 -17
  115. ingestr/src/smartsheets/__init__.py +82 -0
  116. ingestr/src/snapchat_ads/__init__.py +489 -0
  117. ingestr/src/snapchat_ads/client.py +72 -0
  118. ingestr/src/snapchat_ads/helpers.py +535 -0
  119. ingestr/src/socrata_source/__init__.py +83 -0
  120. ingestr/src/socrata_source/helpers.py +85 -0
  121. ingestr/src/socrata_source/settings.py +8 -0
  122. ingestr/src/solidgate/__init__.py +219 -0
  123. ingestr/src/solidgate/helpers.py +154 -0
  124. ingestr/src/sources.py +3132 -212
  125. ingestr/src/stripe_analytics/__init__.py +49 -21
  126. ingestr/src/stripe_analytics/helpers.py +286 -1
  127. ingestr/src/stripe_analytics/settings.py +62 -10
  128. ingestr/src/telemetry/event.py +10 -9
  129. ingestr/src/tiktok_ads/__init__.py +12 -6
  130. ingestr/src/tiktok_ads/tiktok_helpers.py +0 -1
  131. ingestr/src/trustpilot/__init__.py +48 -0
  132. ingestr/src/trustpilot/client.py +48 -0
  133. ingestr/src/version.py +6 -1
  134. ingestr/src/wise/__init__.py +68 -0
  135. ingestr/src/wise/client.py +63 -0
  136. ingestr/src/zoom/__init__.py +99 -0
  137. ingestr/src/zoom/helpers.py +102 -0
  138. ingestr/tests/unit/test_smartsheets.py +133 -0
  139. ingestr-0.14.104.dist-info/METADATA +563 -0
  140. ingestr-0.14.104.dist-info/RECORD +203 -0
  141. ingestr/src/appsflyer/_init_.py +0 -24
  142. ingestr-0.13.2.dist-info/METADATA +0 -302
  143. ingestr-0.13.2.dist-info/RECORD +0 -107
  144. {ingestr-0.13.2.dist-info → ingestr-0.14.104.dist-info}/WHEEL +0 -0
  145. {ingestr-0.13.2.dist-info → ingestr-0.14.104.dist-info}/entry_points.txt +0 -0
  146. {ingestr-0.13.2.dist-info → ingestr-0.14.104.dist-info}/licenses/LICENSE.md +0 -0
@@ -0,0 +1,237 @@
1
+ import asyncio
2
+ import time
3
+ from typing import Any, Dict, Iterator, List, Optional
4
+
5
+ import aiohttp
6
+ import pendulum
7
+ import requests
8
+
9
+ REVENUECAT_API_BASE = "https://api.revenuecat.com/v2"
10
+
11
+
12
+ def _make_request(
13
+ api_key: str,
14
+ endpoint: str,
15
+ params: Optional[Dict[str, Any]] = None,
16
+ max_retries: int = 3,
17
+ ) -> Dict[str, Any]:
18
+ """Make a REST API request to RevenueCat API v2 with rate limiting."""
19
+ auth_header = f"Bearer {api_key}"
20
+
21
+ headers = {"Authorization": auth_header, "Content-Type": "application/json"}
22
+
23
+ url = f"{REVENUECAT_API_BASE}{endpoint}"
24
+
25
+ for attempt in range(max_retries + 1):
26
+ try:
27
+ response = requests.get(url, headers=headers, params=params or {})
28
+
29
+ # Handle rate limiting (429 Too Many Requests)
30
+ if response.status_code == 429:
31
+ if attempt < max_retries:
32
+ # Wait based on Retry-After header or exponential backoff
33
+ retry_after = response.headers.get("Retry-After")
34
+ if retry_after:
35
+ wait_time = int(retry_after)
36
+ else:
37
+ wait_time = (2**attempt) * 5 # 5, 10, 20 seconds
38
+
39
+ time.sleep(wait_time)
40
+ continue
41
+
42
+ response.raise_for_status()
43
+ return response.json()
44
+
45
+ except requests.exceptions.RequestException:
46
+ if attempt < max_retries:
47
+ wait_time = (2**attempt) * 2 # 2, 4, 8 seconds
48
+ time.sleep(wait_time)
49
+ continue
50
+ raise
51
+
52
+ # If we get here, all retries failed
53
+ response.raise_for_status()
54
+ return response.json()
55
+
56
+
57
+ def _paginate(
58
+ api_key: str, endpoint: str, params: Optional[Dict[str, Any]] = None
59
+ ) -> Iterator[Dict[str, Any]]:
60
+ """Paginate through RevenueCat API results."""
61
+ current_params = params.copy() if params is not None else {}
62
+ current_params["limit"] = 1000
63
+
64
+ while True:
65
+ data = _make_request(api_key, endpoint, current_params)
66
+
67
+ if "items" in data and data["items"] is not None:
68
+ yield data["items"]
69
+
70
+ if "next_page" not in data:
71
+ break
72
+
73
+ # Extract starting_after parameter from next_page URL
74
+ next_page_url = data["next_page"]
75
+ if next_page_url and "starting_after=" in next_page_url:
76
+ starting_after = next_page_url.split("starting_after=")[1].split("&")[0]
77
+ current_params["starting_after"] = starting_after
78
+ else:
79
+ break
80
+
81
+
82
+ def convert_timestamps_to_iso(
83
+ record: Dict[str, Any], timestamp_fields: List[str]
84
+ ) -> Dict[str, Any]:
85
+ """Convert timestamp fields from milliseconds to ISO format."""
86
+ for field in timestamp_fields:
87
+ if field in record and record[field] is not None:
88
+ timestamp_ms = record[field]
89
+ dt = pendulum.from_timestamp(timestamp_ms / 1000)
90
+ record[field] = dt.to_iso8601_string()
91
+
92
+ return record
93
+
94
+
95
+ async def _make_request_async(
96
+ session: aiohttp.ClientSession,
97
+ api_key: str,
98
+ endpoint: str,
99
+ params: Optional[Dict[str, Any]] = None,
100
+ max_retries: int = 3,
101
+ ) -> Dict[str, Any]:
102
+ """Make an async REST API request to RevenueCat API v2 with rate limiting."""
103
+ auth_header = f"Bearer {api_key}"
104
+
105
+ headers = {"Authorization": auth_header, "Content-Type": "application/json"}
106
+
107
+ url = f"{REVENUECAT_API_BASE}{endpoint}"
108
+
109
+ for attempt in range(max_retries + 1):
110
+ try:
111
+ async with session.get(
112
+ url, headers=headers, params=params or {}
113
+ ) as response:
114
+ # Handle rate limiting (429 Too Many Requests)
115
+ if response.status == 429:
116
+ if attempt < max_retries:
117
+ # Wait based on Retry-After header or exponential backoff
118
+ retry_after = response.headers.get("Retry-After")
119
+ if retry_after:
120
+ wait_time = int(retry_after)
121
+ else:
122
+ wait_time = (2**attempt) * 5 # 5, 10, 20 seconds
123
+
124
+ await asyncio.sleep(wait_time)
125
+ continue
126
+
127
+ response.raise_for_status()
128
+ return await response.json()
129
+
130
+ except aiohttp.ClientError:
131
+ if attempt < max_retries:
132
+ wait_time = (2**attempt) * 2 # 2, 4, 8 seconds
133
+ await asyncio.sleep(wait_time)
134
+ continue
135
+ raise
136
+
137
+ # If we get here, all retries failed
138
+ async with session.get(url, headers=headers, params=params or {}) as response:
139
+ response.raise_for_status()
140
+ return await response.json()
141
+
142
+
143
+ async def _paginate_async(
144
+ session: aiohttp.ClientSession,
145
+ api_key: str,
146
+ endpoint: str,
147
+ params: Optional[Dict[str, Any]] = None,
148
+ ) -> List[Dict[str, Any]]:
149
+ """Paginate through RevenueCat API results asynchronously."""
150
+ items = []
151
+ current_params = params.copy() if params is not None else {}
152
+ current_params["limit"] = 1000
153
+
154
+ while True:
155
+ data = await _make_request_async(session, api_key, endpoint, current_params)
156
+
157
+ # Collect items from the current page
158
+ if "items" in data and data["items"] is not None:
159
+ items.extend(data["items"])
160
+
161
+ # Check if there's a next page
162
+ if "next_page" not in data:
163
+ break
164
+
165
+ # Extract starting_after parameter from next_page URL
166
+ next_page_url = data["next_page"]
167
+ if next_page_url and "starting_after=" in next_page_url:
168
+ starting_after = next_page_url.split("starting_after=")[1].split("&")[0]
169
+ current_params["starting_after"] = starting_after
170
+ else:
171
+ break
172
+
173
+ return items
174
+
175
+
176
+ async def process_customer_with_nested_resources_async(
177
+ session: aiohttp.ClientSession,
178
+ api_key: str,
179
+ project_id: str,
180
+ customer: Dict[str, Any],
181
+ ) -> Dict[str, Any]:
182
+ customer_id = customer["id"]
183
+ customer = convert_timestamps_to_iso(customer, ["first_seen_at", "last_seen_at"])
184
+ nested_resources = [
185
+ ("subscriptions", ["purchased_at", "expires_at", "grace_period_expires_at"]),
186
+ ("purchases", ["purchased_at", "expires_at"]),
187
+ ]
188
+
189
+ async def fetch_and_convert(resource_name, timestamp_fields):
190
+ if resource_name not in customer or customer[resource_name] is None:
191
+ endpoint = f"/projects/{project_id}/customers/{customer_id}/{resource_name}"
192
+ customer[resource_name] = await _paginate_async(session, api_key, endpoint)
193
+ if (
194
+ timestamp_fields
195
+ and resource_name in customer
196
+ and customer[resource_name] is not None
197
+ ):
198
+ for item in customer[resource_name]:
199
+ convert_timestamps_to_iso(item, timestamp_fields)
200
+
201
+ await asyncio.gather(
202
+ *[
203
+ fetch_and_convert(resource_name, timestamp_fields)
204
+ for resource_name, timestamp_fields in nested_resources
205
+ ]
206
+ )
207
+
208
+ return customer
209
+
210
+
211
+ def create_project_resource(
212
+ resource_name: str,
213
+ api_key: str,
214
+ project_id: str = None,
215
+ timestamp_fields: List[str] = None,
216
+ ) -> Iterator[Dict[str, Any]]:
217
+ """
218
+ Helper function to create DLT resources for project-dependent endpoints.
219
+
220
+ Args:
221
+ resource_name: Name of the resource (e.g., 'products', 'entitlements', 'offerings')
222
+ api_key: RevenueCat API key
223
+ project_id: RevenueCat project ID
224
+ timestamp_fields: List of timestamp fields to convert to ISO format
225
+
226
+ Returns:
227
+ Iterator of resource data
228
+ """
229
+ if project_id is None:
230
+ raise ValueError(f"project_id is required for {resource_name} resource")
231
+
232
+ endpoint = f"/projects/{project_id}/{resource_name}"
233
+ default_timestamp_fields = timestamp_fields or ["created_at", "updated_at"]
234
+
235
+ for item in _paginate(api_key, endpoint):
236
+ item = convert_timestamps_to_iso(item, default_timestamp_fields)
237
+ yield item
@@ -0,0 +1,156 @@
1
+ from typing import Iterable
2
+
3
+ import dlt
4
+ from dlt.common.typing import TDataItem
5
+ from dlt.sources import DltResource, incremental
6
+ from simple_salesforce import Salesforce
7
+
8
+ from .helpers import get_records
9
+
10
+
11
+ @dlt.source(name="salesforce")
12
+ def salesforce_source(
13
+ username: str,
14
+ password: str,
15
+ token: str,
16
+ domain: str,
17
+ custom_object: str = None,
18
+ ) -> Iterable[DltResource]:
19
+ """
20
+ Retrieves data from Salesforce using the Salesforce API.
21
+
22
+ Args:
23
+ username (str): The username for authentication.
24
+ password (str): The password for authentication.
25
+ token (str): The security token for authentication.
26
+
27
+ Yields:
28
+ DltResource: Data resources from Salesforce.
29
+ """
30
+
31
+ client = Salesforce(username, password, token, domain=domain)
32
+
33
+ # define resources
34
+ @dlt.resource(write_disposition="replace")
35
+ def user() -> Iterable[TDataItem]:
36
+ yield get_records(client, "User")
37
+
38
+ @dlt.resource(write_disposition="replace")
39
+ def user_role() -> Iterable[TDataItem]:
40
+ yield get_records(client, "UserRole")
41
+
42
+ @dlt.resource(write_disposition="merge", primary_key="id")
43
+ def opportunity(
44
+ last_timestamp: incremental[str] = dlt.sources.incremental(
45
+ "SystemModstamp", initial_value=None
46
+ ),
47
+ ) -> Iterable[TDataItem]:
48
+ yield get_records(
49
+ client, "Opportunity", last_timestamp.last_value, "SystemModstamp"
50
+ )
51
+
52
+ @dlt.resource(write_disposition="merge", primary_key="id")
53
+ def opportunity_line_item(
54
+ last_timestamp: incremental[str] = dlt.sources.incremental(
55
+ "SystemModstamp", initial_value=None
56
+ ),
57
+ ) -> Iterable[TDataItem]:
58
+ yield get_records(
59
+ client, "OpportunityLineItem", last_timestamp.last_value, "SystemModstamp"
60
+ )
61
+
62
+ @dlt.resource(write_disposition="merge", primary_key="id")
63
+ def opportunity_contact_role(
64
+ last_timestamp: incremental[str] = dlt.sources.incremental(
65
+ "SystemModstamp", initial_value=None
66
+ ),
67
+ ) -> Iterable[TDataItem]:
68
+ yield get_records(
69
+ client,
70
+ "OpportunityContactRole",
71
+ last_timestamp.last_value,
72
+ "SystemModstamp",
73
+ )
74
+
75
+ @dlt.resource(write_disposition="merge", primary_key="id")
76
+ def account(
77
+ last_timestamp: incremental[str] = dlt.sources.incremental(
78
+ "LastModifiedDate", initial_value=None
79
+ ),
80
+ ) -> Iterable[TDataItem]:
81
+ yield get_records(
82
+ client, "Account", last_timestamp.last_value, "LastModifiedDate"
83
+ )
84
+
85
+ @dlt.resource(write_disposition="replace")
86
+ def contact() -> Iterable[TDataItem]:
87
+ yield get_records(client, "Contact")
88
+
89
+ @dlt.resource(write_disposition="replace")
90
+ def lead() -> Iterable[TDataItem]:
91
+ yield get_records(client, "Lead")
92
+
93
+ @dlt.resource(write_disposition="replace")
94
+ def campaign() -> Iterable[TDataItem]:
95
+ yield get_records(client, "Campaign")
96
+
97
+ @dlt.resource(write_disposition="merge", primary_key="id")
98
+ def campaign_member(
99
+ last_timestamp: incremental[str] = dlt.sources.incremental(
100
+ "SystemModstamp", initial_value=None
101
+ ),
102
+ ) -> Iterable[TDataItem]:
103
+ yield get_records(
104
+ client, "CampaignMember", last_timestamp.last_value, "SystemModstamp"
105
+ )
106
+
107
+ @dlt.resource(write_disposition="replace")
108
+ def product() -> Iterable[TDataItem]:
109
+ yield get_records(client, "Product2")
110
+
111
+ @dlt.resource(write_disposition="replace")
112
+ def pricebook() -> Iterable[TDataItem]:
113
+ yield get_records(client, "Pricebook2")
114
+
115
+ @dlt.resource(write_disposition="replace")
116
+ def pricebook_entry() -> Iterable[TDataItem]:
117
+ yield get_records(client, "PricebookEntry")
118
+
119
+ @dlt.resource(write_disposition="merge", primary_key="id")
120
+ def task(
121
+ last_timestamp: incremental[str] = dlt.sources.incremental(
122
+ "SystemModstamp", initial_value=None
123
+ ),
124
+ ) -> Iterable[TDataItem]:
125
+ yield get_records(client, "Task", last_timestamp.last_value, "SystemModstamp")
126
+
127
+ @dlt.resource(write_disposition="merge", primary_key="id")
128
+ def event(
129
+ last_timestamp: incremental[str] = dlt.sources.incremental(
130
+ "SystemModstamp", initial_value=None
131
+ ),
132
+ ) -> Iterable[TDataItem]:
133
+ yield get_records(client, "Event", last_timestamp.last_value, "SystemModstamp")
134
+
135
+ @dlt.resource(write_disposition="replace")
136
+ def custom() -> Iterable[TDataItem]:
137
+ yield get_records(client, custom_object)
138
+
139
+ return (
140
+ user,
141
+ user_role,
142
+ opportunity,
143
+ opportunity_line_item,
144
+ opportunity_contact_role,
145
+ account,
146
+ contact,
147
+ lead,
148
+ campaign,
149
+ campaign_member,
150
+ product,
151
+ pricebook,
152
+ pricebook_entry,
153
+ task,
154
+ event,
155
+ custom,
156
+ )
@@ -0,0 +1,64 @@
1
+ """Salesforce source helpers"""
2
+
3
+ from typing import Iterable, Optional
4
+
5
+ import pendulum
6
+ from dlt.common.typing import TDataItem
7
+ from simple_salesforce import Salesforce
8
+
9
+
10
+ def get_records(
11
+ sf: Salesforce,
12
+ sobject: str,
13
+ last_state: Optional[str] = None,
14
+ replication_key: Optional[str] = None,
15
+ ) -> Iterable[TDataItem]:
16
+ """
17
+ Retrieves records from Salesforce for a specified sObject.
18
+
19
+ Args:
20
+ sf (Salesforce): An instance of the Salesforce API client.
21
+ sobject (str): The name of the sObject to retrieve records from.
22
+ last_state (str, optional): The last known state for incremental loading. Defaults to None.
23
+ replication_key (str, optional): The replication key for incremental loading. Defaults to None.
24
+
25
+ Yields:
26
+ Dict[TDataItem]: A dictionary representing a record from the Salesforce sObject.
27
+ """
28
+
29
+ # Get all fields for the sobject
30
+ desc = getattr(sf, sobject).describe()
31
+ # Salesforce returns compound fields as separate fields, so we need to filter them out
32
+ compound_fields = {
33
+ f["compoundFieldName"]
34
+ for f in desc["fields"]
35
+ if f["compoundFieldName"] is not None
36
+ } - {"Name"}
37
+ # Salesforce returns datetime fields as timestamps, so we need to convert them
38
+ date_fields = {
39
+ f["name"] for f in desc["fields"] if f["type"] in ("datetime",) and f["name"]
40
+ }
41
+ # If no fields are specified, use all fields except compound fields
42
+ fields = [f["name"] for f in desc["fields"] if f["name"] not in compound_fields]
43
+
44
+ # Generate a predicate to filter records by the replication key
45
+ predicate, order_by, n_records = "", "", 0
46
+ if replication_key:
47
+ if last_state:
48
+ predicate = f"WHERE {replication_key} > {last_state}"
49
+ order_by = f"ORDER BY {replication_key} ASC"
50
+ query = f"SELECT {', '.join(fields)} FROM {sobject} {predicate} {order_by}"
51
+
52
+ # Query all records in batches
53
+ for page in getattr(sf.bulk, sobject).query_all(query, lazy_operation=True):
54
+ for record in page:
55
+ # Strip out the attributes field
56
+ record.pop("attributes", None)
57
+ for field in date_fields:
58
+ # Convert Salesforce timestamps to ISO 8601
59
+ if record.get(field):
60
+ record[field] = pendulum.from_timestamp(
61
+ record[field] / 1000,
62
+ ).strftime("%Y-%m-%dT%H:%M:%S.%fZ")
63
+ yield from page
64
+ n_records += len(page)
@@ -669,7 +669,7 @@ def shopify_source(
669
669
  params["updated_at_max"] = updated_at.end_value.isoformat()
670
670
  yield from client.get_pages("customers", params)
671
671
 
672
- @dlt.resource(primary_key="id", write_disposition="append")
672
+ @dlt.resource(primary_key="id", write_disposition="merge")
673
673
  def events(
674
674
  created_at: dlt.sources.incremental[
675
675
  pendulum.DateTime
@@ -1690,16 +1690,6 @@ query discountNodes($after: String, $query: String, $first: Int) {
1690
1690
  "nullable": True,
1691
1691
  "description": "The category of the product from Shopify's Standard Product Taxonomy.",
1692
1692
  },
1693
- "combinedListing": {
1694
- "data_type": "json",
1695
- "nullable": True,
1696
- "description": "A special product type that combines separate products into a single product listing.",
1697
- },
1698
- "combinedListingRole": {
1699
- "data_type": "json",
1700
- "nullable": True,
1701
- "description": "The role of the product in a combined listing.",
1702
- },
1703
1693
  "compareAtPriceRange": {
1704
1694
  "data_type": "json",
1705
1695
  "nullable": True,
@@ -1841,12 +1831,6 @@ query products($after: String, $query: String, $first: Int) {
1841
1831
  category {
1842
1832
  id
1843
1833
  }
1844
- combinedListing {
1845
- parentProduct {
1846
- id
1847
- }
1848
- }
1849
- combinedListingRole
1850
1834
  compareAtPriceRange {
1851
1835
  maxVariantCompareAtPrice {
1852
1836
  amount
@@ -0,0 +1,82 @@
1
+ from typing import Iterable
2
+
3
+ import dlt
4
+ import smartsheet # type: ignore
5
+ from dlt.extract import DltResource
6
+ from smartsheet.models.enums import ColumnType # type: ignore
7
+ from smartsheet.models.sheet import Sheet # type: ignore
8
+
9
+ TYPE_MAPPING = {
10
+ ColumnType.TEXT_NUMBER: "text",
11
+ ColumnType.DATE: "date",
12
+ ColumnType.DATETIME: "timestamp",
13
+ ColumnType.CONTACT_LIST: "text",
14
+ ColumnType.CHECKBOX: "bool",
15
+ ColumnType.PICKLIST: "text",
16
+ ColumnType.DURATION: "text",
17
+ ColumnType.PREDECESSOR: "text",
18
+ ColumnType.ABSTRACT_DATETIME: "timestamp",
19
+ ColumnType.MULTI_CONTACT_LIST: "text",
20
+ ColumnType.MULTI_PICKLIST: "text",
21
+ }
22
+
23
+
24
+ @dlt.source
25
+ def smartsheet_source(
26
+ access_token: str,
27
+ sheet_id: str,
28
+ ) -> Iterable[DltResource]:
29
+ """
30
+ A DLT source for Smartsheet.
31
+
32
+ Args:
33
+ access_token: The Smartsheet API access token.
34
+ sheet_id: The ID of the sheet to load.
35
+
36
+ Returns:
37
+ An iterable of DLT resources.
38
+ """
39
+
40
+ # Initialize Smartsheet client
41
+ smartsheet_client = smartsheet.Smartsheet(access_token)
42
+ smartsheet_client.errors_as_exceptions(True)
43
+
44
+ # The SDK expects sheet_id to be an int
45
+ sheet_id_int = int(sheet_id)
46
+ # Sanitize the sheet name to be a valid resource name
47
+ # We get objectValue to ensure `name` attribute is populated for the sheet
48
+ sheet_details = smartsheet_client.Sheets.get_sheet(
49
+ sheet_id_int, include=["objectValue"]
50
+ )
51
+ sheet_name = sheet_details.name
52
+ resource_name = f"sheet_{sheet_name.replace(' ', '_').lower()}"
53
+ sheet = smartsheet_client.Sheets.get_sheet(sheet_id_int)
54
+
55
+ yield dlt.resource(
56
+ _get_sheet_data(sheet),
57
+ name=resource_name,
58
+ columns=_generate_type_hints(sheet),
59
+ write_disposition="replace",
60
+ )
61
+
62
+
63
+ def _get_sheet_data(sheet: Sheet):
64
+ """Helper function to get all rows from a sheet."""
65
+
66
+ column_titles = [col.title for col in sheet.columns]
67
+ for row in sheet.rows:
68
+ row_data = {"_row_id": row.id}
69
+ for i, cell in enumerate(row.cells):
70
+ row_data[column_titles[i]] = cell.value
71
+ yield row_data
72
+
73
+
74
+ def _generate_type_hints(sheet: Sheet):
75
+ return {
76
+ col.title: {
77
+ "data_type": TYPE_MAPPING.get(col.type.value),
78
+ "nullable": True,
79
+ }
80
+ for col in sheet.columns
81
+ if col.type.value in TYPE_MAPPING
82
+ }