omniload 0.0.0.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (218) hide show
  1. omniload/conftest.py +72 -0
  2. omniload/main.py +810 -0
  3. omniload/src/.gitignore +10 -0
  4. omniload/src/adjust/__init__.py +108 -0
  5. omniload/src/adjust/adjust_helpers.py +122 -0
  6. omniload/src/airtable/__init__.py +84 -0
  7. omniload/src/allium/__init__.py +128 -0
  8. omniload/src/anthropic/__init__.py +277 -0
  9. omniload/src/anthropic/helpers.py +525 -0
  10. omniload/src/applovin/__init__.py +316 -0
  11. omniload/src/applovin_max/__init__.py +117 -0
  12. omniload/src/appsflyer/__init__.py +325 -0
  13. omniload/src/appsflyer/client.py +110 -0
  14. omniload/src/appstore/__init__.py +142 -0
  15. omniload/src/appstore/client.py +126 -0
  16. omniload/src/appstore/errors.py +15 -0
  17. omniload/src/appstore/models.py +117 -0
  18. omniload/src/appstore/resources.py +179 -0
  19. omniload/src/arrow/__init__.py +81 -0
  20. omniload/src/asana_source/__init__.py +281 -0
  21. omniload/src/asana_source/helpers.py +30 -0
  22. omniload/src/asana_source/settings.py +158 -0
  23. omniload/src/attio/__init__.py +102 -0
  24. omniload/src/attio/helpers.py +65 -0
  25. omniload/src/blob.py +95 -0
  26. omniload/src/bruin/__init__.py +76 -0
  27. omniload/src/chess/__init__.py +180 -0
  28. omniload/src/chess/helpers.py +35 -0
  29. omniload/src/chess/settings.py +18 -0
  30. omniload/src/clickup/__init__.py +85 -0
  31. omniload/src/clickup/helpers.py +47 -0
  32. omniload/src/collector/spinner.py +43 -0
  33. omniload/src/couchbase_source/__init__.py +118 -0
  34. omniload/src/couchbase_source/helpers.py +135 -0
  35. omniload/src/cursor/__init__.py +83 -0
  36. omniload/src/cursor/helpers.py +188 -0
  37. omniload/src/customer_io/__init__.py +486 -0
  38. omniload/src/customer_io/helpers.py +530 -0
  39. omniload/src/destinations.py +982 -0
  40. omniload/src/docebo/__init__.py +589 -0
  41. omniload/src/docebo/client.py +435 -0
  42. omniload/src/docebo/helpers.py +97 -0
  43. omniload/src/dune/__init__.py +104 -0
  44. omniload/src/dune/helpers.py +108 -0
  45. omniload/src/dynamodb/__init__.py +86 -0
  46. omniload/src/elasticsearch/__init__.py +80 -0
  47. omniload/src/elasticsearch/helpers.py +141 -0
  48. omniload/src/errors.py +26 -0
  49. omniload/src/facebook_ads/__init__.py +403 -0
  50. omniload/src/facebook_ads/exceptions.py +19 -0
  51. omniload/src/facebook_ads/helpers.py +296 -0
  52. omniload/src/facebook_ads/settings.py +224 -0
  53. omniload/src/facebook_ads/utils.py +53 -0
  54. omniload/src/factory.py +305 -0
  55. omniload/src/filesystem/__init__.py +133 -0
  56. omniload/src/filesystem/helpers.py +114 -0
  57. omniload/src/filesystem/readers.py +187 -0
  58. omniload/src/filters.py +62 -0
  59. omniload/src/fireflies/__init__.py +151 -0
  60. omniload/src/fireflies/helpers.py +753 -0
  61. omniload/src/fluxx/__init__.py +10013 -0
  62. omniload/src/fluxx/helpers.py +233 -0
  63. omniload/src/frankfurter/__init__.py +157 -0
  64. omniload/src/frankfurter/helpers.py +48 -0
  65. omniload/src/freshdesk/__init__.py +103 -0
  66. omniload/src/freshdesk/freshdesk_client.py +151 -0
  67. omniload/src/freshdesk/settings.py +23 -0
  68. omniload/src/fundraiseup/__init__.py +95 -0
  69. omniload/src/fundraiseup/client.py +81 -0
  70. omniload/src/github/__init__.py +202 -0
  71. omniload/src/github/helpers.py +207 -0
  72. omniload/src/github/queries.py +129 -0
  73. omniload/src/github/settings.py +24 -0
  74. omniload/src/google_ads/__init__.py +198 -0
  75. omniload/src/google_ads/field.py +17 -0
  76. omniload/src/google_ads/metrics.py +254 -0
  77. omniload/src/google_ads/predicates.py +37 -0
  78. omniload/src/google_ads/reports.py +411 -0
  79. omniload/src/google_ads/test_google_ads.py +184 -0
  80. omniload/src/google_analytics/__init__.py +144 -0
  81. omniload/src/google_analytics/helpers.py +312 -0
  82. omniload/src/google_sheets/README.md +95 -0
  83. omniload/src/google_sheets/__init__.py +166 -0
  84. omniload/src/google_sheets/helpers/__init__.py +15 -0
  85. omniload/src/google_sheets/helpers/api_calls.py +160 -0
  86. omniload/src/google_sheets/helpers/data_processing.py +316 -0
  87. omniload/src/gorgias/__init__.py +595 -0
  88. omniload/src/gorgias/helpers.py +166 -0
  89. omniload/src/hostaway/__init__.py +302 -0
  90. omniload/src/hostaway/client.py +288 -0
  91. omniload/src/http/__init__.py +38 -0
  92. omniload/src/http/readers.py +146 -0
  93. omniload/src/http_client.py +24 -0
  94. omniload/src/hubspot/__init__.py +800 -0
  95. omniload/src/hubspot/helpers.py +417 -0
  96. omniload/src/hubspot/settings.py +329 -0
  97. omniload/src/indeed/__init__.py +153 -0
  98. omniload/src/indeed/helpers.py +228 -0
  99. omniload/src/influxdb/__init__.py +46 -0
  100. omniload/src/influxdb/client.py +34 -0
  101. omniload/src/intercom/__init__.py +142 -0
  102. omniload/src/intercom/helpers.py +674 -0
  103. omniload/src/intercom/settings.py +279 -0
  104. omniload/src/isoc_pulse/__init__.py +159 -0
  105. omniload/src/jira_source/__init__.py +377 -0
  106. omniload/src/jira_source/helpers.py +510 -0
  107. omniload/src/jira_source/settings.py +184 -0
  108. omniload/src/kafka/__init__.py +120 -0
  109. omniload/src/kafka/helpers.py +241 -0
  110. omniload/src/kinesis/__init__.py +153 -0
  111. omniload/src/kinesis/helpers.py +96 -0
  112. omniload/src/klaviyo/__init__.py +237 -0
  113. omniload/src/klaviyo/client.py +212 -0
  114. omniload/src/klaviyo/helpers.py +19 -0
  115. omniload/src/linear/__init__.py +634 -0
  116. omniload/src/linear/helpers.py +111 -0
  117. omniload/src/linkedin_ads/__init__.py +266 -0
  118. omniload/src/linkedin_ads/dimension_time_enum.py +17 -0
  119. omniload/src/linkedin_ads/helpers.py +246 -0
  120. omniload/src/loader.py +69 -0
  121. omniload/src/mailchimp/__init__.py +126 -0
  122. omniload/src/mailchimp/helpers.py +226 -0
  123. omniload/src/mailchimp/settings.py +164 -0
  124. omniload/src/masking.py +344 -0
  125. omniload/src/mixpanel/__init__.py +62 -0
  126. omniload/src/mixpanel/client.py +104 -0
  127. omniload/src/monday/__init__.py +246 -0
  128. omniload/src/monday/helpers.py +392 -0
  129. omniload/src/monday/settings.py +325 -0
  130. omniload/src/mongodb/__init__.py +281 -0
  131. omniload/src/mongodb/helpers.py +975 -0
  132. omniload/src/notion/__init__.py +69 -0
  133. omniload/src/notion/helpers/__init__.py +14 -0
  134. omniload/src/notion/helpers/client.py +178 -0
  135. omniload/src/notion/helpers/database.py +92 -0
  136. omniload/src/notion/settings.py +17 -0
  137. omniload/src/partition.py +32 -0
  138. omniload/src/personio/__init__.py +345 -0
  139. omniload/src/personio/helpers.py +100 -0
  140. omniload/src/phantombuster/__init__.py +65 -0
  141. omniload/src/phantombuster/client.py +87 -0
  142. omniload/src/pinterest/__init__.py +82 -0
  143. omniload/src/pipedrive/__init__.py +212 -0
  144. omniload/src/pipedrive/helpers/__init__.py +37 -0
  145. omniload/src/pipedrive/helpers/custom_fields_munger.py +116 -0
  146. omniload/src/pipedrive/helpers/pages.py +129 -0
  147. omniload/src/pipedrive/settings.py +41 -0
  148. omniload/src/pipedrive/typing.py +17 -0
  149. omniload/src/plusvibeai/__init__.py +335 -0
  150. omniload/src/plusvibeai/helpers.py +544 -0
  151. omniload/src/plusvibeai/settings.py +252 -0
  152. omniload/src/primer/__init__.py +45 -0
  153. omniload/src/primer/helpers.py +79 -0
  154. omniload/src/quickbooks/__init__.py +117 -0
  155. omniload/src/reddit_ads/__init__.py +183 -0
  156. omniload/src/reddit_ads/helpers.py +232 -0
  157. omniload/src/resource.py +40 -0
  158. omniload/src/revenuecat/__init__.py +83 -0
  159. omniload/src/revenuecat/helpers.py +237 -0
  160. omniload/src/salesforce/__init__.py +170 -0
  161. omniload/src/salesforce/helpers.py +78 -0
  162. omniload/src/shopify/__init__.py +1953 -0
  163. omniload/src/shopify/exceptions.py +17 -0
  164. omniload/src/shopify/helpers.py +202 -0
  165. omniload/src/shopify/settings.py +19 -0
  166. omniload/src/slack/__init__.py +290 -0
  167. omniload/src/slack/helpers.py +218 -0
  168. omniload/src/slack/settings.py +36 -0
  169. omniload/src/smartsheets/__init__.py +82 -0
  170. omniload/src/snapchat_ads/__init__.py +455 -0
  171. omniload/src/snapchat_ads/client.py +72 -0
  172. omniload/src/snapchat_ads/helpers.py +630 -0
  173. omniload/src/snapchat_ads/settings.py +130 -0
  174. omniload/src/socrata_source/__init__.py +83 -0
  175. omniload/src/socrata_source/helpers.py +85 -0
  176. omniload/src/socrata_source/settings.py +8 -0
  177. omniload/src/solidgate/__init__.py +219 -0
  178. omniload/src/solidgate/helpers.py +154 -0
  179. omniload/src/sources.py +5408 -0
  180. omniload/src/sql_database/__init__.py +0 -0
  181. omniload/src/sql_database/callbacks.py +66 -0
  182. omniload/src/stripe_analytics/__init__.py +183 -0
  183. omniload/src/stripe_analytics/helpers.py +386 -0
  184. omniload/src/stripe_analytics/settings.py +80 -0
  185. omniload/src/table_definition.py +15 -0
  186. omniload/src/testdata/fakebqcredentials.json +14 -0
  187. omniload/src/tiktok_ads/__init__.py +150 -0
  188. omniload/src/tiktok_ads/tiktok_helpers.py +130 -0
  189. omniload/src/time.py +11 -0
  190. omniload/src/trustpilot/__init__.py +48 -0
  191. omniload/src/trustpilot/client.py +48 -0
  192. omniload/src/version.py +6 -0
  193. omniload/src/wise/__init__.py +68 -0
  194. omniload/src/wise/client.py +63 -0
  195. omniload/src/zendesk/__init__.py +480 -0
  196. omniload/src/zendesk/helpers/__init__.py +39 -0
  197. omniload/src/zendesk/helpers/api_helpers.py +119 -0
  198. omniload/src/zendesk/helpers/credentials.py +68 -0
  199. omniload/src/zendesk/helpers/talk_api.py +132 -0
  200. omniload/src/zendesk/settings.py +71 -0
  201. omniload/src/zoom/__init__.py +99 -0
  202. omniload/src/zoom/helpers.py +102 -0
  203. omniload/testdata/.gitignore +2 -0
  204. omniload/testdata/create_replace.csv +21 -0
  205. omniload/testdata/delete_insert_expected.csv +6 -0
  206. omniload/testdata/delete_insert_part1.csv +5 -0
  207. omniload/testdata/delete_insert_part2.csv +6 -0
  208. omniload/testdata/merge_expected.csv +5 -0
  209. omniload/testdata/merge_part1.csv +4 -0
  210. omniload/testdata/merge_part2.csv +5 -0
  211. omniload/tests/unit/test_smartsheets.py +133 -0
  212. omniload-0.0.0.dev0.dist-info/METADATA +439 -0
  213. omniload-0.0.0.dev0.dist-info/RECORD +218 -0
  214. omniload-0.0.0.dev0.dist-info/WHEEL +4 -0
  215. omniload-0.0.0.dev0.dist-info/entry_points.txt +2 -0
  216. omniload-0.0.0.dev0.dist-info/licenses/LICENSE.Apache-2.0 +201 -0
  217. omniload-0.0.0.dev0.dist-info/licenses/LICENSE.md +21 -0
  218. omniload-0.0.0.dev0.dist-info/licenses/NOTICE +35 -0
@@ -0,0 +1,288 @@
1
+ from typing import Callable, Iterable, Optional
2
+
3
+ import pendulum
4
+ from dlt.sources.helpers.requests import Client
5
+
6
+
7
+ class HostawayClient:
8
+ BASE_URL = "https://api.hostaway.com"
9
+
10
+ def __init__(self, api_key: str) -> None:
11
+ self.session = Client(raise_for_status=False).session
12
+ self.session.headers.update({"Authorization": f"Bearer {api_key}"})
13
+
14
+ def _fetch_single(self, url: str, params: Optional[dict] = None) -> Iterable[dict]:
15
+ response = self.session.get(url, params=params, timeout=30)
16
+ response.raise_for_status()
17
+ response_data = response.json()
18
+
19
+ if isinstance(response_data, dict) and "result" in response_data:
20
+ items = response_data["result"]
21
+ elif isinstance(response_data, list):
22
+ items = response_data
23
+ else:
24
+ items = []
25
+
26
+ if isinstance(items, list):
27
+ for item in items:
28
+ yield item
29
+ elif isinstance(items, dict):
30
+ yield items
31
+
32
+ def _paginate(
33
+ self,
34
+ url: str,
35
+ params: Optional[dict] = None,
36
+ limit: int = 100,
37
+ process_item: Optional[Callable[[dict], dict]] = None,
38
+ ) -> Iterable[dict]:
39
+ offset = 0
40
+ if params is None:
41
+ params = {}
42
+
43
+ while True:
44
+ page_params = {**params, "limit": limit, "offset": offset}
45
+ response = self.session.get(url, params=page_params, timeout=30)
46
+ response.raise_for_status()
47
+ response_data = response.json()
48
+
49
+ if isinstance(response_data, dict) and "result" in response_data:
50
+ items = response_data["result"]
51
+ elif isinstance(response_data, list):
52
+ items = response_data
53
+ else:
54
+ items = []
55
+
56
+ if not items or (isinstance(items, list) and len(items) == 0):
57
+ break
58
+
59
+ if isinstance(items, list):
60
+ for item in items:
61
+ if process_item:
62
+ item = process_item(item)
63
+ yield item
64
+ elif isinstance(items, dict):
65
+ if process_item:
66
+ items = process_item(items)
67
+ yield items
68
+
69
+ if isinstance(items, list) and len(items) < limit:
70
+ break
71
+ elif isinstance(items, dict):
72
+ break
73
+
74
+ offset += limit
75
+
76
+ def fetch_listings(
77
+ self,
78
+ start_time: pendulum.DateTime,
79
+ end_time: pendulum.DateTime,
80
+ ) -> Iterable[dict]:
81
+ def process_listing(listing: dict) -> dict:
82
+ if "latestActivityOn" in listing and listing["latestActivityOn"]:
83
+ try:
84
+ listing["latestActivityOn"] = pendulum.parse(
85
+ listing["latestActivityOn"]
86
+ )
87
+ except Exception:
88
+ listing["latestActivityOn"] = pendulum.datetime(
89
+ 1970, 1, 1, tz="UTC"
90
+ )
91
+ else:
92
+ listing["latestActivityOn"] = pendulum.datetime(1970, 1, 1, tz="UTC")
93
+ return listing
94
+
95
+ url = f"{self.BASE_URL}/v1/listings"
96
+ for listing in self._paginate(url, process_item=process_listing):
97
+ if start_time <= listing["latestActivityOn"] <= end_time:
98
+ yield listing
99
+
100
+ def fetch_listing_fee_settings(
101
+ self,
102
+ listing_id,
103
+ start_time: pendulum.DateTime,
104
+ end_time: pendulum.DateTime,
105
+ ) -> Iterable[dict]:
106
+ def process_fee(fee: dict) -> dict:
107
+ if "updatedOn" in fee and fee["updatedOn"]:
108
+ try:
109
+ fee["updatedOn"] = pendulum.parse(fee["updatedOn"])
110
+ except Exception:
111
+ fee["updatedOn"] = pendulum.datetime(1970, 1, 1, tz="UTC")
112
+ else:
113
+ fee["updatedOn"] = pendulum.datetime(1970, 1, 1, tz="UTC")
114
+ return fee
115
+
116
+ url = f"{self.BASE_URL}/v1/listingFeeSettings/{str(listing_id)}"
117
+ for fee in self._paginate(url, process_item=process_fee):
118
+ if start_time <= fee["updatedOn"] <= end_time:
119
+ yield fee
120
+
121
+ def fetch_all_listing_fee_settings(
122
+ self,
123
+ start_time: pendulum.DateTime,
124
+ end_time: pendulum.DateTime,
125
+ ) -> Iterable[dict]:
126
+ for listing in self.fetch_listings(start_time, end_time):
127
+ listing_id = listing.get("id")
128
+ if listing_id:
129
+ try:
130
+ yield from self.fetch_listing_fee_settings(
131
+ listing_id, start_time, end_time
132
+ )
133
+ except Exception:
134
+ continue
135
+
136
+ def fetch_listing_agreement(
137
+ self,
138
+ listing_id,
139
+ ) -> Iterable[dict]:
140
+ url = f"{self.BASE_URL}/v1/listingAgreement/{str(listing_id)}"
141
+ yield from self._paginate(url)
142
+
143
+ def fetch_listing_pricing_settings(
144
+ self,
145
+ listing_id,
146
+ ) -> Iterable[dict]:
147
+ url = f"{self.BASE_URL}/v1/listing/pricingSettings/{str(listing_id)}"
148
+ yield from self._paginate(url)
149
+
150
+ def fetch_all_listing_pricing_settings(
151
+ self,
152
+ start_time: pendulum.DateTime,
153
+ end_time: pendulum.DateTime,
154
+ ) -> Iterable[dict]:
155
+ for listing in self.fetch_listings(start_time, end_time):
156
+ listing_id = listing.get("id")
157
+ if listing_id:
158
+ try:
159
+ yield from self.fetch_listing_pricing_settings(listing_id)
160
+ except Exception:
161
+ continue
162
+
163
+ def fetch_all_listing_agreements(
164
+ self,
165
+ start_time: pendulum.DateTime,
166
+ end_time: pendulum.DateTime,
167
+ ) -> Iterable[dict]:
168
+ for listing in self.fetch_listings(start_time, end_time):
169
+ listing_id = listing.get("id")
170
+ if listing_id:
171
+ try:
172
+ yield from self.fetch_listing_agreement(listing_id)
173
+ except Exception:
174
+ continue
175
+
176
+ def fetch_cancellation_policies(self) -> Iterable[dict]:
177
+ url = f"{self.BASE_URL}/v1/cancellationPolicies"
178
+ yield from self._fetch_single(url)
179
+
180
+ def fetch_cancellation_policies_airbnb(self) -> Iterable[dict]:
181
+ url = f"{self.BASE_URL}/v1/cancellationPolicies/airbnb"
182
+ yield from self._fetch_single(url)
183
+
184
+ def fetch_cancellation_policies_marriott(self) -> Iterable[dict]:
185
+ url = f"{self.BASE_URL}/v1/cancellationPolicies/marriott"
186
+ yield from self._fetch_single(url)
187
+
188
+ def fetch_cancellation_policies_vrbo(self) -> Iterable[dict]:
189
+ url = f"{self.BASE_URL}/v1/cancellationPolicies/vrbo"
190
+ yield from self._fetch_single(url)
191
+
192
+ def fetch_reservations(self) -> Iterable[dict]:
193
+ url = f"{self.BASE_URL}/v1/reservations"
194
+ yield from self._paginate(url)
195
+
196
+ def fetch_finance_field(self, reservation_id) -> Iterable[dict]:
197
+ url = f"{self.BASE_URL}/v1/financeField/{str(reservation_id)}"
198
+ yield from self._fetch_single(url)
199
+
200
+ def fetch_all_finance_fields(self) -> Iterable[dict]:
201
+ for reservation in self.fetch_reservations():
202
+ reservation_id = reservation.get("id")
203
+ if reservation_id:
204
+ try:
205
+ yield from self.fetch_finance_field(reservation_id)
206
+ except Exception:
207
+ continue
208
+
209
+ def fetch_reservation_payment_methods(self) -> Iterable[dict]:
210
+ url = f"{self.BASE_URL}/v1/reservations/paymentMethods"
211
+ yield from self._fetch_single(url)
212
+
213
+ def fetch_reservation_rental_agreement(self, reservation_id) -> Iterable[dict]:
214
+ url = f"{self.BASE_URL}/v1/reservations/{str(reservation_id)}/rentalAgreement"
215
+ try:
216
+ yield from self._fetch_single(url)
217
+ except Exception:
218
+ return
219
+
220
+ def fetch_all_reservation_rental_agreements(self) -> Iterable[dict]:
221
+ for reservation in self.fetch_reservations():
222
+ reservation_id = reservation.get("id")
223
+ if reservation_id:
224
+ try:
225
+ yield from self.fetch_reservation_rental_agreement(reservation_id)
226
+ except Exception:
227
+ continue
228
+
229
+ def fetch_listing_calendar(self, listing_id) -> Iterable[dict]:
230
+ url = f"{self.BASE_URL}/v1/listings/{str(listing_id)}/calendar"
231
+ yield from self._fetch_single(url)
232
+
233
+ def fetch_all_listing_calendars(
234
+ self,
235
+ start_time: pendulum.DateTime,
236
+ end_time: pendulum.DateTime,
237
+ ) -> Iterable[dict]:
238
+ for listing in self.fetch_listings(start_time, end_time):
239
+ listing_id = listing.get("id")
240
+ if listing_id:
241
+ try:
242
+ yield from self.fetch_listing_calendar(listing_id)
243
+ except Exception:
244
+ continue
245
+
246
+ def fetch_conversations(self) -> Iterable[dict]:
247
+ url = f"{self.BASE_URL}/v1/conversations"
248
+ yield from self._paginate(url)
249
+
250
+ def fetch_message_templates(self) -> Iterable[dict]:
251
+ url = f"{self.BASE_URL}/v1/messageTemplates"
252
+ yield from self._fetch_single(url)
253
+
254
+ def fetch_bed_types(self) -> Iterable[dict]:
255
+ url = f"{self.BASE_URL}/v1/bedTypes"
256
+ yield from self._fetch_single(url)
257
+
258
+ def fetch_property_types(self) -> Iterable[dict]:
259
+ url = f"{self.BASE_URL}/v1/propertyTypes"
260
+ yield from self._fetch_single(url)
261
+
262
+ def fetch_countries(self) -> Iterable[dict]:
263
+ url = f"{self.BASE_URL}/v1/countries"
264
+ yield from self._fetch_single(url)
265
+
266
+ def fetch_account_tax_settings(self) -> Iterable[dict]:
267
+ url = f"{self.BASE_URL}/v1/accountTaxSettings"
268
+ yield from self._fetch_single(url)
269
+
270
+ def fetch_user_groups(self) -> Iterable[dict]:
271
+ url = f"{self.BASE_URL}/v1/userGroups"
272
+ yield from self._fetch_single(url)
273
+
274
+ def fetch_guest_payment_charges(self) -> Iterable[dict]:
275
+ url = f"{self.BASE_URL}/v1/guestPayments/charges"
276
+ yield from self._paginate(url)
277
+
278
+ def fetch_coupons(self) -> Iterable[dict]:
279
+ url = f"{self.BASE_URL}/v1/coupons"
280
+ yield from self._fetch_single(url)
281
+
282
+ def fetch_webhook_reservations(self) -> Iterable[dict]:
283
+ url = f"{self.BASE_URL}/v1/webhooks/reservations"
284
+ yield from self._fetch_single(url)
285
+
286
+ def fetch_tasks(self) -> Iterable[dict]:
287
+ url = f"{self.BASE_URL}/v1/tasks"
288
+ yield from self._fetch_single(url)
@@ -0,0 +1,38 @@
1
+ """HTTP source for reading CSV, JSON, and Parquet files from public URLs"""
2
+
3
+ from typing import Any, Optional
4
+
5
+ import dlt
6
+ from dlt.sources import DltResource
7
+
8
+ from .readers import HttpReader
9
+
10
+
11
+ @dlt.source
12
+ def http_source(
13
+ url: str,
14
+ file_format: Optional[str] = None,
15
+ column_names: Optional[list[str]] = None,
16
+ **kwargs: Any,
17
+ ) -> DltResource:
18
+ """Source for reading files from HTTP URLs.
19
+
20
+ Supports CSV, JSON, Parquet, and CSV without headers file formats.
21
+
22
+ Args:
23
+ url (str): The HTTP(S) URL to the file
24
+ file_format (str, optional): File format ('csv', 'csv_headless', 'json', 'parquet').
25
+ If not provided, will be inferred from URL extension.
26
+ column_names (list[str], optional): Column names for csv_headless format.
27
+ If not provided for csv_headless, columns will be named unknown_col_0, unknown_col_1, etc.
28
+ **kwargs: Additional arguments passed to the reader functions
29
+
30
+ Returns:
31
+ DltResource: A dlt resource that yields the file data
32
+ """
33
+ reader = HttpReader(url, file_format, column_names)
34
+
35
+ return dlt.resource(
36
+ reader.read_file(**kwargs),
37
+ name="http_data",
38
+ )
@@ -0,0 +1,146 @@
1
+ """Readers for HTTP file sources"""
2
+
3
+ import io
4
+ from typing import Any, Iterator, Optional
5
+ from urllib.parse import urlparse
6
+
7
+ import requests
8
+ from dlt.sources import TDataItems
9
+
10
+
11
+ class HttpReader:
12
+ """Reader for HTTP-based file sources"""
13
+
14
+ def __init__(
15
+ self,
16
+ url: str,
17
+ file_format: Optional[str] = None,
18
+ column_names: Optional[list[str]] = None,
19
+ ):
20
+ self.url = url
21
+ self.file_format = file_format or self._infer_format(url)
22
+ self.column_names = column_names
23
+
24
+ if self.file_format not in ["csv", "csv_headless", "json", "parquet"]:
25
+ raise ValueError(
26
+ f"Unsupported file format: {self.file_format}. "
27
+ "Supported formats: csv, csv_headless, json, parquet"
28
+ )
29
+
30
+ def _infer_format(self, url: str) -> str:
31
+ """Infer file format from URL extension"""
32
+ parsed = urlparse(url)
33
+ path = parsed.path.lower()
34
+
35
+ if path.endswith(".csv"):
36
+ return "csv"
37
+ elif path.endswith(".json") or path.endswith(".jsonl"):
38
+ return "json"
39
+ elif path.endswith(".parquet"):
40
+ return "parquet"
41
+ else:
42
+ raise ValueError(
43
+ f"Cannot infer file format from URL: {url}. "
44
+ "Please specify file_format parameter."
45
+ )
46
+
47
+ def _download_file(self) -> bytes:
48
+ """Download file from URL"""
49
+ response = requests.get(self.url, stream=True, timeout=30)
50
+ response.raise_for_status()
51
+ return response.content
52
+
53
+ def read_file(self, **kwargs: Any) -> Iterator[TDataItems]:
54
+ """Read file and yield data in chunks"""
55
+ content = self._download_file()
56
+
57
+ if self.file_format == "csv":
58
+ yield from self._read_csv(content, **kwargs)
59
+ elif self.file_format == "csv_headless":
60
+ yield from self._read_csv_headless(content, **kwargs)
61
+ elif self.file_format == "json":
62
+ yield from self._read_json(content, **kwargs)
63
+ elif self.file_format == "parquet":
64
+ yield from self._read_parquet(content, **kwargs)
65
+
66
+ def _read_csv(
67
+ self, content: bytes, chunksize: int = 10000, **pandas_kwargs: Any
68
+ ) -> Iterator[TDataItems]:
69
+ """Read CSV file with Pandas chunk by chunk"""
70
+ import pandas as pd # type: ignore
71
+
72
+ kwargs = {**{"header": "infer", "chunksize": chunksize}, **pandas_kwargs}
73
+
74
+ file_obj = io.BytesIO(content)
75
+ for df in pd.read_csv(file_obj, **kwargs):
76
+ yield df.to_dict(orient="records")
77
+
78
+ def _read_csv_headless(
79
+ self, content: bytes, chunksize: int = 10000, **pandas_kwargs: Any
80
+ ) -> Iterator[TDataItems]:
81
+ """Read CSV file without headers, using provided column names or generating them"""
82
+ import pandas as pd # type: ignore
83
+
84
+ # Determine column names
85
+ if self.column_names:
86
+ names = self.column_names
87
+ else:
88
+ # Use pandas to count columns reliably (handles quoted commas)
89
+ first_row = pd.read_csv(io.BytesIO(content), header=None, nrows=1)
90
+ num_columns = len(first_row.columns)
91
+ names = [f"unknown_col_{i}" for i in range(num_columns)]
92
+
93
+ kwargs = {
94
+ **{"header": None, "names": names, "chunksize": chunksize},
95
+ **pandas_kwargs,
96
+ }
97
+
98
+ file_obj = io.BytesIO(content)
99
+ for df in pd.read_csv(file_obj, **kwargs):
100
+ yield df.to_dict(orient="records")
101
+
102
+ def _read_json(
103
+ self, content: bytes, chunksize: int = 1000, **kwargs: Any
104
+ ) -> Iterator[TDataItems]:
105
+ """Read JSON or JSONL file"""
106
+ from dlt.common import json
107
+
108
+ file_obj = io.BytesIO(content)
109
+ text = file_obj.read().decode("utf-8")
110
+
111
+ # Try to detect if it's JSONL format (one JSON object per line)
112
+ lines = text.strip().split("\n")
113
+
114
+ if len(lines) > 1:
115
+ # Likely JSONL format
116
+ lines_chunk = []
117
+ for line in lines:
118
+ if line.strip():
119
+ lines_chunk.append(json.loads(line))
120
+ if len(lines_chunk) >= chunksize:
121
+ yield lines_chunk
122
+ lines_chunk = []
123
+ if lines_chunk:
124
+ yield lines_chunk
125
+ else:
126
+ # Single JSON object or array
127
+ data = json.loads(text)
128
+ if isinstance(data, list):
129
+ # Chunk the list
130
+ for i in range(0, len(data), chunksize):
131
+ yield data[i : i + chunksize]
132
+ else:
133
+ # Single object
134
+ yield [data]
135
+
136
+ def _read_parquet(
137
+ self, content: bytes, chunksize: int = 10000, **kwargs: Any
138
+ ) -> Iterator[TDataItems]:
139
+ """Read Parquet file"""
140
+ from pyarrow import parquet as pq # type: ignore
141
+
142
+ file_obj = io.BytesIO(content)
143
+ parquet_file = pq.ParquetFile(file_obj)
144
+
145
+ for batch in parquet_file.iter_batches(batch_size=chunksize):
146
+ yield batch.to_pylist()
@@ -0,0 +1,24 @@
1
+ import requests
2
+ from dlt.sources.helpers.requests import Client
3
+
4
+
5
+ def create_client(retry_status_codes: list[int] | None = None) -> requests.Session:
6
+ if retry_status_codes is None:
7
+ retry_status_codes = [502]
8
+ return Client(
9
+ raise_for_status=False,
10
+ retry_condition=retry_on_status_code(retry_status_codes),
11
+ request_max_attempts=12,
12
+ request_backoff_factor=10,
13
+ ).session
14
+
15
+
16
+ def retry_on_status_code(retry_status_codes: list[int]):
17
+ def retry_on_limit(
18
+ response: requests.Response | None, exception: BaseException | None
19
+ ) -> bool:
20
+ if response is None:
21
+ return False
22
+ return response.status_code in retry_status_codes
23
+
24
+ return retry_on_limit