omniload 0.0.0.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (218) hide show
  1. omniload/conftest.py +72 -0
  2. omniload/main.py +810 -0
  3. omniload/src/.gitignore +10 -0
  4. omniload/src/adjust/__init__.py +108 -0
  5. omniload/src/adjust/adjust_helpers.py +122 -0
  6. omniload/src/airtable/__init__.py +84 -0
  7. omniload/src/allium/__init__.py +128 -0
  8. omniload/src/anthropic/__init__.py +277 -0
  9. omniload/src/anthropic/helpers.py +525 -0
  10. omniload/src/applovin/__init__.py +316 -0
  11. omniload/src/applovin_max/__init__.py +117 -0
  12. omniload/src/appsflyer/__init__.py +325 -0
  13. omniload/src/appsflyer/client.py +110 -0
  14. omniload/src/appstore/__init__.py +142 -0
  15. omniload/src/appstore/client.py +126 -0
  16. omniload/src/appstore/errors.py +15 -0
  17. omniload/src/appstore/models.py +117 -0
  18. omniload/src/appstore/resources.py +179 -0
  19. omniload/src/arrow/__init__.py +81 -0
  20. omniload/src/asana_source/__init__.py +281 -0
  21. omniload/src/asana_source/helpers.py +30 -0
  22. omniload/src/asana_source/settings.py +158 -0
  23. omniload/src/attio/__init__.py +102 -0
  24. omniload/src/attio/helpers.py +65 -0
  25. omniload/src/blob.py +95 -0
  26. omniload/src/bruin/__init__.py +76 -0
  27. omniload/src/chess/__init__.py +180 -0
  28. omniload/src/chess/helpers.py +35 -0
  29. omniload/src/chess/settings.py +18 -0
  30. omniload/src/clickup/__init__.py +85 -0
  31. omniload/src/clickup/helpers.py +47 -0
  32. omniload/src/collector/spinner.py +43 -0
  33. omniload/src/couchbase_source/__init__.py +118 -0
  34. omniload/src/couchbase_source/helpers.py +135 -0
  35. omniload/src/cursor/__init__.py +83 -0
  36. omniload/src/cursor/helpers.py +188 -0
  37. omniload/src/customer_io/__init__.py +486 -0
  38. omniload/src/customer_io/helpers.py +530 -0
  39. omniload/src/destinations.py +982 -0
  40. omniload/src/docebo/__init__.py +589 -0
  41. omniload/src/docebo/client.py +435 -0
  42. omniload/src/docebo/helpers.py +97 -0
  43. omniload/src/dune/__init__.py +104 -0
  44. omniload/src/dune/helpers.py +108 -0
  45. omniload/src/dynamodb/__init__.py +86 -0
  46. omniload/src/elasticsearch/__init__.py +80 -0
  47. omniload/src/elasticsearch/helpers.py +141 -0
  48. omniload/src/errors.py +26 -0
  49. omniload/src/facebook_ads/__init__.py +403 -0
  50. omniload/src/facebook_ads/exceptions.py +19 -0
  51. omniload/src/facebook_ads/helpers.py +296 -0
  52. omniload/src/facebook_ads/settings.py +224 -0
  53. omniload/src/facebook_ads/utils.py +53 -0
  54. omniload/src/factory.py +305 -0
  55. omniload/src/filesystem/__init__.py +133 -0
  56. omniload/src/filesystem/helpers.py +114 -0
  57. omniload/src/filesystem/readers.py +187 -0
  58. omniload/src/filters.py +62 -0
  59. omniload/src/fireflies/__init__.py +151 -0
  60. omniload/src/fireflies/helpers.py +753 -0
  61. omniload/src/fluxx/__init__.py +10013 -0
  62. omniload/src/fluxx/helpers.py +233 -0
  63. omniload/src/frankfurter/__init__.py +157 -0
  64. omniload/src/frankfurter/helpers.py +48 -0
  65. omniload/src/freshdesk/__init__.py +103 -0
  66. omniload/src/freshdesk/freshdesk_client.py +151 -0
  67. omniload/src/freshdesk/settings.py +23 -0
  68. omniload/src/fundraiseup/__init__.py +95 -0
  69. omniload/src/fundraiseup/client.py +81 -0
  70. omniload/src/github/__init__.py +202 -0
  71. omniload/src/github/helpers.py +207 -0
  72. omniload/src/github/queries.py +129 -0
  73. omniload/src/github/settings.py +24 -0
  74. omniload/src/google_ads/__init__.py +198 -0
  75. omniload/src/google_ads/field.py +17 -0
  76. omniload/src/google_ads/metrics.py +254 -0
  77. omniload/src/google_ads/predicates.py +37 -0
  78. omniload/src/google_ads/reports.py +411 -0
  79. omniload/src/google_ads/test_google_ads.py +184 -0
  80. omniload/src/google_analytics/__init__.py +144 -0
  81. omniload/src/google_analytics/helpers.py +312 -0
  82. omniload/src/google_sheets/README.md +95 -0
  83. omniload/src/google_sheets/__init__.py +166 -0
  84. omniload/src/google_sheets/helpers/__init__.py +15 -0
  85. omniload/src/google_sheets/helpers/api_calls.py +160 -0
  86. omniload/src/google_sheets/helpers/data_processing.py +316 -0
  87. omniload/src/gorgias/__init__.py +595 -0
  88. omniload/src/gorgias/helpers.py +166 -0
  89. omniload/src/hostaway/__init__.py +302 -0
  90. omniload/src/hostaway/client.py +288 -0
  91. omniload/src/http/__init__.py +38 -0
  92. omniload/src/http/readers.py +146 -0
  93. omniload/src/http_client.py +24 -0
  94. omniload/src/hubspot/__init__.py +800 -0
  95. omniload/src/hubspot/helpers.py +417 -0
  96. omniload/src/hubspot/settings.py +329 -0
  97. omniload/src/indeed/__init__.py +153 -0
  98. omniload/src/indeed/helpers.py +228 -0
  99. omniload/src/influxdb/__init__.py +46 -0
  100. omniload/src/influxdb/client.py +34 -0
  101. omniload/src/intercom/__init__.py +142 -0
  102. omniload/src/intercom/helpers.py +674 -0
  103. omniload/src/intercom/settings.py +279 -0
  104. omniload/src/isoc_pulse/__init__.py +159 -0
  105. omniload/src/jira_source/__init__.py +377 -0
  106. omniload/src/jira_source/helpers.py +510 -0
  107. omniload/src/jira_source/settings.py +184 -0
  108. omniload/src/kafka/__init__.py +120 -0
  109. omniload/src/kafka/helpers.py +241 -0
  110. omniload/src/kinesis/__init__.py +153 -0
  111. omniload/src/kinesis/helpers.py +96 -0
  112. omniload/src/klaviyo/__init__.py +237 -0
  113. omniload/src/klaviyo/client.py +212 -0
  114. omniload/src/klaviyo/helpers.py +19 -0
  115. omniload/src/linear/__init__.py +634 -0
  116. omniload/src/linear/helpers.py +111 -0
  117. omniload/src/linkedin_ads/__init__.py +266 -0
  118. omniload/src/linkedin_ads/dimension_time_enum.py +17 -0
  119. omniload/src/linkedin_ads/helpers.py +246 -0
  120. omniload/src/loader.py +69 -0
  121. omniload/src/mailchimp/__init__.py +126 -0
  122. omniload/src/mailchimp/helpers.py +226 -0
  123. omniload/src/mailchimp/settings.py +164 -0
  124. omniload/src/masking.py +344 -0
  125. omniload/src/mixpanel/__init__.py +62 -0
  126. omniload/src/mixpanel/client.py +104 -0
  127. omniload/src/monday/__init__.py +246 -0
  128. omniload/src/monday/helpers.py +392 -0
  129. omniload/src/monday/settings.py +325 -0
  130. omniload/src/mongodb/__init__.py +281 -0
  131. omniload/src/mongodb/helpers.py +975 -0
  132. omniload/src/notion/__init__.py +69 -0
  133. omniload/src/notion/helpers/__init__.py +14 -0
  134. omniload/src/notion/helpers/client.py +178 -0
  135. omniload/src/notion/helpers/database.py +92 -0
  136. omniload/src/notion/settings.py +17 -0
  137. omniload/src/partition.py +32 -0
  138. omniload/src/personio/__init__.py +345 -0
  139. omniload/src/personio/helpers.py +100 -0
  140. omniload/src/phantombuster/__init__.py +65 -0
  141. omniload/src/phantombuster/client.py +87 -0
  142. omniload/src/pinterest/__init__.py +82 -0
  143. omniload/src/pipedrive/__init__.py +212 -0
  144. omniload/src/pipedrive/helpers/__init__.py +37 -0
  145. omniload/src/pipedrive/helpers/custom_fields_munger.py +116 -0
  146. omniload/src/pipedrive/helpers/pages.py +129 -0
  147. omniload/src/pipedrive/settings.py +41 -0
  148. omniload/src/pipedrive/typing.py +17 -0
  149. omniload/src/plusvibeai/__init__.py +335 -0
  150. omniload/src/plusvibeai/helpers.py +544 -0
  151. omniload/src/plusvibeai/settings.py +252 -0
  152. omniload/src/primer/__init__.py +45 -0
  153. omniload/src/primer/helpers.py +79 -0
  154. omniload/src/quickbooks/__init__.py +117 -0
  155. omniload/src/reddit_ads/__init__.py +183 -0
  156. omniload/src/reddit_ads/helpers.py +232 -0
  157. omniload/src/resource.py +40 -0
  158. omniload/src/revenuecat/__init__.py +83 -0
  159. omniload/src/revenuecat/helpers.py +237 -0
  160. omniload/src/salesforce/__init__.py +170 -0
  161. omniload/src/salesforce/helpers.py +78 -0
  162. omniload/src/shopify/__init__.py +1953 -0
  163. omniload/src/shopify/exceptions.py +17 -0
  164. omniload/src/shopify/helpers.py +202 -0
  165. omniload/src/shopify/settings.py +19 -0
  166. omniload/src/slack/__init__.py +290 -0
  167. omniload/src/slack/helpers.py +218 -0
  168. omniload/src/slack/settings.py +36 -0
  169. omniload/src/smartsheets/__init__.py +82 -0
  170. omniload/src/snapchat_ads/__init__.py +455 -0
  171. omniload/src/snapchat_ads/client.py +72 -0
  172. omniload/src/snapchat_ads/helpers.py +630 -0
  173. omniload/src/snapchat_ads/settings.py +130 -0
  174. omniload/src/socrata_source/__init__.py +83 -0
  175. omniload/src/socrata_source/helpers.py +85 -0
  176. omniload/src/socrata_source/settings.py +8 -0
  177. omniload/src/solidgate/__init__.py +219 -0
  178. omniload/src/solidgate/helpers.py +154 -0
  179. omniload/src/sources.py +5408 -0
  180. omniload/src/sql_database/__init__.py +0 -0
  181. omniload/src/sql_database/callbacks.py +66 -0
  182. omniload/src/stripe_analytics/__init__.py +183 -0
  183. omniload/src/stripe_analytics/helpers.py +386 -0
  184. omniload/src/stripe_analytics/settings.py +80 -0
  185. omniload/src/table_definition.py +15 -0
  186. omniload/src/testdata/fakebqcredentials.json +14 -0
  187. omniload/src/tiktok_ads/__init__.py +150 -0
  188. omniload/src/tiktok_ads/tiktok_helpers.py +130 -0
  189. omniload/src/time.py +11 -0
  190. omniload/src/trustpilot/__init__.py +48 -0
  191. omniload/src/trustpilot/client.py +48 -0
  192. omniload/src/version.py +6 -0
  193. omniload/src/wise/__init__.py +68 -0
  194. omniload/src/wise/client.py +63 -0
  195. omniload/src/zendesk/__init__.py +480 -0
  196. omniload/src/zendesk/helpers/__init__.py +39 -0
  197. omniload/src/zendesk/helpers/api_helpers.py +119 -0
  198. omniload/src/zendesk/helpers/credentials.py +68 -0
  199. omniload/src/zendesk/helpers/talk_api.py +132 -0
  200. omniload/src/zendesk/settings.py +71 -0
  201. omniload/src/zoom/__init__.py +99 -0
  202. omniload/src/zoom/helpers.py +102 -0
  203. omniload/testdata/.gitignore +2 -0
  204. omniload/testdata/create_replace.csv +21 -0
  205. omniload/testdata/delete_insert_expected.csv +6 -0
  206. omniload/testdata/delete_insert_part1.csv +5 -0
  207. omniload/testdata/delete_insert_part2.csv +6 -0
  208. omniload/testdata/merge_expected.csv +5 -0
  209. omniload/testdata/merge_part1.csv +4 -0
  210. omniload/testdata/merge_part2.csv +5 -0
  211. omniload/tests/unit/test_smartsheets.py +133 -0
  212. omniload-0.0.0.dev0.dist-info/METADATA +439 -0
  213. omniload-0.0.0.dev0.dist-info/RECORD +218 -0
  214. omniload-0.0.0.dev0.dist-info/WHEEL +4 -0
  215. omniload-0.0.0.dev0.dist-info/entry_points.txt +2 -0
  216. omniload-0.0.0.dev0.dist-info/licenses/LICENSE.Apache-2.0 +201 -0
  217. omniload-0.0.0.dev0.dist-info/licenses/LICENSE.md +21 -0
  218. omniload-0.0.0.dev0.dist-info/licenses/NOTICE +35 -0
@@ -0,0 +1,325 @@
1
+ """Monday.com source settings and constants"""
2
+
3
+ # GraphQL query for fetching app installs
4
+ APP_INSTALLS_QUERY = """
5
+ query ($app_id: ID!, $account_id: ID, $limit: Int!, $page: Int!) {
6
+ app_installs(
7
+ app_id: $app_id
8
+ account_id: $account_id
9
+ limit: $limit
10
+ page: $page
11
+ ) {
12
+ app_id
13
+ timestamp
14
+ app_install_account {
15
+ id
16
+ }
17
+ app_install_user {
18
+ id
19
+ }
20
+ app_version {
21
+ major
22
+ minor
23
+ patch
24
+ type
25
+ text
26
+ }
27
+ permissions {
28
+ approved_scopes
29
+ required_scopes
30
+ }
31
+ }
32
+ }
33
+ """
34
+
35
+ # GraphQL query for fetching account information
36
+ ACCOUNT_QUERY = """
37
+ query {
38
+ account {
39
+ id
40
+ name
41
+ slug
42
+ tier
43
+ country_code
44
+ first_day_of_the_week
45
+ show_timeline_weekends
46
+ sign_up_product_kind
47
+ active_members_count
48
+ logo
49
+ plan {
50
+ max_users
51
+ period
52
+ tier
53
+ version
54
+ }
55
+ }
56
+ }
57
+ """
58
+
59
+ # GraphQL query for fetching account roles
60
+ ACCOUNT_ROLES_QUERY = """
61
+ query {
62
+ account_roles {
63
+ id
64
+ name
65
+ roleType
66
+ }
67
+ }
68
+ """
69
+
70
+ # GraphQL query for fetching users
71
+ USERS_QUERY = """
72
+ query ($limit: Int!, $page: Int!) {
73
+ users(limit: $limit, page: $page) {
74
+ id
75
+ name
76
+ email
77
+ enabled
78
+ is_admin
79
+ is_guest
80
+ is_pending
81
+ is_view_only
82
+ created_at
83
+ birthday
84
+ country_code
85
+ join_date
86
+ location
87
+ mobile_phone
88
+ phone
89
+ photo_original
90
+ photo_thumb
91
+ photo_tiny
92
+ time_zone_identifier
93
+ title
94
+ url
95
+ utc_hours_diff
96
+ current_language
97
+ account {
98
+ id
99
+ }
100
+ }
101
+ }
102
+ """
103
+
104
+ # GraphQL query for fetching boards
105
+ BOARDS_QUERY = """
106
+ query ($limit: Int!, $page: Int!) {
107
+ boards(limit: $limit, page: $page) {
108
+ id
109
+ name
110
+ description
111
+ state
112
+ board_kind
113
+ board_folder_id
114
+ workspace_id
115
+ permissions
116
+ item_terminology
117
+ items_count
118
+ updated_at
119
+ url
120
+ communication
121
+ object_type_unique_key
122
+ type
123
+ creator {
124
+ id
125
+ }
126
+ owners {
127
+ id
128
+ }
129
+ subscribers {
130
+ id
131
+ }
132
+ team_owners {
133
+ id
134
+ }
135
+ team_subscribers {
136
+ id
137
+ }
138
+ tags {
139
+ id
140
+
141
+ }
142
+ }
143
+ }
144
+ """
145
+
146
+ # GraphQL query for fetching custom activities
147
+ CUSTOM_ACTIVITIES_QUERY = """
148
+ query {
149
+ custom_activity {
150
+ id
151
+ name
152
+ type
153
+ color
154
+ icon_id
155
+ }
156
+ }
157
+ """
158
+
159
+ # GraphQL query for fetching board columns
160
+ BOARD_COLUMNS_QUERY = """
161
+ query ($board_ids: [ID!]) {
162
+ boards(ids: $board_ids) {
163
+ id
164
+ columns {
165
+ id
166
+ title
167
+ type
168
+ archived
169
+ description
170
+ settings_str
171
+ width
172
+ }
173
+ }
174
+ }
175
+ """
176
+
177
+ # GraphQL query for fetching board views
178
+ BOARD_VIEWS_QUERY = """
179
+ query ($board_ids: [ID!]) {
180
+ boards(ids: $board_ids) {
181
+ id
182
+ views {
183
+ id
184
+ name
185
+ type
186
+ settings_str
187
+ view_specific_data_str
188
+ source_view_id
189
+ access_level
190
+ }
191
+ }
192
+ }
193
+ """
194
+
195
+ # GraphQL query for fetching workspaces by IDs
196
+ WORKSPACES_QUERY = """
197
+ query ($ids: [ID!]) {
198
+ workspaces(ids: $ids) {
199
+ id
200
+ name
201
+ kind
202
+ description
203
+ created_at
204
+ is_default_workspace
205
+ state
206
+ account_product {
207
+ id
208
+ }
209
+ owners_subscribers {
210
+ id
211
+ }
212
+ team_owners_subscribers {
213
+ id
214
+ }
215
+ teams_subscribers {
216
+ id
217
+ }
218
+ users_subscribers {
219
+ id
220
+ }
221
+ }
222
+ }
223
+ """
224
+
225
+ # GraphQL query for fetching webhooks by board ID
226
+ WEBHOOKS_QUERY = """
227
+ query ($board_id: ID!) {
228
+ webhooks(board_id: $board_id) {
229
+ id
230
+ event
231
+ board_id
232
+ config
233
+ }
234
+ }
235
+ """
236
+
237
+ # GraphQL query for fetching updates
238
+ UPDATES_QUERY = """
239
+ query ($limit: Int!, $from_date: String, $to_date: String) {
240
+ updates(limit: $limit, from_date: $from_date, to_date: $to_date) {
241
+ id
242
+ body
243
+ text_body
244
+ created_at
245
+ updated_at
246
+ edited_at
247
+ creator_id
248
+ item_id
249
+ creator {
250
+ id
251
+ }
252
+ item {
253
+ id
254
+ }
255
+ assets {
256
+ id
257
+ name
258
+ file_extension
259
+ file_size
260
+ public_url
261
+ url
262
+ url_thumbnail
263
+ created_at
264
+ original_geometry
265
+ uploaded_by {
266
+ id
267
+ }
268
+ }
269
+ replies {
270
+ id
271
+ body
272
+ text_body
273
+ created_at
274
+ updated_at
275
+ creator_id
276
+ creator {
277
+ id
278
+ }
279
+ }
280
+ likes {
281
+ id
282
+ }
283
+ pinned_to_top {
284
+ item_id
285
+ }
286
+ viewers {
287
+ medium
288
+ user_id
289
+ user {
290
+ id
291
+ }
292
+ }
293
+ }
294
+ }
295
+ """
296
+
297
+ # GraphQL query for fetching teams
298
+ TEAMS_QUERY = """
299
+ query {
300
+ teams {
301
+ id
302
+ name
303
+ picture_url
304
+ users {
305
+ id
306
+ created_at
307
+ phone
308
+ }
309
+ }
310
+ }
311
+ """
312
+
313
+ # GraphQL query for fetching tags
314
+ TAGS_QUERY = """
315
+ query {
316
+ tags {
317
+ id
318
+ name
319
+ color
320
+ }
321
+ }
322
+ """
323
+
324
+ # Maximum number of results per page
325
+ MAX_PAGE_SIZE = 100
@@ -0,0 +1,281 @@
1
+ # Copyright 2022-2025 ScaleVector
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ """Source that loads collections form any a mongo database, supports incremental loads."""
16
+
17
+ from typing import Any, Dict, Iterable, List, Mapping, Optional, Union
18
+
19
+ import dlt
20
+ from dlt.common.data_writers import TDataItemFormat
21
+ from dlt.sources import DltResource
22
+
23
+ from .helpers import (
24
+ MongoDbCollectionConfiguration,
25
+ MongoDbCollectionResourceConfiguration,
26
+ client_from_credentials,
27
+ collection_documents,
28
+ process_file_items,
29
+ )
30
+
31
+
32
+ @dlt.source(max_table_nesting=0)
33
+ def mongodb(
34
+ connection_url: str = dlt.secrets.value,
35
+ database: Optional[str] = dlt.config.value,
36
+ collection_names: Optional[List[str]] = dlt.config.value,
37
+ incremental: Optional[dlt.sources.incremental] = None, # type: ignore[type-arg]
38
+ write_disposition: Optional[str] = dlt.config.value,
39
+ parallel: Optional[bool] = dlt.config.value,
40
+ limit: Optional[int] = None,
41
+ filter_: Optional[Dict[str, Any]] = None,
42
+ projection: Optional[Union[Mapping[str, Any], Iterable[str]]] = None,
43
+ pymongoarrow_schema: Optional[Any] = None,
44
+ ) -> Iterable[DltResource]:
45
+ """
46
+ A DLT source which loads data from a mongo database using PyMongo.
47
+ Resources are automatically created for each collection in the database or from the given list of collection.
48
+
49
+ Args:
50
+ connection_url (str): Database connection_url.
51
+ database (Optional[str]): Selected database name, it will use the default database if not passed.
52
+ collection_names (Optional[List[str]]): The list of collections `pymongo.collection.Collection` to load.
53
+ incremental (Optional[dlt.sources.incremental]): Option to enable incremental loading for the collection.
54
+ E.g., `incremental=dlt.sources.incremental('updated_at', pendulum.parse('2022-01-01T00:00:00Z'))`
55
+ write_disposition (str): Write disposition of the resource.
56
+ parallel (Optional[bool]): Option to enable parallel loading for the collection. Default is False.
57
+ limit (Optional[int]):
58
+ The maximum number of documents to load. The limit is
59
+ applied to each requested collection separately.
60
+ filter_ (Optional[Dict[str, Any]]): The filter to apply to the collection.
61
+ projection: (Optional[Union[Mapping[str, Any], Iterable[str]]]): The projection to select fields of a collection
62
+ when loading the collection. Supported inputs:
63
+ include (list) - ["year", "title"]
64
+ include (dict) - {"year": True, "title": True}
65
+ exclude (dict) - {"released": False, "runtime": False}
66
+ Note: Can't mix include and exclude statements '{"title": True, "released": False}`
67
+ pymongoarrow_schema (pymongoarrow.schema.Schema): Mapping of expected field types of a collection to convert BSON to Arrow
68
+
69
+ Returns:
70
+ Iterable[DltResource]: A list of DLT resources for each collection to be loaded.
71
+ """
72
+
73
+ # set up mongo client
74
+ client = client_from_credentials(connection_url)
75
+ if not database:
76
+ mongo_database = client.get_default_database()
77
+ else:
78
+ mongo_database = client[database]
79
+
80
+ # use provided collection or all conllections
81
+ if not collection_names:
82
+ collection_names = mongo_database.list_collection_names()
83
+
84
+ collection_list = [mongo_database[collection] for collection in collection_names]
85
+
86
+ for collection in collection_list:
87
+ yield dlt.resource( # type: ignore
88
+ collection_documents,
89
+ name=collection.name,
90
+ primary_key="_id",
91
+ write_disposition=write_disposition,
92
+ spec=MongoDbCollectionConfiguration,
93
+ max_table_nesting=0,
94
+ )(
95
+ client,
96
+ collection,
97
+ incremental=incremental,
98
+ parallel=parallel,
99
+ limit=limit,
100
+ filter_=filter_ or {},
101
+ projection=projection,
102
+ pymongoarrow_schema=pymongoarrow_schema,
103
+ )
104
+
105
+
106
+ @dlt.resource(
107
+ name=lambda args: args["collection"],
108
+ standalone=True,
109
+ spec=MongoDbCollectionResourceConfiguration,
110
+ max_table_nesting=0,
111
+ )
112
+ def mongodb_collection(
113
+ connection_url: str = dlt.secrets.value,
114
+ database: Optional[str] = dlt.config.value,
115
+ collection: str = dlt.config.value,
116
+ incremental: Optional[dlt.sources.incremental] = None, # type: ignore[type-arg]
117
+ write_disposition: Optional[str] = dlt.config.value,
118
+ parallel: Optional[bool] = False,
119
+ limit: Optional[int] = None,
120
+ chunk_size: Optional[int] = 1000,
121
+ data_item_format: Optional[TDataItemFormat] = "object",
122
+ filter_: Optional[Dict[str, Any]] = None,
123
+ projection: Optional[Union[Mapping[str, Any], Iterable[str]]] = dlt.config.value,
124
+ pymongoarrow_schema: Optional[Any] = None,
125
+ custom_query: Optional[List[Dict[str, Any]]] = None,
126
+ ) -> DltResource:
127
+ """
128
+ A DLT source which loads a collection from a mongo database using PyMongo.
129
+
130
+ Args:
131
+ connection_url (str): Database connection_url.
132
+ database (Optional[str]): Selected database name, it will use the default database if not passed.
133
+ collection (str): The collection name to load.
134
+ incremental (Optional[dlt.sources.incremental]): Option to enable incremental loading for the collection.
135
+ E.g., `incremental=dlt.sources.incremental('updated_at', pendulum.parse('2022-01-01T00:00:00Z'))`
136
+ write_disposition (str): Write disposition of the resource.
137
+ parallel (Optional[bool]): Option to enable parallel loading for the collection. Default is False.
138
+ limit (Optional[int]): The number of documents load.
139
+ chunk_size (Optional[int]): The number of documents load in each batch.
140
+ data_item_format (Optional[TDataItemFormat]): The data format to use for loading.
141
+ Supported formats:
142
+ object - Python objects (dicts, lists).
143
+ arrow - Apache Arrow tables.
144
+ filter_ (Optional[Dict[str, Any]]): The filter to apply to the collection.
145
+ projection: (Optional[Union[Mapping[str, Any], Iterable[str]]]): The projection to select fields
146
+ when loading the collection. Supported inputs:
147
+ include (list) - ["year", "title"]
148
+ include (dict) - {"year": True, "title": True}
149
+ exclude (dict) - {"released": False, "runtime": False}
150
+ Note: Can't mix include and exclude statements '{"title": True, "released": False}`
151
+ pymongoarrow_schema (pymongoarrow.schema.Schema): Mapping of expected field types to convert BSON to Arrow
152
+ custom_query (Optional[List[Dict[str, Any]]]): Custom MongoDB aggregation pipeline to execute instead of find()
153
+
154
+ Returns:
155
+ Iterable[DltResource]: A list of DLT resources for each collection to be loaded.
156
+ """
157
+ # set up mongo client
158
+ client = client_from_credentials(connection_url)
159
+ if not database:
160
+ mongo_database = client.get_default_database()
161
+ else:
162
+ mongo_database = client[database]
163
+
164
+ collection_obj = mongo_database[collection]
165
+
166
+ return dlt.resource( # type: ignore
167
+ collection_documents,
168
+ name=collection_obj.name,
169
+ primary_key="_id",
170
+ write_disposition=write_disposition,
171
+ )(
172
+ client,
173
+ collection_obj,
174
+ incremental=incremental,
175
+ parallel=parallel,
176
+ limit=limit,
177
+ chunk_size=chunk_size,
178
+ data_item_format=data_item_format,
179
+ filter_=filter_ or {},
180
+ projection=projection,
181
+ pymongoarrow_schema=pymongoarrow_schema,
182
+ custom_query=custom_query,
183
+ )
184
+
185
+
186
+ def mongodb_insert(uri: str):
187
+ """Creates a dlt.destination for inserting data into a MongoDB collection.
188
+
189
+ Args:
190
+ uri (str): MongoDB connection URI including database.
191
+
192
+ Returns:
193
+ dlt.destination: A DLT destination object configured for MongoDB.
194
+ """
195
+ from urllib.parse import urlparse
196
+
197
+ parsed_uri = urlparse(uri)
198
+ database = (
199
+ parsed_uri.path.lstrip("/") if parsed_uri.path.lstrip("/") else "omniload_db"
200
+ )
201
+ first_batch_per_table: dict[str, bool] = {}
202
+ BATCH_SIZE = 10000
203
+
204
+ def destination(items, table) -> None:
205
+ import pyarrow
206
+ from pymongo import MongoClient
207
+
208
+ collection_name = table["name"]
209
+
210
+ if collection_name not in first_batch_per_table:
211
+ first_batch_per_table[collection_name] = True
212
+
213
+ with MongoClient(uri) as client:
214
+ db = client[database]
215
+ collection = db[collection_name]
216
+
217
+ # Process documents
218
+ if isinstance(items, str):
219
+ documents = process_file_items(items)
220
+ elif isinstance(items, pyarrow.RecordBatch):
221
+ documents = items.to_pylist()
222
+ else:
223
+ documents = [item for item in items if isinstance(item, dict)]
224
+
225
+ write_disposition = table.get("write_disposition")
226
+
227
+ batches = [
228
+ documents[i : i + BATCH_SIZE]
229
+ for i in range(0, len(documents), BATCH_SIZE)
230
+ ]
231
+
232
+ if write_disposition == "merge":
233
+ from pymongo import ReplaceOne
234
+
235
+ primary_keys = [
236
+ col_name
237
+ for col_name, col_def in table.get("columns", {}).items()
238
+ if isinstance(col_def, dict) and col_def.get("primary_key")
239
+ ]
240
+
241
+ if not primary_keys:
242
+ raise ValueError(
243
+ f"Merge operation requires primary keys for table '{collection_name}'. "
244
+ f"Please define primary keys in the table schema or use 'replace' write disposition."
245
+ )
246
+
247
+ for batch in batches:
248
+ operations = [
249
+ ReplaceOne(
250
+ {key: doc[key] for key in primary_keys},
251
+ doc,
252
+ upsert=True,
253
+ )
254
+ for doc in batch
255
+ if all(key in doc for key in primary_keys)
256
+ ]
257
+ if operations:
258
+ collection.bulk_write(operations, ordered=False)
259
+
260
+ elif write_disposition == "replace":
261
+ if first_batch_per_table[collection_name] and documents:
262
+ collection.delete_many({})
263
+ first_batch_per_table[collection_name] = False
264
+
265
+ for batch in batches:
266
+ if batch:
267
+ collection.insert_many(batch)
268
+
269
+ else:
270
+ raise ValueError(
271
+ f"Unsupported write disposition '{write_disposition}' for MongoDB destination. "
272
+ )
273
+
274
+ return dlt.destination(
275
+ destination,
276
+ name="mongodb",
277
+ loader_file_format="typed-jsonl",
278
+ batch_size=1000,
279
+ naming_convention="snake_case",
280
+ loader_parallelism_strategy="sequential",
281
+ )