ingestr 0.13.2__py3-none-any.whl → 0.14.104__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ingestr/conftest.py +72 -0
- ingestr/main.py +134 -87
- ingestr/src/adjust/__init__.py +4 -4
- ingestr/src/adjust/adjust_helpers.py +7 -3
- ingestr/src/airtable/__init__.py +3 -2
- ingestr/src/allium/__init__.py +128 -0
- ingestr/src/anthropic/__init__.py +277 -0
- ingestr/src/anthropic/helpers.py +525 -0
- ingestr/src/applovin/__init__.py +262 -0
- ingestr/src/applovin_max/__init__.py +117 -0
- ingestr/src/appsflyer/__init__.py +325 -0
- ingestr/src/appsflyer/client.py +49 -45
- ingestr/src/appstore/__init__.py +1 -0
- ingestr/src/arrow/__init__.py +9 -1
- ingestr/src/asana_source/__init__.py +1 -1
- ingestr/src/attio/__init__.py +102 -0
- ingestr/src/attio/helpers.py +65 -0
- ingestr/src/blob.py +38 -11
- ingestr/src/buildinfo.py +1 -0
- ingestr/src/chess/__init__.py +1 -1
- ingestr/src/clickup/__init__.py +85 -0
- ingestr/src/clickup/helpers.py +47 -0
- ingestr/src/collector/spinner.py +43 -0
- ingestr/src/couchbase_source/__init__.py +118 -0
- ingestr/src/couchbase_source/helpers.py +135 -0
- ingestr/src/cursor/__init__.py +83 -0
- ingestr/src/cursor/helpers.py +188 -0
- ingestr/src/destinations.py +520 -33
- ingestr/src/docebo/__init__.py +589 -0
- ingestr/src/docebo/client.py +435 -0
- ingestr/src/docebo/helpers.py +97 -0
- ingestr/src/elasticsearch/__init__.py +80 -0
- ingestr/src/elasticsearch/helpers.py +138 -0
- ingestr/src/errors.py +8 -0
- ingestr/src/facebook_ads/__init__.py +47 -28
- ingestr/src/facebook_ads/helpers.py +59 -37
- ingestr/src/facebook_ads/settings.py +2 -0
- ingestr/src/facebook_ads/utils.py +39 -0
- ingestr/src/factory.py +116 -2
- ingestr/src/filesystem/__init__.py +8 -3
- ingestr/src/filters.py +46 -3
- ingestr/src/fluxx/__init__.py +9906 -0
- ingestr/src/fluxx/helpers.py +209 -0
- ingestr/src/frankfurter/__init__.py +157 -0
- ingestr/src/frankfurter/helpers.py +48 -0
- ingestr/src/freshdesk/__init__.py +89 -0
- ingestr/src/freshdesk/freshdesk_client.py +137 -0
- ingestr/src/freshdesk/settings.py +9 -0
- ingestr/src/fundraiseup/__init__.py +95 -0
- ingestr/src/fundraiseup/client.py +81 -0
- ingestr/src/github/__init__.py +41 -6
- ingestr/src/github/helpers.py +5 -5
- ingestr/src/google_analytics/__init__.py +22 -4
- ingestr/src/google_analytics/helpers.py +124 -6
- ingestr/src/google_sheets/__init__.py +4 -4
- ingestr/src/google_sheets/helpers/data_processing.py +2 -2
- ingestr/src/hostaway/__init__.py +302 -0
- ingestr/src/hostaway/client.py +288 -0
- ingestr/src/http/__init__.py +35 -0
- ingestr/src/http/readers.py +114 -0
- ingestr/src/http_client.py +24 -0
- ingestr/src/hubspot/__init__.py +66 -23
- ingestr/src/hubspot/helpers.py +52 -22
- ingestr/src/hubspot/settings.py +14 -7
- ingestr/src/influxdb/__init__.py +46 -0
- ingestr/src/influxdb/client.py +34 -0
- ingestr/src/intercom/__init__.py +142 -0
- ingestr/src/intercom/helpers.py +674 -0
- ingestr/src/intercom/settings.py +279 -0
- ingestr/src/isoc_pulse/__init__.py +159 -0
- ingestr/src/jira_source/__init__.py +340 -0
- ingestr/src/jira_source/helpers.py +439 -0
- ingestr/src/jira_source/settings.py +170 -0
- ingestr/src/kafka/__init__.py +4 -1
- ingestr/src/kinesis/__init__.py +139 -0
- ingestr/src/kinesis/helpers.py +82 -0
- ingestr/src/klaviyo/{_init_.py → __init__.py} +5 -6
- ingestr/src/linear/__init__.py +634 -0
- ingestr/src/linear/helpers.py +111 -0
- ingestr/src/linkedin_ads/helpers.py +0 -1
- ingestr/src/loader.py +69 -0
- ingestr/src/mailchimp/__init__.py +126 -0
- ingestr/src/mailchimp/helpers.py +226 -0
- ingestr/src/mailchimp/settings.py +164 -0
- ingestr/src/masking.py +344 -0
- ingestr/src/mixpanel/__init__.py +62 -0
- ingestr/src/mixpanel/client.py +99 -0
- ingestr/src/monday/__init__.py +246 -0
- ingestr/src/monday/helpers.py +392 -0
- ingestr/src/monday/settings.py +328 -0
- ingestr/src/mongodb/__init__.py +72 -8
- ingestr/src/mongodb/helpers.py +915 -38
- ingestr/src/partition.py +32 -0
- ingestr/src/personio/__init__.py +331 -0
- ingestr/src/personio/helpers.py +86 -0
- ingestr/src/phantombuster/__init__.py +65 -0
- ingestr/src/phantombuster/client.py +87 -0
- ingestr/src/pinterest/__init__.py +82 -0
- ingestr/src/pipedrive/__init__.py +198 -0
- ingestr/src/pipedrive/helpers/__init__.py +23 -0
- ingestr/src/pipedrive/helpers/custom_fields_munger.py +102 -0
- ingestr/src/pipedrive/helpers/pages.py +115 -0
- ingestr/src/pipedrive/settings.py +27 -0
- ingestr/src/pipedrive/typing.py +3 -0
- ingestr/src/plusvibeai/__init__.py +335 -0
- ingestr/src/plusvibeai/helpers.py +544 -0
- ingestr/src/plusvibeai/settings.py +252 -0
- ingestr/src/quickbooks/__init__.py +117 -0
- ingestr/src/resource.py +40 -0
- ingestr/src/revenuecat/__init__.py +83 -0
- ingestr/src/revenuecat/helpers.py +237 -0
- ingestr/src/salesforce/__init__.py +156 -0
- ingestr/src/salesforce/helpers.py +64 -0
- ingestr/src/shopify/__init__.py +1 -17
- ingestr/src/smartsheets/__init__.py +82 -0
- ingestr/src/snapchat_ads/__init__.py +489 -0
- ingestr/src/snapchat_ads/client.py +72 -0
- ingestr/src/snapchat_ads/helpers.py +535 -0
- ingestr/src/socrata_source/__init__.py +83 -0
- ingestr/src/socrata_source/helpers.py +85 -0
- ingestr/src/socrata_source/settings.py +8 -0
- ingestr/src/solidgate/__init__.py +219 -0
- ingestr/src/solidgate/helpers.py +154 -0
- ingestr/src/sources.py +3132 -212
- ingestr/src/stripe_analytics/__init__.py +49 -21
- ingestr/src/stripe_analytics/helpers.py +286 -1
- ingestr/src/stripe_analytics/settings.py +62 -10
- ingestr/src/telemetry/event.py +10 -9
- ingestr/src/tiktok_ads/__init__.py +12 -6
- ingestr/src/tiktok_ads/tiktok_helpers.py +0 -1
- ingestr/src/trustpilot/__init__.py +48 -0
- ingestr/src/trustpilot/client.py +48 -0
- ingestr/src/version.py +6 -1
- ingestr/src/wise/__init__.py +68 -0
- ingestr/src/wise/client.py +63 -0
- ingestr/src/zoom/__init__.py +99 -0
- ingestr/src/zoom/helpers.py +102 -0
- ingestr/tests/unit/test_smartsheets.py +133 -0
- ingestr-0.14.104.dist-info/METADATA +563 -0
- ingestr-0.14.104.dist-info/RECORD +203 -0
- ingestr/src/appsflyer/_init_.py +0 -24
- ingestr-0.13.2.dist-info/METADATA +0 -302
- ingestr-0.13.2.dist-info/RECORD +0 -107
- {ingestr-0.13.2.dist-info → ingestr-0.14.104.dist-info}/WHEEL +0 -0
- {ingestr-0.13.2.dist-info → ingestr-0.14.104.dist-info}/entry_points.txt +0 -0
- {ingestr-0.13.2.dist-info → ingestr-0.14.104.dist-info}/licenses/LICENSE.md +0 -0
|
@@ -0,0 +1,328 @@
|
|
|
1
|
+
"""Monday.com source settings and constants"""
|
|
2
|
+
|
|
3
|
+
# GraphQL query for fetching app installs
|
|
4
|
+
APP_INSTALLS_QUERY = """
|
|
5
|
+
query ($app_id: ID!, $account_id: ID, $limit: Int!, $page: Int!) {
|
|
6
|
+
app_installs(
|
|
7
|
+
app_id: $app_id
|
|
8
|
+
account_id: $account_id
|
|
9
|
+
limit: $limit
|
|
10
|
+
page: $page
|
|
11
|
+
) {
|
|
12
|
+
app_id
|
|
13
|
+
timestamp
|
|
14
|
+
app_install_account {
|
|
15
|
+
id
|
|
16
|
+
}
|
|
17
|
+
app_install_user {
|
|
18
|
+
id
|
|
19
|
+
}
|
|
20
|
+
app_version {
|
|
21
|
+
major
|
|
22
|
+
minor
|
|
23
|
+
patch
|
|
24
|
+
type
|
|
25
|
+
text
|
|
26
|
+
}
|
|
27
|
+
permissions {
|
|
28
|
+
approved_scopes
|
|
29
|
+
required_scopes
|
|
30
|
+
}
|
|
31
|
+
}
|
|
32
|
+
}
|
|
33
|
+
"""
|
|
34
|
+
|
|
35
|
+
# GraphQL query for fetching account information
|
|
36
|
+
ACCOUNT_QUERY = """
|
|
37
|
+
query {
|
|
38
|
+
account {
|
|
39
|
+
id
|
|
40
|
+
name
|
|
41
|
+
slug
|
|
42
|
+
tier
|
|
43
|
+
country_code
|
|
44
|
+
first_day_of_the_week
|
|
45
|
+
show_timeline_weekends
|
|
46
|
+
sign_up_product_kind
|
|
47
|
+
active_members_count
|
|
48
|
+
logo
|
|
49
|
+
plan {
|
|
50
|
+
max_users
|
|
51
|
+
period
|
|
52
|
+
tier
|
|
53
|
+
version
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
"""
|
|
58
|
+
|
|
59
|
+
# GraphQL query for fetching account roles
|
|
60
|
+
ACCOUNT_ROLES_QUERY = """
|
|
61
|
+
query {
|
|
62
|
+
account_roles {
|
|
63
|
+
id
|
|
64
|
+
name
|
|
65
|
+
roleType
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
"""
|
|
69
|
+
|
|
70
|
+
# GraphQL query for fetching users
|
|
71
|
+
USERS_QUERY = """
|
|
72
|
+
query ($limit: Int!, $page: Int!) {
|
|
73
|
+
users(limit: $limit, page: $page) {
|
|
74
|
+
id
|
|
75
|
+
name
|
|
76
|
+
email
|
|
77
|
+
enabled
|
|
78
|
+
is_admin
|
|
79
|
+
is_guest
|
|
80
|
+
is_pending
|
|
81
|
+
is_view_only
|
|
82
|
+
created_at
|
|
83
|
+
birthday
|
|
84
|
+
country_code
|
|
85
|
+
join_date
|
|
86
|
+
location
|
|
87
|
+
mobile_phone
|
|
88
|
+
phone
|
|
89
|
+
photo_original
|
|
90
|
+
photo_thumb
|
|
91
|
+
photo_tiny
|
|
92
|
+
time_zone_identifier
|
|
93
|
+
title
|
|
94
|
+
url
|
|
95
|
+
utc_hours_diff
|
|
96
|
+
current_language
|
|
97
|
+
account {
|
|
98
|
+
id
|
|
99
|
+
}
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
"""
|
|
103
|
+
|
|
104
|
+
# GraphQL query for fetching boards
|
|
105
|
+
BOARDS_QUERY = """
|
|
106
|
+
query ($limit: Int!, $page: Int!) {
|
|
107
|
+
boards(limit: $limit, page: $page) {
|
|
108
|
+
id
|
|
109
|
+
name
|
|
110
|
+
description
|
|
111
|
+
state
|
|
112
|
+
board_kind
|
|
113
|
+
board_folder_id
|
|
114
|
+
workspace_id
|
|
115
|
+
permissions
|
|
116
|
+
item_terminology
|
|
117
|
+
items_count
|
|
118
|
+
updated_at
|
|
119
|
+
url
|
|
120
|
+
communication
|
|
121
|
+
object_type_unique_key
|
|
122
|
+
type
|
|
123
|
+
creator {
|
|
124
|
+
id
|
|
125
|
+
}
|
|
126
|
+
owners {
|
|
127
|
+
id
|
|
128
|
+
}
|
|
129
|
+
subscribers {
|
|
130
|
+
id
|
|
131
|
+
}
|
|
132
|
+
team_owners {
|
|
133
|
+
id
|
|
134
|
+
}
|
|
135
|
+
team_subscribers {
|
|
136
|
+
id
|
|
137
|
+
}
|
|
138
|
+
tags {
|
|
139
|
+
id
|
|
140
|
+
|
|
141
|
+
}
|
|
142
|
+
}
|
|
143
|
+
}
|
|
144
|
+
"""
|
|
145
|
+
|
|
146
|
+
# GraphQL query for fetching custom activities
|
|
147
|
+
CUSTOM_ACTIVITIES_QUERY = """
|
|
148
|
+
query {
|
|
149
|
+
custom_activity {
|
|
150
|
+
id
|
|
151
|
+
name
|
|
152
|
+
type
|
|
153
|
+
color
|
|
154
|
+
icon_id
|
|
155
|
+
}
|
|
156
|
+
}
|
|
157
|
+
"""
|
|
158
|
+
|
|
159
|
+
# GraphQL query for fetching board columns
|
|
160
|
+
BOARD_COLUMNS_QUERY = """
|
|
161
|
+
query ($board_ids: [ID!]) {
|
|
162
|
+
boards(ids: $board_ids) {
|
|
163
|
+
id
|
|
164
|
+
columns {
|
|
165
|
+
id
|
|
166
|
+
title
|
|
167
|
+
type
|
|
168
|
+
archived
|
|
169
|
+
description
|
|
170
|
+
settings_str
|
|
171
|
+
width
|
|
172
|
+
}
|
|
173
|
+
}
|
|
174
|
+
}
|
|
175
|
+
"""
|
|
176
|
+
|
|
177
|
+
# GraphQL query for fetching board views
|
|
178
|
+
BOARD_VIEWS_QUERY = """
|
|
179
|
+
query ($board_ids: [ID!]) {
|
|
180
|
+
boards(ids: $board_ids) {
|
|
181
|
+
id
|
|
182
|
+
views {
|
|
183
|
+
id
|
|
184
|
+
name
|
|
185
|
+
type
|
|
186
|
+
settings_str
|
|
187
|
+
view_specific_data_str
|
|
188
|
+
source_view_id
|
|
189
|
+
access_level
|
|
190
|
+
}
|
|
191
|
+
}
|
|
192
|
+
}
|
|
193
|
+
"""
|
|
194
|
+
|
|
195
|
+
# GraphQL query for fetching workspaces by IDs
|
|
196
|
+
WORKSPACES_QUERY = """
|
|
197
|
+
query ($ids: [ID!]) {
|
|
198
|
+
workspaces(ids: $ids) {
|
|
199
|
+
id
|
|
200
|
+
name
|
|
201
|
+
kind
|
|
202
|
+
description
|
|
203
|
+
created_at
|
|
204
|
+
is_default_workspace
|
|
205
|
+
state
|
|
206
|
+
account_product {
|
|
207
|
+
id
|
|
208
|
+
}
|
|
209
|
+
owners_subscribers {
|
|
210
|
+
id
|
|
211
|
+
}
|
|
212
|
+
team_owners_subscribers {
|
|
213
|
+
id
|
|
214
|
+
}
|
|
215
|
+
teams_subscribers {
|
|
216
|
+
id
|
|
217
|
+
}
|
|
218
|
+
users_subscribers {
|
|
219
|
+
id
|
|
220
|
+
}
|
|
221
|
+
settings {
|
|
222
|
+
icon
|
|
223
|
+
}
|
|
224
|
+
}
|
|
225
|
+
}
|
|
226
|
+
"""
|
|
227
|
+
|
|
228
|
+
# GraphQL query for fetching webhooks by board ID
|
|
229
|
+
WEBHOOKS_QUERY = """
|
|
230
|
+
query ($board_id: ID!) {
|
|
231
|
+
webhooks(board_id: $board_id) {
|
|
232
|
+
id
|
|
233
|
+
event
|
|
234
|
+
board_id
|
|
235
|
+
config
|
|
236
|
+
}
|
|
237
|
+
}
|
|
238
|
+
"""
|
|
239
|
+
|
|
240
|
+
# GraphQL query for fetching updates
|
|
241
|
+
UPDATES_QUERY = """
|
|
242
|
+
query ($limit: Int!, $from_date: String, $to_date: String) {
|
|
243
|
+
updates(limit: $limit, from_date: $from_date, to_date: $to_date) {
|
|
244
|
+
id
|
|
245
|
+
body
|
|
246
|
+
text_body
|
|
247
|
+
created_at
|
|
248
|
+
updated_at
|
|
249
|
+
edited_at
|
|
250
|
+
creator_id
|
|
251
|
+
item_id
|
|
252
|
+
creator {
|
|
253
|
+
id
|
|
254
|
+
}
|
|
255
|
+
item {
|
|
256
|
+
id
|
|
257
|
+
}
|
|
258
|
+
assets {
|
|
259
|
+
id
|
|
260
|
+
name
|
|
261
|
+
file_extension
|
|
262
|
+
file_size
|
|
263
|
+
public_url
|
|
264
|
+
url
|
|
265
|
+
url_thumbnail
|
|
266
|
+
created_at
|
|
267
|
+
original_geometry
|
|
268
|
+
uploaded_by {
|
|
269
|
+
id
|
|
270
|
+
}
|
|
271
|
+
}
|
|
272
|
+
replies {
|
|
273
|
+
id
|
|
274
|
+
body
|
|
275
|
+
text_body
|
|
276
|
+
created_at
|
|
277
|
+
updated_at
|
|
278
|
+
creator_id
|
|
279
|
+
creator {
|
|
280
|
+
id
|
|
281
|
+
}
|
|
282
|
+
}
|
|
283
|
+
likes {
|
|
284
|
+
id
|
|
285
|
+
}
|
|
286
|
+
pinned_to_top {
|
|
287
|
+
item_id
|
|
288
|
+
}
|
|
289
|
+
viewers {
|
|
290
|
+
medium
|
|
291
|
+
user_id
|
|
292
|
+
user {
|
|
293
|
+
id
|
|
294
|
+
}
|
|
295
|
+
}
|
|
296
|
+
}
|
|
297
|
+
}
|
|
298
|
+
"""
|
|
299
|
+
|
|
300
|
+
# GraphQL query for fetching teams
|
|
301
|
+
TEAMS_QUERY = """
|
|
302
|
+
query {
|
|
303
|
+
teams {
|
|
304
|
+
id
|
|
305
|
+
name
|
|
306
|
+
picture_url
|
|
307
|
+
users {
|
|
308
|
+
id
|
|
309
|
+
created_at
|
|
310
|
+
phone
|
|
311
|
+
}
|
|
312
|
+
}
|
|
313
|
+
}
|
|
314
|
+
"""
|
|
315
|
+
|
|
316
|
+
# GraphQL query for fetching tags
|
|
317
|
+
TAGS_QUERY = """
|
|
318
|
+
query {
|
|
319
|
+
tags {
|
|
320
|
+
id
|
|
321
|
+
name
|
|
322
|
+
color
|
|
323
|
+
}
|
|
324
|
+
}
|
|
325
|
+
"""
|
|
326
|
+
|
|
327
|
+
# Maximum number of results per page
|
|
328
|
+
MAX_PAGE_SIZE = 100
|
ingestr/src/mongodb/__init__.py
CHANGED
|
@@ -1,8 +1,9 @@
|
|
|
1
1
|
"""Source that loads collections form any a mongo database, supports incremental loads."""
|
|
2
2
|
|
|
3
|
-
from typing import Any, Iterable, List, Optional
|
|
3
|
+
from typing import Any, Dict, Iterable, List, Mapping, Optional, Union
|
|
4
4
|
|
|
5
5
|
import dlt
|
|
6
|
+
from dlt.common.data_writers import TDataItemFormat
|
|
6
7
|
from dlt.sources import DltResource
|
|
7
8
|
|
|
8
9
|
from .helpers import (
|
|
@@ -13,7 +14,7 @@ from .helpers import (
|
|
|
13
14
|
)
|
|
14
15
|
|
|
15
16
|
|
|
16
|
-
@dlt.source
|
|
17
|
+
@dlt.source(max_table_nesting=0)
|
|
17
18
|
def mongodb(
|
|
18
19
|
connection_url: str = dlt.secrets.value,
|
|
19
20
|
database: Optional[str] = dlt.config.value,
|
|
@@ -21,6 +22,10 @@ def mongodb(
|
|
|
21
22
|
incremental: Optional[dlt.sources.incremental] = None, # type: ignore[type-arg]
|
|
22
23
|
write_disposition: Optional[str] = dlt.config.value,
|
|
23
24
|
parallel: Optional[bool] = dlt.config.value,
|
|
25
|
+
limit: Optional[int] = None,
|
|
26
|
+
filter_: Optional[Dict[str, Any]] = None,
|
|
27
|
+
projection: Optional[Union[Mapping[str, Any], Iterable[str]]] = None,
|
|
28
|
+
pymongoarrow_schema: Optional[Any] = None,
|
|
24
29
|
) -> Iterable[DltResource]:
|
|
25
30
|
"""
|
|
26
31
|
A DLT source which loads data from a mongo database using PyMongo.
|
|
@@ -34,6 +39,18 @@ def mongodb(
|
|
|
34
39
|
E.g., `incremental=dlt.sources.incremental('updated_at', pendulum.parse('2022-01-01T00:00:00Z'))`
|
|
35
40
|
write_disposition (str): Write disposition of the resource.
|
|
36
41
|
parallel (Optional[bool]): Option to enable parallel loading for the collection. Default is False.
|
|
42
|
+
limit (Optional[int]):
|
|
43
|
+
The maximum number of documents to load. The limit is
|
|
44
|
+
applied to each requested collection separately.
|
|
45
|
+
filter_ (Optional[Dict[str, Any]]): The filter to apply to the collection.
|
|
46
|
+
projection: (Optional[Union[Mapping[str, Any], Iterable[str]]]): The projection to select fields of a collection
|
|
47
|
+
when loading the collection. Supported inputs:
|
|
48
|
+
include (list) - ["year", "title"]
|
|
49
|
+
include (dict) - {"year": True, "title": True}
|
|
50
|
+
exclude (dict) - {"released": False, "runtime": False}
|
|
51
|
+
Note: Can't mix include and exclude statements '{"title": True, "released": False}`
|
|
52
|
+
pymongoarrow_schema (pymongoarrow.schema.Schema): Mapping of expected field types of a collection to convert BSON to Arrow
|
|
53
|
+
|
|
37
54
|
Returns:
|
|
38
55
|
Iterable[DltResource]: A list of DLT resources for each collection to be loaded.
|
|
39
56
|
"""
|
|
@@ -58,20 +75,39 @@ def mongodb(
|
|
|
58
75
|
primary_key="_id",
|
|
59
76
|
write_disposition=write_disposition,
|
|
60
77
|
spec=MongoDbCollectionConfiguration,
|
|
61
|
-
|
|
78
|
+
max_table_nesting=0,
|
|
79
|
+
)(
|
|
80
|
+
client,
|
|
81
|
+
collection,
|
|
82
|
+
incremental=incremental,
|
|
83
|
+
parallel=parallel,
|
|
84
|
+
limit=limit,
|
|
85
|
+
filter_=filter_ or {},
|
|
86
|
+
projection=projection,
|
|
87
|
+
pymongoarrow_schema=pymongoarrow_schema,
|
|
88
|
+
)
|
|
62
89
|
|
|
63
90
|
|
|
64
|
-
@dlt.
|
|
65
|
-
|
|
91
|
+
@dlt.resource(
|
|
92
|
+
name=lambda args: args["collection"],
|
|
93
|
+
standalone=True,
|
|
94
|
+
spec=MongoDbCollectionResourceConfiguration,
|
|
66
95
|
)
|
|
67
96
|
def mongodb_collection(
|
|
68
|
-
connection_url: str = dlt.
|
|
97
|
+
connection_url: str = dlt.secrets.value,
|
|
69
98
|
database: Optional[str] = dlt.config.value,
|
|
70
99
|
collection: str = dlt.config.value,
|
|
71
100
|
incremental: Optional[dlt.sources.incremental] = None, # type: ignore[type-arg]
|
|
72
101
|
write_disposition: Optional[str] = dlt.config.value,
|
|
73
102
|
parallel: Optional[bool] = False,
|
|
74
|
-
|
|
103
|
+
limit: Optional[int] = None,
|
|
104
|
+
chunk_size: Optional[int] = 1000,
|
|
105
|
+
data_item_format: Optional[TDataItemFormat] = "object",
|
|
106
|
+
filter_: Optional[Dict[str, Any]] = None,
|
|
107
|
+
projection: Optional[Union[Mapping[str, Any], Iterable[str]]] = dlt.config.value,
|
|
108
|
+
pymongoarrow_schema: Optional[Any] = None,
|
|
109
|
+
custom_query: Optional[List[Dict[str, Any]]] = None,
|
|
110
|
+
) -> DltResource:
|
|
75
111
|
"""
|
|
76
112
|
A DLT source which loads a collection from a mongo database using PyMongo.
|
|
77
113
|
|
|
@@ -83,6 +119,22 @@ def mongodb_collection(
|
|
|
83
119
|
E.g., `incremental=dlt.sources.incremental('updated_at', pendulum.parse('2022-01-01T00:00:00Z'))`
|
|
84
120
|
write_disposition (str): Write disposition of the resource.
|
|
85
121
|
parallel (Optional[bool]): Option to enable parallel loading for the collection. Default is False.
|
|
122
|
+
limit (Optional[int]): The number of documents load.
|
|
123
|
+
chunk_size (Optional[int]): The number of documents load in each batch.
|
|
124
|
+
data_item_format (Optional[TDataItemFormat]): The data format to use for loading.
|
|
125
|
+
Supported formats:
|
|
126
|
+
object - Python objects (dicts, lists).
|
|
127
|
+
arrow - Apache Arrow tables.
|
|
128
|
+
filter_ (Optional[Dict[str, Any]]): The filter to apply to the collection.
|
|
129
|
+
projection: (Optional[Union[Mapping[str, Any], Iterable[str]]]): The projection to select fields
|
|
130
|
+
when loading the collection. Supported inputs:
|
|
131
|
+
include (list) - ["year", "title"]
|
|
132
|
+
include (dict) - {"year": True, "title": True}
|
|
133
|
+
exclude (dict) - {"released": False, "runtime": False}
|
|
134
|
+
Note: Can't mix include and exclude statements '{"title": True, "released": False}`
|
|
135
|
+
pymongoarrow_schema (pymongoarrow.schema.Schema): Mapping of expected field types to convert BSON to Arrow
|
|
136
|
+
custom_query (Optional[List[Dict[str, Any]]]): Custom MongoDB aggregation pipeline to execute instead of find()
|
|
137
|
+
|
|
86
138
|
Returns:
|
|
87
139
|
Iterable[DltResource]: A list of DLT resources for each collection to be loaded.
|
|
88
140
|
"""
|
|
@@ -100,4 +152,16 @@ def mongodb_collection(
|
|
|
100
152
|
name=collection_obj.name,
|
|
101
153
|
primary_key="_id",
|
|
102
154
|
write_disposition=write_disposition,
|
|
103
|
-
)(
|
|
155
|
+
)(
|
|
156
|
+
client,
|
|
157
|
+
collection_obj,
|
|
158
|
+
incremental=incremental,
|
|
159
|
+
parallel=parallel,
|
|
160
|
+
limit=limit,
|
|
161
|
+
chunk_size=chunk_size,
|
|
162
|
+
data_item_format=data_item_format,
|
|
163
|
+
filter_=filter_ or {},
|
|
164
|
+
projection=projection,
|
|
165
|
+
pymongoarrow_schema=pymongoarrow_schema,
|
|
166
|
+
custom_query=custom_query,
|
|
167
|
+
)
|