omniload 0.0.0.dev0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- omniload/conftest.py +72 -0
- omniload/main.py +810 -0
- omniload/src/.gitignore +10 -0
- omniload/src/adjust/__init__.py +108 -0
- omniload/src/adjust/adjust_helpers.py +122 -0
- omniload/src/airtable/__init__.py +84 -0
- omniload/src/allium/__init__.py +128 -0
- omniload/src/anthropic/__init__.py +277 -0
- omniload/src/anthropic/helpers.py +525 -0
- omniload/src/applovin/__init__.py +316 -0
- omniload/src/applovin_max/__init__.py +117 -0
- omniload/src/appsflyer/__init__.py +325 -0
- omniload/src/appsflyer/client.py +110 -0
- omniload/src/appstore/__init__.py +142 -0
- omniload/src/appstore/client.py +126 -0
- omniload/src/appstore/errors.py +15 -0
- omniload/src/appstore/models.py +117 -0
- omniload/src/appstore/resources.py +179 -0
- omniload/src/arrow/__init__.py +81 -0
- omniload/src/asana_source/__init__.py +281 -0
- omniload/src/asana_source/helpers.py +30 -0
- omniload/src/asana_source/settings.py +158 -0
- omniload/src/attio/__init__.py +102 -0
- omniload/src/attio/helpers.py +65 -0
- omniload/src/blob.py +95 -0
- omniload/src/bruin/__init__.py +76 -0
- omniload/src/chess/__init__.py +180 -0
- omniload/src/chess/helpers.py +35 -0
- omniload/src/chess/settings.py +18 -0
- omniload/src/clickup/__init__.py +85 -0
- omniload/src/clickup/helpers.py +47 -0
- omniload/src/collector/spinner.py +43 -0
- omniload/src/couchbase_source/__init__.py +118 -0
- omniload/src/couchbase_source/helpers.py +135 -0
- omniload/src/cursor/__init__.py +83 -0
- omniload/src/cursor/helpers.py +188 -0
- omniload/src/customer_io/__init__.py +486 -0
- omniload/src/customer_io/helpers.py +530 -0
- omniload/src/destinations.py +982 -0
- omniload/src/docebo/__init__.py +589 -0
- omniload/src/docebo/client.py +435 -0
- omniload/src/docebo/helpers.py +97 -0
- omniload/src/dune/__init__.py +104 -0
- omniload/src/dune/helpers.py +108 -0
- omniload/src/dynamodb/__init__.py +86 -0
- omniload/src/elasticsearch/__init__.py +80 -0
- omniload/src/elasticsearch/helpers.py +141 -0
- omniload/src/errors.py +26 -0
- omniload/src/facebook_ads/__init__.py +403 -0
- omniload/src/facebook_ads/exceptions.py +19 -0
- omniload/src/facebook_ads/helpers.py +296 -0
- omniload/src/facebook_ads/settings.py +224 -0
- omniload/src/facebook_ads/utils.py +53 -0
- omniload/src/factory.py +305 -0
- omniload/src/filesystem/__init__.py +133 -0
- omniload/src/filesystem/helpers.py +114 -0
- omniload/src/filesystem/readers.py +187 -0
- omniload/src/filters.py +62 -0
- omniload/src/fireflies/__init__.py +151 -0
- omniload/src/fireflies/helpers.py +753 -0
- omniload/src/fluxx/__init__.py +10013 -0
- omniload/src/fluxx/helpers.py +233 -0
- omniload/src/frankfurter/__init__.py +157 -0
- omniload/src/frankfurter/helpers.py +48 -0
- omniload/src/freshdesk/__init__.py +103 -0
- omniload/src/freshdesk/freshdesk_client.py +151 -0
- omniload/src/freshdesk/settings.py +23 -0
- omniload/src/fundraiseup/__init__.py +95 -0
- omniload/src/fundraiseup/client.py +81 -0
- omniload/src/github/__init__.py +202 -0
- omniload/src/github/helpers.py +207 -0
- omniload/src/github/queries.py +129 -0
- omniload/src/github/settings.py +24 -0
- omniload/src/google_ads/__init__.py +198 -0
- omniload/src/google_ads/field.py +17 -0
- omniload/src/google_ads/metrics.py +254 -0
- omniload/src/google_ads/predicates.py +37 -0
- omniload/src/google_ads/reports.py +411 -0
- omniload/src/google_ads/test_google_ads.py +184 -0
- omniload/src/google_analytics/__init__.py +144 -0
- omniload/src/google_analytics/helpers.py +312 -0
- omniload/src/google_sheets/README.md +95 -0
- omniload/src/google_sheets/__init__.py +166 -0
- omniload/src/google_sheets/helpers/__init__.py +15 -0
- omniload/src/google_sheets/helpers/api_calls.py +160 -0
- omniload/src/google_sheets/helpers/data_processing.py +316 -0
- omniload/src/gorgias/__init__.py +595 -0
- omniload/src/gorgias/helpers.py +166 -0
- omniload/src/hostaway/__init__.py +302 -0
- omniload/src/hostaway/client.py +288 -0
- omniload/src/http/__init__.py +38 -0
- omniload/src/http/readers.py +146 -0
- omniload/src/http_client.py +24 -0
- omniload/src/hubspot/__init__.py +800 -0
- omniload/src/hubspot/helpers.py +417 -0
- omniload/src/hubspot/settings.py +329 -0
- omniload/src/indeed/__init__.py +153 -0
- omniload/src/indeed/helpers.py +228 -0
- omniload/src/influxdb/__init__.py +46 -0
- omniload/src/influxdb/client.py +34 -0
- omniload/src/intercom/__init__.py +142 -0
- omniload/src/intercom/helpers.py +674 -0
- omniload/src/intercom/settings.py +279 -0
- omniload/src/isoc_pulse/__init__.py +159 -0
- omniload/src/jira_source/__init__.py +377 -0
- omniload/src/jira_source/helpers.py +510 -0
- omniload/src/jira_source/settings.py +184 -0
- omniload/src/kafka/__init__.py +120 -0
- omniload/src/kafka/helpers.py +241 -0
- omniload/src/kinesis/__init__.py +153 -0
- omniload/src/kinesis/helpers.py +96 -0
- omniload/src/klaviyo/__init__.py +237 -0
- omniload/src/klaviyo/client.py +212 -0
- omniload/src/klaviyo/helpers.py +19 -0
- omniload/src/linear/__init__.py +634 -0
- omniload/src/linear/helpers.py +111 -0
- omniload/src/linkedin_ads/__init__.py +266 -0
- omniload/src/linkedin_ads/dimension_time_enum.py +17 -0
- omniload/src/linkedin_ads/helpers.py +246 -0
- omniload/src/loader.py +69 -0
- omniload/src/mailchimp/__init__.py +126 -0
- omniload/src/mailchimp/helpers.py +226 -0
- omniload/src/mailchimp/settings.py +164 -0
- omniload/src/masking.py +344 -0
- omniload/src/mixpanel/__init__.py +62 -0
- omniload/src/mixpanel/client.py +104 -0
- omniload/src/monday/__init__.py +246 -0
- omniload/src/monday/helpers.py +392 -0
- omniload/src/monday/settings.py +325 -0
- omniload/src/mongodb/__init__.py +281 -0
- omniload/src/mongodb/helpers.py +975 -0
- omniload/src/notion/__init__.py +69 -0
- omniload/src/notion/helpers/__init__.py +14 -0
- omniload/src/notion/helpers/client.py +178 -0
- omniload/src/notion/helpers/database.py +92 -0
- omniload/src/notion/settings.py +17 -0
- omniload/src/partition.py +32 -0
- omniload/src/personio/__init__.py +345 -0
- omniload/src/personio/helpers.py +100 -0
- omniload/src/phantombuster/__init__.py +65 -0
- omniload/src/phantombuster/client.py +87 -0
- omniload/src/pinterest/__init__.py +82 -0
- omniload/src/pipedrive/__init__.py +212 -0
- omniload/src/pipedrive/helpers/__init__.py +37 -0
- omniload/src/pipedrive/helpers/custom_fields_munger.py +116 -0
- omniload/src/pipedrive/helpers/pages.py +129 -0
- omniload/src/pipedrive/settings.py +41 -0
- omniload/src/pipedrive/typing.py +17 -0
- omniload/src/plusvibeai/__init__.py +335 -0
- omniload/src/plusvibeai/helpers.py +544 -0
- omniload/src/plusvibeai/settings.py +252 -0
- omniload/src/primer/__init__.py +45 -0
- omniload/src/primer/helpers.py +79 -0
- omniload/src/quickbooks/__init__.py +117 -0
- omniload/src/reddit_ads/__init__.py +183 -0
- omniload/src/reddit_ads/helpers.py +232 -0
- omniload/src/resource.py +40 -0
- omniload/src/revenuecat/__init__.py +83 -0
- omniload/src/revenuecat/helpers.py +237 -0
- omniload/src/salesforce/__init__.py +170 -0
- omniload/src/salesforce/helpers.py +78 -0
- omniload/src/shopify/__init__.py +1953 -0
- omniload/src/shopify/exceptions.py +17 -0
- omniload/src/shopify/helpers.py +202 -0
- omniload/src/shopify/settings.py +19 -0
- omniload/src/slack/__init__.py +290 -0
- omniload/src/slack/helpers.py +218 -0
- omniload/src/slack/settings.py +36 -0
- omniload/src/smartsheets/__init__.py +82 -0
- omniload/src/snapchat_ads/__init__.py +455 -0
- omniload/src/snapchat_ads/client.py +72 -0
- omniload/src/snapchat_ads/helpers.py +630 -0
- omniload/src/snapchat_ads/settings.py +130 -0
- omniload/src/socrata_source/__init__.py +83 -0
- omniload/src/socrata_source/helpers.py +85 -0
- omniload/src/socrata_source/settings.py +8 -0
- omniload/src/solidgate/__init__.py +219 -0
- omniload/src/solidgate/helpers.py +154 -0
- omniload/src/sources.py +5408 -0
- omniload/src/sql_database/__init__.py +0 -0
- omniload/src/sql_database/callbacks.py +66 -0
- omniload/src/stripe_analytics/__init__.py +183 -0
- omniload/src/stripe_analytics/helpers.py +386 -0
- omniload/src/stripe_analytics/settings.py +80 -0
- omniload/src/table_definition.py +15 -0
- omniload/src/testdata/fakebqcredentials.json +14 -0
- omniload/src/tiktok_ads/__init__.py +150 -0
- omniload/src/tiktok_ads/tiktok_helpers.py +130 -0
- omniload/src/time.py +11 -0
- omniload/src/trustpilot/__init__.py +48 -0
- omniload/src/trustpilot/client.py +48 -0
- omniload/src/version.py +6 -0
- omniload/src/wise/__init__.py +68 -0
- omniload/src/wise/client.py +63 -0
- omniload/src/zendesk/__init__.py +480 -0
- omniload/src/zendesk/helpers/__init__.py +39 -0
- omniload/src/zendesk/helpers/api_helpers.py +119 -0
- omniload/src/zendesk/helpers/credentials.py +68 -0
- omniload/src/zendesk/helpers/talk_api.py +132 -0
- omniload/src/zendesk/settings.py +71 -0
- omniload/src/zoom/__init__.py +99 -0
- omniload/src/zoom/helpers.py +102 -0
- omniload/testdata/.gitignore +2 -0
- omniload/testdata/create_replace.csv +21 -0
- omniload/testdata/delete_insert_expected.csv +6 -0
- omniload/testdata/delete_insert_part1.csv +5 -0
- omniload/testdata/delete_insert_part2.csv +6 -0
- omniload/testdata/merge_expected.csv +5 -0
- omniload/testdata/merge_part1.csv +4 -0
- omniload/testdata/merge_part2.csv +5 -0
- omniload/tests/unit/test_smartsheets.py +133 -0
- omniload-0.0.0.dev0.dist-info/METADATA +439 -0
- omniload-0.0.0.dev0.dist-info/RECORD +218 -0
- omniload-0.0.0.dev0.dist-info/WHEEL +4 -0
- omniload-0.0.0.dev0.dist-info/entry_points.txt +2 -0
- omniload-0.0.0.dev0.dist-info/licenses/LICENSE.Apache-2.0 +201 -0
- omniload-0.0.0.dev0.dist-info/licenses/LICENSE.md +21 -0
- omniload-0.0.0.dev0.dist-info/licenses/NOTICE +35 -0
|
@@ -0,0 +1,325 @@
|
|
|
1
|
+
"""Monday.com source settings and constants"""
|
|
2
|
+
|
|
3
|
+
# GraphQL query for fetching app installs
|
|
4
|
+
APP_INSTALLS_QUERY = """
|
|
5
|
+
query ($app_id: ID!, $account_id: ID, $limit: Int!, $page: Int!) {
|
|
6
|
+
app_installs(
|
|
7
|
+
app_id: $app_id
|
|
8
|
+
account_id: $account_id
|
|
9
|
+
limit: $limit
|
|
10
|
+
page: $page
|
|
11
|
+
) {
|
|
12
|
+
app_id
|
|
13
|
+
timestamp
|
|
14
|
+
app_install_account {
|
|
15
|
+
id
|
|
16
|
+
}
|
|
17
|
+
app_install_user {
|
|
18
|
+
id
|
|
19
|
+
}
|
|
20
|
+
app_version {
|
|
21
|
+
major
|
|
22
|
+
minor
|
|
23
|
+
patch
|
|
24
|
+
type
|
|
25
|
+
text
|
|
26
|
+
}
|
|
27
|
+
permissions {
|
|
28
|
+
approved_scopes
|
|
29
|
+
required_scopes
|
|
30
|
+
}
|
|
31
|
+
}
|
|
32
|
+
}
|
|
33
|
+
"""
|
|
34
|
+
|
|
35
|
+
# GraphQL query for fetching account information
|
|
36
|
+
ACCOUNT_QUERY = """
|
|
37
|
+
query {
|
|
38
|
+
account {
|
|
39
|
+
id
|
|
40
|
+
name
|
|
41
|
+
slug
|
|
42
|
+
tier
|
|
43
|
+
country_code
|
|
44
|
+
first_day_of_the_week
|
|
45
|
+
show_timeline_weekends
|
|
46
|
+
sign_up_product_kind
|
|
47
|
+
active_members_count
|
|
48
|
+
logo
|
|
49
|
+
plan {
|
|
50
|
+
max_users
|
|
51
|
+
period
|
|
52
|
+
tier
|
|
53
|
+
version
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
"""
|
|
58
|
+
|
|
59
|
+
# GraphQL query for fetching account roles
|
|
60
|
+
ACCOUNT_ROLES_QUERY = """
|
|
61
|
+
query {
|
|
62
|
+
account_roles {
|
|
63
|
+
id
|
|
64
|
+
name
|
|
65
|
+
roleType
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
"""
|
|
69
|
+
|
|
70
|
+
# GraphQL query for fetching users
|
|
71
|
+
USERS_QUERY = """
|
|
72
|
+
query ($limit: Int!, $page: Int!) {
|
|
73
|
+
users(limit: $limit, page: $page) {
|
|
74
|
+
id
|
|
75
|
+
name
|
|
76
|
+
email
|
|
77
|
+
enabled
|
|
78
|
+
is_admin
|
|
79
|
+
is_guest
|
|
80
|
+
is_pending
|
|
81
|
+
is_view_only
|
|
82
|
+
created_at
|
|
83
|
+
birthday
|
|
84
|
+
country_code
|
|
85
|
+
join_date
|
|
86
|
+
location
|
|
87
|
+
mobile_phone
|
|
88
|
+
phone
|
|
89
|
+
photo_original
|
|
90
|
+
photo_thumb
|
|
91
|
+
photo_tiny
|
|
92
|
+
time_zone_identifier
|
|
93
|
+
title
|
|
94
|
+
url
|
|
95
|
+
utc_hours_diff
|
|
96
|
+
current_language
|
|
97
|
+
account {
|
|
98
|
+
id
|
|
99
|
+
}
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
"""
|
|
103
|
+
|
|
104
|
+
# GraphQL query for fetching boards
|
|
105
|
+
BOARDS_QUERY = """
|
|
106
|
+
query ($limit: Int!, $page: Int!) {
|
|
107
|
+
boards(limit: $limit, page: $page) {
|
|
108
|
+
id
|
|
109
|
+
name
|
|
110
|
+
description
|
|
111
|
+
state
|
|
112
|
+
board_kind
|
|
113
|
+
board_folder_id
|
|
114
|
+
workspace_id
|
|
115
|
+
permissions
|
|
116
|
+
item_terminology
|
|
117
|
+
items_count
|
|
118
|
+
updated_at
|
|
119
|
+
url
|
|
120
|
+
communication
|
|
121
|
+
object_type_unique_key
|
|
122
|
+
type
|
|
123
|
+
creator {
|
|
124
|
+
id
|
|
125
|
+
}
|
|
126
|
+
owners {
|
|
127
|
+
id
|
|
128
|
+
}
|
|
129
|
+
subscribers {
|
|
130
|
+
id
|
|
131
|
+
}
|
|
132
|
+
team_owners {
|
|
133
|
+
id
|
|
134
|
+
}
|
|
135
|
+
team_subscribers {
|
|
136
|
+
id
|
|
137
|
+
}
|
|
138
|
+
tags {
|
|
139
|
+
id
|
|
140
|
+
|
|
141
|
+
}
|
|
142
|
+
}
|
|
143
|
+
}
|
|
144
|
+
"""
|
|
145
|
+
|
|
146
|
+
# GraphQL query for fetching custom activities
|
|
147
|
+
CUSTOM_ACTIVITIES_QUERY = """
|
|
148
|
+
query {
|
|
149
|
+
custom_activity {
|
|
150
|
+
id
|
|
151
|
+
name
|
|
152
|
+
type
|
|
153
|
+
color
|
|
154
|
+
icon_id
|
|
155
|
+
}
|
|
156
|
+
}
|
|
157
|
+
"""
|
|
158
|
+
|
|
159
|
+
# GraphQL query for fetching board columns
|
|
160
|
+
BOARD_COLUMNS_QUERY = """
|
|
161
|
+
query ($board_ids: [ID!]) {
|
|
162
|
+
boards(ids: $board_ids) {
|
|
163
|
+
id
|
|
164
|
+
columns {
|
|
165
|
+
id
|
|
166
|
+
title
|
|
167
|
+
type
|
|
168
|
+
archived
|
|
169
|
+
description
|
|
170
|
+
settings_str
|
|
171
|
+
width
|
|
172
|
+
}
|
|
173
|
+
}
|
|
174
|
+
}
|
|
175
|
+
"""
|
|
176
|
+
|
|
177
|
+
# GraphQL query for fetching board views
|
|
178
|
+
BOARD_VIEWS_QUERY = """
|
|
179
|
+
query ($board_ids: [ID!]) {
|
|
180
|
+
boards(ids: $board_ids) {
|
|
181
|
+
id
|
|
182
|
+
views {
|
|
183
|
+
id
|
|
184
|
+
name
|
|
185
|
+
type
|
|
186
|
+
settings_str
|
|
187
|
+
view_specific_data_str
|
|
188
|
+
source_view_id
|
|
189
|
+
access_level
|
|
190
|
+
}
|
|
191
|
+
}
|
|
192
|
+
}
|
|
193
|
+
"""
|
|
194
|
+
|
|
195
|
+
# GraphQL query for fetching workspaces by IDs
|
|
196
|
+
WORKSPACES_QUERY = """
|
|
197
|
+
query ($ids: [ID!]) {
|
|
198
|
+
workspaces(ids: $ids) {
|
|
199
|
+
id
|
|
200
|
+
name
|
|
201
|
+
kind
|
|
202
|
+
description
|
|
203
|
+
created_at
|
|
204
|
+
is_default_workspace
|
|
205
|
+
state
|
|
206
|
+
account_product {
|
|
207
|
+
id
|
|
208
|
+
}
|
|
209
|
+
owners_subscribers {
|
|
210
|
+
id
|
|
211
|
+
}
|
|
212
|
+
team_owners_subscribers {
|
|
213
|
+
id
|
|
214
|
+
}
|
|
215
|
+
teams_subscribers {
|
|
216
|
+
id
|
|
217
|
+
}
|
|
218
|
+
users_subscribers {
|
|
219
|
+
id
|
|
220
|
+
}
|
|
221
|
+
}
|
|
222
|
+
}
|
|
223
|
+
"""
|
|
224
|
+
|
|
225
|
+
# GraphQL query for fetching webhooks by board ID
|
|
226
|
+
WEBHOOKS_QUERY = """
|
|
227
|
+
query ($board_id: ID!) {
|
|
228
|
+
webhooks(board_id: $board_id) {
|
|
229
|
+
id
|
|
230
|
+
event
|
|
231
|
+
board_id
|
|
232
|
+
config
|
|
233
|
+
}
|
|
234
|
+
}
|
|
235
|
+
"""
|
|
236
|
+
|
|
237
|
+
# GraphQL query for fetching updates
|
|
238
|
+
UPDATES_QUERY = """
|
|
239
|
+
query ($limit: Int!, $from_date: String, $to_date: String) {
|
|
240
|
+
updates(limit: $limit, from_date: $from_date, to_date: $to_date) {
|
|
241
|
+
id
|
|
242
|
+
body
|
|
243
|
+
text_body
|
|
244
|
+
created_at
|
|
245
|
+
updated_at
|
|
246
|
+
edited_at
|
|
247
|
+
creator_id
|
|
248
|
+
item_id
|
|
249
|
+
creator {
|
|
250
|
+
id
|
|
251
|
+
}
|
|
252
|
+
item {
|
|
253
|
+
id
|
|
254
|
+
}
|
|
255
|
+
assets {
|
|
256
|
+
id
|
|
257
|
+
name
|
|
258
|
+
file_extension
|
|
259
|
+
file_size
|
|
260
|
+
public_url
|
|
261
|
+
url
|
|
262
|
+
url_thumbnail
|
|
263
|
+
created_at
|
|
264
|
+
original_geometry
|
|
265
|
+
uploaded_by {
|
|
266
|
+
id
|
|
267
|
+
}
|
|
268
|
+
}
|
|
269
|
+
replies {
|
|
270
|
+
id
|
|
271
|
+
body
|
|
272
|
+
text_body
|
|
273
|
+
created_at
|
|
274
|
+
updated_at
|
|
275
|
+
creator_id
|
|
276
|
+
creator {
|
|
277
|
+
id
|
|
278
|
+
}
|
|
279
|
+
}
|
|
280
|
+
likes {
|
|
281
|
+
id
|
|
282
|
+
}
|
|
283
|
+
pinned_to_top {
|
|
284
|
+
item_id
|
|
285
|
+
}
|
|
286
|
+
viewers {
|
|
287
|
+
medium
|
|
288
|
+
user_id
|
|
289
|
+
user {
|
|
290
|
+
id
|
|
291
|
+
}
|
|
292
|
+
}
|
|
293
|
+
}
|
|
294
|
+
}
|
|
295
|
+
"""
|
|
296
|
+
|
|
297
|
+
# GraphQL query for fetching teams
|
|
298
|
+
TEAMS_QUERY = """
|
|
299
|
+
query {
|
|
300
|
+
teams {
|
|
301
|
+
id
|
|
302
|
+
name
|
|
303
|
+
picture_url
|
|
304
|
+
users {
|
|
305
|
+
id
|
|
306
|
+
created_at
|
|
307
|
+
phone
|
|
308
|
+
}
|
|
309
|
+
}
|
|
310
|
+
}
|
|
311
|
+
"""
|
|
312
|
+
|
|
313
|
+
# GraphQL query for fetching tags
|
|
314
|
+
TAGS_QUERY = """
|
|
315
|
+
query {
|
|
316
|
+
tags {
|
|
317
|
+
id
|
|
318
|
+
name
|
|
319
|
+
color
|
|
320
|
+
}
|
|
321
|
+
}
|
|
322
|
+
"""
|
|
323
|
+
|
|
324
|
+
# Maximum number of results per page
|
|
325
|
+
MAX_PAGE_SIZE = 100
|
|
@@ -0,0 +1,281 @@
|
|
|
1
|
+
# Copyright 2022-2025 ScaleVector
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
"""Source that loads collections form any a mongo database, supports incremental loads."""
|
|
16
|
+
|
|
17
|
+
from typing import Any, Dict, Iterable, List, Mapping, Optional, Union
|
|
18
|
+
|
|
19
|
+
import dlt
|
|
20
|
+
from dlt.common.data_writers import TDataItemFormat
|
|
21
|
+
from dlt.sources import DltResource
|
|
22
|
+
|
|
23
|
+
from .helpers import (
|
|
24
|
+
MongoDbCollectionConfiguration,
|
|
25
|
+
MongoDbCollectionResourceConfiguration,
|
|
26
|
+
client_from_credentials,
|
|
27
|
+
collection_documents,
|
|
28
|
+
process_file_items,
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
@dlt.source(max_table_nesting=0)
|
|
33
|
+
def mongodb(
|
|
34
|
+
connection_url: str = dlt.secrets.value,
|
|
35
|
+
database: Optional[str] = dlt.config.value,
|
|
36
|
+
collection_names: Optional[List[str]] = dlt.config.value,
|
|
37
|
+
incremental: Optional[dlt.sources.incremental] = None, # type: ignore[type-arg]
|
|
38
|
+
write_disposition: Optional[str] = dlt.config.value,
|
|
39
|
+
parallel: Optional[bool] = dlt.config.value,
|
|
40
|
+
limit: Optional[int] = None,
|
|
41
|
+
filter_: Optional[Dict[str, Any]] = None,
|
|
42
|
+
projection: Optional[Union[Mapping[str, Any], Iterable[str]]] = None,
|
|
43
|
+
pymongoarrow_schema: Optional[Any] = None,
|
|
44
|
+
) -> Iterable[DltResource]:
|
|
45
|
+
"""
|
|
46
|
+
A DLT source which loads data from a mongo database using PyMongo.
|
|
47
|
+
Resources are automatically created for each collection in the database or from the given list of collection.
|
|
48
|
+
|
|
49
|
+
Args:
|
|
50
|
+
connection_url (str): Database connection_url.
|
|
51
|
+
database (Optional[str]): Selected database name, it will use the default database if not passed.
|
|
52
|
+
collection_names (Optional[List[str]]): The list of collections `pymongo.collection.Collection` to load.
|
|
53
|
+
incremental (Optional[dlt.sources.incremental]): Option to enable incremental loading for the collection.
|
|
54
|
+
E.g., `incremental=dlt.sources.incremental('updated_at', pendulum.parse('2022-01-01T00:00:00Z'))`
|
|
55
|
+
write_disposition (str): Write disposition of the resource.
|
|
56
|
+
parallel (Optional[bool]): Option to enable parallel loading for the collection. Default is False.
|
|
57
|
+
limit (Optional[int]):
|
|
58
|
+
The maximum number of documents to load. The limit is
|
|
59
|
+
applied to each requested collection separately.
|
|
60
|
+
filter_ (Optional[Dict[str, Any]]): The filter to apply to the collection.
|
|
61
|
+
projection: (Optional[Union[Mapping[str, Any], Iterable[str]]]): The projection to select fields of a collection
|
|
62
|
+
when loading the collection. Supported inputs:
|
|
63
|
+
include (list) - ["year", "title"]
|
|
64
|
+
include (dict) - {"year": True, "title": True}
|
|
65
|
+
exclude (dict) - {"released": False, "runtime": False}
|
|
66
|
+
Note: Can't mix include and exclude statements '{"title": True, "released": False}`
|
|
67
|
+
pymongoarrow_schema (pymongoarrow.schema.Schema): Mapping of expected field types of a collection to convert BSON to Arrow
|
|
68
|
+
|
|
69
|
+
Returns:
|
|
70
|
+
Iterable[DltResource]: A list of DLT resources for each collection to be loaded.
|
|
71
|
+
"""
|
|
72
|
+
|
|
73
|
+
# set up mongo client
|
|
74
|
+
client = client_from_credentials(connection_url)
|
|
75
|
+
if not database:
|
|
76
|
+
mongo_database = client.get_default_database()
|
|
77
|
+
else:
|
|
78
|
+
mongo_database = client[database]
|
|
79
|
+
|
|
80
|
+
# use provided collection or all conllections
|
|
81
|
+
if not collection_names:
|
|
82
|
+
collection_names = mongo_database.list_collection_names()
|
|
83
|
+
|
|
84
|
+
collection_list = [mongo_database[collection] for collection in collection_names]
|
|
85
|
+
|
|
86
|
+
for collection in collection_list:
|
|
87
|
+
yield dlt.resource( # type: ignore
|
|
88
|
+
collection_documents,
|
|
89
|
+
name=collection.name,
|
|
90
|
+
primary_key="_id",
|
|
91
|
+
write_disposition=write_disposition,
|
|
92
|
+
spec=MongoDbCollectionConfiguration,
|
|
93
|
+
max_table_nesting=0,
|
|
94
|
+
)(
|
|
95
|
+
client,
|
|
96
|
+
collection,
|
|
97
|
+
incremental=incremental,
|
|
98
|
+
parallel=parallel,
|
|
99
|
+
limit=limit,
|
|
100
|
+
filter_=filter_ or {},
|
|
101
|
+
projection=projection,
|
|
102
|
+
pymongoarrow_schema=pymongoarrow_schema,
|
|
103
|
+
)
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
@dlt.resource(
|
|
107
|
+
name=lambda args: args["collection"],
|
|
108
|
+
standalone=True,
|
|
109
|
+
spec=MongoDbCollectionResourceConfiguration,
|
|
110
|
+
max_table_nesting=0,
|
|
111
|
+
)
|
|
112
|
+
def mongodb_collection(
|
|
113
|
+
connection_url: str = dlt.secrets.value,
|
|
114
|
+
database: Optional[str] = dlt.config.value,
|
|
115
|
+
collection: str = dlt.config.value,
|
|
116
|
+
incremental: Optional[dlt.sources.incremental] = None, # type: ignore[type-arg]
|
|
117
|
+
write_disposition: Optional[str] = dlt.config.value,
|
|
118
|
+
parallel: Optional[bool] = False,
|
|
119
|
+
limit: Optional[int] = None,
|
|
120
|
+
chunk_size: Optional[int] = 1000,
|
|
121
|
+
data_item_format: Optional[TDataItemFormat] = "object",
|
|
122
|
+
filter_: Optional[Dict[str, Any]] = None,
|
|
123
|
+
projection: Optional[Union[Mapping[str, Any], Iterable[str]]] = dlt.config.value,
|
|
124
|
+
pymongoarrow_schema: Optional[Any] = None,
|
|
125
|
+
custom_query: Optional[List[Dict[str, Any]]] = None,
|
|
126
|
+
) -> DltResource:
|
|
127
|
+
"""
|
|
128
|
+
A DLT source which loads a collection from a mongo database using PyMongo.
|
|
129
|
+
|
|
130
|
+
Args:
|
|
131
|
+
connection_url (str): Database connection_url.
|
|
132
|
+
database (Optional[str]): Selected database name, it will use the default database if not passed.
|
|
133
|
+
collection (str): The collection name to load.
|
|
134
|
+
incremental (Optional[dlt.sources.incremental]): Option to enable incremental loading for the collection.
|
|
135
|
+
E.g., `incremental=dlt.sources.incremental('updated_at', pendulum.parse('2022-01-01T00:00:00Z'))`
|
|
136
|
+
write_disposition (str): Write disposition of the resource.
|
|
137
|
+
parallel (Optional[bool]): Option to enable parallel loading for the collection. Default is False.
|
|
138
|
+
limit (Optional[int]): The number of documents load.
|
|
139
|
+
chunk_size (Optional[int]): The number of documents load in each batch.
|
|
140
|
+
data_item_format (Optional[TDataItemFormat]): The data format to use for loading.
|
|
141
|
+
Supported formats:
|
|
142
|
+
object - Python objects (dicts, lists).
|
|
143
|
+
arrow - Apache Arrow tables.
|
|
144
|
+
filter_ (Optional[Dict[str, Any]]): The filter to apply to the collection.
|
|
145
|
+
projection: (Optional[Union[Mapping[str, Any], Iterable[str]]]): The projection to select fields
|
|
146
|
+
when loading the collection. Supported inputs:
|
|
147
|
+
include (list) - ["year", "title"]
|
|
148
|
+
include (dict) - {"year": True, "title": True}
|
|
149
|
+
exclude (dict) - {"released": False, "runtime": False}
|
|
150
|
+
Note: Can't mix include and exclude statements '{"title": True, "released": False}`
|
|
151
|
+
pymongoarrow_schema (pymongoarrow.schema.Schema): Mapping of expected field types to convert BSON to Arrow
|
|
152
|
+
custom_query (Optional[List[Dict[str, Any]]]): Custom MongoDB aggregation pipeline to execute instead of find()
|
|
153
|
+
|
|
154
|
+
Returns:
|
|
155
|
+
Iterable[DltResource]: A list of DLT resources for each collection to be loaded.
|
|
156
|
+
"""
|
|
157
|
+
# set up mongo client
|
|
158
|
+
client = client_from_credentials(connection_url)
|
|
159
|
+
if not database:
|
|
160
|
+
mongo_database = client.get_default_database()
|
|
161
|
+
else:
|
|
162
|
+
mongo_database = client[database]
|
|
163
|
+
|
|
164
|
+
collection_obj = mongo_database[collection]
|
|
165
|
+
|
|
166
|
+
return dlt.resource( # type: ignore
|
|
167
|
+
collection_documents,
|
|
168
|
+
name=collection_obj.name,
|
|
169
|
+
primary_key="_id",
|
|
170
|
+
write_disposition=write_disposition,
|
|
171
|
+
)(
|
|
172
|
+
client,
|
|
173
|
+
collection_obj,
|
|
174
|
+
incremental=incremental,
|
|
175
|
+
parallel=parallel,
|
|
176
|
+
limit=limit,
|
|
177
|
+
chunk_size=chunk_size,
|
|
178
|
+
data_item_format=data_item_format,
|
|
179
|
+
filter_=filter_ or {},
|
|
180
|
+
projection=projection,
|
|
181
|
+
pymongoarrow_schema=pymongoarrow_schema,
|
|
182
|
+
custom_query=custom_query,
|
|
183
|
+
)
|
|
184
|
+
|
|
185
|
+
|
|
186
|
+
def mongodb_insert(uri: str):
|
|
187
|
+
"""Creates a dlt.destination for inserting data into a MongoDB collection.
|
|
188
|
+
|
|
189
|
+
Args:
|
|
190
|
+
uri (str): MongoDB connection URI including database.
|
|
191
|
+
|
|
192
|
+
Returns:
|
|
193
|
+
dlt.destination: A DLT destination object configured for MongoDB.
|
|
194
|
+
"""
|
|
195
|
+
from urllib.parse import urlparse
|
|
196
|
+
|
|
197
|
+
parsed_uri = urlparse(uri)
|
|
198
|
+
database = (
|
|
199
|
+
parsed_uri.path.lstrip("/") if parsed_uri.path.lstrip("/") else "omniload_db"
|
|
200
|
+
)
|
|
201
|
+
first_batch_per_table: dict[str, bool] = {}
|
|
202
|
+
BATCH_SIZE = 10000
|
|
203
|
+
|
|
204
|
+
def destination(items, table) -> None:
|
|
205
|
+
import pyarrow
|
|
206
|
+
from pymongo import MongoClient
|
|
207
|
+
|
|
208
|
+
collection_name = table["name"]
|
|
209
|
+
|
|
210
|
+
if collection_name not in first_batch_per_table:
|
|
211
|
+
first_batch_per_table[collection_name] = True
|
|
212
|
+
|
|
213
|
+
with MongoClient(uri) as client:
|
|
214
|
+
db = client[database]
|
|
215
|
+
collection = db[collection_name]
|
|
216
|
+
|
|
217
|
+
# Process documents
|
|
218
|
+
if isinstance(items, str):
|
|
219
|
+
documents = process_file_items(items)
|
|
220
|
+
elif isinstance(items, pyarrow.RecordBatch):
|
|
221
|
+
documents = items.to_pylist()
|
|
222
|
+
else:
|
|
223
|
+
documents = [item for item in items if isinstance(item, dict)]
|
|
224
|
+
|
|
225
|
+
write_disposition = table.get("write_disposition")
|
|
226
|
+
|
|
227
|
+
batches = [
|
|
228
|
+
documents[i : i + BATCH_SIZE]
|
|
229
|
+
for i in range(0, len(documents), BATCH_SIZE)
|
|
230
|
+
]
|
|
231
|
+
|
|
232
|
+
if write_disposition == "merge":
|
|
233
|
+
from pymongo import ReplaceOne
|
|
234
|
+
|
|
235
|
+
primary_keys = [
|
|
236
|
+
col_name
|
|
237
|
+
for col_name, col_def in table.get("columns", {}).items()
|
|
238
|
+
if isinstance(col_def, dict) and col_def.get("primary_key")
|
|
239
|
+
]
|
|
240
|
+
|
|
241
|
+
if not primary_keys:
|
|
242
|
+
raise ValueError(
|
|
243
|
+
f"Merge operation requires primary keys for table '{collection_name}'. "
|
|
244
|
+
f"Please define primary keys in the table schema or use 'replace' write disposition."
|
|
245
|
+
)
|
|
246
|
+
|
|
247
|
+
for batch in batches:
|
|
248
|
+
operations = [
|
|
249
|
+
ReplaceOne(
|
|
250
|
+
{key: doc[key] for key in primary_keys},
|
|
251
|
+
doc,
|
|
252
|
+
upsert=True,
|
|
253
|
+
)
|
|
254
|
+
for doc in batch
|
|
255
|
+
if all(key in doc for key in primary_keys)
|
|
256
|
+
]
|
|
257
|
+
if operations:
|
|
258
|
+
collection.bulk_write(operations, ordered=False)
|
|
259
|
+
|
|
260
|
+
elif write_disposition == "replace":
|
|
261
|
+
if first_batch_per_table[collection_name] and documents:
|
|
262
|
+
collection.delete_many({})
|
|
263
|
+
first_batch_per_table[collection_name] = False
|
|
264
|
+
|
|
265
|
+
for batch in batches:
|
|
266
|
+
if batch:
|
|
267
|
+
collection.insert_many(batch)
|
|
268
|
+
|
|
269
|
+
else:
|
|
270
|
+
raise ValueError(
|
|
271
|
+
f"Unsupported write disposition '{write_disposition}' for MongoDB destination. "
|
|
272
|
+
)
|
|
273
|
+
|
|
274
|
+
return dlt.destination(
|
|
275
|
+
destination,
|
|
276
|
+
name="mongodb",
|
|
277
|
+
loader_file_format="typed-jsonl",
|
|
278
|
+
batch_size=1000,
|
|
279
|
+
naming_convention="snake_case",
|
|
280
|
+
loader_parallelism_strategy="sequential",
|
|
281
|
+
)
|