ingestr 0.13.2__py3-none-any.whl → 0.14.104__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (146) hide show
  1. ingestr/conftest.py +72 -0
  2. ingestr/main.py +134 -87
  3. ingestr/src/adjust/__init__.py +4 -4
  4. ingestr/src/adjust/adjust_helpers.py +7 -3
  5. ingestr/src/airtable/__init__.py +3 -2
  6. ingestr/src/allium/__init__.py +128 -0
  7. ingestr/src/anthropic/__init__.py +277 -0
  8. ingestr/src/anthropic/helpers.py +525 -0
  9. ingestr/src/applovin/__init__.py +262 -0
  10. ingestr/src/applovin_max/__init__.py +117 -0
  11. ingestr/src/appsflyer/__init__.py +325 -0
  12. ingestr/src/appsflyer/client.py +49 -45
  13. ingestr/src/appstore/__init__.py +1 -0
  14. ingestr/src/arrow/__init__.py +9 -1
  15. ingestr/src/asana_source/__init__.py +1 -1
  16. ingestr/src/attio/__init__.py +102 -0
  17. ingestr/src/attio/helpers.py +65 -0
  18. ingestr/src/blob.py +38 -11
  19. ingestr/src/buildinfo.py +1 -0
  20. ingestr/src/chess/__init__.py +1 -1
  21. ingestr/src/clickup/__init__.py +85 -0
  22. ingestr/src/clickup/helpers.py +47 -0
  23. ingestr/src/collector/spinner.py +43 -0
  24. ingestr/src/couchbase_source/__init__.py +118 -0
  25. ingestr/src/couchbase_source/helpers.py +135 -0
  26. ingestr/src/cursor/__init__.py +83 -0
  27. ingestr/src/cursor/helpers.py +188 -0
  28. ingestr/src/destinations.py +520 -33
  29. ingestr/src/docebo/__init__.py +589 -0
  30. ingestr/src/docebo/client.py +435 -0
  31. ingestr/src/docebo/helpers.py +97 -0
  32. ingestr/src/elasticsearch/__init__.py +80 -0
  33. ingestr/src/elasticsearch/helpers.py +138 -0
  34. ingestr/src/errors.py +8 -0
  35. ingestr/src/facebook_ads/__init__.py +47 -28
  36. ingestr/src/facebook_ads/helpers.py +59 -37
  37. ingestr/src/facebook_ads/settings.py +2 -0
  38. ingestr/src/facebook_ads/utils.py +39 -0
  39. ingestr/src/factory.py +116 -2
  40. ingestr/src/filesystem/__init__.py +8 -3
  41. ingestr/src/filters.py +46 -3
  42. ingestr/src/fluxx/__init__.py +9906 -0
  43. ingestr/src/fluxx/helpers.py +209 -0
  44. ingestr/src/frankfurter/__init__.py +157 -0
  45. ingestr/src/frankfurter/helpers.py +48 -0
  46. ingestr/src/freshdesk/__init__.py +89 -0
  47. ingestr/src/freshdesk/freshdesk_client.py +137 -0
  48. ingestr/src/freshdesk/settings.py +9 -0
  49. ingestr/src/fundraiseup/__init__.py +95 -0
  50. ingestr/src/fundraiseup/client.py +81 -0
  51. ingestr/src/github/__init__.py +41 -6
  52. ingestr/src/github/helpers.py +5 -5
  53. ingestr/src/google_analytics/__init__.py +22 -4
  54. ingestr/src/google_analytics/helpers.py +124 -6
  55. ingestr/src/google_sheets/__init__.py +4 -4
  56. ingestr/src/google_sheets/helpers/data_processing.py +2 -2
  57. ingestr/src/hostaway/__init__.py +302 -0
  58. ingestr/src/hostaway/client.py +288 -0
  59. ingestr/src/http/__init__.py +35 -0
  60. ingestr/src/http/readers.py +114 -0
  61. ingestr/src/http_client.py +24 -0
  62. ingestr/src/hubspot/__init__.py +66 -23
  63. ingestr/src/hubspot/helpers.py +52 -22
  64. ingestr/src/hubspot/settings.py +14 -7
  65. ingestr/src/influxdb/__init__.py +46 -0
  66. ingestr/src/influxdb/client.py +34 -0
  67. ingestr/src/intercom/__init__.py +142 -0
  68. ingestr/src/intercom/helpers.py +674 -0
  69. ingestr/src/intercom/settings.py +279 -0
  70. ingestr/src/isoc_pulse/__init__.py +159 -0
  71. ingestr/src/jira_source/__init__.py +340 -0
  72. ingestr/src/jira_source/helpers.py +439 -0
  73. ingestr/src/jira_source/settings.py +170 -0
  74. ingestr/src/kafka/__init__.py +4 -1
  75. ingestr/src/kinesis/__init__.py +139 -0
  76. ingestr/src/kinesis/helpers.py +82 -0
  77. ingestr/src/klaviyo/{_init_.py → __init__.py} +5 -6
  78. ingestr/src/linear/__init__.py +634 -0
  79. ingestr/src/linear/helpers.py +111 -0
  80. ingestr/src/linkedin_ads/helpers.py +0 -1
  81. ingestr/src/loader.py +69 -0
  82. ingestr/src/mailchimp/__init__.py +126 -0
  83. ingestr/src/mailchimp/helpers.py +226 -0
  84. ingestr/src/mailchimp/settings.py +164 -0
  85. ingestr/src/masking.py +344 -0
  86. ingestr/src/mixpanel/__init__.py +62 -0
  87. ingestr/src/mixpanel/client.py +99 -0
  88. ingestr/src/monday/__init__.py +246 -0
  89. ingestr/src/monday/helpers.py +392 -0
  90. ingestr/src/monday/settings.py +328 -0
  91. ingestr/src/mongodb/__init__.py +72 -8
  92. ingestr/src/mongodb/helpers.py +915 -38
  93. ingestr/src/partition.py +32 -0
  94. ingestr/src/personio/__init__.py +331 -0
  95. ingestr/src/personio/helpers.py +86 -0
  96. ingestr/src/phantombuster/__init__.py +65 -0
  97. ingestr/src/phantombuster/client.py +87 -0
  98. ingestr/src/pinterest/__init__.py +82 -0
  99. ingestr/src/pipedrive/__init__.py +198 -0
  100. ingestr/src/pipedrive/helpers/__init__.py +23 -0
  101. ingestr/src/pipedrive/helpers/custom_fields_munger.py +102 -0
  102. ingestr/src/pipedrive/helpers/pages.py +115 -0
  103. ingestr/src/pipedrive/settings.py +27 -0
  104. ingestr/src/pipedrive/typing.py +3 -0
  105. ingestr/src/plusvibeai/__init__.py +335 -0
  106. ingestr/src/plusvibeai/helpers.py +544 -0
  107. ingestr/src/plusvibeai/settings.py +252 -0
  108. ingestr/src/quickbooks/__init__.py +117 -0
  109. ingestr/src/resource.py +40 -0
  110. ingestr/src/revenuecat/__init__.py +83 -0
  111. ingestr/src/revenuecat/helpers.py +237 -0
  112. ingestr/src/salesforce/__init__.py +156 -0
  113. ingestr/src/salesforce/helpers.py +64 -0
  114. ingestr/src/shopify/__init__.py +1 -17
  115. ingestr/src/smartsheets/__init__.py +82 -0
  116. ingestr/src/snapchat_ads/__init__.py +489 -0
  117. ingestr/src/snapchat_ads/client.py +72 -0
  118. ingestr/src/snapchat_ads/helpers.py +535 -0
  119. ingestr/src/socrata_source/__init__.py +83 -0
  120. ingestr/src/socrata_source/helpers.py +85 -0
  121. ingestr/src/socrata_source/settings.py +8 -0
  122. ingestr/src/solidgate/__init__.py +219 -0
  123. ingestr/src/solidgate/helpers.py +154 -0
  124. ingestr/src/sources.py +3132 -212
  125. ingestr/src/stripe_analytics/__init__.py +49 -21
  126. ingestr/src/stripe_analytics/helpers.py +286 -1
  127. ingestr/src/stripe_analytics/settings.py +62 -10
  128. ingestr/src/telemetry/event.py +10 -9
  129. ingestr/src/tiktok_ads/__init__.py +12 -6
  130. ingestr/src/tiktok_ads/tiktok_helpers.py +0 -1
  131. ingestr/src/trustpilot/__init__.py +48 -0
  132. ingestr/src/trustpilot/client.py +48 -0
  133. ingestr/src/version.py +6 -1
  134. ingestr/src/wise/__init__.py +68 -0
  135. ingestr/src/wise/client.py +63 -0
  136. ingestr/src/zoom/__init__.py +99 -0
  137. ingestr/src/zoom/helpers.py +102 -0
  138. ingestr/tests/unit/test_smartsheets.py +133 -0
  139. ingestr-0.14.104.dist-info/METADATA +563 -0
  140. ingestr-0.14.104.dist-info/RECORD +203 -0
  141. ingestr/src/appsflyer/_init_.py +0 -24
  142. ingestr-0.13.2.dist-info/METADATA +0 -302
  143. ingestr-0.13.2.dist-info/RECORD +0 -107
  144. {ingestr-0.13.2.dist-info → ingestr-0.14.104.dist-info}/WHEEL +0 -0
  145. {ingestr-0.13.2.dist-info → ingestr-0.14.104.dist-info}/entry_points.txt +0 -0
  146. {ingestr-0.13.2.dist-info → ingestr-0.14.104.dist-info}/licenses/LICENSE.md +0 -0
@@ -0,0 +1,328 @@
1
+ """Monday.com source settings and constants"""
2
+
3
+ # GraphQL query for fetching app installs
4
+ APP_INSTALLS_QUERY = """
5
+ query ($app_id: ID!, $account_id: ID, $limit: Int!, $page: Int!) {
6
+ app_installs(
7
+ app_id: $app_id
8
+ account_id: $account_id
9
+ limit: $limit
10
+ page: $page
11
+ ) {
12
+ app_id
13
+ timestamp
14
+ app_install_account {
15
+ id
16
+ }
17
+ app_install_user {
18
+ id
19
+ }
20
+ app_version {
21
+ major
22
+ minor
23
+ patch
24
+ type
25
+ text
26
+ }
27
+ permissions {
28
+ approved_scopes
29
+ required_scopes
30
+ }
31
+ }
32
+ }
33
+ """
34
+
35
+ # GraphQL query for fetching account information
36
+ ACCOUNT_QUERY = """
37
+ query {
38
+ account {
39
+ id
40
+ name
41
+ slug
42
+ tier
43
+ country_code
44
+ first_day_of_the_week
45
+ show_timeline_weekends
46
+ sign_up_product_kind
47
+ active_members_count
48
+ logo
49
+ plan {
50
+ max_users
51
+ period
52
+ tier
53
+ version
54
+ }
55
+ }
56
+ }
57
+ """
58
+
59
+ # GraphQL query for fetching account roles
60
+ ACCOUNT_ROLES_QUERY = """
61
+ query {
62
+ account_roles {
63
+ id
64
+ name
65
+ roleType
66
+ }
67
+ }
68
+ """
69
+
70
+ # GraphQL query for fetching users
71
+ USERS_QUERY = """
72
+ query ($limit: Int!, $page: Int!) {
73
+ users(limit: $limit, page: $page) {
74
+ id
75
+ name
76
+ email
77
+ enabled
78
+ is_admin
79
+ is_guest
80
+ is_pending
81
+ is_view_only
82
+ created_at
83
+ birthday
84
+ country_code
85
+ join_date
86
+ location
87
+ mobile_phone
88
+ phone
89
+ photo_original
90
+ photo_thumb
91
+ photo_tiny
92
+ time_zone_identifier
93
+ title
94
+ url
95
+ utc_hours_diff
96
+ current_language
97
+ account {
98
+ id
99
+ }
100
+ }
101
+ }
102
+ """
103
+
104
+ # GraphQL query for fetching boards
105
+ BOARDS_QUERY = """
106
+ query ($limit: Int!, $page: Int!) {
107
+ boards(limit: $limit, page: $page) {
108
+ id
109
+ name
110
+ description
111
+ state
112
+ board_kind
113
+ board_folder_id
114
+ workspace_id
115
+ permissions
116
+ item_terminology
117
+ items_count
118
+ updated_at
119
+ url
120
+ communication
121
+ object_type_unique_key
122
+ type
123
+ creator {
124
+ id
125
+ }
126
+ owners {
127
+ id
128
+ }
129
+ subscribers {
130
+ id
131
+ }
132
+ team_owners {
133
+ id
134
+ }
135
+ team_subscribers {
136
+ id
137
+ }
138
+ tags {
139
+ id
140
+
141
+ }
142
+ }
143
+ }
144
+ """
145
+
146
+ # GraphQL query for fetching custom activities
147
+ CUSTOM_ACTIVITIES_QUERY = """
148
+ query {
149
+ custom_activity {
150
+ id
151
+ name
152
+ type
153
+ color
154
+ icon_id
155
+ }
156
+ }
157
+ """
158
+
159
+ # GraphQL query for fetching board columns
160
+ BOARD_COLUMNS_QUERY = """
161
+ query ($board_ids: [ID!]) {
162
+ boards(ids: $board_ids) {
163
+ id
164
+ columns {
165
+ id
166
+ title
167
+ type
168
+ archived
169
+ description
170
+ settings_str
171
+ width
172
+ }
173
+ }
174
+ }
175
+ """
176
+
177
+ # GraphQL query for fetching board views
178
+ BOARD_VIEWS_QUERY = """
179
+ query ($board_ids: [ID!]) {
180
+ boards(ids: $board_ids) {
181
+ id
182
+ views {
183
+ id
184
+ name
185
+ type
186
+ settings_str
187
+ view_specific_data_str
188
+ source_view_id
189
+ access_level
190
+ }
191
+ }
192
+ }
193
+ """
194
+
195
+ # GraphQL query for fetching workspaces by IDs
196
+ WORKSPACES_QUERY = """
197
+ query ($ids: [ID!]) {
198
+ workspaces(ids: $ids) {
199
+ id
200
+ name
201
+ kind
202
+ description
203
+ created_at
204
+ is_default_workspace
205
+ state
206
+ account_product {
207
+ id
208
+ }
209
+ owners_subscribers {
210
+ id
211
+ }
212
+ team_owners_subscribers {
213
+ id
214
+ }
215
+ teams_subscribers {
216
+ id
217
+ }
218
+ users_subscribers {
219
+ id
220
+ }
221
+ settings {
222
+ icon
223
+ }
224
+ }
225
+ }
226
+ """
227
+
228
+ # GraphQL query for fetching webhooks by board ID
229
+ WEBHOOKS_QUERY = """
230
+ query ($board_id: ID!) {
231
+ webhooks(board_id: $board_id) {
232
+ id
233
+ event
234
+ board_id
235
+ config
236
+ }
237
+ }
238
+ """
239
+
240
+ # GraphQL query for fetching updates
241
+ UPDATES_QUERY = """
242
+ query ($limit: Int!, $from_date: String, $to_date: String) {
243
+ updates(limit: $limit, from_date: $from_date, to_date: $to_date) {
244
+ id
245
+ body
246
+ text_body
247
+ created_at
248
+ updated_at
249
+ edited_at
250
+ creator_id
251
+ item_id
252
+ creator {
253
+ id
254
+ }
255
+ item {
256
+ id
257
+ }
258
+ assets {
259
+ id
260
+ name
261
+ file_extension
262
+ file_size
263
+ public_url
264
+ url
265
+ url_thumbnail
266
+ created_at
267
+ original_geometry
268
+ uploaded_by {
269
+ id
270
+ }
271
+ }
272
+ replies {
273
+ id
274
+ body
275
+ text_body
276
+ created_at
277
+ updated_at
278
+ creator_id
279
+ creator {
280
+ id
281
+ }
282
+ }
283
+ likes {
284
+ id
285
+ }
286
+ pinned_to_top {
287
+ item_id
288
+ }
289
+ viewers {
290
+ medium
291
+ user_id
292
+ user {
293
+ id
294
+ }
295
+ }
296
+ }
297
+ }
298
+ """
299
+
300
+ # GraphQL query for fetching teams
301
+ TEAMS_QUERY = """
302
+ query {
303
+ teams {
304
+ id
305
+ name
306
+ picture_url
307
+ users {
308
+ id
309
+ created_at
310
+ phone
311
+ }
312
+ }
313
+ }
314
+ """
315
+
316
+ # GraphQL query for fetching tags
317
+ TAGS_QUERY = """
318
+ query {
319
+ tags {
320
+ id
321
+ name
322
+ color
323
+ }
324
+ }
325
+ """
326
+
327
+ # Maximum number of results per page
328
+ MAX_PAGE_SIZE = 100
@@ -1,8 +1,9 @@
1
1
  """Source that loads collections form any a mongo database, supports incremental loads."""
2
2
 
3
- from typing import Any, Iterable, List, Optional
3
+ from typing import Any, Dict, Iterable, List, Mapping, Optional, Union
4
4
 
5
5
  import dlt
6
+ from dlt.common.data_writers import TDataItemFormat
6
7
  from dlt.sources import DltResource
7
8
 
8
9
  from .helpers import (
@@ -13,7 +14,7 @@ from .helpers import (
13
14
  )
14
15
 
15
16
 
16
- @dlt.source
17
+ @dlt.source(max_table_nesting=0)
17
18
  def mongodb(
18
19
  connection_url: str = dlt.secrets.value,
19
20
  database: Optional[str] = dlt.config.value,
@@ -21,6 +22,10 @@ def mongodb(
21
22
  incremental: Optional[dlt.sources.incremental] = None, # type: ignore[type-arg]
22
23
  write_disposition: Optional[str] = dlt.config.value,
23
24
  parallel: Optional[bool] = dlt.config.value,
25
+ limit: Optional[int] = None,
26
+ filter_: Optional[Dict[str, Any]] = None,
27
+ projection: Optional[Union[Mapping[str, Any], Iterable[str]]] = None,
28
+ pymongoarrow_schema: Optional[Any] = None,
24
29
  ) -> Iterable[DltResource]:
25
30
  """
26
31
  A DLT source which loads data from a mongo database using PyMongo.
@@ -34,6 +39,18 @@ def mongodb(
34
39
  E.g., `incremental=dlt.sources.incremental('updated_at', pendulum.parse('2022-01-01T00:00:00Z'))`
35
40
  write_disposition (str): Write disposition of the resource.
36
41
  parallel (Optional[bool]): Option to enable parallel loading for the collection. Default is False.
42
+ limit (Optional[int]):
43
+ The maximum number of documents to load. The limit is
44
+ applied to each requested collection separately.
45
+ filter_ (Optional[Dict[str, Any]]): The filter to apply to the collection.
46
+ projection: (Optional[Union[Mapping[str, Any], Iterable[str]]]): The projection to select fields of a collection
47
+ when loading the collection. Supported inputs:
48
+ include (list) - ["year", "title"]
49
+ include (dict) - {"year": True, "title": True}
50
+ exclude (dict) - {"released": False, "runtime": False}
51
+ Note: Can't mix include and exclude statements '{"title": True, "released": False}`
52
+ pymongoarrow_schema (pymongoarrow.schema.Schema): Mapping of expected field types of a collection to convert BSON to Arrow
53
+
37
54
  Returns:
38
55
  Iterable[DltResource]: A list of DLT resources for each collection to be loaded.
39
56
  """
@@ -58,20 +75,39 @@ def mongodb(
58
75
  primary_key="_id",
59
76
  write_disposition=write_disposition,
60
77
  spec=MongoDbCollectionConfiguration,
61
- )(client, collection, incremental=incremental, parallel=parallel)
78
+ max_table_nesting=0,
79
+ )(
80
+ client,
81
+ collection,
82
+ incremental=incremental,
83
+ parallel=parallel,
84
+ limit=limit,
85
+ filter_=filter_ or {},
86
+ projection=projection,
87
+ pymongoarrow_schema=pymongoarrow_schema,
88
+ )
62
89
 
63
90
 
64
- @dlt.common.configuration.with_config(
65
- sections=("sources", "mongodb"), spec=MongoDbCollectionResourceConfiguration
91
+ @dlt.resource(
92
+ name=lambda args: args["collection"],
93
+ standalone=True,
94
+ spec=MongoDbCollectionResourceConfiguration,
66
95
  )
67
96
  def mongodb_collection(
68
- connection_url: str = dlt.config.value,
97
+ connection_url: str = dlt.secrets.value,
69
98
  database: Optional[str] = dlt.config.value,
70
99
  collection: str = dlt.config.value,
71
100
  incremental: Optional[dlt.sources.incremental] = None, # type: ignore[type-arg]
72
101
  write_disposition: Optional[str] = dlt.config.value,
73
102
  parallel: Optional[bool] = False,
74
- ) -> Any:
103
+ limit: Optional[int] = None,
104
+ chunk_size: Optional[int] = 1000,
105
+ data_item_format: Optional[TDataItemFormat] = "object",
106
+ filter_: Optional[Dict[str, Any]] = None,
107
+ projection: Optional[Union[Mapping[str, Any], Iterable[str]]] = dlt.config.value,
108
+ pymongoarrow_schema: Optional[Any] = None,
109
+ custom_query: Optional[List[Dict[str, Any]]] = None,
110
+ ) -> DltResource:
75
111
  """
76
112
  A DLT source which loads a collection from a mongo database using PyMongo.
77
113
 
@@ -83,6 +119,22 @@ def mongodb_collection(
83
119
  E.g., `incremental=dlt.sources.incremental('updated_at', pendulum.parse('2022-01-01T00:00:00Z'))`
84
120
  write_disposition (str): Write disposition of the resource.
85
121
  parallel (Optional[bool]): Option to enable parallel loading for the collection. Default is False.
122
+ limit (Optional[int]): The number of documents load.
123
+ chunk_size (Optional[int]): The number of documents load in each batch.
124
+ data_item_format (Optional[TDataItemFormat]): The data format to use for loading.
125
+ Supported formats:
126
+ object - Python objects (dicts, lists).
127
+ arrow - Apache Arrow tables.
128
+ filter_ (Optional[Dict[str, Any]]): The filter to apply to the collection.
129
+ projection: (Optional[Union[Mapping[str, Any], Iterable[str]]]): The projection to select fields
130
+ when loading the collection. Supported inputs:
131
+ include (list) - ["year", "title"]
132
+ include (dict) - {"year": True, "title": True}
133
+ exclude (dict) - {"released": False, "runtime": False}
134
+ Note: Can't mix include and exclude statements '{"title": True, "released": False}`
135
+ pymongoarrow_schema (pymongoarrow.schema.Schema): Mapping of expected field types to convert BSON to Arrow
136
+ custom_query (Optional[List[Dict[str, Any]]]): Custom MongoDB aggregation pipeline to execute instead of find()
137
+
86
138
  Returns:
87
139
  Iterable[DltResource]: A list of DLT resources for each collection to be loaded.
88
140
  """
@@ -100,4 +152,16 @@ def mongodb_collection(
100
152
  name=collection_obj.name,
101
153
  primary_key="_id",
102
154
  write_disposition=write_disposition,
103
- )(client, collection_obj, incremental=incremental, parallel=parallel)
155
+ )(
156
+ client,
157
+ collection_obj,
158
+ incremental=incremental,
159
+ parallel=parallel,
160
+ limit=limit,
161
+ chunk_size=chunk_size,
162
+ data_item_format=data_item_format,
163
+ filter_=filter_ or {},
164
+ projection=projection,
165
+ pymongoarrow_schema=pymongoarrow_schema,
166
+ custom_query=custom_query,
167
+ )