omniload 0.0.0.dev0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (218) hide show
  1. omniload/conftest.py +72 -0
  2. omniload/main.py +810 -0
  3. omniload/src/.gitignore +10 -0
  4. omniload/src/adjust/__init__.py +108 -0
  5. omniload/src/adjust/adjust_helpers.py +122 -0
  6. omniload/src/airtable/__init__.py +84 -0
  7. omniload/src/allium/__init__.py +128 -0
  8. omniload/src/anthropic/__init__.py +277 -0
  9. omniload/src/anthropic/helpers.py +525 -0
  10. omniload/src/applovin/__init__.py +316 -0
  11. omniload/src/applovin_max/__init__.py +117 -0
  12. omniload/src/appsflyer/__init__.py +325 -0
  13. omniload/src/appsflyer/client.py +110 -0
  14. omniload/src/appstore/__init__.py +142 -0
  15. omniload/src/appstore/client.py +126 -0
  16. omniload/src/appstore/errors.py +15 -0
  17. omniload/src/appstore/models.py +117 -0
  18. omniload/src/appstore/resources.py +179 -0
  19. omniload/src/arrow/__init__.py +81 -0
  20. omniload/src/asana_source/__init__.py +281 -0
  21. omniload/src/asana_source/helpers.py +30 -0
  22. omniload/src/asana_source/settings.py +158 -0
  23. omniload/src/attio/__init__.py +102 -0
  24. omniload/src/attio/helpers.py +65 -0
  25. omniload/src/blob.py +95 -0
  26. omniload/src/bruin/__init__.py +76 -0
  27. omniload/src/chess/__init__.py +180 -0
  28. omniload/src/chess/helpers.py +35 -0
  29. omniload/src/chess/settings.py +18 -0
  30. omniload/src/clickup/__init__.py +85 -0
  31. omniload/src/clickup/helpers.py +47 -0
  32. omniload/src/collector/spinner.py +43 -0
  33. omniload/src/couchbase_source/__init__.py +118 -0
  34. omniload/src/couchbase_source/helpers.py +135 -0
  35. omniload/src/cursor/__init__.py +83 -0
  36. omniload/src/cursor/helpers.py +188 -0
  37. omniload/src/customer_io/__init__.py +486 -0
  38. omniload/src/customer_io/helpers.py +530 -0
  39. omniload/src/destinations.py +982 -0
  40. omniload/src/docebo/__init__.py +589 -0
  41. omniload/src/docebo/client.py +435 -0
  42. omniload/src/docebo/helpers.py +97 -0
  43. omniload/src/dune/__init__.py +104 -0
  44. omniload/src/dune/helpers.py +108 -0
  45. omniload/src/dynamodb/__init__.py +86 -0
  46. omniload/src/elasticsearch/__init__.py +80 -0
  47. omniload/src/elasticsearch/helpers.py +141 -0
  48. omniload/src/errors.py +26 -0
  49. omniload/src/facebook_ads/__init__.py +403 -0
  50. omniload/src/facebook_ads/exceptions.py +19 -0
  51. omniload/src/facebook_ads/helpers.py +296 -0
  52. omniload/src/facebook_ads/settings.py +224 -0
  53. omniload/src/facebook_ads/utils.py +53 -0
  54. omniload/src/factory.py +305 -0
  55. omniload/src/filesystem/__init__.py +133 -0
  56. omniload/src/filesystem/helpers.py +114 -0
  57. omniload/src/filesystem/readers.py +187 -0
  58. omniload/src/filters.py +62 -0
  59. omniload/src/fireflies/__init__.py +151 -0
  60. omniload/src/fireflies/helpers.py +753 -0
  61. omniload/src/fluxx/__init__.py +10013 -0
  62. omniload/src/fluxx/helpers.py +233 -0
  63. omniload/src/frankfurter/__init__.py +157 -0
  64. omniload/src/frankfurter/helpers.py +48 -0
  65. omniload/src/freshdesk/__init__.py +103 -0
  66. omniload/src/freshdesk/freshdesk_client.py +151 -0
  67. omniload/src/freshdesk/settings.py +23 -0
  68. omniload/src/fundraiseup/__init__.py +95 -0
  69. omniload/src/fundraiseup/client.py +81 -0
  70. omniload/src/github/__init__.py +202 -0
  71. omniload/src/github/helpers.py +207 -0
  72. omniload/src/github/queries.py +129 -0
  73. omniload/src/github/settings.py +24 -0
  74. omniload/src/google_ads/__init__.py +198 -0
  75. omniload/src/google_ads/field.py +17 -0
  76. omniload/src/google_ads/metrics.py +254 -0
  77. omniload/src/google_ads/predicates.py +37 -0
  78. omniload/src/google_ads/reports.py +411 -0
  79. omniload/src/google_ads/test_google_ads.py +184 -0
  80. omniload/src/google_analytics/__init__.py +144 -0
  81. omniload/src/google_analytics/helpers.py +312 -0
  82. omniload/src/google_sheets/README.md +95 -0
  83. omniload/src/google_sheets/__init__.py +166 -0
  84. omniload/src/google_sheets/helpers/__init__.py +15 -0
  85. omniload/src/google_sheets/helpers/api_calls.py +160 -0
  86. omniload/src/google_sheets/helpers/data_processing.py +316 -0
  87. omniload/src/gorgias/__init__.py +595 -0
  88. omniload/src/gorgias/helpers.py +166 -0
  89. omniload/src/hostaway/__init__.py +302 -0
  90. omniload/src/hostaway/client.py +288 -0
  91. omniload/src/http/__init__.py +38 -0
  92. omniload/src/http/readers.py +146 -0
  93. omniload/src/http_client.py +24 -0
  94. omniload/src/hubspot/__init__.py +800 -0
  95. omniload/src/hubspot/helpers.py +417 -0
  96. omniload/src/hubspot/settings.py +329 -0
  97. omniload/src/indeed/__init__.py +153 -0
  98. omniload/src/indeed/helpers.py +228 -0
  99. omniload/src/influxdb/__init__.py +46 -0
  100. omniload/src/influxdb/client.py +34 -0
  101. omniload/src/intercom/__init__.py +142 -0
  102. omniload/src/intercom/helpers.py +674 -0
  103. omniload/src/intercom/settings.py +279 -0
  104. omniload/src/isoc_pulse/__init__.py +159 -0
  105. omniload/src/jira_source/__init__.py +377 -0
  106. omniload/src/jira_source/helpers.py +510 -0
  107. omniload/src/jira_source/settings.py +184 -0
  108. omniload/src/kafka/__init__.py +120 -0
  109. omniload/src/kafka/helpers.py +241 -0
  110. omniload/src/kinesis/__init__.py +153 -0
  111. omniload/src/kinesis/helpers.py +96 -0
  112. omniload/src/klaviyo/__init__.py +237 -0
  113. omniload/src/klaviyo/client.py +212 -0
  114. omniload/src/klaviyo/helpers.py +19 -0
  115. omniload/src/linear/__init__.py +634 -0
  116. omniload/src/linear/helpers.py +111 -0
  117. omniload/src/linkedin_ads/__init__.py +266 -0
  118. omniload/src/linkedin_ads/dimension_time_enum.py +17 -0
  119. omniload/src/linkedin_ads/helpers.py +246 -0
  120. omniload/src/loader.py +69 -0
  121. omniload/src/mailchimp/__init__.py +126 -0
  122. omniload/src/mailchimp/helpers.py +226 -0
  123. omniload/src/mailchimp/settings.py +164 -0
  124. omniload/src/masking.py +344 -0
  125. omniload/src/mixpanel/__init__.py +62 -0
  126. omniload/src/mixpanel/client.py +104 -0
  127. omniload/src/monday/__init__.py +246 -0
  128. omniload/src/monday/helpers.py +392 -0
  129. omniload/src/monday/settings.py +325 -0
  130. omniload/src/mongodb/__init__.py +281 -0
  131. omniload/src/mongodb/helpers.py +975 -0
  132. omniload/src/notion/__init__.py +69 -0
  133. omniload/src/notion/helpers/__init__.py +14 -0
  134. omniload/src/notion/helpers/client.py +178 -0
  135. omniload/src/notion/helpers/database.py +92 -0
  136. omniload/src/notion/settings.py +17 -0
  137. omniload/src/partition.py +32 -0
  138. omniload/src/personio/__init__.py +345 -0
  139. omniload/src/personio/helpers.py +100 -0
  140. omniload/src/phantombuster/__init__.py +65 -0
  141. omniload/src/phantombuster/client.py +87 -0
  142. omniload/src/pinterest/__init__.py +82 -0
  143. omniload/src/pipedrive/__init__.py +212 -0
  144. omniload/src/pipedrive/helpers/__init__.py +37 -0
  145. omniload/src/pipedrive/helpers/custom_fields_munger.py +116 -0
  146. omniload/src/pipedrive/helpers/pages.py +129 -0
  147. omniload/src/pipedrive/settings.py +41 -0
  148. omniload/src/pipedrive/typing.py +17 -0
  149. omniload/src/plusvibeai/__init__.py +335 -0
  150. omniload/src/plusvibeai/helpers.py +544 -0
  151. omniload/src/plusvibeai/settings.py +252 -0
  152. omniload/src/primer/__init__.py +45 -0
  153. omniload/src/primer/helpers.py +79 -0
  154. omniload/src/quickbooks/__init__.py +117 -0
  155. omniload/src/reddit_ads/__init__.py +183 -0
  156. omniload/src/reddit_ads/helpers.py +232 -0
  157. omniload/src/resource.py +40 -0
  158. omniload/src/revenuecat/__init__.py +83 -0
  159. omniload/src/revenuecat/helpers.py +237 -0
  160. omniload/src/salesforce/__init__.py +170 -0
  161. omniload/src/salesforce/helpers.py +78 -0
  162. omniload/src/shopify/__init__.py +1953 -0
  163. omniload/src/shopify/exceptions.py +17 -0
  164. omniload/src/shopify/helpers.py +202 -0
  165. omniload/src/shopify/settings.py +19 -0
  166. omniload/src/slack/__init__.py +290 -0
  167. omniload/src/slack/helpers.py +218 -0
  168. omniload/src/slack/settings.py +36 -0
  169. omniload/src/smartsheets/__init__.py +82 -0
  170. omniload/src/snapchat_ads/__init__.py +455 -0
  171. omniload/src/snapchat_ads/client.py +72 -0
  172. omniload/src/snapchat_ads/helpers.py +630 -0
  173. omniload/src/snapchat_ads/settings.py +130 -0
  174. omniload/src/socrata_source/__init__.py +83 -0
  175. omniload/src/socrata_source/helpers.py +85 -0
  176. omniload/src/socrata_source/settings.py +8 -0
  177. omniload/src/solidgate/__init__.py +219 -0
  178. omniload/src/solidgate/helpers.py +154 -0
  179. omniload/src/sources.py +5408 -0
  180. omniload/src/sql_database/__init__.py +0 -0
  181. omniload/src/sql_database/callbacks.py +66 -0
  182. omniload/src/stripe_analytics/__init__.py +183 -0
  183. omniload/src/stripe_analytics/helpers.py +386 -0
  184. omniload/src/stripe_analytics/settings.py +80 -0
  185. omniload/src/table_definition.py +15 -0
  186. omniload/src/testdata/fakebqcredentials.json +14 -0
  187. omniload/src/tiktok_ads/__init__.py +150 -0
  188. omniload/src/tiktok_ads/tiktok_helpers.py +130 -0
  189. omniload/src/time.py +11 -0
  190. omniload/src/trustpilot/__init__.py +48 -0
  191. omniload/src/trustpilot/client.py +48 -0
  192. omniload/src/version.py +6 -0
  193. omniload/src/wise/__init__.py +68 -0
  194. omniload/src/wise/client.py +63 -0
  195. omniload/src/zendesk/__init__.py +480 -0
  196. omniload/src/zendesk/helpers/__init__.py +39 -0
  197. omniload/src/zendesk/helpers/api_helpers.py +119 -0
  198. omniload/src/zendesk/helpers/credentials.py +68 -0
  199. omniload/src/zendesk/helpers/talk_api.py +132 -0
  200. omniload/src/zendesk/settings.py +71 -0
  201. omniload/src/zoom/__init__.py +99 -0
  202. omniload/src/zoom/helpers.py +102 -0
  203. omniload/testdata/.gitignore +2 -0
  204. omniload/testdata/create_replace.csv +21 -0
  205. omniload/testdata/delete_insert_expected.csv +6 -0
  206. omniload/testdata/delete_insert_part1.csv +5 -0
  207. omniload/testdata/delete_insert_part2.csv +6 -0
  208. omniload/testdata/merge_expected.csv +5 -0
  209. omniload/testdata/merge_part1.csv +4 -0
  210. omniload/testdata/merge_part2.csv +5 -0
  211. omniload/tests/unit/test_smartsheets.py +133 -0
  212. omniload-0.0.0.dev0.dist-info/METADATA +439 -0
  213. omniload-0.0.0.dev0.dist-info/RECORD +218 -0
  214. omniload-0.0.0.dev0.dist-info/WHEEL +4 -0
  215. omniload-0.0.0.dev0.dist-info/entry_points.txt +2 -0
  216. omniload-0.0.0.dev0.dist-info/licenses/LICENSE.Apache-2.0 +201 -0
  217. omniload-0.0.0.dev0.dist-info/licenses/LICENSE.md +21 -0
  218. omniload-0.0.0.dev0.dist-info/licenses/NOTICE +35 -0
@@ -0,0 +1,17 @@
1
+ # Copyright 2022-2025 ScaleVector
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+
16
+ class ShopifyPartnerApiError(Exception):
17
+ pass
@@ -0,0 +1,202 @@
1
+ # Copyright 2022-2025 ScaleVector
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ """Shopify source helpers"""
16
+
17
+ from typing import Any, Iterable, Literal, Optional
18
+ from urllib.parse import urljoin
19
+
20
+ from dlt.common import jsonpath
21
+ from dlt.common.time import ensure_pendulum_datetime
22
+ from dlt.common.typing import Dict, DictStrAny, TDataItems
23
+ from dlt.sources.helpers import requests
24
+
25
+ from .exceptions import ShopifyPartnerApiError
26
+ from .settings import DEFAULT_API_VERSION, DEFAULT_PARTNER_API_VERSION
27
+
28
+ TOrderStatus = Literal["open", "closed", "cancelled", "any"]
29
+
30
+
31
+ def convert_datetime_fields(item: Dict[str, Any]) -> Dict[str, Any]:
32
+ """Convert timestamp fields in the item to pendulum datetime objects
33
+
34
+ The item is modified in place, including nested items.
35
+
36
+ Args:
37
+ item: The item to convert
38
+
39
+ Returns:
40
+ The same data item (for convenience)
41
+ """
42
+ fields = ["created_at", "updated_at", "createdAt", "updatedAt"]
43
+
44
+ def convert_nested(obj: Any) -> Any:
45
+ if isinstance(obj, dict):
46
+ for key, value in obj.items():
47
+ if key in fields and isinstance(value, str):
48
+ obj[key] = ensure_pendulum_datetime(value)
49
+ else:
50
+ obj[key] = convert_nested(value)
51
+ elif isinstance(obj, list):
52
+ return [convert_nested(elem) for elem in obj]
53
+ return obj
54
+
55
+ return convert_nested(item)
56
+
57
+
58
+ def remove_nodes_key(item: Any) -> Any:
59
+ """
60
+ Recursively remove the 'nodes' key from dictionaries if it's the only key and its value is an array.
61
+
62
+ Args:
63
+ item: The item to process (can be a dict, list, or any other type)
64
+
65
+ Returns:
66
+ The processed item
67
+ """
68
+ if isinstance(item, dict):
69
+ if len(item) == 1 and "nodes" in item and isinstance(item["nodes"], list):
70
+ return [remove_nodes_key(node) for node in item["nodes"]]
71
+ return {k: remove_nodes_key(v) for k, v in item.items()}
72
+ elif isinstance(item, list):
73
+ return [remove_nodes_key(element) for element in item]
74
+ else:
75
+ return item
76
+
77
+
78
+ class ShopifyApi:
79
+ """
80
+ A Shopify API client that can be used to get pages of data from Shopify.
81
+ """
82
+
83
+ def __init__(
84
+ self,
85
+ shop_url: str,
86
+ private_app_password: str,
87
+ api_version: str = DEFAULT_API_VERSION,
88
+ ) -> None:
89
+ """
90
+ Args:
91
+ shop_url: The URL of your shop (e.g. https://my-shop.myshopify.com).
92
+ private_app_password: The private app password to the app on your shop.
93
+ api_version: The API version to use (e.g. 2023-01)
94
+ """
95
+ self.shop_url = shop_url
96
+ self.private_app_password = private_app_password
97
+ self.api_version = api_version
98
+
99
+ def get_pages(
100
+ self, resource: str, params: Optional[Dict[str, Any]] = None
101
+ ) -> Iterable[TDataItems]:
102
+ """Get all pages from shopify using requests.
103
+ Iterates through all pages and yield each page items.
104
+
105
+ Args:
106
+ resource: The resource to get pages for (e.g. products, orders, customers).
107
+ params: Query params to include in the request.
108
+
109
+ Yields:
110
+ List of data items from the page
111
+ """
112
+ url = urljoin(self.shop_url, f"/admin/api/{self.api_version}/{resource}.json")
113
+
114
+ resource_last = resource.split("/")[-1]
115
+
116
+ headers = {"X-Shopify-Access-Token": self.private_app_password}
117
+ while url:
118
+ response = requests.get(url, params=params, headers=headers)
119
+ response.raise_for_status()
120
+ json = response.json()
121
+ yield [convert_datetime_fields(item) for item in json[resource_last]]
122
+ url = response.links.get("next", {}).get("url")
123
+ # Query params are included in subsequent page URLs
124
+ params = None
125
+
126
+
127
+ class ShopifyGraphQLApi:
128
+ """Client for Shopify GraphQL API"""
129
+
130
+ def __init__(
131
+ self,
132
+ access_token: str,
133
+ api_version: str = DEFAULT_PARTNER_API_VERSION,
134
+ base_url: str = "partners.shopify.com",
135
+ ) -> None:
136
+ self.access_token = access_token
137
+ self.api_version = api_version
138
+ self.base_url = base_url
139
+
140
+ @property
141
+ def graphql_url(self) -> str:
142
+ if self.base_url.startswith("https://"):
143
+ return f"{self.base_url}/admin/api/{self.api_version}/graphql.json"
144
+
145
+ return f"https://{self.base_url}/admin/api/{self.api_version}/graphql.json"
146
+
147
+ def run_graphql_query(
148
+ self, query: str, variables: Optional[DictStrAny] = None
149
+ ) -> DictStrAny:
150
+ """Run a graphql query against the Shopify Partner API
151
+
152
+ Args:
153
+ query: The query to run
154
+ variables: The variables to include in the query
155
+
156
+ Returns:
157
+ The response JSON
158
+ """
159
+ headers = {"X-Shopify-Access-Token": self.access_token}
160
+ response = requests.post(
161
+ self.graphql_url,
162
+ json={"query": query, "variables": variables},
163
+ headers=headers,
164
+ )
165
+ data = response.json()
166
+ if data.get("errors"):
167
+ raise ShopifyPartnerApiError(response.text)
168
+ return data # type: ignore[no-any-return]
169
+
170
+ def get_graphql_pages(
171
+ self,
172
+ query: str,
173
+ data_items_path: jsonpath.TJsonPath,
174
+ pagination_cursor_path: jsonpath.TJsonPath,
175
+ pagination_variable_name: str,
176
+ pagination_cursor_has_next_page_path: Optional[jsonpath.TJsonPath] = None,
177
+ variables: Optional[DictStrAny] = None,
178
+ ) -> Iterable[TDataItems]:
179
+ variables = dict(variables or {})
180
+ while True:
181
+ data = self.run_graphql_query(query, variables)
182
+ data_items = jsonpath.find_values(data_items_path, data)
183
+
184
+ if not data_items:
185
+ break
186
+
187
+ yield [
188
+ remove_nodes_key(convert_datetime_fields(item)) for item in data_items
189
+ ]
190
+
191
+ cursors = jsonpath.find_values(pagination_cursor_path, data)
192
+ if not cursors:
193
+ break
194
+
195
+ if pagination_cursor_has_next_page_path:
196
+ has_next_page = jsonpath.find_values(
197
+ pagination_cursor_has_next_page_path, data
198
+ )
199
+ if not has_next_page or not has_next_page[0]:
200
+ break
201
+
202
+ variables[pagination_variable_name] = cursors[-1]
@@ -0,0 +1,19 @@
1
+ # Copyright 2022-2025 ScaleVector
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ FIRST_DAY_OF_MILLENNIUM = "2000-01-01"
16
+ DEFAULT_API_VERSION = "2023-10"
17
+ DEFAULT_ITEMS_PER_PAGE = 250
18
+
19
+ DEFAULT_PARTNER_API_VERSION = "2024-01"
@@ -0,0 +1,290 @@
1
+ # Copyright 2022-2025 ScaleVector
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ """Fetches Slack Conversations, History and logs."""
16
+
17
+ from functools import partial
18
+ from typing import Any, Dict, Iterable, List, Literal, Optional, Tuple
19
+
20
+ import dlt
21
+ from dlt.common.typing import TAnyDateTime, TDataItem
22
+ from dlt.sources import DltResource
23
+ from pendulum import DateTime
24
+
25
+ from .helpers import SlackAPI, ensure_dt_type
26
+ from .settings import (
27
+ DEFAULT_DATETIME_FIELDS,
28
+ DEFAULT_START_DATE,
29
+ MAX_PAGE_SIZE,
30
+ MSG_DATETIME_FIELDS,
31
+ )
32
+
33
+
34
+ @dlt.source(name="slack", max_table_nesting=0)
35
+ def slack_source(
36
+ page_size: int = MAX_PAGE_SIZE,
37
+ access_token: str = dlt.secrets.value,
38
+ start_date: Optional[TAnyDateTime] = DEFAULT_START_DATE,
39
+ end_date: Optional[TAnyDateTime] = None,
40
+ selected_channels: Optional[List[str]] = dlt.config.value,
41
+ table_per_channel: bool = True,
42
+ replies: bool = False,
43
+ ) -> Iterable[DltResource]:
44
+ """
45
+ The source for the Slack pipeline. Available resources are conversations, conversations_history
46
+ and access_logs.
47
+
48
+ Args:
49
+ page_size: The max number of items to fetch per page. Defaults to 1000.
50
+ access_token: the oauth access_token used to authenticate.
51
+ start_date: The start time of the range for which to load. Defaults to January 1st 2000.
52
+ end_date: The end time of the range for which to load data.
53
+ selected_channels: The list of channels to load. If None, all channels will be loaded.
54
+ table_per_channel: Boolean flag, True by default. If True - for each channel separate table with messages is created.
55
+ Otherwise, all messages are put in one table.
56
+ replies: Boolean flag indicating if you want a replies table to be present as well. False by default.
57
+
58
+ Returns:
59
+ Iterable[DltResource]: A list of DltResource objects representing the data resources.
60
+ """
61
+
62
+ end_dt: Optional[DateTime] = ensure_dt_type(end_date)
63
+ start_dt: Optional[DateTime] = ensure_dt_type(start_date)
64
+ write_disposition: Literal["append", "merge"] = (
65
+ "append" if end_date is None else "merge"
66
+ )
67
+
68
+ api = SlackAPI(
69
+ access_token=access_token,
70
+ page_size=page_size,
71
+ )
72
+
73
+ def get_channels(
74
+ slack_api: SlackAPI, selected_channels: Optional[List[str]]
75
+ ) -> Tuple[List[TDataItem], List[TDataItem]]:
76
+ """
77
+ Returns channel fetched from slack and list of selected channels.
78
+
79
+ Args:
80
+ slack_api: Slack API instance.
81
+ selected_channels: List of selected channels names or None.
82
+
83
+ Returns:
84
+ Tuple[List[TDataItem], List[TDataItem]]: fetched channels and selected fetched channels.
85
+ """
86
+ channels: List[TDataItem] = []
87
+ for page_data in slack_api.get_pages(
88
+ resource="conversations.list",
89
+ response_path="$.channels[*]",
90
+ datetime_fields=DEFAULT_DATETIME_FIELDS,
91
+ ):
92
+ channels.extend(page_data)
93
+
94
+ if selected_channels:
95
+ fetch_channels = [
96
+ c
97
+ for c in channels
98
+ if c["name"] in selected_channels or c["id"] in selected_channels
99
+ ]
100
+ else:
101
+ fetch_channels = channels
102
+ return channels, fetch_channels
103
+
104
+ channels, fetched_selected_channels = get_channels(api, selected_channels)
105
+
106
+ @dlt.resource(name="channels", primary_key="id", write_disposition="replace")
107
+ def channels_resource() -> Iterable[TDataItem]:
108
+ """Yield all channels as a DLT resource."""
109
+ yield from channels
110
+
111
+ @dlt.resource(name="users", primary_key="id", write_disposition="replace")
112
+ def users_resource() -> Iterable[TDataItem]:
113
+ """
114
+ Yield all users as a DLT resource.
115
+
116
+ Yields:
117
+ Iterable[TDataItem]: A list of users.
118
+ """
119
+
120
+ for page_data in api.get_pages(
121
+ resource="users.list",
122
+ response_path="$.members[*]",
123
+ params=dict(include_locale=True),
124
+ datetime_fields=DEFAULT_DATETIME_FIELDS,
125
+ ):
126
+ yield page_data
127
+
128
+ def get_messages(
129
+ channel_data: Dict[str, Any], start_date_ts: float, end_date_ts: float
130
+ ) -> Iterable[TDataItem]:
131
+ """
132
+ Generator, which gets channel messages for specific dates.
133
+ Args:
134
+ channel_data: dict with channels data.
135
+ start_date_ts: start timestamp.
136
+ end_date_ts: end timestamp.
137
+
138
+ Yields:
139
+ List[TDataItem]: messages.
140
+ """
141
+ params = {
142
+ "channel": channel_data["id"],
143
+ "oldest": start_date_ts,
144
+ "latest": end_date_ts,
145
+ }
146
+
147
+ for page_data in api.get_pages(
148
+ resource="conversations.history",
149
+ response_path="$.messages[*]",
150
+ params=params,
151
+ datetime_fields=MSG_DATETIME_FIELDS,
152
+ context={"channel": channel_data["id"]},
153
+ ):
154
+ yield page_data
155
+
156
+ def get_thread_replies(messages: List[Dict[str, Any]]) -> Iterable[TDataItem]:
157
+ """
158
+ Generator, which gets replies for each message.
159
+ Args:
160
+ messages: messages data.
161
+
162
+ Yields:
163
+ Li
164
+ """
165
+ for message in messages:
166
+ if message.get("thread_ts", None):
167
+ params = {
168
+ "channel": message["channel"],
169
+ "ts": ensure_dt_type(message["thread_ts"], to_ts=True),
170
+ }
171
+
172
+ for page_data in api.get_pages(
173
+ resource="conversations.replies",
174
+ response_path="$.messages[*]",
175
+ params=params,
176
+ context={"channel": message["channel"]},
177
+ ):
178
+ yield page_data[1:]
179
+
180
+ @dlt.resource(
181
+ name="messages",
182
+ primary_key=("channel", "ts"),
183
+ columns={"blocks": {"data_type": "json"}},
184
+ write_disposition=write_disposition,
185
+ )
186
+ def messages_resource(
187
+ created_at: dlt.sources.incremental[DateTime] = dlt.sources.incremental(
188
+ "ts",
189
+ initial_value=start_dt,
190
+ end_value=end_dt,
191
+ allow_external_schedulers=True,
192
+ range_end="closed",
193
+ range_start="closed",
194
+ ),
195
+ ) -> Iterable[TDataItem]:
196
+ """
197
+ Yield all messages for a set of selected channels as a DLT resource. Keep blocks column without normalization.
198
+
199
+ Args:
200
+ created_at (dlt.sources.incremental[DateTime]): The incremental created_at field.
201
+
202
+ Yields:
203
+ Iterable[TDataItem]: A list of messages.
204
+ """
205
+ start_date_ts = ensure_dt_type(created_at.last_value, to_ts=True)
206
+ end_date_ts = ensure_dt_type(created_at.end_value, to_ts=True)
207
+ for channel_data in fetched_selected_channels:
208
+ yield from get_messages(channel_data, start_date_ts, end_date_ts)
209
+
210
+ def per_table_messages_resource(
211
+ channel_data: Dict[str, Any],
212
+ created_at: dlt.sources.incremental[DateTime] = dlt.sources.incremental(
213
+ "ts",
214
+ initial_value=start_dt,
215
+ end_value=end_dt,
216
+ allow_external_schedulers=True,
217
+ range_end="closed",
218
+ range_start="closed",
219
+ ),
220
+ ) -> Iterable[TDataItem]:
221
+ """Yield all messages for a given channel as a DLT resource. Keep blocks column without normalization.
222
+
223
+ Args:
224
+ channel_data (Dict[str, Any]): The channel data.
225
+ created_at (dlt.sources.incremental[DateTime]): The incremental created_at field.
226
+
227
+ Yields:
228
+ Iterable[TDataItem]: A list of messages.
229
+ """
230
+ start_date_ts = ensure_dt_type(created_at.last_value, to_ts=True)
231
+ end_date_ts = ensure_dt_type(created_at.end_value, to_ts=True)
232
+ yield from get_messages(channel_data, start_date_ts, end_date_ts)
233
+
234
+ def table_name_func(channel_name: str, payload: TDataItem) -> str:
235
+ """Return the table name for a given channel and payload."""
236
+ table_type = payload.get("subtype", payload.get("type", ""))
237
+ return f"{channel_name}_{table_type}"
238
+
239
+ # It will not work in the pipeline or tests because it is a paid feature,
240
+ # raise an error when it is not a paying account.
241
+ @dlt.resource(
242
+ name="access_logs",
243
+ selected=False,
244
+ primary_key="user_id",
245
+ write_disposition="append",
246
+ )
247
+ # it is not an incremental resource it just has an end_date filter
248
+ def logs_resource() -> Iterable[TDataItem]:
249
+ """The access logs resource."""
250
+ for page_data in api.get_pages(
251
+ resource="team.accessLogs",
252
+ response_path="$.logins[*]",
253
+ datetime_fields=["date_first", "date_last"],
254
+ params={"before": end_dt if end_dt is None else end_dt.int_timestamp},
255
+ ):
256
+ yield page_data
257
+
258
+ yield from (channels_resource, users_resource, logs_resource)
259
+
260
+ if table_per_channel:
261
+ for channel in fetched_selected_channels:
262
+ channel_name = channel["name"]
263
+ table_name = partial(table_name_func, channel_name)
264
+ messages_channel = dlt.resource(
265
+ per_table_messages_resource,
266
+ name=channel_name,
267
+ table_name=table_name,
268
+ primary_key=("channel", "ts"),
269
+ write_disposition=write_disposition,
270
+ columns={"blocks": {"data_type": "json"}},
271
+ )(channel)
272
+
273
+ yield messages_channel
274
+ if replies:
275
+ yield messages_channel | dlt.transformer(
276
+ get_thread_replies,
277
+ name=channel_name + "_replies",
278
+ table_name=partial(table_name_func, channel_name + "_replies"),
279
+ primary_key=("thread_ts", "ts"),
280
+ write_disposition=write_disposition,
281
+ )
282
+ else:
283
+ yield messages_resource
284
+ if replies:
285
+ yield messages_resource | dlt.transformer(
286
+ get_thread_replies,
287
+ name="replies",
288
+ primary_key=("thread_ts", "ts"),
289
+ write_disposition=write_disposition,
290
+ )