omniload 0.0.0.dev0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- omniload/conftest.py +72 -0
- omniload/main.py +810 -0
- omniload/src/.gitignore +10 -0
- omniload/src/adjust/__init__.py +108 -0
- omniload/src/adjust/adjust_helpers.py +122 -0
- omniload/src/airtable/__init__.py +84 -0
- omniload/src/allium/__init__.py +128 -0
- omniload/src/anthropic/__init__.py +277 -0
- omniload/src/anthropic/helpers.py +525 -0
- omniload/src/applovin/__init__.py +316 -0
- omniload/src/applovin_max/__init__.py +117 -0
- omniload/src/appsflyer/__init__.py +325 -0
- omniload/src/appsflyer/client.py +110 -0
- omniload/src/appstore/__init__.py +142 -0
- omniload/src/appstore/client.py +126 -0
- omniload/src/appstore/errors.py +15 -0
- omniload/src/appstore/models.py +117 -0
- omniload/src/appstore/resources.py +179 -0
- omniload/src/arrow/__init__.py +81 -0
- omniload/src/asana_source/__init__.py +281 -0
- omniload/src/asana_source/helpers.py +30 -0
- omniload/src/asana_source/settings.py +158 -0
- omniload/src/attio/__init__.py +102 -0
- omniload/src/attio/helpers.py +65 -0
- omniload/src/blob.py +95 -0
- omniload/src/bruin/__init__.py +76 -0
- omniload/src/chess/__init__.py +180 -0
- omniload/src/chess/helpers.py +35 -0
- omniload/src/chess/settings.py +18 -0
- omniload/src/clickup/__init__.py +85 -0
- omniload/src/clickup/helpers.py +47 -0
- omniload/src/collector/spinner.py +43 -0
- omniload/src/couchbase_source/__init__.py +118 -0
- omniload/src/couchbase_source/helpers.py +135 -0
- omniload/src/cursor/__init__.py +83 -0
- omniload/src/cursor/helpers.py +188 -0
- omniload/src/customer_io/__init__.py +486 -0
- omniload/src/customer_io/helpers.py +530 -0
- omniload/src/destinations.py +982 -0
- omniload/src/docebo/__init__.py +589 -0
- omniload/src/docebo/client.py +435 -0
- omniload/src/docebo/helpers.py +97 -0
- omniload/src/dune/__init__.py +104 -0
- omniload/src/dune/helpers.py +108 -0
- omniload/src/dynamodb/__init__.py +86 -0
- omniload/src/elasticsearch/__init__.py +80 -0
- omniload/src/elasticsearch/helpers.py +141 -0
- omniload/src/errors.py +26 -0
- omniload/src/facebook_ads/__init__.py +403 -0
- omniload/src/facebook_ads/exceptions.py +19 -0
- omniload/src/facebook_ads/helpers.py +296 -0
- omniload/src/facebook_ads/settings.py +224 -0
- omniload/src/facebook_ads/utils.py +53 -0
- omniload/src/factory.py +305 -0
- omniload/src/filesystem/__init__.py +133 -0
- omniload/src/filesystem/helpers.py +114 -0
- omniload/src/filesystem/readers.py +187 -0
- omniload/src/filters.py +62 -0
- omniload/src/fireflies/__init__.py +151 -0
- omniload/src/fireflies/helpers.py +753 -0
- omniload/src/fluxx/__init__.py +10013 -0
- omniload/src/fluxx/helpers.py +233 -0
- omniload/src/frankfurter/__init__.py +157 -0
- omniload/src/frankfurter/helpers.py +48 -0
- omniload/src/freshdesk/__init__.py +103 -0
- omniload/src/freshdesk/freshdesk_client.py +151 -0
- omniload/src/freshdesk/settings.py +23 -0
- omniload/src/fundraiseup/__init__.py +95 -0
- omniload/src/fundraiseup/client.py +81 -0
- omniload/src/github/__init__.py +202 -0
- omniload/src/github/helpers.py +207 -0
- omniload/src/github/queries.py +129 -0
- omniload/src/github/settings.py +24 -0
- omniload/src/google_ads/__init__.py +198 -0
- omniload/src/google_ads/field.py +17 -0
- omniload/src/google_ads/metrics.py +254 -0
- omniload/src/google_ads/predicates.py +37 -0
- omniload/src/google_ads/reports.py +411 -0
- omniload/src/google_ads/test_google_ads.py +184 -0
- omniload/src/google_analytics/__init__.py +144 -0
- omniload/src/google_analytics/helpers.py +312 -0
- omniload/src/google_sheets/README.md +95 -0
- omniload/src/google_sheets/__init__.py +166 -0
- omniload/src/google_sheets/helpers/__init__.py +15 -0
- omniload/src/google_sheets/helpers/api_calls.py +160 -0
- omniload/src/google_sheets/helpers/data_processing.py +316 -0
- omniload/src/gorgias/__init__.py +595 -0
- omniload/src/gorgias/helpers.py +166 -0
- omniload/src/hostaway/__init__.py +302 -0
- omniload/src/hostaway/client.py +288 -0
- omniload/src/http/__init__.py +38 -0
- omniload/src/http/readers.py +146 -0
- omniload/src/http_client.py +24 -0
- omniload/src/hubspot/__init__.py +800 -0
- omniload/src/hubspot/helpers.py +417 -0
- omniload/src/hubspot/settings.py +329 -0
- omniload/src/indeed/__init__.py +153 -0
- omniload/src/indeed/helpers.py +228 -0
- omniload/src/influxdb/__init__.py +46 -0
- omniload/src/influxdb/client.py +34 -0
- omniload/src/intercom/__init__.py +142 -0
- omniload/src/intercom/helpers.py +674 -0
- omniload/src/intercom/settings.py +279 -0
- omniload/src/isoc_pulse/__init__.py +159 -0
- omniload/src/jira_source/__init__.py +377 -0
- omniload/src/jira_source/helpers.py +510 -0
- omniload/src/jira_source/settings.py +184 -0
- omniload/src/kafka/__init__.py +120 -0
- omniload/src/kafka/helpers.py +241 -0
- omniload/src/kinesis/__init__.py +153 -0
- omniload/src/kinesis/helpers.py +96 -0
- omniload/src/klaviyo/__init__.py +237 -0
- omniload/src/klaviyo/client.py +212 -0
- omniload/src/klaviyo/helpers.py +19 -0
- omniload/src/linear/__init__.py +634 -0
- omniload/src/linear/helpers.py +111 -0
- omniload/src/linkedin_ads/__init__.py +266 -0
- omniload/src/linkedin_ads/dimension_time_enum.py +17 -0
- omniload/src/linkedin_ads/helpers.py +246 -0
- omniload/src/loader.py +69 -0
- omniload/src/mailchimp/__init__.py +126 -0
- omniload/src/mailchimp/helpers.py +226 -0
- omniload/src/mailchimp/settings.py +164 -0
- omniload/src/masking.py +344 -0
- omniload/src/mixpanel/__init__.py +62 -0
- omniload/src/mixpanel/client.py +104 -0
- omniload/src/monday/__init__.py +246 -0
- omniload/src/monday/helpers.py +392 -0
- omniload/src/monday/settings.py +325 -0
- omniload/src/mongodb/__init__.py +281 -0
- omniload/src/mongodb/helpers.py +975 -0
- omniload/src/notion/__init__.py +69 -0
- omniload/src/notion/helpers/__init__.py +14 -0
- omniload/src/notion/helpers/client.py +178 -0
- omniload/src/notion/helpers/database.py +92 -0
- omniload/src/notion/settings.py +17 -0
- omniload/src/partition.py +32 -0
- omniload/src/personio/__init__.py +345 -0
- omniload/src/personio/helpers.py +100 -0
- omniload/src/phantombuster/__init__.py +65 -0
- omniload/src/phantombuster/client.py +87 -0
- omniload/src/pinterest/__init__.py +82 -0
- omniload/src/pipedrive/__init__.py +212 -0
- omniload/src/pipedrive/helpers/__init__.py +37 -0
- omniload/src/pipedrive/helpers/custom_fields_munger.py +116 -0
- omniload/src/pipedrive/helpers/pages.py +129 -0
- omniload/src/pipedrive/settings.py +41 -0
- omniload/src/pipedrive/typing.py +17 -0
- omniload/src/plusvibeai/__init__.py +335 -0
- omniload/src/plusvibeai/helpers.py +544 -0
- omniload/src/plusvibeai/settings.py +252 -0
- omniload/src/primer/__init__.py +45 -0
- omniload/src/primer/helpers.py +79 -0
- omniload/src/quickbooks/__init__.py +117 -0
- omniload/src/reddit_ads/__init__.py +183 -0
- omniload/src/reddit_ads/helpers.py +232 -0
- omniload/src/resource.py +40 -0
- omniload/src/revenuecat/__init__.py +83 -0
- omniload/src/revenuecat/helpers.py +237 -0
- omniload/src/salesforce/__init__.py +170 -0
- omniload/src/salesforce/helpers.py +78 -0
- omniload/src/shopify/__init__.py +1953 -0
- omniload/src/shopify/exceptions.py +17 -0
- omniload/src/shopify/helpers.py +202 -0
- omniload/src/shopify/settings.py +19 -0
- omniload/src/slack/__init__.py +290 -0
- omniload/src/slack/helpers.py +218 -0
- omniload/src/slack/settings.py +36 -0
- omniload/src/smartsheets/__init__.py +82 -0
- omniload/src/snapchat_ads/__init__.py +455 -0
- omniload/src/snapchat_ads/client.py +72 -0
- omniload/src/snapchat_ads/helpers.py +630 -0
- omniload/src/snapchat_ads/settings.py +130 -0
- omniload/src/socrata_source/__init__.py +83 -0
- omniload/src/socrata_source/helpers.py +85 -0
- omniload/src/socrata_source/settings.py +8 -0
- omniload/src/solidgate/__init__.py +219 -0
- omniload/src/solidgate/helpers.py +154 -0
- omniload/src/sources.py +5408 -0
- omniload/src/sql_database/__init__.py +0 -0
- omniload/src/sql_database/callbacks.py +66 -0
- omniload/src/stripe_analytics/__init__.py +183 -0
- omniload/src/stripe_analytics/helpers.py +386 -0
- omniload/src/stripe_analytics/settings.py +80 -0
- omniload/src/table_definition.py +15 -0
- omniload/src/testdata/fakebqcredentials.json +14 -0
- omniload/src/tiktok_ads/__init__.py +150 -0
- omniload/src/tiktok_ads/tiktok_helpers.py +130 -0
- omniload/src/time.py +11 -0
- omniload/src/trustpilot/__init__.py +48 -0
- omniload/src/trustpilot/client.py +48 -0
- omniload/src/version.py +6 -0
- omniload/src/wise/__init__.py +68 -0
- omniload/src/wise/client.py +63 -0
- omniload/src/zendesk/__init__.py +480 -0
- omniload/src/zendesk/helpers/__init__.py +39 -0
- omniload/src/zendesk/helpers/api_helpers.py +119 -0
- omniload/src/zendesk/helpers/credentials.py +68 -0
- omniload/src/zendesk/helpers/talk_api.py +132 -0
- omniload/src/zendesk/settings.py +71 -0
- omniload/src/zoom/__init__.py +99 -0
- omniload/src/zoom/helpers.py +102 -0
- omniload/testdata/.gitignore +2 -0
- omniload/testdata/create_replace.csv +21 -0
- omniload/testdata/delete_insert_expected.csv +6 -0
- omniload/testdata/delete_insert_part1.csv +5 -0
- omniload/testdata/delete_insert_part2.csv +6 -0
- omniload/testdata/merge_expected.csv +5 -0
- omniload/testdata/merge_part1.csv +4 -0
- omniload/testdata/merge_part2.csv +5 -0
- omniload/tests/unit/test_smartsheets.py +133 -0
- omniload-0.0.0.dev0.dist-info/METADATA +439 -0
- omniload-0.0.0.dev0.dist-info/RECORD +218 -0
- omniload-0.0.0.dev0.dist-info/WHEEL +4 -0
- omniload-0.0.0.dev0.dist-info/entry_points.txt +2 -0
- omniload-0.0.0.dev0.dist-info/licenses/LICENSE.Apache-2.0 +201 -0
- omniload-0.0.0.dev0.dist-info/licenses/LICENSE.md +21 -0
- omniload-0.0.0.dev0.dist-info/licenses/NOTICE +35 -0
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
# Copyright 2022-2025 ScaleVector
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class ShopifyPartnerApiError(Exception):
|
|
17
|
+
pass
|
|
@@ -0,0 +1,202 @@
|
|
|
1
|
+
# Copyright 2022-2025 ScaleVector
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
"""Shopify source helpers"""
|
|
16
|
+
|
|
17
|
+
from typing import Any, Iterable, Literal, Optional
|
|
18
|
+
from urllib.parse import urljoin
|
|
19
|
+
|
|
20
|
+
from dlt.common import jsonpath
|
|
21
|
+
from dlt.common.time import ensure_pendulum_datetime
|
|
22
|
+
from dlt.common.typing import Dict, DictStrAny, TDataItems
|
|
23
|
+
from dlt.sources.helpers import requests
|
|
24
|
+
|
|
25
|
+
from .exceptions import ShopifyPartnerApiError
|
|
26
|
+
from .settings import DEFAULT_API_VERSION, DEFAULT_PARTNER_API_VERSION
|
|
27
|
+
|
|
28
|
+
TOrderStatus = Literal["open", "closed", "cancelled", "any"]
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def convert_datetime_fields(item: Dict[str, Any]) -> Dict[str, Any]:
|
|
32
|
+
"""Convert timestamp fields in the item to pendulum datetime objects
|
|
33
|
+
|
|
34
|
+
The item is modified in place, including nested items.
|
|
35
|
+
|
|
36
|
+
Args:
|
|
37
|
+
item: The item to convert
|
|
38
|
+
|
|
39
|
+
Returns:
|
|
40
|
+
The same data item (for convenience)
|
|
41
|
+
"""
|
|
42
|
+
fields = ["created_at", "updated_at", "createdAt", "updatedAt"]
|
|
43
|
+
|
|
44
|
+
def convert_nested(obj: Any) -> Any:
|
|
45
|
+
if isinstance(obj, dict):
|
|
46
|
+
for key, value in obj.items():
|
|
47
|
+
if key in fields and isinstance(value, str):
|
|
48
|
+
obj[key] = ensure_pendulum_datetime(value)
|
|
49
|
+
else:
|
|
50
|
+
obj[key] = convert_nested(value)
|
|
51
|
+
elif isinstance(obj, list):
|
|
52
|
+
return [convert_nested(elem) for elem in obj]
|
|
53
|
+
return obj
|
|
54
|
+
|
|
55
|
+
return convert_nested(item)
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def remove_nodes_key(item: Any) -> Any:
|
|
59
|
+
"""
|
|
60
|
+
Recursively remove the 'nodes' key from dictionaries if it's the only key and its value is an array.
|
|
61
|
+
|
|
62
|
+
Args:
|
|
63
|
+
item: The item to process (can be a dict, list, or any other type)
|
|
64
|
+
|
|
65
|
+
Returns:
|
|
66
|
+
The processed item
|
|
67
|
+
"""
|
|
68
|
+
if isinstance(item, dict):
|
|
69
|
+
if len(item) == 1 and "nodes" in item and isinstance(item["nodes"], list):
|
|
70
|
+
return [remove_nodes_key(node) for node in item["nodes"]]
|
|
71
|
+
return {k: remove_nodes_key(v) for k, v in item.items()}
|
|
72
|
+
elif isinstance(item, list):
|
|
73
|
+
return [remove_nodes_key(element) for element in item]
|
|
74
|
+
else:
|
|
75
|
+
return item
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
class ShopifyApi:
|
|
79
|
+
"""
|
|
80
|
+
A Shopify API client that can be used to get pages of data from Shopify.
|
|
81
|
+
"""
|
|
82
|
+
|
|
83
|
+
def __init__(
|
|
84
|
+
self,
|
|
85
|
+
shop_url: str,
|
|
86
|
+
private_app_password: str,
|
|
87
|
+
api_version: str = DEFAULT_API_VERSION,
|
|
88
|
+
) -> None:
|
|
89
|
+
"""
|
|
90
|
+
Args:
|
|
91
|
+
shop_url: The URL of your shop (e.g. https://my-shop.myshopify.com).
|
|
92
|
+
private_app_password: The private app password to the app on your shop.
|
|
93
|
+
api_version: The API version to use (e.g. 2023-01)
|
|
94
|
+
"""
|
|
95
|
+
self.shop_url = shop_url
|
|
96
|
+
self.private_app_password = private_app_password
|
|
97
|
+
self.api_version = api_version
|
|
98
|
+
|
|
99
|
+
def get_pages(
|
|
100
|
+
self, resource: str, params: Optional[Dict[str, Any]] = None
|
|
101
|
+
) -> Iterable[TDataItems]:
|
|
102
|
+
"""Get all pages from shopify using requests.
|
|
103
|
+
Iterates through all pages and yield each page items.
|
|
104
|
+
|
|
105
|
+
Args:
|
|
106
|
+
resource: The resource to get pages for (e.g. products, orders, customers).
|
|
107
|
+
params: Query params to include in the request.
|
|
108
|
+
|
|
109
|
+
Yields:
|
|
110
|
+
List of data items from the page
|
|
111
|
+
"""
|
|
112
|
+
url = urljoin(self.shop_url, f"/admin/api/{self.api_version}/{resource}.json")
|
|
113
|
+
|
|
114
|
+
resource_last = resource.split("/")[-1]
|
|
115
|
+
|
|
116
|
+
headers = {"X-Shopify-Access-Token": self.private_app_password}
|
|
117
|
+
while url:
|
|
118
|
+
response = requests.get(url, params=params, headers=headers)
|
|
119
|
+
response.raise_for_status()
|
|
120
|
+
json = response.json()
|
|
121
|
+
yield [convert_datetime_fields(item) for item in json[resource_last]]
|
|
122
|
+
url = response.links.get("next", {}).get("url")
|
|
123
|
+
# Query params are included in subsequent page URLs
|
|
124
|
+
params = None
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
class ShopifyGraphQLApi:
|
|
128
|
+
"""Client for Shopify GraphQL API"""
|
|
129
|
+
|
|
130
|
+
def __init__(
|
|
131
|
+
self,
|
|
132
|
+
access_token: str,
|
|
133
|
+
api_version: str = DEFAULT_PARTNER_API_VERSION,
|
|
134
|
+
base_url: str = "partners.shopify.com",
|
|
135
|
+
) -> None:
|
|
136
|
+
self.access_token = access_token
|
|
137
|
+
self.api_version = api_version
|
|
138
|
+
self.base_url = base_url
|
|
139
|
+
|
|
140
|
+
@property
|
|
141
|
+
def graphql_url(self) -> str:
|
|
142
|
+
if self.base_url.startswith("https://"):
|
|
143
|
+
return f"{self.base_url}/admin/api/{self.api_version}/graphql.json"
|
|
144
|
+
|
|
145
|
+
return f"https://{self.base_url}/admin/api/{self.api_version}/graphql.json"
|
|
146
|
+
|
|
147
|
+
def run_graphql_query(
|
|
148
|
+
self, query: str, variables: Optional[DictStrAny] = None
|
|
149
|
+
) -> DictStrAny:
|
|
150
|
+
"""Run a graphql query against the Shopify Partner API
|
|
151
|
+
|
|
152
|
+
Args:
|
|
153
|
+
query: The query to run
|
|
154
|
+
variables: The variables to include in the query
|
|
155
|
+
|
|
156
|
+
Returns:
|
|
157
|
+
The response JSON
|
|
158
|
+
"""
|
|
159
|
+
headers = {"X-Shopify-Access-Token": self.access_token}
|
|
160
|
+
response = requests.post(
|
|
161
|
+
self.graphql_url,
|
|
162
|
+
json={"query": query, "variables": variables},
|
|
163
|
+
headers=headers,
|
|
164
|
+
)
|
|
165
|
+
data = response.json()
|
|
166
|
+
if data.get("errors"):
|
|
167
|
+
raise ShopifyPartnerApiError(response.text)
|
|
168
|
+
return data # type: ignore[no-any-return]
|
|
169
|
+
|
|
170
|
+
def get_graphql_pages(
|
|
171
|
+
self,
|
|
172
|
+
query: str,
|
|
173
|
+
data_items_path: jsonpath.TJsonPath,
|
|
174
|
+
pagination_cursor_path: jsonpath.TJsonPath,
|
|
175
|
+
pagination_variable_name: str,
|
|
176
|
+
pagination_cursor_has_next_page_path: Optional[jsonpath.TJsonPath] = None,
|
|
177
|
+
variables: Optional[DictStrAny] = None,
|
|
178
|
+
) -> Iterable[TDataItems]:
|
|
179
|
+
variables = dict(variables or {})
|
|
180
|
+
while True:
|
|
181
|
+
data = self.run_graphql_query(query, variables)
|
|
182
|
+
data_items = jsonpath.find_values(data_items_path, data)
|
|
183
|
+
|
|
184
|
+
if not data_items:
|
|
185
|
+
break
|
|
186
|
+
|
|
187
|
+
yield [
|
|
188
|
+
remove_nodes_key(convert_datetime_fields(item)) for item in data_items
|
|
189
|
+
]
|
|
190
|
+
|
|
191
|
+
cursors = jsonpath.find_values(pagination_cursor_path, data)
|
|
192
|
+
if not cursors:
|
|
193
|
+
break
|
|
194
|
+
|
|
195
|
+
if pagination_cursor_has_next_page_path:
|
|
196
|
+
has_next_page = jsonpath.find_values(
|
|
197
|
+
pagination_cursor_has_next_page_path, data
|
|
198
|
+
)
|
|
199
|
+
if not has_next_page or not has_next_page[0]:
|
|
200
|
+
break
|
|
201
|
+
|
|
202
|
+
variables[pagination_variable_name] = cursors[-1]
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
# Copyright 2022-2025 ScaleVector
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
FIRST_DAY_OF_MILLENNIUM = "2000-01-01"
|
|
16
|
+
DEFAULT_API_VERSION = "2023-10"
|
|
17
|
+
DEFAULT_ITEMS_PER_PAGE = 250
|
|
18
|
+
|
|
19
|
+
DEFAULT_PARTNER_API_VERSION = "2024-01"
|
|
@@ -0,0 +1,290 @@
|
|
|
1
|
+
# Copyright 2022-2025 ScaleVector
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
"""Fetches Slack Conversations, History and logs."""
|
|
16
|
+
|
|
17
|
+
from functools import partial
|
|
18
|
+
from typing import Any, Dict, Iterable, List, Literal, Optional, Tuple
|
|
19
|
+
|
|
20
|
+
import dlt
|
|
21
|
+
from dlt.common.typing import TAnyDateTime, TDataItem
|
|
22
|
+
from dlt.sources import DltResource
|
|
23
|
+
from pendulum import DateTime
|
|
24
|
+
|
|
25
|
+
from .helpers import SlackAPI, ensure_dt_type
|
|
26
|
+
from .settings import (
|
|
27
|
+
DEFAULT_DATETIME_FIELDS,
|
|
28
|
+
DEFAULT_START_DATE,
|
|
29
|
+
MAX_PAGE_SIZE,
|
|
30
|
+
MSG_DATETIME_FIELDS,
|
|
31
|
+
)
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
@dlt.source(name="slack", max_table_nesting=0)
|
|
35
|
+
def slack_source(
|
|
36
|
+
page_size: int = MAX_PAGE_SIZE,
|
|
37
|
+
access_token: str = dlt.secrets.value,
|
|
38
|
+
start_date: Optional[TAnyDateTime] = DEFAULT_START_DATE,
|
|
39
|
+
end_date: Optional[TAnyDateTime] = None,
|
|
40
|
+
selected_channels: Optional[List[str]] = dlt.config.value,
|
|
41
|
+
table_per_channel: bool = True,
|
|
42
|
+
replies: bool = False,
|
|
43
|
+
) -> Iterable[DltResource]:
|
|
44
|
+
"""
|
|
45
|
+
The source for the Slack pipeline. Available resources are conversations, conversations_history
|
|
46
|
+
and access_logs.
|
|
47
|
+
|
|
48
|
+
Args:
|
|
49
|
+
page_size: The max number of items to fetch per page. Defaults to 1000.
|
|
50
|
+
access_token: the oauth access_token used to authenticate.
|
|
51
|
+
start_date: The start time of the range for which to load. Defaults to January 1st 2000.
|
|
52
|
+
end_date: The end time of the range for which to load data.
|
|
53
|
+
selected_channels: The list of channels to load. If None, all channels will be loaded.
|
|
54
|
+
table_per_channel: Boolean flag, True by default. If True - for each channel separate table with messages is created.
|
|
55
|
+
Otherwise, all messages are put in one table.
|
|
56
|
+
replies: Boolean flag indicating if you want a replies table to be present as well. False by default.
|
|
57
|
+
|
|
58
|
+
Returns:
|
|
59
|
+
Iterable[DltResource]: A list of DltResource objects representing the data resources.
|
|
60
|
+
"""
|
|
61
|
+
|
|
62
|
+
end_dt: Optional[DateTime] = ensure_dt_type(end_date)
|
|
63
|
+
start_dt: Optional[DateTime] = ensure_dt_type(start_date)
|
|
64
|
+
write_disposition: Literal["append", "merge"] = (
|
|
65
|
+
"append" if end_date is None else "merge"
|
|
66
|
+
)
|
|
67
|
+
|
|
68
|
+
api = SlackAPI(
|
|
69
|
+
access_token=access_token,
|
|
70
|
+
page_size=page_size,
|
|
71
|
+
)
|
|
72
|
+
|
|
73
|
+
def get_channels(
|
|
74
|
+
slack_api: SlackAPI, selected_channels: Optional[List[str]]
|
|
75
|
+
) -> Tuple[List[TDataItem], List[TDataItem]]:
|
|
76
|
+
"""
|
|
77
|
+
Returns channel fetched from slack and list of selected channels.
|
|
78
|
+
|
|
79
|
+
Args:
|
|
80
|
+
slack_api: Slack API instance.
|
|
81
|
+
selected_channels: List of selected channels names or None.
|
|
82
|
+
|
|
83
|
+
Returns:
|
|
84
|
+
Tuple[List[TDataItem], List[TDataItem]]: fetched channels and selected fetched channels.
|
|
85
|
+
"""
|
|
86
|
+
channels: List[TDataItem] = []
|
|
87
|
+
for page_data in slack_api.get_pages(
|
|
88
|
+
resource="conversations.list",
|
|
89
|
+
response_path="$.channels[*]",
|
|
90
|
+
datetime_fields=DEFAULT_DATETIME_FIELDS,
|
|
91
|
+
):
|
|
92
|
+
channels.extend(page_data)
|
|
93
|
+
|
|
94
|
+
if selected_channels:
|
|
95
|
+
fetch_channels = [
|
|
96
|
+
c
|
|
97
|
+
for c in channels
|
|
98
|
+
if c["name"] in selected_channels or c["id"] in selected_channels
|
|
99
|
+
]
|
|
100
|
+
else:
|
|
101
|
+
fetch_channels = channels
|
|
102
|
+
return channels, fetch_channels
|
|
103
|
+
|
|
104
|
+
channels, fetched_selected_channels = get_channels(api, selected_channels)
|
|
105
|
+
|
|
106
|
+
@dlt.resource(name="channels", primary_key="id", write_disposition="replace")
|
|
107
|
+
def channels_resource() -> Iterable[TDataItem]:
|
|
108
|
+
"""Yield all channels as a DLT resource."""
|
|
109
|
+
yield from channels
|
|
110
|
+
|
|
111
|
+
@dlt.resource(name="users", primary_key="id", write_disposition="replace")
|
|
112
|
+
def users_resource() -> Iterable[TDataItem]:
|
|
113
|
+
"""
|
|
114
|
+
Yield all users as a DLT resource.
|
|
115
|
+
|
|
116
|
+
Yields:
|
|
117
|
+
Iterable[TDataItem]: A list of users.
|
|
118
|
+
"""
|
|
119
|
+
|
|
120
|
+
for page_data in api.get_pages(
|
|
121
|
+
resource="users.list",
|
|
122
|
+
response_path="$.members[*]",
|
|
123
|
+
params=dict(include_locale=True),
|
|
124
|
+
datetime_fields=DEFAULT_DATETIME_FIELDS,
|
|
125
|
+
):
|
|
126
|
+
yield page_data
|
|
127
|
+
|
|
128
|
+
def get_messages(
|
|
129
|
+
channel_data: Dict[str, Any], start_date_ts: float, end_date_ts: float
|
|
130
|
+
) -> Iterable[TDataItem]:
|
|
131
|
+
"""
|
|
132
|
+
Generator, which gets channel messages for specific dates.
|
|
133
|
+
Args:
|
|
134
|
+
channel_data: dict with channels data.
|
|
135
|
+
start_date_ts: start timestamp.
|
|
136
|
+
end_date_ts: end timestamp.
|
|
137
|
+
|
|
138
|
+
Yields:
|
|
139
|
+
List[TDataItem]: messages.
|
|
140
|
+
"""
|
|
141
|
+
params = {
|
|
142
|
+
"channel": channel_data["id"],
|
|
143
|
+
"oldest": start_date_ts,
|
|
144
|
+
"latest": end_date_ts,
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
for page_data in api.get_pages(
|
|
148
|
+
resource="conversations.history",
|
|
149
|
+
response_path="$.messages[*]",
|
|
150
|
+
params=params,
|
|
151
|
+
datetime_fields=MSG_DATETIME_FIELDS,
|
|
152
|
+
context={"channel": channel_data["id"]},
|
|
153
|
+
):
|
|
154
|
+
yield page_data
|
|
155
|
+
|
|
156
|
+
def get_thread_replies(messages: List[Dict[str, Any]]) -> Iterable[TDataItem]:
|
|
157
|
+
"""
|
|
158
|
+
Generator, which gets replies for each message.
|
|
159
|
+
Args:
|
|
160
|
+
messages: messages data.
|
|
161
|
+
|
|
162
|
+
Yields:
|
|
163
|
+
Li
|
|
164
|
+
"""
|
|
165
|
+
for message in messages:
|
|
166
|
+
if message.get("thread_ts", None):
|
|
167
|
+
params = {
|
|
168
|
+
"channel": message["channel"],
|
|
169
|
+
"ts": ensure_dt_type(message["thread_ts"], to_ts=True),
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
for page_data in api.get_pages(
|
|
173
|
+
resource="conversations.replies",
|
|
174
|
+
response_path="$.messages[*]",
|
|
175
|
+
params=params,
|
|
176
|
+
context={"channel": message["channel"]},
|
|
177
|
+
):
|
|
178
|
+
yield page_data[1:]
|
|
179
|
+
|
|
180
|
+
@dlt.resource(
|
|
181
|
+
name="messages",
|
|
182
|
+
primary_key=("channel", "ts"),
|
|
183
|
+
columns={"blocks": {"data_type": "json"}},
|
|
184
|
+
write_disposition=write_disposition,
|
|
185
|
+
)
|
|
186
|
+
def messages_resource(
|
|
187
|
+
created_at: dlt.sources.incremental[DateTime] = dlt.sources.incremental(
|
|
188
|
+
"ts",
|
|
189
|
+
initial_value=start_dt,
|
|
190
|
+
end_value=end_dt,
|
|
191
|
+
allow_external_schedulers=True,
|
|
192
|
+
range_end="closed",
|
|
193
|
+
range_start="closed",
|
|
194
|
+
),
|
|
195
|
+
) -> Iterable[TDataItem]:
|
|
196
|
+
"""
|
|
197
|
+
Yield all messages for a set of selected channels as a DLT resource. Keep blocks column without normalization.
|
|
198
|
+
|
|
199
|
+
Args:
|
|
200
|
+
created_at (dlt.sources.incremental[DateTime]): The incremental created_at field.
|
|
201
|
+
|
|
202
|
+
Yields:
|
|
203
|
+
Iterable[TDataItem]: A list of messages.
|
|
204
|
+
"""
|
|
205
|
+
start_date_ts = ensure_dt_type(created_at.last_value, to_ts=True)
|
|
206
|
+
end_date_ts = ensure_dt_type(created_at.end_value, to_ts=True)
|
|
207
|
+
for channel_data in fetched_selected_channels:
|
|
208
|
+
yield from get_messages(channel_data, start_date_ts, end_date_ts)
|
|
209
|
+
|
|
210
|
+
def per_table_messages_resource(
|
|
211
|
+
channel_data: Dict[str, Any],
|
|
212
|
+
created_at: dlt.sources.incremental[DateTime] = dlt.sources.incremental(
|
|
213
|
+
"ts",
|
|
214
|
+
initial_value=start_dt,
|
|
215
|
+
end_value=end_dt,
|
|
216
|
+
allow_external_schedulers=True,
|
|
217
|
+
range_end="closed",
|
|
218
|
+
range_start="closed",
|
|
219
|
+
),
|
|
220
|
+
) -> Iterable[TDataItem]:
|
|
221
|
+
"""Yield all messages for a given channel as a DLT resource. Keep blocks column without normalization.
|
|
222
|
+
|
|
223
|
+
Args:
|
|
224
|
+
channel_data (Dict[str, Any]): The channel data.
|
|
225
|
+
created_at (dlt.sources.incremental[DateTime]): The incremental created_at field.
|
|
226
|
+
|
|
227
|
+
Yields:
|
|
228
|
+
Iterable[TDataItem]: A list of messages.
|
|
229
|
+
"""
|
|
230
|
+
start_date_ts = ensure_dt_type(created_at.last_value, to_ts=True)
|
|
231
|
+
end_date_ts = ensure_dt_type(created_at.end_value, to_ts=True)
|
|
232
|
+
yield from get_messages(channel_data, start_date_ts, end_date_ts)
|
|
233
|
+
|
|
234
|
+
def table_name_func(channel_name: str, payload: TDataItem) -> str:
|
|
235
|
+
"""Return the table name for a given channel and payload."""
|
|
236
|
+
table_type = payload.get("subtype", payload.get("type", ""))
|
|
237
|
+
return f"{channel_name}_{table_type}"
|
|
238
|
+
|
|
239
|
+
# It will not work in the pipeline or tests because it is a paid feature,
|
|
240
|
+
# raise an error when it is not a paying account.
|
|
241
|
+
@dlt.resource(
|
|
242
|
+
name="access_logs",
|
|
243
|
+
selected=False,
|
|
244
|
+
primary_key="user_id",
|
|
245
|
+
write_disposition="append",
|
|
246
|
+
)
|
|
247
|
+
# it is not an incremental resource it just has an end_date filter
|
|
248
|
+
def logs_resource() -> Iterable[TDataItem]:
|
|
249
|
+
"""The access logs resource."""
|
|
250
|
+
for page_data in api.get_pages(
|
|
251
|
+
resource="team.accessLogs",
|
|
252
|
+
response_path="$.logins[*]",
|
|
253
|
+
datetime_fields=["date_first", "date_last"],
|
|
254
|
+
params={"before": end_dt if end_dt is None else end_dt.int_timestamp},
|
|
255
|
+
):
|
|
256
|
+
yield page_data
|
|
257
|
+
|
|
258
|
+
yield from (channels_resource, users_resource, logs_resource)
|
|
259
|
+
|
|
260
|
+
if table_per_channel:
|
|
261
|
+
for channel in fetched_selected_channels:
|
|
262
|
+
channel_name = channel["name"]
|
|
263
|
+
table_name = partial(table_name_func, channel_name)
|
|
264
|
+
messages_channel = dlt.resource(
|
|
265
|
+
per_table_messages_resource,
|
|
266
|
+
name=channel_name,
|
|
267
|
+
table_name=table_name,
|
|
268
|
+
primary_key=("channel", "ts"),
|
|
269
|
+
write_disposition=write_disposition,
|
|
270
|
+
columns={"blocks": {"data_type": "json"}},
|
|
271
|
+
)(channel)
|
|
272
|
+
|
|
273
|
+
yield messages_channel
|
|
274
|
+
if replies:
|
|
275
|
+
yield messages_channel | dlt.transformer(
|
|
276
|
+
get_thread_replies,
|
|
277
|
+
name=channel_name + "_replies",
|
|
278
|
+
table_name=partial(table_name_func, channel_name + "_replies"),
|
|
279
|
+
primary_key=("thread_ts", "ts"),
|
|
280
|
+
write_disposition=write_disposition,
|
|
281
|
+
)
|
|
282
|
+
else:
|
|
283
|
+
yield messages_resource
|
|
284
|
+
if replies:
|
|
285
|
+
yield messages_resource | dlt.transformer(
|
|
286
|
+
get_thread_replies,
|
|
287
|
+
name="replies",
|
|
288
|
+
primary_key=("thread_ts", "ts"),
|
|
289
|
+
write_disposition=write_disposition,
|
|
290
|
+
)
|