ingestr 0.13.63__py3-none-any.whl → 0.13.65__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ingestr might be problematic. Click here for more details.
- ingestr/src/attio/__init__.py +10 -7
- ingestr/src/attio/helpers.py +19 -8
- ingestr/src/buildinfo.py +1 -1
- ingestr/src/destinations.py +93 -1
- ingestr/src/factory.py +1 -0
- ingestr/src/linear/__init__.py +46 -34
- ingestr/src/linear/helpers.py +60 -0
- ingestr/src/sources.py +47 -2
- ingestr/src/zoom/__init__.py +45 -1
- ingestr/src/zoom/helpers.py +26 -0
- {ingestr-0.13.63.dist-info → ingestr-0.13.65.dist-info}/METADATA +1 -1
- {ingestr-0.13.63.dist-info → ingestr-0.13.65.dist-info}/RECORD +15 -14
- {ingestr-0.13.63.dist-info → ingestr-0.13.65.dist-info}/WHEEL +0 -0
- {ingestr-0.13.63.dist-info → ingestr-0.13.65.dist-info}/entry_points.txt +0 -0
- {ingestr-0.13.63.dist-info → ingestr-0.13.65.dist-info}/licenses/LICENSE.md +0 -0
ingestr/src/attio/__init__.py
CHANGED
|
@@ -20,13 +20,15 @@ def attio_source(
|
|
|
20
20
|
"created_at": {"data_type": "timestamp", "partition": True},
|
|
21
21
|
},
|
|
22
22
|
)
|
|
23
|
+
# https://docs.attio.com/rest-api/endpoint-reference/objects/list-objects - does not support pagination
|
|
23
24
|
def fetch_objects() -> Iterator[dict]:
|
|
24
25
|
if len(params) != 0:
|
|
25
26
|
raise ValueError("Objects table must be in the format `objects`")
|
|
26
27
|
|
|
27
28
|
path = "objects"
|
|
28
|
-
yield attio_client.
|
|
29
|
+
yield attio_client.fetch_all(path, "get")
|
|
29
30
|
|
|
31
|
+
# https://docs.attio.com/rest-api/endpoint-reference/records/list-records
|
|
30
32
|
@dlt.resource(
|
|
31
33
|
name="records",
|
|
32
34
|
write_disposition="replace",
|
|
@@ -39,12 +41,12 @@ def attio_source(
|
|
|
39
41
|
raise ValueError(
|
|
40
42
|
"Records table must be in the format `records:{object_api_slug}`"
|
|
41
43
|
)
|
|
42
|
-
|
|
43
44
|
object_id = params[0]
|
|
44
45
|
path = f"objects/{object_id}/records/query"
|
|
45
46
|
|
|
46
|
-
yield attio_client.
|
|
47
|
+
yield attio_client.fetch_paginated(path, "post")
|
|
47
48
|
|
|
49
|
+
# https://docs.attio.com/rest-api/endpoint-reference/lists/list-all-lists -- does not support pagination
|
|
48
50
|
@dlt.resource(
|
|
49
51
|
name="lists",
|
|
50
52
|
write_disposition="replace",
|
|
@@ -54,8 +56,9 @@ def attio_source(
|
|
|
54
56
|
)
|
|
55
57
|
def fetch_lists() -> Iterator[dict]:
|
|
56
58
|
path = "lists"
|
|
57
|
-
yield attio_client.
|
|
59
|
+
yield attio_client.fetch_all(path, "get")
|
|
58
60
|
|
|
61
|
+
# https://docs.attio.com/rest-api/endpoint-reference/entries/list-entries
|
|
59
62
|
@dlt.resource(
|
|
60
63
|
name="list_entries",
|
|
61
64
|
write_disposition="replace",
|
|
@@ -70,7 +73,7 @@ def attio_source(
|
|
|
70
73
|
)
|
|
71
74
|
path = f"lists/{params[0]}/entries/query"
|
|
72
75
|
|
|
73
|
-
yield attio_client.
|
|
76
|
+
yield attio_client.fetch_paginated(path, "post")
|
|
74
77
|
|
|
75
78
|
@dlt.resource(
|
|
76
79
|
name="all_list_entries",
|
|
@@ -85,10 +88,10 @@ def attio_source(
|
|
|
85
88
|
"All list entries table must be in the format `all_list_entries:{object_api_slug}`"
|
|
86
89
|
)
|
|
87
90
|
path = "lists"
|
|
88
|
-
for lst in attio_client.
|
|
91
|
+
for lst in attio_client.fetch_all(path, "get"):
|
|
89
92
|
if params[0] in lst["parent_object"]:
|
|
90
93
|
path = f"lists/{lst['id']['list_id']}/entries/query"
|
|
91
|
-
yield from attio_client.
|
|
94
|
+
yield from attio_client.fetch_paginated(path, "post")
|
|
92
95
|
|
|
93
96
|
return (
|
|
94
97
|
fetch_objects,
|
ingestr/src/attio/helpers.py
CHANGED
|
@@ -10,42 +10,53 @@ class AttioClient:
|
|
|
10
10
|
}
|
|
11
11
|
self.client = create_client()
|
|
12
12
|
|
|
13
|
-
def
|
|
13
|
+
def fetch_paginated(self, path: str, method: str, limit: int = 1000, params=None):
|
|
14
14
|
url = f"{self.base_url}/{path}"
|
|
15
15
|
if params is None:
|
|
16
16
|
params = {}
|
|
17
17
|
offset = 0
|
|
18
18
|
while True:
|
|
19
|
-
query_params = {
|
|
19
|
+
query_params = {"limit": limit, "offset": offset, **params}
|
|
20
20
|
if method == "get":
|
|
21
21
|
response = self.client.get(
|
|
22
22
|
url, headers=self.headers, params=query_params
|
|
23
23
|
)
|
|
24
24
|
else:
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
)
|
|
25
|
+
json_body = {**params, "limit": limit, "offset": offset}
|
|
26
|
+
response = self.client.post(url, headers=self.headers, json=json_body)
|
|
28
27
|
|
|
29
28
|
if response.status_code != 200:
|
|
30
29
|
raise Exception(f"HTTP {response.status_code} error: {response.text}")
|
|
31
30
|
|
|
32
31
|
response_data = response.json()
|
|
33
32
|
if "data" not in response_data:
|
|
34
|
-
print(f"API Response: {response_data}")
|
|
35
33
|
raise Exception(
|
|
36
34
|
"Attio API returned a response without the expected data"
|
|
37
35
|
)
|
|
38
36
|
|
|
39
37
|
data = response_data["data"]
|
|
40
|
-
|
|
41
38
|
for item in data:
|
|
42
39
|
flat_item = flatten_item(item)
|
|
43
40
|
yield flat_item
|
|
44
|
-
|
|
45
41
|
if len(data) < limit:
|
|
46
42
|
break
|
|
43
|
+
|
|
47
44
|
offset += limit
|
|
48
45
|
|
|
46
|
+
def fetch_all(self, path: str, method: str = "get", params=None):
|
|
47
|
+
url = f"{self.base_url}/{path}"
|
|
48
|
+
params = params or {}
|
|
49
|
+
|
|
50
|
+
if method == "get":
|
|
51
|
+
response = self.client.get(url, headers=self.headers, params=params)
|
|
52
|
+
else:
|
|
53
|
+
response = self.client.post(url, headers=self.headers, json=params)
|
|
54
|
+
|
|
55
|
+
response.raise_for_status()
|
|
56
|
+
data = response.json().get("data", [])
|
|
57
|
+
for item in data:
|
|
58
|
+
yield flatten_item(item)
|
|
59
|
+
|
|
49
60
|
|
|
50
61
|
def flatten_item(item: dict) -> dict:
|
|
51
62
|
if "id" in item:
|
ingestr/src/buildinfo.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
version = "v0.13.
|
|
1
|
+
version = "v0.13.65"
|
ingestr/src/destinations.py
CHANGED
|
@@ -1,19 +1,31 @@
|
|
|
1
1
|
import abc
|
|
2
2
|
import base64
|
|
3
3
|
import csv
|
|
4
|
+
import datetime
|
|
4
5
|
import json
|
|
5
6
|
import os
|
|
6
7
|
import shutil
|
|
8
|
+
import struct
|
|
7
9
|
import tempfile
|
|
8
10
|
from urllib.parse import parse_qs, quote, urlparse
|
|
9
11
|
|
|
10
12
|
import dlt
|
|
11
13
|
import dlt.destinations.impl.filesystem.filesystem
|
|
12
14
|
from dlt.common.configuration.specs import AwsCredentials
|
|
15
|
+
from dlt.common.destination.capabilities import DestinationCapabilitiesContext
|
|
16
|
+
from dlt.common.schema import Schema
|
|
13
17
|
from dlt.common.storages.configuration import FileSystemCredentials
|
|
14
18
|
from dlt.destinations.impl.clickhouse.configuration import (
|
|
15
19
|
ClickHouseCredentials,
|
|
16
20
|
)
|
|
21
|
+
from dlt.destinations.impl.mssql.configuration import MsSqlClientConfiguration
|
|
22
|
+
from dlt.destinations.impl.mssql.mssql import (
|
|
23
|
+
HINT_TO_MSSQL_ATTR,
|
|
24
|
+
MsSqlJobClient,
|
|
25
|
+
)
|
|
26
|
+
from dlt.destinations.impl.mssql.sql_client import (
|
|
27
|
+
PyOdbcMsSqlClient,
|
|
28
|
+
)
|
|
17
29
|
|
|
18
30
|
from ingestr.src.errors import MissingValueError
|
|
19
31
|
from ingestr.src.loader import load_dlt_file
|
|
@@ -143,9 +155,89 @@ class DuckDBDestination(GenericSqlDestination):
|
|
|
143
155
|
return dlt.destinations.duckdb(uri, **kwargs)
|
|
144
156
|
|
|
145
157
|
|
|
158
|
+
def handle_datetimeoffset(dto_value: bytes) -> datetime.datetime:
|
|
159
|
+
# ref: https://github.com/mkleehammer/pyodbc/issues/134#issuecomment-281739794
|
|
160
|
+
tup = struct.unpack(
|
|
161
|
+
"<6hI2h", dto_value
|
|
162
|
+
) # e.g., (2017, 3, 16, 10, 35, 18, 500000000, -6, 0)
|
|
163
|
+
return datetime.datetime(
|
|
164
|
+
tup[0],
|
|
165
|
+
tup[1],
|
|
166
|
+
tup[2],
|
|
167
|
+
tup[3],
|
|
168
|
+
tup[4],
|
|
169
|
+
tup[5],
|
|
170
|
+
tup[6] // 1000,
|
|
171
|
+
datetime.timezone(datetime.timedelta(hours=tup[7], minutes=tup[8])),
|
|
172
|
+
)
|
|
173
|
+
|
|
174
|
+
|
|
175
|
+
class OdbcMsSqlClient(PyOdbcMsSqlClient):
|
|
176
|
+
SQL_COPT_SS_ACCESS_TOKEN = 1256
|
|
177
|
+
SKIP_CREDENTIALS = {"PWD", "AUTHENTICATION", "UID"}
|
|
178
|
+
|
|
179
|
+
def open_connection(self):
|
|
180
|
+
cfg = self.credentials._get_odbc_dsn_dict()
|
|
181
|
+
if (
|
|
182
|
+
cfg.get("AUTHENTICATION", "").strip().lower()
|
|
183
|
+
!= "activedirectoryaccesstoken"
|
|
184
|
+
):
|
|
185
|
+
return super().open_connection()
|
|
186
|
+
|
|
187
|
+
import pyodbc # type: ignore
|
|
188
|
+
|
|
189
|
+
dsn = ";".join(
|
|
190
|
+
[f"{k}={v}" for k, v in cfg.items() if k not in self.SKIP_CREDENTIALS]
|
|
191
|
+
)
|
|
192
|
+
|
|
193
|
+
self._conn = pyodbc.connect(
|
|
194
|
+
dsn,
|
|
195
|
+
timeout=self.credentials.connect_timeout,
|
|
196
|
+
attrs_before={
|
|
197
|
+
self.SQL_COPT_SS_ACCESS_TOKEN: self.serialize_token(cfg["PWD"]),
|
|
198
|
+
},
|
|
199
|
+
)
|
|
200
|
+
|
|
201
|
+
# https://github.com/mkleehammer/pyodbc/wiki/Using-an-Output-Converter-function
|
|
202
|
+
self._conn.add_output_converter(-155, handle_datetimeoffset)
|
|
203
|
+
self._conn.autocommit = True
|
|
204
|
+
return self._conn
|
|
205
|
+
|
|
206
|
+
def serialize_token(self, token):
|
|
207
|
+
# https://github.com/mkleehammer/pyodbc/issues/228#issuecomment-494773723
|
|
208
|
+
encoded = token.encode("utf_16_le")
|
|
209
|
+
return struct.pack("<i", len(encoded)) + encoded
|
|
210
|
+
|
|
211
|
+
|
|
212
|
+
class MsSqlClient(MsSqlJobClient):
|
|
213
|
+
def __init__(
|
|
214
|
+
self,
|
|
215
|
+
schema: Schema,
|
|
216
|
+
config: MsSqlClientConfiguration,
|
|
217
|
+
capabilities: DestinationCapabilitiesContext,
|
|
218
|
+
) -> None:
|
|
219
|
+
sql_client = OdbcMsSqlClient(
|
|
220
|
+
config.normalize_dataset_name(schema),
|
|
221
|
+
config.normalize_staging_dataset_name(schema),
|
|
222
|
+
config.credentials,
|
|
223
|
+
capabilities,
|
|
224
|
+
)
|
|
225
|
+
super(MsSqlJobClient, self).__init__(schema, config, sql_client)
|
|
226
|
+
self.config: MsSqlClientConfiguration = config
|
|
227
|
+
self.sql_client = sql_client
|
|
228
|
+
self.active_hints = HINT_TO_MSSQL_ATTR if self.config.create_indexes else {}
|
|
229
|
+
self.type_mapper = capabilities.get_type_mapper()
|
|
230
|
+
|
|
231
|
+
|
|
232
|
+
class MsSqlDestImpl(dlt.destinations.mssql):
|
|
233
|
+
@property
|
|
234
|
+
def client_class(self):
|
|
235
|
+
return MsSqlClient
|
|
236
|
+
|
|
237
|
+
|
|
146
238
|
class MsSQLDestination(GenericSqlDestination):
|
|
147
239
|
def dlt_dest(self, uri: str, **kwargs):
|
|
148
|
-
return
|
|
240
|
+
return MsSqlDestImpl(credentials=uri, **kwargs)
|
|
149
241
|
|
|
150
242
|
|
|
151
243
|
class DatabricksDestination(GenericSqlDestination):
|
ingestr/src/factory.py
CHANGED
ingestr/src/linear/__init__.py
CHANGED
|
@@ -1,38 +1,9 @@
|
|
|
1
|
-
from typing import Any, Dict, Iterable, Iterator
|
|
1
|
+
from typing import Any, Dict, Iterable, Iterator
|
|
2
2
|
|
|
3
3
|
import dlt
|
|
4
4
|
import pendulum
|
|
5
|
-
import requests
|
|
6
|
-
|
|
7
|
-
LINEAR_GRAPHQL_ENDPOINT = "https://api.linear.app/graphql"
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
def _graphql(
|
|
11
|
-
api_key: str, query: str, variables: Optional[Dict[str, Any]] = None
|
|
12
|
-
) -> Dict[str, Any]:
|
|
13
|
-
headers = {"Authorization": api_key, "Content-Type": "application/json"}
|
|
14
|
-
response = requests.post(
|
|
15
|
-
LINEAR_GRAPHQL_ENDPOINT,
|
|
16
|
-
json={"query": query, "variables": variables or {}},
|
|
17
|
-
headers=headers,
|
|
18
|
-
)
|
|
19
|
-
response.raise_for_status()
|
|
20
|
-
payload = response.json()
|
|
21
|
-
if "errors" in payload:
|
|
22
|
-
raise ValueError(str(payload["errors"]))
|
|
23
|
-
return payload["data"]
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
def _paginate(api_key: str, query: str, root: str) -> Iterator[Dict[str, Any]]:
|
|
27
|
-
cursor: Optional[str] = None
|
|
28
|
-
while True:
|
|
29
|
-
data = _graphql(api_key, query, {"cursor": cursor})[root]
|
|
30
|
-
for item in data["nodes"]:
|
|
31
|
-
yield item
|
|
32
|
-
if not data["pageInfo"]["hasNextPage"]:
|
|
33
|
-
break
|
|
34
|
-
cursor = data["pageInfo"]["endCursor"]
|
|
35
5
|
|
|
6
|
+
from .helpers import _paginate, _normalize_issue, _normalize_team
|
|
36
7
|
|
|
37
8
|
ISSUES_QUERY = """
|
|
38
9
|
query Issues($cursor: String) {
|
|
@@ -43,6 +14,17 @@ query Issues($cursor: String) {
|
|
|
43
14
|
description
|
|
44
15
|
createdAt
|
|
45
16
|
updatedAt
|
|
17
|
+
creator { id }
|
|
18
|
+
assignee { id}
|
|
19
|
+
state { id}
|
|
20
|
+
labels { nodes { id } }
|
|
21
|
+
cycle { id}
|
|
22
|
+
project { id }
|
|
23
|
+
subtasks: children { nodes { id title } }
|
|
24
|
+
comments(first: 250) { nodes { id body } }
|
|
25
|
+
priority
|
|
26
|
+
attachments { nodes { id } }
|
|
27
|
+
subscribers { nodes { id } }
|
|
46
28
|
}
|
|
47
29
|
pageInfo { hasNextPage endCursor }
|
|
48
30
|
}
|
|
@@ -58,6 +40,10 @@ query Projects($cursor: String) {
|
|
|
58
40
|
description
|
|
59
41
|
createdAt
|
|
60
42
|
updatedAt
|
|
43
|
+
health
|
|
44
|
+
priority
|
|
45
|
+
targetDate
|
|
46
|
+
lead { id }
|
|
61
47
|
}
|
|
62
48
|
pageInfo { hasNextPage endCursor }
|
|
63
49
|
}
|
|
@@ -72,6 +58,11 @@ query Teams($cursor: String) {
|
|
|
72
58
|
name
|
|
73
59
|
key
|
|
74
60
|
description
|
|
61
|
+
updatedAt
|
|
62
|
+
createdAt
|
|
63
|
+
memberships { nodes { id } }
|
|
64
|
+
members { nodes { id } }
|
|
65
|
+
projects { nodes { id } }
|
|
75
66
|
}
|
|
76
67
|
pageInfo { hasNextPage endCursor }
|
|
77
68
|
}
|
|
@@ -124,7 +115,7 @@ def linear_source(
|
|
|
124
115
|
for item in _paginate(api_key, ISSUES_QUERY, "issues"):
|
|
125
116
|
if pendulum.parse(item["updatedAt"]) >= current_start_date:
|
|
126
117
|
if pendulum.parse(item["updatedAt"]) <= current_end_date:
|
|
127
|
-
yield item
|
|
118
|
+
yield _normalize_issue(item)
|
|
128
119
|
|
|
129
120
|
@dlt.resource(name="projects", primary_key="id", write_disposition="merge")
|
|
130
121
|
def projects(
|
|
@@ -152,8 +143,29 @@ def linear_source(
|
|
|
152
143
|
yield item
|
|
153
144
|
|
|
154
145
|
@dlt.resource(name="teams", primary_key="id", write_disposition="merge")
|
|
155
|
-
def teams(
|
|
156
|
-
|
|
146
|
+
def teams( updated_at: dlt.sources.incremental[str] = dlt.sources.incremental(
|
|
147
|
+
"updatedAt",
|
|
148
|
+
initial_value=start_date.isoformat(),
|
|
149
|
+
end_value=end_date.isoformat() if end_date else None,
|
|
150
|
+
range_start="closed",
|
|
151
|
+
range_end="closed",
|
|
152
|
+
),) -> Iterator[Dict[str, Any]]:
|
|
153
|
+
print(start_date)
|
|
154
|
+
if updated_at.last_value:
|
|
155
|
+
current_start_date = pendulum.parse(updated_at.last_value)
|
|
156
|
+
else:
|
|
157
|
+
current_start_date = pendulum.parse(start_date)
|
|
158
|
+
print(current_start_date)
|
|
159
|
+
|
|
160
|
+
if updated_at.end_value:
|
|
161
|
+
current_end_date = pendulum.parse(updated_at.end_value)
|
|
162
|
+
else:
|
|
163
|
+
current_end_date = pendulum.now(tz="UTC")
|
|
164
|
+
|
|
165
|
+
for item in _paginate(api_key, TEAMS_QUERY, "teams"):
|
|
166
|
+
if pendulum.parse(item["updatedAt"]) >= current_start_date:
|
|
167
|
+
if pendulum.parse(item["updatedAt"]) <= current_end_date:
|
|
168
|
+
yield _normalize_team(item)
|
|
157
169
|
|
|
158
170
|
@dlt.resource(name="users", primary_key="id", write_disposition="merge")
|
|
159
171
|
def users(
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
import json
|
|
2
|
+
from typing import Any, Dict, Iterator, Optional
|
|
3
|
+
|
|
4
|
+
import requests
|
|
5
|
+
|
|
6
|
+
LINEAR_GRAPHQL_ENDPOINT = "https://api.linear.app/graphql"
|
|
7
|
+
|
|
8
|
+
def _graphql(
|
|
9
|
+
api_key: str, query: str, variables: Optional[Dict[str, Any]] = None
|
|
10
|
+
) -> Dict[str, Any]:
|
|
11
|
+
headers = {"Authorization": api_key, "Content-Type": "application/json"}
|
|
12
|
+
response = requests.post(
|
|
13
|
+
LINEAR_GRAPHQL_ENDPOINT,
|
|
14
|
+
json={"query": query, "variables": variables or {}},
|
|
15
|
+
headers=headers,
|
|
16
|
+
)
|
|
17
|
+
response.raise_for_status()
|
|
18
|
+
payload = response.json()
|
|
19
|
+
if "errors" in payload:
|
|
20
|
+
raise ValueError(str(payload["errors"]))
|
|
21
|
+
return payload["data"]
|
|
22
|
+
|
|
23
|
+
def _paginate(api_key: str, query: str, root: str) -> Iterator[Dict[str, Any]]:
|
|
24
|
+
cursor: Optional[str] = None
|
|
25
|
+
while True:
|
|
26
|
+
data = _graphql(api_key, query, {"cursor": cursor})[root]
|
|
27
|
+
for item in data["nodes"]:
|
|
28
|
+
yield item
|
|
29
|
+
if not data["pageInfo"]["hasNextPage"]:
|
|
30
|
+
break
|
|
31
|
+
cursor = data["pageInfo"]["endCursor"]
|
|
32
|
+
|
|
33
|
+
def _normalize_issue(item: Dict[str, Any]) -> Dict[str, Any]:
|
|
34
|
+
field_mapping = {
|
|
35
|
+
"assignee": "assignee_id",
|
|
36
|
+
"creator": "creator_id",
|
|
37
|
+
"state": "state_id",
|
|
38
|
+
"cycle": "cycle_id",
|
|
39
|
+
"project": "project_id",
|
|
40
|
+
}
|
|
41
|
+
for key, value in field_mapping.items():
|
|
42
|
+
if item.get(key):
|
|
43
|
+
item[value] = item[key]["id"]
|
|
44
|
+
del item[key]
|
|
45
|
+
else:
|
|
46
|
+
item[value] = None
|
|
47
|
+
del item[key]
|
|
48
|
+
json_fields = ["comments", "subscribers", "attachments", "labels", "subtasks","projects", "memberships", "members"]
|
|
49
|
+
for field in json_fields:
|
|
50
|
+
if item.get(field):
|
|
51
|
+
item[f"{field}"] = item[field].get("nodes", [])
|
|
52
|
+
|
|
53
|
+
return item
|
|
54
|
+
|
|
55
|
+
def _normalize_team(item: Dict[str, Any]) -> Dict[str, Any]:
|
|
56
|
+
json_fields = ["memberships", "members", "projects"]
|
|
57
|
+
for field in json_fields:
|
|
58
|
+
if item.get(field):
|
|
59
|
+
item[f"{field}"] = item[field].get("nodes", [])
|
|
60
|
+
return item
|
ingestr/src/sources.py
CHANGED
|
@@ -258,8 +258,53 @@ class SqlSource:
|
|
|
258
258
|
# override the query adapters, the only one we want is the one here in the case of custom queries
|
|
259
259
|
query_adapters = [custom_query_variable_subsitution(query_value, kwargs)]
|
|
260
260
|
|
|
261
|
+
credentials = ConnectionStringCredentials(uri)
|
|
262
|
+
if uri.startswith("mssql://"):
|
|
263
|
+
parsed_uri = urlparse(uri)
|
|
264
|
+
params = parse_qs(parsed_uri.query)
|
|
265
|
+
params = {k.lower(): v for k, v in params.items()}
|
|
266
|
+
if params.get("authentication") == ["ActiveDirectoryAccessToken"]:
|
|
267
|
+
import pyodbc # type: ignore
|
|
268
|
+
from sqlalchemy import create_engine
|
|
269
|
+
|
|
270
|
+
from ingestr.src.destinations import (
|
|
271
|
+
OdbcMsSqlClient,
|
|
272
|
+
handle_datetimeoffset,
|
|
273
|
+
)
|
|
274
|
+
|
|
275
|
+
cfg = {
|
|
276
|
+
"DRIVER": params.get("driver", ["ODBC Driver 18 for SQL Server"])[
|
|
277
|
+
0
|
|
278
|
+
],
|
|
279
|
+
"SERVER": f"{parsed_uri.hostname},{parsed_uri.port or 1433}",
|
|
280
|
+
"DATABASE": parsed_uri.path.lstrip("/"),
|
|
281
|
+
}
|
|
282
|
+
for k, v in params.items():
|
|
283
|
+
if k.lower() not in ["driver", "authentication", "connect_timeout"]:
|
|
284
|
+
cfg[k.upper()] = v[0]
|
|
285
|
+
|
|
286
|
+
token = OdbcMsSqlClient.serialize_token(None, parsed_uri.password) # type: ignore[arg-type]
|
|
287
|
+
dsn = ";".join([f"{k}={v}" for k, v in cfg.items()])
|
|
288
|
+
|
|
289
|
+
def creator():
|
|
290
|
+
connection = pyodbc.connect(
|
|
291
|
+
dsn,
|
|
292
|
+
autocommit=True,
|
|
293
|
+
timeout=kwargs.get("connect_timeout", 30),
|
|
294
|
+
attrs_before={
|
|
295
|
+
OdbcMsSqlClient.SQL_COPT_SS_ACCESS_TOKEN: token,
|
|
296
|
+
},
|
|
297
|
+
)
|
|
298
|
+
connection.add_output_converter(-155, handle_datetimeoffset)
|
|
299
|
+
return connection
|
|
300
|
+
|
|
301
|
+
credentials = create_engine(
|
|
302
|
+
"mssql+pyodbc://",
|
|
303
|
+
creator=creator,
|
|
304
|
+
)
|
|
305
|
+
|
|
261
306
|
builder_res = self.table_builder(
|
|
262
|
-
credentials=
|
|
307
|
+
credentials=credentials,
|
|
263
308
|
schema=table_fields.dataset,
|
|
264
309
|
table=table_fields.table,
|
|
265
310
|
incremental=incremental,
|
|
@@ -2915,7 +2960,7 @@ class ZoomSource:
|
|
|
2915
2960
|
|
|
2916
2961
|
from ingestr.src.zoom import zoom_source
|
|
2917
2962
|
|
|
2918
|
-
if table not in {"meetings"}:
|
|
2963
|
+
if table not in {"meetings", "users", "participants"}:
|
|
2919
2964
|
raise UnsupportedResourceError(table, "Zoom")
|
|
2920
2965
|
|
|
2921
2966
|
return zoom_source(
|
ingestr/src/zoom/__init__.py
CHANGED
|
@@ -42,14 +42,58 @@ def zoom_source(
|
|
|
42
42
|
end_dt = pendulum.now("UTC")
|
|
43
43
|
else:
|
|
44
44
|
end_dt = pendulum.parse(datetime.end_value)
|
|
45
|
+
|
|
45
46
|
base_params: Dict[str, Any] = {
|
|
46
47
|
"type": "scheduled",
|
|
47
48
|
"page_size": 300,
|
|
48
49
|
"from": start_dt.to_date_string(),
|
|
49
50
|
"to": end_dt.to_date_string(),
|
|
50
51
|
}
|
|
52
|
+
|
|
51
53
|
for user in client.get_users():
|
|
52
54
|
user_id = user["id"]
|
|
53
55
|
yield from client.get_meetings(user_id, base_params)
|
|
54
56
|
|
|
55
|
-
|
|
57
|
+
@dlt.resource(write_disposition="merge", primary_key="id")
|
|
58
|
+
def users() -> Iterable[TDataItem]:
|
|
59
|
+
yield from client.get_users()
|
|
60
|
+
|
|
61
|
+
@dlt.resource(write_disposition="merge", primary_key="id")
|
|
62
|
+
def participants(
|
|
63
|
+
datetime: dlt.sources.incremental[TAnyDateTime] = dlt.sources.incremental(
|
|
64
|
+
"join_time",
|
|
65
|
+
initial_value=start_date.isoformat(),
|
|
66
|
+
end_value=end_date.isoformat() if end_date is not None else None,
|
|
67
|
+
range_start="closed",
|
|
68
|
+
range_end="closed",
|
|
69
|
+
),
|
|
70
|
+
) -> Iterable[TDataItem]:
|
|
71
|
+
if datetime.last_value:
|
|
72
|
+
start_dt = pendulum.parse(datetime.last_value)
|
|
73
|
+
else:
|
|
74
|
+
start_dt = pendulum.parse(start_date)
|
|
75
|
+
|
|
76
|
+
if end_date is None:
|
|
77
|
+
end_dt = pendulum.now("UTC")
|
|
78
|
+
else:
|
|
79
|
+
end_dt = pendulum.parse(datetime.end_value)
|
|
80
|
+
|
|
81
|
+
participant_params: Dict[str, Any] = {
|
|
82
|
+
"page_size": 300,
|
|
83
|
+
}
|
|
84
|
+
meeting_params = {
|
|
85
|
+
"type": "previous_meetings",
|
|
86
|
+
"page_size": 300,
|
|
87
|
+
}
|
|
88
|
+
for user in client.get_users():
|
|
89
|
+
user_id = user["id"]
|
|
90
|
+
for meeting in client.get_meetings(user_id=user_id, params=meeting_params):
|
|
91
|
+
meeting_id = meeting["id"]
|
|
92
|
+
yield from client.get_participants(
|
|
93
|
+
meeting_id=meeting_id,
|
|
94
|
+
params=participant_params,
|
|
95
|
+
start_date=start_dt,
|
|
96
|
+
end_date=end_dt,
|
|
97
|
+
)
|
|
98
|
+
|
|
99
|
+
return meetings, users, participants
|
ingestr/src/zoom/helpers.py
CHANGED
|
@@ -1,6 +1,8 @@
|
|
|
1
1
|
import time
|
|
2
2
|
from typing import Any, Dict, Iterator, Optional
|
|
3
3
|
|
|
4
|
+
import pendulum
|
|
5
|
+
|
|
4
6
|
from ingestr.src.http_client import create_client
|
|
5
7
|
|
|
6
8
|
|
|
@@ -47,6 +49,7 @@ class ZoomClient:
|
|
|
47
49
|
|
|
48
50
|
def get_users(self) -> Iterator[Dict[str, Any]]:
|
|
49
51
|
url = f"{self.base_url}/users"
|
|
52
|
+
|
|
50
53
|
params = {"page_size": 1000}
|
|
51
54
|
while True:
|
|
52
55
|
response = self.session.get(url, headers=self._headers(), params=params)
|
|
@@ -59,6 +62,7 @@ class ZoomClient:
|
|
|
59
62
|
break
|
|
60
63
|
params["next_page_token"] = token
|
|
61
64
|
|
|
65
|
+
#https://developers.zoom.us/docs/api/rest/reference/zoom-api/methods/#operation/meetings
|
|
62
66
|
def get_meetings(
|
|
63
67
|
self, user_id: str, params: Dict[str, Any]
|
|
64
68
|
) -> Iterator[Dict[str, Any]]:
|
|
@@ -74,3 +78,25 @@ class ZoomClient:
|
|
|
74
78
|
if not token:
|
|
75
79
|
break
|
|
76
80
|
params["next_page_token"] = token
|
|
81
|
+
|
|
82
|
+
# https://developers.zoom.us/docs/api/rest/reference/zoom-api/methods/#operation/reportMeetingParticipants
|
|
83
|
+
def get_participants(
|
|
84
|
+
self,
|
|
85
|
+
meeting_id: str,
|
|
86
|
+
params: Dict[str, Any],
|
|
87
|
+
start_date: pendulum.DateTime,
|
|
88
|
+
end_date: pendulum.DateTime,
|
|
89
|
+
) -> Iterator[Dict[str, Any]]:
|
|
90
|
+
url = f"{self.base_url}/report/meetings/{meeting_id}/participants"
|
|
91
|
+
while True:
|
|
92
|
+
response = self.session.get(url, headers=self._headers(), params=params)
|
|
93
|
+
response.raise_for_status()
|
|
94
|
+
data = response.json()
|
|
95
|
+
for item in data.get("participants", []):
|
|
96
|
+
join_time = pendulum.parse(item["join_time"])
|
|
97
|
+
if join_time >= start_date and join_time <= end_date:
|
|
98
|
+
yield item
|
|
99
|
+
token = data.get("next_page_token")
|
|
100
|
+
if not token:
|
|
101
|
+
break
|
|
102
|
+
params["next_page_token"] = token
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: ingestr
|
|
3
|
-
Version: 0.13.
|
|
3
|
+
Version: 0.13.65
|
|
4
4
|
Summary: ingestr is a command-line application that ingests data from various sources and stores them in any database.
|
|
5
5
|
Project-URL: Homepage, https://github.com/bruin-data/ingestr
|
|
6
6
|
Project-URL: Issues, https://github.com/bruin-data/ingestr/issues
|
|
@@ -2,16 +2,16 @@ ingestr/conftest.py,sha256=Q03FIJIZpLBbpj55cfCHIKEjc1FCvWJhMF2cidUJKQU,1748
|
|
|
2
2
|
ingestr/main.py,sha256=taDyHyaVSpB17iNLl8zA0gmr4CqDO-MSTQX1CaRBB9U,26364
|
|
3
3
|
ingestr/src/.gitignore,sha256=8cX1AZTSI0TcdZFGTmS_oyBjpfCzhOEt0DdAo2dFIY8,203
|
|
4
4
|
ingestr/src/blob.py,sha256=UUWMjHUuoR9xP1XZQ6UANQmnMVyDx3d0X4-2FQC271I,2138
|
|
5
|
-
ingestr/src/buildinfo.py,sha256=
|
|
6
|
-
ingestr/src/destinations.py,sha256=
|
|
5
|
+
ingestr/src/buildinfo.py,sha256=RDAMEy23q-LmXSYODsQMAghvn5syzLPD4mQO_GpxC0c,21
|
|
6
|
+
ingestr/src/destinations.py,sha256=ZJTbTn1K9oXinL19dTGQDUrft5C9fjrpSlTw1CLQhuM,21749
|
|
7
7
|
ingestr/src/errors.py,sha256=Ufs4_DfE77_E3vnA1fOQdi6cmuLVNm7_SbFLkL1XPGk,686
|
|
8
|
-
ingestr/src/factory.py,sha256=
|
|
8
|
+
ingestr/src/factory.py,sha256=AJCvlK4M1sIpAAks1K-xsR_uxziIxru74mj572zixhg,6546
|
|
9
9
|
ingestr/src/filters.py,sha256=LLecXe9QkLFkFLUZ92OXNdcANr1a8edDxrflc2ko_KA,1452
|
|
10
10
|
ingestr/src/http_client.py,sha256=bxqsk6nJNXCo-79gW04B53DQO-yr25vaSsqP0AKtjx4,732
|
|
11
11
|
ingestr/src/loader.py,sha256=9NaWAyfkXdqAZSS-N72Iwo36Lbx4PyqIfaaH1dNdkFs,1712
|
|
12
12
|
ingestr/src/partition.py,sha256=BrIP6wFJvyR7Nus_3ElnfxknUXeCipK_E_bB8kZowfc,969
|
|
13
13
|
ingestr/src/resource.py,sha256=ZqmZxFQVGlF8rFPhBiUB08HES0yoTj8sZ--jKfaaVps,1164
|
|
14
|
-
ingestr/src/sources.py,sha256=
|
|
14
|
+
ingestr/src/sources.py,sha256=K554sV-RW29vj0c84r_rR0yZTl5HVvQ9vqbQOkJ9E7M,103386
|
|
15
15
|
ingestr/src/table_definition.py,sha256=REbAbqdlmUMUuRh8nEQRreWjPVOQ5ZcfqGkScKdCrmk,390
|
|
16
16
|
ingestr/src/time.py,sha256=H_Fk2J4ShXyUM-EMY7MqCLZQhlnZMZvO952bmZPc4yE,254
|
|
17
17
|
ingestr/src/version.py,sha256=J_2xgZ0mKlvuHcjdKCx2nlioneLH0I47JiU_Slr_Nwc,189
|
|
@@ -31,8 +31,8 @@ ingestr/src/arrow/__init__.py,sha256=8fEntgHseKjFMiPQIzxYzw_raicNsEgnveLi1IzBca0
|
|
|
31
31
|
ingestr/src/asana_source/__init__.py,sha256=QwQTCb5PXts8I4wLHG9UfRP-5ChfjSe88XAVfxMV5Ag,8183
|
|
32
32
|
ingestr/src/asana_source/helpers.py,sha256=PukcdDQWIGqnGxuuobbLw4hUy4-t6gxXg_XywR7Lg9M,375
|
|
33
33
|
ingestr/src/asana_source/settings.py,sha256=-2tpdkwh04RvLKFvwQodnFLYn9MaxOO1hsebGnDQMTU,2829
|
|
34
|
-
ingestr/src/attio/__init__.py,sha256=
|
|
35
|
-
ingestr/src/attio/helpers.py,sha256=
|
|
34
|
+
ingestr/src/attio/__init__.py,sha256=CLejJjp5vGkt6r18nfNNZ-Xjc1SZgQ5IlcBW5XFQR90,3243
|
|
35
|
+
ingestr/src/attio/helpers.py,sha256=fCySmG5E6Iyh3Nm9a-HGbHNedxPH_2_otXYMTQsCibw,2185
|
|
36
36
|
ingestr/src/chess/__init__.py,sha256=y0Q8aKBigeKf3N7wuB_gadMQjVJzBPUT8Jhp1ObEWjk,6812
|
|
37
37
|
ingestr/src/chess/helpers.py,sha256=v1HTImOMjAF7AzZUPDIuHu00e7ut0o5y1kWcVYo4QZw,549
|
|
38
38
|
ingestr/src/chess/settings.py,sha256=p0RlCGgtXUacPDEvZmwzSWmzX0Apj1riwfz-nrMK89k,158
|
|
@@ -81,7 +81,8 @@ ingestr/src/kinesis/helpers.py,sha256=SO2cFmWNGcykUYmjHdfxWsOQSkLQXyhFtfWnkcUOM0
|
|
|
81
81
|
ingestr/src/klaviyo/__init__.py,sha256=o_noUgbxLk36s4f9W56_ibPorF0n7kVapPUlV0p-jfA,7875
|
|
82
82
|
ingestr/src/klaviyo/client.py,sha256=tPj79ia7AW0ZOJhzlKNPCliGbdojRNwUFp8HvB2ym5s,7434
|
|
83
83
|
ingestr/src/klaviyo/helpers.py,sha256=_i-SHffhv25feLDcjy6Blj1UxYLISCwVCMgGtrlnYHk,496
|
|
84
|
-
ingestr/src/linear/__init__.py,sha256=
|
|
84
|
+
ingestr/src/linear/__init__.py,sha256=attlRyodShvAZ5dmDJXgoKrYhwElpLMQTSaRaAGEqC0,5941
|
|
85
|
+
ingestr/src/linear/helpers.py,sha256=VR_CBgTfMVTH6ULcSLKyrssGoJpJx8VFZrmBeYZzFfc,1995
|
|
85
86
|
ingestr/src/linkedin_ads/__init__.py,sha256=CAPWFyV24loziiphbLmODxZUXZJwm4JxlFkr56q0jfo,1855
|
|
86
87
|
ingestr/src/linkedin_ads/dimension_time_enum.py,sha256=EmHRdkFyTAfo4chGjThrwqffWJxmAadZMbpTvf0xkQc,198
|
|
87
88
|
ingestr/src/linkedin_ads/helpers.py,sha256=eUWudRVlXl4kqIhfXQ1eVsUpZwJn7UFqKSpnbLfxzds,4498
|
|
@@ -135,8 +136,8 @@ ingestr/src/zendesk/helpers/__init__.py,sha256=YTJejCiUjfIcsj9FrkY0l-JGYDI7RRte1
|
|
|
135
136
|
ingestr/src/zendesk/helpers/api_helpers.py,sha256=dMkNn4ZQXgJTDOXAAXdmRt41phNFoRhYyPaLJih0pZY,4184
|
|
136
137
|
ingestr/src/zendesk/helpers/credentials.py,sha256=EWyi0ZlxWFgd1huD86KNF4dApLHgmabqWksFpEg1cf0,1332
|
|
137
138
|
ingestr/src/zendesk/helpers/talk_api.py,sha256=TSVSOErsBZvxcX91LMhAgvy6yLSYvpuVfOyKViOHtvA,4718
|
|
138
|
-
ingestr/src/zoom/__init__.py,sha256=
|
|
139
|
-
ingestr/src/zoom/helpers.py,sha256=
|
|
139
|
+
ingestr/src/zoom/__init__.py,sha256=xkkqeWeNkD3K45j1WvCrn0YAwA1U04z01193YJMcqwU,3280
|
|
140
|
+
ingestr/src/zoom/helpers.py,sha256=63DypaYdeEUpq7W9AJkKRVpE0z0YmlzOZP-9c1La6cc,3710
|
|
140
141
|
ingestr/testdata/.gitignore,sha256=DFzYYOpqdTiT7S1HjCT-jffZSmEvFZge295_upAB0FY,13
|
|
141
142
|
ingestr/testdata/create_replace.csv,sha256=TQDbOSkRKq9ZZv1d68Qjwh94aIyUQ-oEwxpJIrd3YK8,1060
|
|
142
143
|
ingestr/testdata/delete_insert_expected.csv,sha256=wbj7uboVWwm3sNMh1n7f4-OKFEQJv1s96snjEHp9nkg,336
|
|
@@ -146,8 +147,8 @@ ingestr/testdata/merge_expected.csv,sha256=DReHqWGnQMsf2PBv_Q2pfjsgvikYFnf1zYcQZ
|
|
|
146
147
|
ingestr/testdata/merge_part1.csv,sha256=Pw8Z9IDKcNU0qQHx1z6BUf4rF_-SxKGFOvymCt4OY9I,185
|
|
147
148
|
ingestr/testdata/merge_part2.csv,sha256=T_GiWxA81SN63_tMOIuemcvboEFeAmbKc7xRXvL9esw,287
|
|
148
149
|
ingestr/tests/unit/test_smartsheets.py,sha256=eiC2CCO4iNJcuN36ONvqmEDryCA1bA1REpayHpu42lk,5058
|
|
149
|
-
ingestr-0.13.
|
|
150
|
-
ingestr-0.13.
|
|
151
|
-
ingestr-0.13.
|
|
152
|
-
ingestr-0.13.
|
|
153
|
-
ingestr-0.13.
|
|
150
|
+
ingestr-0.13.65.dist-info/METADATA,sha256=PWjju7xvb3O9Ya0IRwj-zti34_sN6sGSY3YbROP3KKs,15027
|
|
151
|
+
ingestr-0.13.65.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
152
|
+
ingestr-0.13.65.dist-info/entry_points.txt,sha256=oPJy0KBnPWYjDtP1k8qwAihcTLHSZokSQvRAw_wtfJM,46
|
|
153
|
+
ingestr-0.13.65.dist-info/licenses/LICENSE.md,sha256=cW8wIhn8HFE-KLStDF9jHQ1O_ARWP3kTpk_-eOccL24,1075
|
|
154
|
+
ingestr-0.13.65.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|