ingestr 0.13.62__py3-none-any.whl → 0.13.64__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ingestr might be problematic. Click here for more details.

ingestr/src/buildinfo.py CHANGED
@@ -1 +1 @@
1
- version = "v0.13.62"
1
+ version = "v0.13.64"
@@ -1,19 +1,31 @@
1
1
  import abc
2
2
  import base64
3
3
  import csv
4
+ import datetime
4
5
  import json
5
6
  import os
6
7
  import shutil
8
+ import struct
7
9
  import tempfile
8
10
  from urllib.parse import parse_qs, quote, urlparse
9
11
 
10
12
  import dlt
11
13
  import dlt.destinations.impl.filesystem.filesystem
12
14
  from dlt.common.configuration.specs import AwsCredentials
15
+ from dlt.common.destination.capabilities import DestinationCapabilitiesContext
16
+ from dlt.common.schema import Schema
13
17
  from dlt.common.storages.configuration import FileSystemCredentials
14
18
  from dlt.destinations.impl.clickhouse.configuration import (
15
19
  ClickHouseCredentials,
16
20
  )
21
+ from dlt.destinations.impl.mssql.configuration import MsSqlClientConfiguration
22
+ from dlt.destinations.impl.mssql.mssql import (
23
+ HINT_TO_MSSQL_ATTR,
24
+ MsSqlJobClient,
25
+ )
26
+ from dlt.destinations.impl.mssql.sql_client import (
27
+ PyOdbcMsSqlClient,
28
+ )
17
29
 
18
30
  from ingestr.src.errors import MissingValueError
19
31
  from ingestr.src.loader import load_dlt_file
@@ -143,9 +155,89 @@ class DuckDBDestination(GenericSqlDestination):
143
155
  return dlt.destinations.duckdb(uri, **kwargs)
144
156
 
145
157
 
158
+ def handle_datetimeoffset(dto_value: bytes) -> datetime.datetime:
159
+ # ref: https://github.com/mkleehammer/pyodbc/issues/134#issuecomment-281739794
160
+ tup = struct.unpack(
161
+ "<6hI2h", dto_value
162
+ ) # e.g., (2017, 3, 16, 10, 35, 18, 500000000, -6, 0)
163
+ return datetime.datetime(
164
+ tup[0],
165
+ tup[1],
166
+ tup[2],
167
+ tup[3],
168
+ tup[4],
169
+ tup[5],
170
+ tup[6] // 1000,
171
+ datetime.timezone(datetime.timedelta(hours=tup[7], minutes=tup[8])),
172
+ )
173
+
174
+
175
+ class OdbcMsSqlClient(PyOdbcMsSqlClient):
176
+ SQL_COPT_SS_ACCESS_TOKEN = 1256
177
+ SKIP_CREDENTIALS = {"PWD", "AUTHENTICATION", "UID"}
178
+
179
+ def open_connection(self):
180
+ cfg = self.credentials._get_odbc_dsn_dict()
181
+ if (
182
+ cfg.get("AUTHENTICATION", "").strip().lower()
183
+ != "activedirectoryaccesstoken"
184
+ ):
185
+ return super().open_connection()
186
+
187
+ import pyodbc # type: ignore
188
+
189
+ dsn = ";".join(
190
+ [f"{k}={v}" for k, v in cfg.items() if k not in self.SKIP_CREDENTIALS]
191
+ )
192
+
193
+ self._conn = pyodbc.connect(
194
+ dsn,
195
+ timeout=self.credentials.connect_timeout,
196
+ attrs_before={
197
+ self.SQL_COPT_SS_ACCESS_TOKEN: self.serialize_token(cfg["PWD"]),
198
+ },
199
+ )
200
+
201
+ # https://github.com/mkleehammer/pyodbc/wiki/Using-an-Output-Converter-function
202
+ self._conn.add_output_converter(-155, handle_datetimeoffset)
203
+ self._conn.autocommit = True
204
+ return self._conn
205
+
206
+ def serialize_token(self, token):
207
+ # https://github.com/mkleehammer/pyodbc/issues/228#issuecomment-494773723
208
+ encoded = token.encode("utf_16_le")
209
+ return struct.pack("<i", len(encoded)) + encoded
210
+
211
+
212
+ class MsSqlClient(MsSqlJobClient):
213
+ def __init__(
214
+ self,
215
+ schema: Schema,
216
+ config: MsSqlClientConfiguration,
217
+ capabilities: DestinationCapabilitiesContext,
218
+ ) -> None:
219
+ sql_client = OdbcMsSqlClient(
220
+ config.normalize_dataset_name(schema),
221
+ config.normalize_staging_dataset_name(schema),
222
+ config.credentials,
223
+ capabilities,
224
+ )
225
+ super(MsSqlJobClient, self).__init__(schema, config, sql_client)
226
+ self.config: MsSqlClientConfiguration = config
227
+ self.sql_client = sql_client
228
+ self.active_hints = HINT_TO_MSSQL_ATTR if self.config.create_indexes else {}
229
+ self.type_mapper = capabilities.get_type_mapper()
230
+
231
+
232
+ class MsSqlDestImpl(dlt.destinations.mssql):
233
+ @property
234
+ def client_class(self):
235
+ return MsSqlClient
236
+
237
+
146
238
  class MsSQLDestination(GenericSqlDestination):
147
239
  def dlt_dest(self, uri: str, **kwargs):
148
- return dlt.destinations.mssql(credentials=uri, **kwargs)
240
+ return MsSqlDestImpl(credentials=uri, **kwargs)
149
241
 
150
242
 
151
243
  class DatabricksDestination(GenericSqlDestination):
ingestr/src/factory.py CHANGED
@@ -79,6 +79,7 @@ SQL_SOURCE_SCHEMES = [
79
79
  "crate",
80
80
  "duckdb",
81
81
  "mssql",
82
+ "mssql+pyodbc",
82
83
  "mysql",
83
84
  "mysql+pymysql",
84
85
  "mysql+mysqlconnector",
ingestr/src/sources.py CHANGED
@@ -258,8 +258,53 @@ class SqlSource:
258
258
  # override the query adapters, the only one we want is the one here in the case of custom queries
259
259
  query_adapters = [custom_query_variable_subsitution(query_value, kwargs)]
260
260
 
261
+ credentials = ConnectionStringCredentials(uri)
262
+ if uri.startswith("mssql://"):
263
+ parsed_uri = urlparse(uri)
264
+ params = parse_qs(parsed_uri.query)
265
+ params = {k.lower(): v for k, v in params.items()}
266
+ if params.get("authentication") == ["ActiveDirectoryAccessToken"]:
267
+ import pyodbc # type: ignore
268
+ from sqlalchemy import create_engine
269
+
270
+ from ingestr.src.destinations import (
271
+ OdbcMsSqlClient,
272
+ handle_datetimeoffset,
273
+ )
274
+
275
+ cfg = {
276
+ "DRIVER": params.get("driver", ["ODBC Driver 18 for SQL Server"])[
277
+ 0
278
+ ],
279
+ "SERVER": f"{parsed_uri.hostname},{parsed_uri.port or 1433}",
280
+ "DATABASE": parsed_uri.path.lstrip("/"),
281
+ }
282
+ for k, v in params.items():
283
+ if k.lower() not in ["driver", "authentication", "connect_timeout"]:
284
+ cfg[k.upper()] = v[0]
285
+
286
+ token = OdbcMsSqlClient.serialize_token(None, parsed_uri.password) # type: ignore[arg-type]
287
+ dsn = ";".join([f"{k}={v}" for k, v in cfg.items()])
288
+
289
+ def creator():
290
+ connection = pyodbc.connect(
291
+ dsn,
292
+ autocommit=True,
293
+ timeout=kwargs.get("connect_timeout", 30),
294
+ attrs_before={
295
+ OdbcMsSqlClient.SQL_COPT_SS_ACCESS_TOKEN: token,
296
+ },
297
+ )
298
+ connection.add_output_converter(-155, handle_datetimeoffset)
299
+ return connection
300
+
301
+ credentials = create_engine(
302
+ "mssql+pyodbc://",
303
+ creator=creator,
304
+ )
305
+
261
306
  builder_res = self.table_builder(
262
- credentials=ConnectionStringCredentials(uri),
307
+ credentials=credentials,
263
308
  schema=table_fields.dataset,
264
309
  table=table_fields.table,
265
310
  incremental=incremental,
@@ -2812,7 +2857,7 @@ class IsocPulseSource:
2812
2857
  src = pulse_source(
2813
2858
  token=token[0],
2814
2859
  start_date=start_date.strftime("%Y-%m-%d"),
2815
- end_date=str(end_date) if end_date else None,
2860
+ end_date=end_date.strftime("%Y-%m-%d") if end_date else None,
2816
2861
  metric=metric,
2817
2862
  opts=opts,
2818
2863
  )
@@ -2915,7 +2960,7 @@ class ZoomSource:
2915
2960
 
2916
2961
  from ingestr.src.zoom import zoom_source
2917
2962
 
2918
- if table not in {"meetings"}:
2963
+ if table not in {"meetings", "users", "participants"}:
2919
2964
  raise UnsupportedResourceError(table, "Zoom")
2920
2965
 
2921
2966
  return zoom_source(
@@ -42,14 +42,58 @@ def zoom_source(
42
42
  end_dt = pendulum.now("UTC")
43
43
  else:
44
44
  end_dt = pendulum.parse(datetime.end_value)
45
+
45
46
  base_params: Dict[str, Any] = {
46
47
  "type": "scheduled",
47
48
  "page_size": 300,
48
49
  "from": start_dt.to_date_string(),
49
50
  "to": end_dt.to_date_string(),
50
51
  }
52
+
51
53
  for user in client.get_users():
52
54
  user_id = user["id"]
53
55
  yield from client.get_meetings(user_id, base_params)
54
56
 
55
- return meetings
57
+ @dlt.resource(write_disposition="merge", primary_key="id")
58
+ def users() -> Iterable[TDataItem]:
59
+ yield from client.get_users()
60
+
61
+ @dlt.resource(write_disposition="merge", primary_key="id")
62
+ def participants(
63
+ datetime: dlt.sources.incremental[TAnyDateTime] = dlt.sources.incremental(
64
+ "join_time",
65
+ initial_value=start_date.isoformat(),
66
+ end_value=end_date.isoformat() if end_date is not None else None,
67
+ range_start="closed",
68
+ range_end="closed",
69
+ ),
70
+ ) -> Iterable[TDataItem]:
71
+ if datetime.last_value:
72
+ start_dt = pendulum.parse(datetime.last_value)
73
+ else:
74
+ start_dt = pendulum.parse(start_date)
75
+
76
+ if end_date is None:
77
+ end_dt = pendulum.now("UTC")
78
+ else:
79
+ end_dt = pendulum.parse(datetime.end_value)
80
+
81
+ participant_params: Dict[str, Any] = {
82
+ "page_size": 300,
83
+ }
84
+ meeting_params = {
85
+ "type": "previous_meetings",
86
+ "page_size": 300,
87
+ }
88
+ for user in client.get_users():
89
+ user_id = user["id"]
90
+ for meeting in client.get_meetings(user_id=user_id, params=meeting_params):
91
+ meeting_id = meeting["id"]
92
+ yield from client.get_participants(
93
+ meeting_id=meeting_id,
94
+ params=participant_params,
95
+ start_date=start_dt,
96
+ end_date=end_dt,
97
+ )
98
+
99
+ return meetings, users, participants
@@ -1,6 +1,8 @@
1
1
  import time
2
2
  from typing import Any, Dict, Iterator, Optional
3
3
 
4
+ import pendulum
5
+
4
6
  from ingestr.src.http_client import create_client
5
7
 
6
8
 
@@ -47,6 +49,7 @@ class ZoomClient:
47
49
 
48
50
  def get_users(self) -> Iterator[Dict[str, Any]]:
49
51
  url = f"{self.base_url}/users"
52
+
50
53
  params = {"page_size": 1000}
51
54
  while True:
52
55
  response = self.session.get(url, headers=self._headers(), params=params)
@@ -59,6 +62,7 @@ class ZoomClient:
59
62
  break
60
63
  params["next_page_token"] = token
61
64
 
65
+ #https://developers.zoom.us/docs/api/rest/reference/zoom-api/methods/#operation/meetings
62
66
  def get_meetings(
63
67
  self, user_id: str, params: Dict[str, Any]
64
68
  ) -> Iterator[Dict[str, Any]]:
@@ -74,3 +78,25 @@ class ZoomClient:
74
78
  if not token:
75
79
  break
76
80
  params["next_page_token"] = token
81
+
82
+ # https://developers.zoom.us/docs/api/rest/reference/zoom-api/methods/#operation/reportMeetingParticipants
83
+ def get_participants(
84
+ self,
85
+ meeting_id: str,
86
+ params: Dict[str, Any],
87
+ start_date: pendulum.DateTime,
88
+ end_date: pendulum.DateTime,
89
+ ) -> Iterator[Dict[str, Any]]:
90
+ url = f"{self.base_url}/report/meetings/{meeting_id}/participants"
91
+ while True:
92
+ response = self.session.get(url, headers=self._headers(), params=params)
93
+ response.raise_for_status()
94
+ data = response.json()
95
+ for item in data.get("participants", []):
96
+ join_time = pendulum.parse(item["join_time"])
97
+ if join_time >= start_date and join_time <= end_date:
98
+ yield item
99
+ token = data.get("next_page_token")
100
+ if not token:
101
+ break
102
+ params["next_page_token"] = token
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ingestr
3
- Version: 0.13.62
3
+ Version: 0.13.64
4
4
  Summary: ingestr is a command-line application that ingests data from various sources and stores them in any database.
5
5
  Project-URL: Homepage, https://github.com/bruin-data/ingestr
6
6
  Project-URL: Issues, https://github.com/bruin-data/ingestr/issues
@@ -2,16 +2,16 @@ ingestr/conftest.py,sha256=Q03FIJIZpLBbpj55cfCHIKEjc1FCvWJhMF2cidUJKQU,1748
2
2
  ingestr/main.py,sha256=taDyHyaVSpB17iNLl8zA0gmr4CqDO-MSTQX1CaRBB9U,26364
3
3
  ingestr/src/.gitignore,sha256=8cX1AZTSI0TcdZFGTmS_oyBjpfCzhOEt0DdAo2dFIY8,203
4
4
  ingestr/src/blob.py,sha256=UUWMjHUuoR9xP1XZQ6UANQmnMVyDx3d0X4-2FQC271I,2138
5
- ingestr/src/buildinfo.py,sha256=TQVet9YuexpAbyia8_nOwytK_io6rEVmB-flmAr4z8E,21
6
- ingestr/src/destinations.py,sha256=YU7c5cNqViCIJ9NDhgGuorh5jMvLi7yViPWpEJ57Xx0,18788
5
+ ingestr/src/buildinfo.py,sha256=PuIV-cer5xdwwdtvFJ-yA_CjyfoO-SckFPSRHGfPM4I,21
6
+ ingestr/src/destinations.py,sha256=ZJTbTn1K9oXinL19dTGQDUrft5C9fjrpSlTw1CLQhuM,21749
7
7
  ingestr/src/errors.py,sha256=Ufs4_DfE77_E3vnA1fOQdi6cmuLVNm7_SbFLkL1XPGk,686
8
- ingestr/src/factory.py,sha256=nYXINj54QtMGuQv9oCXbHO7DFuAudqZfijkSvSXEVj8,6526
8
+ ingestr/src/factory.py,sha256=AJCvlK4M1sIpAAks1K-xsR_uxziIxru74mj572zixhg,6546
9
9
  ingestr/src/filters.py,sha256=LLecXe9QkLFkFLUZ92OXNdcANr1a8edDxrflc2ko_KA,1452
10
10
  ingestr/src/http_client.py,sha256=bxqsk6nJNXCo-79gW04B53DQO-yr25vaSsqP0AKtjx4,732
11
11
  ingestr/src/loader.py,sha256=9NaWAyfkXdqAZSS-N72Iwo36Lbx4PyqIfaaH1dNdkFs,1712
12
12
  ingestr/src/partition.py,sha256=BrIP6wFJvyR7Nus_3ElnfxknUXeCipK_E_bB8kZowfc,969
13
13
  ingestr/src/resource.py,sha256=ZqmZxFQVGlF8rFPhBiUB08HES0yoTj8sZ--jKfaaVps,1164
14
- ingestr/src/sources.py,sha256=nTzCi_RZhUesBkXyUTAIoAUgj5iZSQJ_D8mId8rq8mE,101467
14
+ ingestr/src/sources.py,sha256=K554sV-RW29vj0c84r_rR0yZTl5HVvQ9vqbQOkJ9E7M,103386
15
15
  ingestr/src/table_definition.py,sha256=REbAbqdlmUMUuRh8nEQRreWjPVOQ5ZcfqGkScKdCrmk,390
16
16
  ingestr/src/time.py,sha256=H_Fk2J4ShXyUM-EMY7MqCLZQhlnZMZvO952bmZPc4yE,254
17
17
  ingestr/src/version.py,sha256=J_2xgZ0mKlvuHcjdKCx2nlioneLH0I47JiU_Slr_Nwc,189
@@ -135,8 +135,8 @@ ingestr/src/zendesk/helpers/__init__.py,sha256=YTJejCiUjfIcsj9FrkY0l-JGYDI7RRte1
135
135
  ingestr/src/zendesk/helpers/api_helpers.py,sha256=dMkNn4ZQXgJTDOXAAXdmRt41phNFoRhYyPaLJih0pZY,4184
136
136
  ingestr/src/zendesk/helpers/credentials.py,sha256=EWyi0ZlxWFgd1huD86KNF4dApLHgmabqWksFpEg1cf0,1332
137
137
  ingestr/src/zendesk/helpers/talk_api.py,sha256=TSVSOErsBZvxcX91LMhAgvy6yLSYvpuVfOyKViOHtvA,4718
138
- ingestr/src/zoom/__init__.py,sha256=6NdHXLv438FLD-cUEgmgnuJi__70-C88bTGOLqYHqbQ,1736
139
- ingestr/src/zoom/helpers.py,sha256=Y6fjIpQTWUAkKXJKfwjJnZc6wlFlqzJfWqq34WZhrcU,2669
138
+ ingestr/src/zoom/__init__.py,sha256=xkkqeWeNkD3K45j1WvCrn0YAwA1U04z01193YJMcqwU,3280
139
+ ingestr/src/zoom/helpers.py,sha256=63DypaYdeEUpq7W9AJkKRVpE0z0YmlzOZP-9c1La6cc,3710
140
140
  ingestr/testdata/.gitignore,sha256=DFzYYOpqdTiT7S1HjCT-jffZSmEvFZge295_upAB0FY,13
141
141
  ingestr/testdata/create_replace.csv,sha256=TQDbOSkRKq9ZZv1d68Qjwh94aIyUQ-oEwxpJIrd3YK8,1060
142
142
  ingestr/testdata/delete_insert_expected.csv,sha256=wbj7uboVWwm3sNMh1n7f4-OKFEQJv1s96snjEHp9nkg,336
@@ -146,8 +146,8 @@ ingestr/testdata/merge_expected.csv,sha256=DReHqWGnQMsf2PBv_Q2pfjsgvikYFnf1zYcQZ
146
146
  ingestr/testdata/merge_part1.csv,sha256=Pw8Z9IDKcNU0qQHx1z6BUf4rF_-SxKGFOvymCt4OY9I,185
147
147
  ingestr/testdata/merge_part2.csv,sha256=T_GiWxA81SN63_tMOIuemcvboEFeAmbKc7xRXvL9esw,287
148
148
  ingestr/tests/unit/test_smartsheets.py,sha256=eiC2CCO4iNJcuN36ONvqmEDryCA1bA1REpayHpu42lk,5058
149
- ingestr-0.13.62.dist-info/METADATA,sha256=GdxhHlw_v3U_5vL1ZTbDFxRMr7RLBnWDA-dVwG74OtQ,15027
150
- ingestr-0.13.62.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
151
- ingestr-0.13.62.dist-info/entry_points.txt,sha256=oPJy0KBnPWYjDtP1k8qwAihcTLHSZokSQvRAw_wtfJM,46
152
- ingestr-0.13.62.dist-info/licenses/LICENSE.md,sha256=cW8wIhn8HFE-KLStDF9jHQ1O_ARWP3kTpk_-eOccL24,1075
153
- ingestr-0.13.62.dist-info/RECORD,,
149
+ ingestr-0.13.64.dist-info/METADATA,sha256=QaBM1vQbQPYKgzDga2AY6uKibQbd1SkavE3dw92Pw-o,15027
150
+ ingestr-0.13.64.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
151
+ ingestr-0.13.64.dist-info/entry_points.txt,sha256=oPJy0KBnPWYjDtP1k8qwAihcTLHSZokSQvRAw_wtfJM,46
152
+ ingestr-0.13.64.dist-info/licenses/LICENSE.md,sha256=cW8wIhn8HFE-KLStDF9jHQ1O_ARWP3kTpk_-eOccL24,1075
153
+ ingestr-0.13.64.dist-info/RECORD,,