ingestr 0.13.60__py3-none-any.whl → 0.13.62__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ingestr might be problematic. Click here for more details.

ingestr/src/buildinfo.py CHANGED
@@ -1 +1 @@
1
- version = "v0.13.60"
1
+ version = "v0.13.62"
@@ -1,3 +1,4 @@
1
+ import abc
1
2
  import base64
2
3
  import csv
3
4
  import json
@@ -9,6 +10,7 @@ from urllib.parse import parse_qs, quote, urlparse
9
10
  import dlt
10
11
  import dlt.destinations.impl.filesystem.filesystem
11
12
  from dlt.common.configuration.specs import AwsCredentials
13
+ from dlt.common.storages.configuration import FileSystemCredentials
12
14
  from dlt.destinations.impl.clickhouse.configuration import (
13
15
  ClickHouseCredentials,
14
16
  )
@@ -111,6 +113,14 @@ class BigQueryDestination:
111
113
  pass
112
114
 
113
115
 
116
+ class CrateDBDestination(GenericSqlDestination):
117
+ def dlt_dest(self, uri: str, **kwargs):
118
+ uri = uri.replace("cratedb://", "postgres://")
119
+ import dlt_cratedb.impl.cratedb.factory
120
+
121
+ return dlt_cratedb.impl.cratedb.factory.cratedb(credentials=uri, **kwargs)
122
+
123
+
114
124
  class PostgresDestination(GenericSqlDestination):
115
125
  def dlt_dest(self, uri: str, **kwargs):
116
126
  return dlt.destinations.postgres(credentials=uri, **kwargs)
@@ -386,43 +396,62 @@ class ClickhouseDestination:
386
396
  pass
387
397
 
388
398
 
389
- class S3FSClient(dlt.destinations.impl.filesystem.filesystem.FilesystemClient):
399
+ class BlobFSClient(dlt.destinations.impl.filesystem.filesystem.FilesystemClient):
390
400
  @property
391
401
  def dataset_path(self):
392
402
  # override to remove dataset path
393
403
  return self.bucket_path
394
404
 
395
405
 
396
- class S3FS(dlt.destinations.filesystem):
406
+ class BlobFS(dlt.destinations.filesystem):
397
407
  @property
398
408
  def client_class(self):
399
- return S3FSClient
409
+ return BlobFSClient
400
410
 
401
411
 
402
- class S3Destination:
412
+ class SqliteDestination(GenericSqlDestination):
403
413
  def dlt_dest(self, uri: str, **kwargs):
404
- parsed_uri = urlparse(uri)
405
- params = parse_qs(parsed_uri.query)
414
+ return dlt.destinations.sqlalchemy(credentials=uri)
406
415
 
407
- access_key_id = params.get("access_key_id", [None])[0]
408
- if access_key_id is None:
409
- raise MissingValueError("access_key_id", "S3")
416
+ def dlt_run_params(self, uri: str, table: str, **kwargs):
417
+ return {
418
+ # https://dlthub.com/docs/dlt-ecosystem/destinations/sqlalchemy#dataset-files
419
+ "dataset_name": "main",
420
+ "table_name": table,
421
+ }
410
422
 
411
- secret_access_key = params.get("secret_access_key", [None])[0]
412
- if secret_access_key is None:
413
- raise MissingValueError("secret_access_key", "S3")
414
423
 
415
- endpoint_url = params.get("endpoint_url", [None])[0]
416
- if endpoint_url is not None:
417
- parsed_endpoint = urlparse(endpoint_url)
418
- if not parsed_endpoint.scheme or not parsed_endpoint.netloc:
419
- raise ValueError("Invalid endpoint_url. Must be a valid URL.")
424
+ class MySqlDestination(GenericSqlDestination):
425
+ def dlt_dest(self, uri: str, **kwargs):
426
+ return dlt.destinations.sqlalchemy(credentials=uri)
420
427
 
421
- creds = AwsCredentials(
422
- aws_access_key_id=access_key_id,
423
- aws_secret_access_key=secret_access_key,
424
- endpoint_url=endpoint_url,
425
- )
428
+ def dlt_run_params(self, uri: str, table: str, **kwargs):
429
+ parsed = urlparse(uri)
430
+ database = parsed.path.lstrip("/")
431
+ if not database:
432
+ raise ValueError("You need to specify a database")
433
+ return {
434
+ "dataset_name": database,
435
+ "table_name": table,
436
+ }
437
+
438
+
439
+ class BlobStorageDestination(abc.ABC):
440
+ @abc.abstractmethod
441
+ def credentials(self, params: dict) -> FileSystemCredentials:
442
+ """Build credentials for the blob storage destination."""
443
+ pass
444
+
445
+ @property
446
+ @abc.abstractmethod
447
+ def protocol(self) -> str:
448
+ """The protocol used for the blob storage destination."""
449
+ pass
450
+
451
+ def dlt_dest(self, uri: str, **kwargs):
452
+ parsed_uri = urlparse(uri)
453
+ params = parse_qs(parsed_uri.query)
454
+ creds = self.credentials(params)
426
455
 
427
456
  dest_table = kwargs["dest_table"]
428
457
 
@@ -442,7 +471,7 @@ class S3Destination:
442
471
  base_path = "/".join(table_parts[:-1])
443
472
 
444
473
  opts = {
445
- "bucket_url": f"s3://{base_path}",
474
+ "bucket_url": f"{self.protocol}://{base_path}",
446
475
  "credentials": creds,
447
476
  # supresses dlt warnings about dataset name normalization.
448
477
  # we don't use dataset names in S3 so it's fine to disable this.
@@ -452,7 +481,7 @@ class S3Destination:
452
481
  if layout is not None:
453
482
  opts["layout"] = layout
454
483
 
455
- return S3FS(**opts) # type: ignore
484
+ return BlobFS(**opts) # type: ignore
456
485
 
457
486
  def validate_table(self, table: str):
458
487
  table = table.strip("/ ")
@@ -470,28 +499,56 @@ class S3Destination:
470
499
  pass
471
500
 
472
501
 
473
- class SqliteDestination(GenericSqlDestination):
474
- def dlt_dest(self, uri: str, **kwargs):
475
- return dlt.destinations.sqlalchemy(credentials=uri)
502
+ class S3Destination(BlobStorageDestination):
503
+ @property
504
+ def protocol(self) -> str:
505
+ return "s3"
476
506
 
477
- def dlt_run_params(self, uri: str, table: str, **kwargs):
478
- return {
479
- # https://dlthub.com/docs/dlt-ecosystem/destinations/sqlalchemy#dataset-files
480
- "dataset_name": "main",
481
- "table_name": table,
482
- }
507
+ def credentials(self, params: dict) -> FileSystemCredentials:
508
+ access_key_id = params.get("access_key_id", [None])[0]
509
+ if access_key_id is None:
510
+ raise MissingValueError("access_key_id", "S3")
483
511
 
512
+ secret_access_key = params.get("secret_access_key", [None])[0]
513
+ if secret_access_key is None:
514
+ raise MissingValueError("secret_access_key", "S3")
484
515
 
485
- class MySqlDestination(GenericSqlDestination):
486
- def dlt_dest(self, uri: str, **kwargs):
487
- return dlt.destinations.sqlalchemy(credentials=uri)
516
+ endpoint_url = params.get("endpoint_url", [None])[0]
517
+ if endpoint_url is not None:
518
+ parsed_endpoint = urlparse(endpoint_url)
519
+ if not parsed_endpoint.scheme or not parsed_endpoint.netloc:
520
+ raise ValueError("Invalid endpoint_url. Must be a valid URL.")
488
521
 
489
- def dlt_run_params(self, uri: str, table: str, **kwargs):
490
- parsed = urlparse(uri)
491
- database = parsed.path.lstrip("/")
492
- if not database:
493
- raise ValueError("You need to specify a database")
494
- return {
495
- "dataset_name": database,
496
- "table_name": table,
497
- }
522
+ return AwsCredentials(
523
+ aws_access_key_id=access_key_id,
524
+ aws_secret_access_key=secret_access_key,
525
+ endpoint_url=endpoint_url,
526
+ )
527
+
528
+
529
+ class GCSDestination(BlobStorageDestination):
530
+ @property
531
+ def protocol(self) -> str:
532
+ return "gs"
533
+
534
+ def credentials(self, params: dict) -> FileSystemCredentials:
535
+ """Builds GCS credentials from the provided parameters."""
536
+ credentials_path = params.get("credentials_path")
537
+ credentials_base64 = params.get("credentials_base64")
538
+ credentials_available = any(
539
+ map(
540
+ lambda x: x is not None,
541
+ [credentials_path, credentials_base64],
542
+ )
543
+ )
544
+ if credentials_available is False:
545
+ raise MissingValueError("credentials_path or credentials_base64", "GCS")
546
+
547
+ credentials = None
548
+ if credentials_path:
549
+ with open(credentials_path[0], "r") as f:
550
+ credentials = json.load(f)
551
+ else:
552
+ credentials = json.loads(base64.b64decode(credentials_base64[0]).decode()) # type: ignore
553
+
554
+ return credentials
ingestr/src/factory.py CHANGED
@@ -7,9 +7,11 @@ from ingestr.src.destinations import (
7
7
  AthenaDestination,
8
8
  BigQueryDestination,
9
9
  ClickhouseDestination,
10
+ CrateDBDestination,
10
11
  CsvDestination,
11
12
  DatabricksDestination,
12
13
  DuckDBDestination,
14
+ GCSDestination,
13
15
  MsSQLDestination,
14
16
  MySqlDestination,
15
17
  PostgresDestination,
@@ -46,6 +48,7 @@ from ingestr.src.sources import (
46
48
  KafkaSource,
47
49
  KinesisSource,
48
50
  KlaviyoSource,
51
+ LinearSource,
49
52
  LinkedInAdsSource,
50
53
  LocalCsvSource,
51
54
  MixpanelSource,
@@ -68,6 +71,7 @@ from ingestr.src.sources import (
68
71
  TikTokSource,
69
72
  TrustpilotSource,
70
73
  ZendeskSource,
74
+ ZoomSource,
71
75
  )
72
76
 
73
77
  SQL_SOURCE_SCHEMES = [
@@ -160,6 +164,7 @@ class SourceDestinationFactory:
160
164
  "appstore": AppleAppStoreSource,
161
165
  "gs": GCSSource,
162
166
  "linkedinads": LinkedInAdsSource,
167
+ "linear": LinearSource,
163
168
  "applovin": AppLovinSource,
164
169
  "applovinmax": ApplovinMaxSource,
165
170
  "salesforce": SalesforceSource,
@@ -178,9 +183,11 @@ class SourceDestinationFactory:
178
183
  "smartsheet": SmartsheetSource,
179
184
  "sftp": SFTPSource,
180
185
  "pinterest": PinterestSource,
186
+ "zoom": ZoomSource,
181
187
  }
182
188
  destinations: Dict[str, Type[DestinationProtocol]] = {
183
189
  "bigquery": BigQueryDestination,
190
+ "cratedb": CrateDBDestination,
184
191
  "databricks": DatabricksDestination,
185
192
  "duckdb": DuckDBDestination,
186
193
  "mssql": MsSQLDestination,
@@ -197,6 +204,7 @@ class SourceDestinationFactory:
197
204
  "clickhouse+native": ClickhouseDestination,
198
205
  "clickhouse": ClickhouseDestination,
199
206
  "s3": S3Destination,
207
+ "gs": GCSDestination,
200
208
  "sqlite": SqliteDestination,
201
209
  "mysql": MySqlDestination,
202
210
  "mysql+pymysql": MySqlDestination,
@@ -73,7 +73,7 @@ def pulse_source(
73
73
  "write_disposition": "merge",
74
74
  "primary_key": "date",
75
75
  },
76
- "resources": resources, # type:ignore
76
+ "resources": resources, # type:ignore
77
77
  }
78
78
  res = rest_api_resources(config)
79
79
  if metric == "net_loss":
@@ -0,0 +1,183 @@
1
+ from typing import Any, Dict, Iterable, Iterator, Optional
2
+
3
+ import dlt
4
+ import pendulum
5
+ import requests
6
+
7
+ LINEAR_GRAPHQL_ENDPOINT = "https://api.linear.app/graphql"
8
+
9
+
10
+ def _graphql(
11
+ api_key: str, query: str, variables: Optional[Dict[str, Any]] = None
12
+ ) -> Dict[str, Any]:
13
+ headers = {"Authorization": api_key, "Content-Type": "application/json"}
14
+ response = requests.post(
15
+ LINEAR_GRAPHQL_ENDPOINT,
16
+ json={"query": query, "variables": variables or {}},
17
+ headers=headers,
18
+ )
19
+ response.raise_for_status()
20
+ payload = response.json()
21
+ if "errors" in payload:
22
+ raise ValueError(str(payload["errors"]))
23
+ return payload["data"]
24
+
25
+
26
+ def _paginate(api_key: str, query: str, root: str) -> Iterator[Dict[str, Any]]:
27
+ cursor: Optional[str] = None
28
+ while True:
29
+ data = _graphql(api_key, query, {"cursor": cursor})[root]
30
+ for item in data["nodes"]:
31
+ yield item
32
+ if not data["pageInfo"]["hasNextPage"]:
33
+ break
34
+ cursor = data["pageInfo"]["endCursor"]
35
+
36
+
37
+ ISSUES_QUERY = """
38
+ query Issues($cursor: String) {
39
+ issues(first: 50, after: $cursor) {
40
+ nodes {
41
+ id
42
+ title
43
+ description
44
+ createdAt
45
+ updatedAt
46
+ }
47
+ pageInfo { hasNextPage endCursor }
48
+ }
49
+ }
50
+ """
51
+
52
+ PROJECTS_QUERY = """
53
+ query Projects($cursor: String) {
54
+ projects(first: 50, after: $cursor) {
55
+ nodes {
56
+ id
57
+ name
58
+ description
59
+ createdAt
60
+ updatedAt
61
+ }
62
+ pageInfo { hasNextPage endCursor }
63
+ }
64
+ }
65
+ """
66
+
67
+ TEAMS_QUERY = """
68
+ query Teams($cursor: String) {
69
+ teams(first: 50, after: $cursor) {
70
+ nodes {
71
+ id
72
+ name
73
+ key
74
+ description
75
+ }
76
+ pageInfo { hasNextPage endCursor }
77
+ }
78
+ }
79
+ """
80
+
81
+ USERS_QUERY = """
82
+ query Users($cursor: String) {
83
+ users(first: 50, after: $cursor) {
84
+ nodes {
85
+ id
86
+ name
87
+ displayName
88
+ email
89
+ createdAt
90
+ updatedAt
91
+ }
92
+ pageInfo { hasNextPage endCursor }
93
+ }
94
+ }
95
+ """
96
+
97
+
98
+ @dlt.source(name="linear", max_table_nesting=0)
99
+ def linear_source(
100
+ api_key: str,
101
+ start_date: pendulum.DateTime,
102
+ end_date: pendulum.DateTime | None = None,
103
+ ) -> Iterable[dlt.sources.DltResource]:
104
+ @dlt.resource(name="issues", primary_key="id", write_disposition="merge")
105
+ def issues(
106
+ updated_at: dlt.sources.incremental[str] = dlt.sources.incremental(
107
+ "updatedAt",
108
+ initial_value=start_date.isoformat(),
109
+ end_value=end_date.isoformat() if end_date else None,
110
+ range_start="closed",
111
+ range_end="closed",
112
+ ),
113
+ ) -> Iterator[Dict[str, Any]]:
114
+ if updated_at.last_value:
115
+ current_start_date = pendulum.parse(updated_at.last_value)
116
+ else:
117
+ current_start_date = pendulum.parse(start_date)
118
+
119
+ if updated_at.end_value:
120
+ current_end_date = pendulum.parse(updated_at.end_value)
121
+ else:
122
+ current_end_date = pendulum.now(tz="UTC")
123
+
124
+ for item in _paginate(api_key, ISSUES_QUERY, "issues"):
125
+ if pendulum.parse(item["updatedAt"]) >= current_start_date:
126
+ if pendulum.parse(item["updatedAt"]) <= current_end_date:
127
+ yield item
128
+
129
+ @dlt.resource(name="projects", primary_key="id", write_disposition="merge")
130
+ def projects(
131
+ updated_at: dlt.sources.incremental[str] = dlt.sources.incremental(
132
+ "updatedAt",
133
+ initial_value=start_date.isoformat(),
134
+ end_value=end_date.isoformat() if end_date else None,
135
+ range_start="closed",
136
+ range_end="closed",
137
+ ),
138
+ ) -> Iterator[Dict[str, Any]]:
139
+ if updated_at.last_value:
140
+ current_start_date = pendulum.parse(updated_at.last_value)
141
+ else:
142
+ current_start_date = pendulum.parse(start_date)
143
+
144
+ if updated_at.end_value:
145
+ current_end_date = pendulum.parse(updated_at.end_value)
146
+ else:
147
+ current_end_date = pendulum.now(tz="UTC")
148
+
149
+ for item in _paginate(api_key, PROJECTS_QUERY, "projects"):
150
+ if pendulum.parse(item["updatedAt"]) >= current_start_date:
151
+ if pendulum.parse(item["updatedAt"]) <= current_end_date:
152
+ yield item
153
+
154
+ @dlt.resource(name="teams", primary_key="id", write_disposition="merge")
155
+ def teams() -> Iterator[Dict[str, Any]]:
156
+ yield from _paginate(api_key, TEAMS_QUERY, "teams")
157
+
158
+ @dlt.resource(name="users", primary_key="id", write_disposition="merge")
159
+ def users(
160
+ updated_at: dlt.sources.incremental[str] = dlt.sources.incremental(
161
+ "updatedAt",
162
+ initial_value=start_date.isoformat(),
163
+ end_value=end_date.isoformat() if end_date else None,
164
+ range_start="closed",
165
+ range_end="closed",
166
+ ),
167
+ ) -> Iterator[Dict[str, Any]]:
168
+ if updated_at.last_value:
169
+ current_start_date = pendulum.parse(updated_at.last_value)
170
+ else:
171
+ current_start_date = pendulum.parse(start_date)
172
+
173
+ if updated_at.end_value:
174
+ current_end_date = pendulum.parse(updated_at.end_value)
175
+ else:
176
+ current_end_date = pendulum.now(tz="UTC")
177
+
178
+ for item in _paginate(api_key, USERS_QUERY, "users"):
179
+ if pendulum.parse(item["updatedAt"]) >= current_start_date:
180
+ if pendulum.parse(item["updatedAt"]) <= current_end_date:
181
+ yield item
182
+
183
+ return issues, projects, teams, users
ingestr/src/sources.py CHANGED
@@ -1885,7 +1885,7 @@ class GCSSource:
1885
1885
  endpoint = blob.parse_endpoint(path_to_file)
1886
1886
  except blob.UnsupportedEndpointError:
1887
1887
  raise ValueError(
1888
- "S3 Source only supports specific formats files: csv, jsonl, parquet"
1888
+ "GCS Source only supports specific formats files: csv, jsonl, parquet"
1889
1889
  )
1890
1890
  except Exception as e:
1891
1891
  raise ValueError(
@@ -2851,3 +2851,77 @@ class PinterestSource:
2851
2851
  start_date=start_date,
2852
2852
  end_date=end_date,
2853
2853
  ).with_resources(table)
2854
+
2855
+
2856
+ class LinearSource:
2857
+ def handles_incrementality(self) -> bool:
2858
+ return True
2859
+
2860
+ def dlt_source(self, uri: str, table: str, **kwargs):
2861
+ parsed_uri = urlparse(uri)
2862
+ params = parse_qs(parsed_uri.query)
2863
+ api_key = params.get("api_key")
2864
+ if api_key is None:
2865
+ raise MissingValueError("api_key", "Linear")
2866
+
2867
+ if table not in ["issues", "projects", "teams", "users"]:
2868
+ raise UnsupportedResourceError(table, "Linear")
2869
+
2870
+ start_date = kwargs.get("interval_start")
2871
+ if start_date is not None:
2872
+ start_date = ensure_pendulum_datetime(start_date)
2873
+ else:
2874
+ start_date = pendulum.datetime(2020, 1, 1).in_tz("UTC")
2875
+
2876
+ end_date = kwargs.get("interval_end")
2877
+ if end_date is not None:
2878
+ end_date = end_date = ensure_pendulum_datetime(end_date).in_tz("UTC")
2879
+
2880
+ from ingestr.src.linear import linear_source
2881
+
2882
+ return linear_source(
2883
+ api_key=api_key[0],
2884
+ start_date=start_date,
2885
+ end_date=end_date,
2886
+ ).with_resources(table)
2887
+
2888
+
2889
+ class ZoomSource:
2890
+ def handles_incrementality(self) -> bool:
2891
+ return True
2892
+
2893
+ def dlt_source(self, uri: str, table: str, **kwargs):
2894
+ parsed = urlparse(uri)
2895
+ params = parse_qs(parsed.query)
2896
+ client_id = params.get("client_id")
2897
+ client_secret = params.get("client_secret")
2898
+ account_id = params.get("account_id")
2899
+
2900
+ if not (client_id and client_secret and account_id):
2901
+ raise MissingValueError(
2902
+ "client_id/client_secret/account_id",
2903
+ "Zoom",
2904
+ )
2905
+
2906
+ start_date = kwargs.get("interval_start")
2907
+ if start_date is not None:
2908
+ start_date = ensure_pendulum_datetime(start_date)
2909
+ else:
2910
+ start_date = pendulum.datetime(2020, 1, 26).in_tz("UTC")
2911
+
2912
+ end_date = kwargs.get("interval_end")
2913
+ if end_date is not None:
2914
+ end_date = end_date = ensure_pendulum_datetime(end_date).in_tz("UTC")
2915
+
2916
+ from ingestr.src.zoom import zoom_source
2917
+
2918
+ if table not in {"meetings"}:
2919
+ raise UnsupportedResourceError(table, "Zoom")
2920
+
2921
+ return zoom_source(
2922
+ client_id=client_id[0],
2923
+ client_secret=client_secret[0],
2924
+ account_id=account_id[0],
2925
+ start_date=start_date,
2926
+ end_date=end_date,
2927
+ ).with_resources(table)
@@ -0,0 +1,55 @@
1
+ from typing import Any, Dict, Iterable, Sequence
2
+
3
+ import dlt
4
+ import pendulum
5
+ from dlt.common.typing import TAnyDateTime, TDataItem
6
+ from dlt.sources import DltResource
7
+
8
+ from .helpers import ZoomClient
9
+
10
+
11
+ @dlt.source(name="zoom", max_table_nesting=0)
12
+ def zoom_source(
13
+ client_id: str,
14
+ client_secret: str,
15
+ account_id: str,
16
+ start_date: pendulum.DateTime,
17
+ end_date: pendulum.DateTime | None = None,
18
+ ) -> Sequence[DltResource]:
19
+ """Create a Zoom source with meetings resource for all users in the account."""
20
+ client = ZoomClient(
21
+ client_id=client_id,
22
+ client_secret=client_secret,
23
+ account_id=account_id,
24
+ )
25
+
26
+ @dlt.resource(write_disposition="merge", primary_key="id")
27
+ def meetings(
28
+ datetime: dlt.sources.incremental[TAnyDateTime] = dlt.sources.incremental(
29
+ "start_time",
30
+ initial_value=start_date.isoformat(),
31
+ end_value=end_date.isoformat() if end_date is not None else None,
32
+ range_start="closed",
33
+ range_end="closed",
34
+ ),
35
+ ) -> Iterable[TDataItem]:
36
+ if datetime.last_value:
37
+ start_dt = pendulum.parse(datetime.last_value)
38
+ else:
39
+ start_dt = pendulum.parse(start_date)
40
+
41
+ if end_date is None:
42
+ end_dt = pendulum.now("UTC")
43
+ else:
44
+ end_dt = pendulum.parse(datetime.end_value)
45
+ base_params: Dict[str, Any] = {
46
+ "type": "scheduled",
47
+ "page_size": 300,
48
+ "from": start_dt.to_date_string(),
49
+ "to": end_dt.to_date_string(),
50
+ }
51
+ for user in client.get_users():
52
+ user_id = user["id"]
53
+ yield from client.get_meetings(user_id, base_params)
54
+
55
+ return meetings
@@ -0,0 +1,76 @@
1
+ import time
2
+ from typing import Any, Dict, Iterator, Optional
3
+
4
+ from ingestr.src.http_client import create_client
5
+
6
+
7
+ class ZoomClient:
8
+ """Minimal Zoom API client supporting Server-to-Server OAuth."""
9
+
10
+ def __init__(
11
+ self,
12
+ client_id: Optional[str] = None,
13
+ client_secret: Optional[str] = None,
14
+ account_id: Optional[str] = None,
15
+ ) -> None:
16
+ self.client_id = client_id
17
+ self.client_secret = client_secret
18
+ self.account_id = account_id
19
+ self.token_expires_at: float = 0
20
+ self.base_url = "https://api.zoom.us/v2"
21
+ self.session = create_client()
22
+ self._refresh_access_token()
23
+
24
+ def _refresh_access_token(self) -> None:
25
+ token_url = "https://zoom.us/oauth/token"
26
+ auth = (self.client_id, self.client_secret)
27
+ resp = self.session.post(
28
+ token_url,
29
+ params={"grant_type": "account_credentials", "account_id": self.account_id},
30
+ auth=auth,
31
+ )
32
+ resp.raise_for_status()
33
+ data = resp.json()
34
+ self.access_token = data.get("access_token")
35
+ self.token_expires_at = time.time() + data.get("expires_in", 3600)
36
+
37
+ def _ensure_token(self) -> None:
38
+ if self.access_token is None or self.token_expires_at <= time.time():
39
+ self._refresh_access_token()
40
+
41
+ def _headers(self) -> Dict[str, str]:
42
+ self._ensure_token()
43
+ return {
44
+ "Authorization": f"Bearer {self.access_token}",
45
+ "Accept": "application/json",
46
+ }
47
+
48
+ def get_users(self) -> Iterator[Dict[str, Any]]:
49
+ url = f"{self.base_url}/users"
50
+ params = {"page_size": 1000}
51
+ while True:
52
+ response = self.session.get(url, headers=self._headers(), params=params)
53
+ response.raise_for_status()
54
+ data = response.json()
55
+ for user in data.get("users", []):
56
+ yield user
57
+ token = data.get("next_page_token")
58
+ if not token:
59
+ break
60
+ params["next_page_token"] = token
61
+
62
+ def get_meetings(
63
+ self, user_id: str, params: Dict[str, Any]
64
+ ) -> Iterator[Dict[str, Any]]:
65
+ url = f"{self.base_url}/users/{user_id}/meetings"
66
+ while True:
67
+ response = self.session.get(url, headers=self._headers(), params=params)
68
+ response.raise_for_status()
69
+ data = response.json()
70
+ for item in data.get("meetings", []):
71
+ item["zoom_user_id"] = user_id
72
+ yield item
73
+ token = data.get("next_page_token")
74
+ if not token:
75
+ break
76
+ params["next_page_token"] = token
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ingestr
3
- Version: 0.13.60
3
+ Version: 0.13.62
4
4
  Summary: ingestr is a command-line application that ingests data from various sources and stores them in any database.
5
5
  Project-URL: Homepage, https://github.com/bruin-data/ingestr
6
6
  Project-URL: Issues, https://github.com/bruin-data/ingestr/issues
@@ -47,6 +47,7 @@ Requires-Dist: databricks-sqlalchemy==1.0.2
47
47
  Requires-Dist: dataclasses-json==0.6.7
48
48
  Requires-Dist: decorator==5.2.1
49
49
  Requires-Dist: deprecation==2.1.0
50
+ Requires-Dist: dlt-cratedb==0.0.1
50
51
  Requires-Dist: dlt==1.10.0
51
52
  Requires-Dist: dnspython==2.7.0
52
53
  Requires-Dist: duckdb-engine==0.17.0
@@ -305,7 +306,7 @@ Pull requests are welcome. However, please open an issue first to discuss what y
305
306
  <tr>
306
307
  <td>CrateDB</td>
307
308
  <td>✅</td>
308
- <td>❌</td>
309
+ <td>✅</td>
309
310
  </tr>
310
311
  <tr>
311
312
  <td>Databricks</td>
@@ -2,16 +2,16 @@ ingestr/conftest.py,sha256=Q03FIJIZpLBbpj55cfCHIKEjc1FCvWJhMF2cidUJKQU,1748
2
2
  ingestr/main.py,sha256=taDyHyaVSpB17iNLl8zA0gmr4CqDO-MSTQX1CaRBB9U,26364
3
3
  ingestr/src/.gitignore,sha256=8cX1AZTSI0TcdZFGTmS_oyBjpfCzhOEt0DdAo2dFIY8,203
4
4
  ingestr/src/blob.py,sha256=UUWMjHUuoR9xP1XZQ6UANQmnMVyDx3d0X4-2FQC271I,2138
5
- ingestr/src/buildinfo.py,sha256=1sTup4WLO36DuLnh5cnxtmEDBjKKYxAOSisEvjELy1w,21
6
- ingestr/src/destinations.py,sha256=TcxM2rcwHfgMMP6U0yRNcfWKlEzkBbZbqCIDww7lkTY,16882
5
+ ingestr/src/buildinfo.py,sha256=TQVet9YuexpAbyia8_nOwytK_io6rEVmB-flmAr4z8E,21
6
+ ingestr/src/destinations.py,sha256=YU7c5cNqViCIJ9NDhgGuorh5jMvLi7yViPWpEJ57Xx0,18788
7
7
  ingestr/src/errors.py,sha256=Ufs4_DfE77_E3vnA1fOQdi6cmuLVNm7_SbFLkL1XPGk,686
8
- ingestr/src/factory.py,sha256=OKqjYqvHhgaOF48-eSNSabcfXt4Gmr1yZ8cFGizXh0g,6319
8
+ ingestr/src/factory.py,sha256=nYXINj54QtMGuQv9oCXbHO7DFuAudqZfijkSvSXEVj8,6526
9
9
  ingestr/src/filters.py,sha256=LLecXe9QkLFkFLUZ92OXNdcANr1a8edDxrflc2ko_KA,1452
10
10
  ingestr/src/http_client.py,sha256=bxqsk6nJNXCo-79gW04B53DQO-yr25vaSsqP0AKtjx4,732
11
11
  ingestr/src/loader.py,sha256=9NaWAyfkXdqAZSS-N72Iwo36Lbx4PyqIfaaH1dNdkFs,1712
12
12
  ingestr/src/partition.py,sha256=BrIP6wFJvyR7Nus_3ElnfxknUXeCipK_E_bB8kZowfc,969
13
13
  ingestr/src/resource.py,sha256=ZqmZxFQVGlF8rFPhBiUB08HES0yoTj8sZ--jKfaaVps,1164
14
- ingestr/src/sources.py,sha256=sJmiiInFb-KCPsaIy4qus6lx59MDCOobWgxJ7lfKH08,99047
14
+ ingestr/src/sources.py,sha256=nTzCi_RZhUesBkXyUTAIoAUgj5iZSQJ_D8mId8rq8mE,101467
15
15
  ingestr/src/table_definition.py,sha256=REbAbqdlmUMUuRh8nEQRreWjPVOQ5ZcfqGkScKdCrmk,390
16
16
  ingestr/src/time.py,sha256=H_Fk2J4ShXyUM-EMY7MqCLZQhlnZMZvO952bmZPc4yE,254
17
17
  ingestr/src/version.py,sha256=J_2xgZ0mKlvuHcjdKCx2nlioneLH0I47JiU_Slr_Nwc,189
@@ -73,7 +73,7 @@ ingestr/src/gorgias/helpers.py,sha256=DamuijnvhGY9hysQO4txrVMf4izkGbh5qfBKImdOIN
73
73
  ingestr/src/hubspot/__init__.py,sha256=wqHefhc_YRI5dNFCcpvH-UUilNThE49sbGouSBiHYsw,11776
74
74
  ingestr/src/hubspot/helpers.py,sha256=k2b-lhxqBNKHoOSHoHegFSsk8xxjjGA0I04V0XyX2b4,7883
75
75
  ingestr/src/hubspot/settings.py,sha256=i73MkSiJfRLMFLfiJgYdhp-rhymHTfoqFzZ4uOJdFJM,2456
76
- ingestr/src/isoc_pulse/__init__.py,sha256=WDgKBn15gyQheXE6oJ_2OuMUQwKPbAjflKAsnucu7u8,4647
76
+ ingestr/src/isoc_pulse/__init__.py,sha256=9b4eN4faatpiwTuRNPuYcEt1hEFDEjua9XhfakUigBk,4648
77
77
  ingestr/src/kafka/__init__.py,sha256=wMCXdiraeKd1Kssi9WcVCGZaNGm2tJEtnNyuB4aR5_k,3541
78
78
  ingestr/src/kafka/helpers.py,sha256=V9WcVn3PKnEpggArHda4vnAcaV8VDuh__dSmRviJb5Y,7502
79
79
  ingestr/src/kinesis/__init__.py,sha256=YretSz4F28tbkcPhd55mBp2Xk7XE9unyWx0nmvl8iEc,6235
@@ -81,6 +81,7 @@ ingestr/src/kinesis/helpers.py,sha256=SO2cFmWNGcykUYmjHdfxWsOQSkLQXyhFtfWnkcUOM0
81
81
  ingestr/src/klaviyo/__init__.py,sha256=o_noUgbxLk36s4f9W56_ibPorF0n7kVapPUlV0p-jfA,7875
82
82
  ingestr/src/klaviyo/client.py,sha256=tPj79ia7AW0ZOJhzlKNPCliGbdojRNwUFp8HvB2ym5s,7434
83
83
  ingestr/src/klaviyo/helpers.py,sha256=_i-SHffhv25feLDcjy6Blj1UxYLISCwVCMgGtrlnYHk,496
84
+ ingestr/src/linear/__init__.py,sha256=ITMLsuLjrGYx3bTsEK1cdPUkowJYCdAII_ucci_lGDQ,5422
84
85
  ingestr/src/linkedin_ads/__init__.py,sha256=CAPWFyV24loziiphbLmODxZUXZJwm4JxlFkr56q0jfo,1855
85
86
  ingestr/src/linkedin_ads/dimension_time_enum.py,sha256=EmHRdkFyTAfo4chGjThrwqffWJxmAadZMbpTvf0xkQc,198
86
87
  ingestr/src/linkedin_ads/helpers.py,sha256=eUWudRVlXl4kqIhfXQ1eVsUpZwJn7UFqKSpnbLfxzds,4498
@@ -134,6 +135,8 @@ ingestr/src/zendesk/helpers/__init__.py,sha256=YTJejCiUjfIcsj9FrkY0l-JGYDI7RRte1
134
135
  ingestr/src/zendesk/helpers/api_helpers.py,sha256=dMkNn4ZQXgJTDOXAAXdmRt41phNFoRhYyPaLJih0pZY,4184
135
136
  ingestr/src/zendesk/helpers/credentials.py,sha256=EWyi0ZlxWFgd1huD86KNF4dApLHgmabqWksFpEg1cf0,1332
136
137
  ingestr/src/zendesk/helpers/talk_api.py,sha256=TSVSOErsBZvxcX91LMhAgvy6yLSYvpuVfOyKViOHtvA,4718
138
+ ingestr/src/zoom/__init__.py,sha256=6NdHXLv438FLD-cUEgmgnuJi__70-C88bTGOLqYHqbQ,1736
139
+ ingestr/src/zoom/helpers.py,sha256=Y6fjIpQTWUAkKXJKfwjJnZc6wlFlqzJfWqq34WZhrcU,2669
137
140
  ingestr/testdata/.gitignore,sha256=DFzYYOpqdTiT7S1HjCT-jffZSmEvFZge295_upAB0FY,13
138
141
  ingestr/testdata/create_replace.csv,sha256=TQDbOSkRKq9ZZv1d68Qjwh94aIyUQ-oEwxpJIrd3YK8,1060
139
142
  ingestr/testdata/delete_insert_expected.csv,sha256=wbj7uboVWwm3sNMh1n7f4-OKFEQJv1s96snjEHp9nkg,336
@@ -143,8 +146,8 @@ ingestr/testdata/merge_expected.csv,sha256=DReHqWGnQMsf2PBv_Q2pfjsgvikYFnf1zYcQZ
143
146
  ingestr/testdata/merge_part1.csv,sha256=Pw8Z9IDKcNU0qQHx1z6BUf4rF_-SxKGFOvymCt4OY9I,185
144
147
  ingestr/testdata/merge_part2.csv,sha256=T_GiWxA81SN63_tMOIuemcvboEFeAmbKc7xRXvL9esw,287
145
148
  ingestr/tests/unit/test_smartsheets.py,sha256=eiC2CCO4iNJcuN36ONvqmEDryCA1bA1REpayHpu42lk,5058
146
- ingestr-0.13.60.dist-info/METADATA,sha256=FwdcfGIPPRKlSV8wJX1HAqHriGUZBl_XXi0Yco8O874,14993
147
- ingestr-0.13.60.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
148
- ingestr-0.13.60.dist-info/entry_points.txt,sha256=oPJy0KBnPWYjDtP1k8qwAihcTLHSZokSQvRAw_wtfJM,46
149
- ingestr-0.13.60.dist-info/licenses/LICENSE.md,sha256=cW8wIhn8HFE-KLStDF9jHQ1O_ARWP3kTpk_-eOccL24,1075
150
- ingestr-0.13.60.dist-info/RECORD,,
149
+ ingestr-0.13.62.dist-info/METADATA,sha256=GdxhHlw_v3U_5vL1ZTbDFxRMr7RLBnWDA-dVwG74OtQ,15027
150
+ ingestr-0.13.62.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
151
+ ingestr-0.13.62.dist-info/entry_points.txt,sha256=oPJy0KBnPWYjDtP1k8qwAihcTLHSZokSQvRAw_wtfJM,46
152
+ ingestr-0.13.62.dist-info/licenses/LICENSE.md,sha256=cW8wIhn8HFE-KLStDF9jHQ1O_ARWP3kTpk_-eOccL24,1075
153
+ ingestr-0.13.62.dist-info/RECORD,,