ingestr 0.13.60__py3-none-any.whl → 0.13.62__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ingestr might be problematic. Click here for more details.
- ingestr/src/buildinfo.py +1 -1
- ingestr/src/destinations.py +102 -45
- ingestr/src/factory.py +8 -0
- ingestr/src/isoc_pulse/__init__.py +1 -1
- ingestr/src/linear/__init__.py +183 -0
- ingestr/src/sources.py +75 -1
- ingestr/src/zoom/__init__.py +55 -0
- ingestr/src/zoom/helpers.py +76 -0
- {ingestr-0.13.60.dist-info → ingestr-0.13.62.dist-info}/METADATA +3 -2
- {ingestr-0.13.60.dist-info → ingestr-0.13.62.dist-info}/RECORD +13 -10
- {ingestr-0.13.60.dist-info → ingestr-0.13.62.dist-info}/WHEEL +0 -0
- {ingestr-0.13.60.dist-info → ingestr-0.13.62.dist-info}/entry_points.txt +0 -0
- {ingestr-0.13.60.dist-info → ingestr-0.13.62.dist-info}/licenses/LICENSE.md +0 -0
ingestr/src/buildinfo.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
version = "v0.13.
|
|
1
|
+
version = "v0.13.62"
|
ingestr/src/destinations.py
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import abc
|
|
1
2
|
import base64
|
|
2
3
|
import csv
|
|
3
4
|
import json
|
|
@@ -9,6 +10,7 @@ from urllib.parse import parse_qs, quote, urlparse
|
|
|
9
10
|
import dlt
|
|
10
11
|
import dlt.destinations.impl.filesystem.filesystem
|
|
11
12
|
from dlt.common.configuration.specs import AwsCredentials
|
|
13
|
+
from dlt.common.storages.configuration import FileSystemCredentials
|
|
12
14
|
from dlt.destinations.impl.clickhouse.configuration import (
|
|
13
15
|
ClickHouseCredentials,
|
|
14
16
|
)
|
|
@@ -111,6 +113,14 @@ class BigQueryDestination:
|
|
|
111
113
|
pass
|
|
112
114
|
|
|
113
115
|
|
|
116
|
+
class CrateDBDestination(GenericSqlDestination):
|
|
117
|
+
def dlt_dest(self, uri: str, **kwargs):
|
|
118
|
+
uri = uri.replace("cratedb://", "postgres://")
|
|
119
|
+
import dlt_cratedb.impl.cratedb.factory
|
|
120
|
+
|
|
121
|
+
return dlt_cratedb.impl.cratedb.factory.cratedb(credentials=uri, **kwargs)
|
|
122
|
+
|
|
123
|
+
|
|
114
124
|
class PostgresDestination(GenericSqlDestination):
|
|
115
125
|
def dlt_dest(self, uri: str, **kwargs):
|
|
116
126
|
return dlt.destinations.postgres(credentials=uri, **kwargs)
|
|
@@ -386,43 +396,62 @@ class ClickhouseDestination:
|
|
|
386
396
|
pass
|
|
387
397
|
|
|
388
398
|
|
|
389
|
-
class
|
|
399
|
+
class BlobFSClient(dlt.destinations.impl.filesystem.filesystem.FilesystemClient):
|
|
390
400
|
@property
|
|
391
401
|
def dataset_path(self):
|
|
392
402
|
# override to remove dataset path
|
|
393
403
|
return self.bucket_path
|
|
394
404
|
|
|
395
405
|
|
|
396
|
-
class
|
|
406
|
+
class BlobFS(dlt.destinations.filesystem):
|
|
397
407
|
@property
|
|
398
408
|
def client_class(self):
|
|
399
|
-
return
|
|
409
|
+
return BlobFSClient
|
|
400
410
|
|
|
401
411
|
|
|
402
|
-
class
|
|
412
|
+
class SqliteDestination(GenericSqlDestination):
|
|
403
413
|
def dlt_dest(self, uri: str, **kwargs):
|
|
404
|
-
|
|
405
|
-
params = parse_qs(parsed_uri.query)
|
|
414
|
+
return dlt.destinations.sqlalchemy(credentials=uri)
|
|
406
415
|
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
416
|
+
def dlt_run_params(self, uri: str, table: str, **kwargs):
|
|
417
|
+
return {
|
|
418
|
+
# https://dlthub.com/docs/dlt-ecosystem/destinations/sqlalchemy#dataset-files
|
|
419
|
+
"dataset_name": "main",
|
|
420
|
+
"table_name": table,
|
|
421
|
+
}
|
|
410
422
|
|
|
411
|
-
secret_access_key = params.get("secret_access_key", [None])[0]
|
|
412
|
-
if secret_access_key is None:
|
|
413
|
-
raise MissingValueError("secret_access_key", "S3")
|
|
414
423
|
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
if not parsed_endpoint.scheme or not parsed_endpoint.netloc:
|
|
419
|
-
raise ValueError("Invalid endpoint_url. Must be a valid URL.")
|
|
424
|
+
class MySqlDestination(GenericSqlDestination):
|
|
425
|
+
def dlt_dest(self, uri: str, **kwargs):
|
|
426
|
+
return dlt.destinations.sqlalchemy(credentials=uri)
|
|
420
427
|
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
428
|
+
def dlt_run_params(self, uri: str, table: str, **kwargs):
|
|
429
|
+
parsed = urlparse(uri)
|
|
430
|
+
database = parsed.path.lstrip("/")
|
|
431
|
+
if not database:
|
|
432
|
+
raise ValueError("You need to specify a database")
|
|
433
|
+
return {
|
|
434
|
+
"dataset_name": database,
|
|
435
|
+
"table_name": table,
|
|
436
|
+
}
|
|
437
|
+
|
|
438
|
+
|
|
439
|
+
class BlobStorageDestination(abc.ABC):
|
|
440
|
+
@abc.abstractmethod
|
|
441
|
+
def credentials(self, params: dict) -> FileSystemCredentials:
|
|
442
|
+
"""Build credentials for the blob storage destination."""
|
|
443
|
+
pass
|
|
444
|
+
|
|
445
|
+
@property
|
|
446
|
+
@abc.abstractmethod
|
|
447
|
+
def protocol(self) -> str:
|
|
448
|
+
"""The protocol used for the blob storage destination."""
|
|
449
|
+
pass
|
|
450
|
+
|
|
451
|
+
def dlt_dest(self, uri: str, **kwargs):
|
|
452
|
+
parsed_uri = urlparse(uri)
|
|
453
|
+
params = parse_qs(parsed_uri.query)
|
|
454
|
+
creds = self.credentials(params)
|
|
426
455
|
|
|
427
456
|
dest_table = kwargs["dest_table"]
|
|
428
457
|
|
|
@@ -442,7 +471,7 @@ class S3Destination:
|
|
|
442
471
|
base_path = "/".join(table_parts[:-1])
|
|
443
472
|
|
|
444
473
|
opts = {
|
|
445
|
-
"bucket_url": f"
|
|
474
|
+
"bucket_url": f"{self.protocol}://{base_path}",
|
|
446
475
|
"credentials": creds,
|
|
447
476
|
# supresses dlt warnings about dataset name normalization.
|
|
448
477
|
# we don't use dataset names in S3 so it's fine to disable this.
|
|
@@ -452,7 +481,7 @@ class S3Destination:
|
|
|
452
481
|
if layout is not None:
|
|
453
482
|
opts["layout"] = layout
|
|
454
483
|
|
|
455
|
-
return
|
|
484
|
+
return BlobFS(**opts) # type: ignore
|
|
456
485
|
|
|
457
486
|
def validate_table(self, table: str):
|
|
458
487
|
table = table.strip("/ ")
|
|
@@ -470,28 +499,56 @@ class S3Destination:
|
|
|
470
499
|
pass
|
|
471
500
|
|
|
472
501
|
|
|
473
|
-
class
|
|
474
|
-
|
|
475
|
-
|
|
502
|
+
class S3Destination(BlobStorageDestination):
|
|
503
|
+
@property
|
|
504
|
+
def protocol(self) -> str:
|
|
505
|
+
return "s3"
|
|
476
506
|
|
|
477
|
-
def
|
|
478
|
-
|
|
479
|
-
|
|
480
|
-
"
|
|
481
|
-
"table_name": table,
|
|
482
|
-
}
|
|
507
|
+
def credentials(self, params: dict) -> FileSystemCredentials:
|
|
508
|
+
access_key_id = params.get("access_key_id", [None])[0]
|
|
509
|
+
if access_key_id is None:
|
|
510
|
+
raise MissingValueError("access_key_id", "S3")
|
|
483
511
|
|
|
512
|
+
secret_access_key = params.get("secret_access_key", [None])[0]
|
|
513
|
+
if secret_access_key is None:
|
|
514
|
+
raise MissingValueError("secret_access_key", "S3")
|
|
484
515
|
|
|
485
|
-
|
|
486
|
-
|
|
487
|
-
|
|
516
|
+
endpoint_url = params.get("endpoint_url", [None])[0]
|
|
517
|
+
if endpoint_url is not None:
|
|
518
|
+
parsed_endpoint = urlparse(endpoint_url)
|
|
519
|
+
if not parsed_endpoint.scheme or not parsed_endpoint.netloc:
|
|
520
|
+
raise ValueError("Invalid endpoint_url. Must be a valid URL.")
|
|
488
521
|
|
|
489
|
-
|
|
490
|
-
|
|
491
|
-
|
|
492
|
-
|
|
493
|
-
|
|
494
|
-
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
|
|
522
|
+
return AwsCredentials(
|
|
523
|
+
aws_access_key_id=access_key_id,
|
|
524
|
+
aws_secret_access_key=secret_access_key,
|
|
525
|
+
endpoint_url=endpoint_url,
|
|
526
|
+
)
|
|
527
|
+
|
|
528
|
+
|
|
529
|
+
class GCSDestination(BlobStorageDestination):
|
|
530
|
+
@property
|
|
531
|
+
def protocol(self) -> str:
|
|
532
|
+
return "gs"
|
|
533
|
+
|
|
534
|
+
def credentials(self, params: dict) -> FileSystemCredentials:
|
|
535
|
+
"""Builds GCS credentials from the provided parameters."""
|
|
536
|
+
credentials_path = params.get("credentials_path")
|
|
537
|
+
credentials_base64 = params.get("credentials_base64")
|
|
538
|
+
credentials_available = any(
|
|
539
|
+
map(
|
|
540
|
+
lambda x: x is not None,
|
|
541
|
+
[credentials_path, credentials_base64],
|
|
542
|
+
)
|
|
543
|
+
)
|
|
544
|
+
if credentials_available is False:
|
|
545
|
+
raise MissingValueError("credentials_path or credentials_base64", "GCS")
|
|
546
|
+
|
|
547
|
+
credentials = None
|
|
548
|
+
if credentials_path:
|
|
549
|
+
with open(credentials_path[0], "r") as f:
|
|
550
|
+
credentials = json.load(f)
|
|
551
|
+
else:
|
|
552
|
+
credentials = json.loads(base64.b64decode(credentials_base64[0]).decode()) # type: ignore
|
|
553
|
+
|
|
554
|
+
return credentials
|
ingestr/src/factory.py
CHANGED
|
@@ -7,9 +7,11 @@ from ingestr.src.destinations import (
|
|
|
7
7
|
AthenaDestination,
|
|
8
8
|
BigQueryDestination,
|
|
9
9
|
ClickhouseDestination,
|
|
10
|
+
CrateDBDestination,
|
|
10
11
|
CsvDestination,
|
|
11
12
|
DatabricksDestination,
|
|
12
13
|
DuckDBDestination,
|
|
14
|
+
GCSDestination,
|
|
13
15
|
MsSQLDestination,
|
|
14
16
|
MySqlDestination,
|
|
15
17
|
PostgresDestination,
|
|
@@ -46,6 +48,7 @@ from ingestr.src.sources import (
|
|
|
46
48
|
KafkaSource,
|
|
47
49
|
KinesisSource,
|
|
48
50
|
KlaviyoSource,
|
|
51
|
+
LinearSource,
|
|
49
52
|
LinkedInAdsSource,
|
|
50
53
|
LocalCsvSource,
|
|
51
54
|
MixpanelSource,
|
|
@@ -68,6 +71,7 @@ from ingestr.src.sources import (
|
|
|
68
71
|
TikTokSource,
|
|
69
72
|
TrustpilotSource,
|
|
70
73
|
ZendeskSource,
|
|
74
|
+
ZoomSource,
|
|
71
75
|
)
|
|
72
76
|
|
|
73
77
|
SQL_SOURCE_SCHEMES = [
|
|
@@ -160,6 +164,7 @@ class SourceDestinationFactory:
|
|
|
160
164
|
"appstore": AppleAppStoreSource,
|
|
161
165
|
"gs": GCSSource,
|
|
162
166
|
"linkedinads": LinkedInAdsSource,
|
|
167
|
+
"linear": LinearSource,
|
|
163
168
|
"applovin": AppLovinSource,
|
|
164
169
|
"applovinmax": ApplovinMaxSource,
|
|
165
170
|
"salesforce": SalesforceSource,
|
|
@@ -178,9 +183,11 @@ class SourceDestinationFactory:
|
|
|
178
183
|
"smartsheet": SmartsheetSource,
|
|
179
184
|
"sftp": SFTPSource,
|
|
180
185
|
"pinterest": PinterestSource,
|
|
186
|
+
"zoom": ZoomSource,
|
|
181
187
|
}
|
|
182
188
|
destinations: Dict[str, Type[DestinationProtocol]] = {
|
|
183
189
|
"bigquery": BigQueryDestination,
|
|
190
|
+
"cratedb": CrateDBDestination,
|
|
184
191
|
"databricks": DatabricksDestination,
|
|
185
192
|
"duckdb": DuckDBDestination,
|
|
186
193
|
"mssql": MsSQLDestination,
|
|
@@ -197,6 +204,7 @@ class SourceDestinationFactory:
|
|
|
197
204
|
"clickhouse+native": ClickhouseDestination,
|
|
198
205
|
"clickhouse": ClickhouseDestination,
|
|
199
206
|
"s3": S3Destination,
|
|
207
|
+
"gs": GCSDestination,
|
|
200
208
|
"sqlite": SqliteDestination,
|
|
201
209
|
"mysql": MySqlDestination,
|
|
202
210
|
"mysql+pymysql": MySqlDestination,
|
|
@@ -0,0 +1,183 @@
|
|
|
1
|
+
from typing import Any, Dict, Iterable, Iterator, Optional
|
|
2
|
+
|
|
3
|
+
import dlt
|
|
4
|
+
import pendulum
|
|
5
|
+
import requests
|
|
6
|
+
|
|
7
|
+
LINEAR_GRAPHQL_ENDPOINT = "https://api.linear.app/graphql"
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def _graphql(
|
|
11
|
+
api_key: str, query: str, variables: Optional[Dict[str, Any]] = None
|
|
12
|
+
) -> Dict[str, Any]:
|
|
13
|
+
headers = {"Authorization": api_key, "Content-Type": "application/json"}
|
|
14
|
+
response = requests.post(
|
|
15
|
+
LINEAR_GRAPHQL_ENDPOINT,
|
|
16
|
+
json={"query": query, "variables": variables or {}},
|
|
17
|
+
headers=headers,
|
|
18
|
+
)
|
|
19
|
+
response.raise_for_status()
|
|
20
|
+
payload = response.json()
|
|
21
|
+
if "errors" in payload:
|
|
22
|
+
raise ValueError(str(payload["errors"]))
|
|
23
|
+
return payload["data"]
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def _paginate(api_key: str, query: str, root: str) -> Iterator[Dict[str, Any]]:
|
|
27
|
+
cursor: Optional[str] = None
|
|
28
|
+
while True:
|
|
29
|
+
data = _graphql(api_key, query, {"cursor": cursor})[root]
|
|
30
|
+
for item in data["nodes"]:
|
|
31
|
+
yield item
|
|
32
|
+
if not data["pageInfo"]["hasNextPage"]:
|
|
33
|
+
break
|
|
34
|
+
cursor = data["pageInfo"]["endCursor"]
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
ISSUES_QUERY = """
|
|
38
|
+
query Issues($cursor: String) {
|
|
39
|
+
issues(first: 50, after: $cursor) {
|
|
40
|
+
nodes {
|
|
41
|
+
id
|
|
42
|
+
title
|
|
43
|
+
description
|
|
44
|
+
createdAt
|
|
45
|
+
updatedAt
|
|
46
|
+
}
|
|
47
|
+
pageInfo { hasNextPage endCursor }
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
"""
|
|
51
|
+
|
|
52
|
+
PROJECTS_QUERY = """
|
|
53
|
+
query Projects($cursor: String) {
|
|
54
|
+
projects(first: 50, after: $cursor) {
|
|
55
|
+
nodes {
|
|
56
|
+
id
|
|
57
|
+
name
|
|
58
|
+
description
|
|
59
|
+
createdAt
|
|
60
|
+
updatedAt
|
|
61
|
+
}
|
|
62
|
+
pageInfo { hasNextPage endCursor }
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
"""
|
|
66
|
+
|
|
67
|
+
TEAMS_QUERY = """
|
|
68
|
+
query Teams($cursor: String) {
|
|
69
|
+
teams(first: 50, after: $cursor) {
|
|
70
|
+
nodes {
|
|
71
|
+
id
|
|
72
|
+
name
|
|
73
|
+
key
|
|
74
|
+
description
|
|
75
|
+
}
|
|
76
|
+
pageInfo { hasNextPage endCursor }
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
"""
|
|
80
|
+
|
|
81
|
+
USERS_QUERY = """
|
|
82
|
+
query Users($cursor: String) {
|
|
83
|
+
users(first: 50, after: $cursor) {
|
|
84
|
+
nodes {
|
|
85
|
+
id
|
|
86
|
+
name
|
|
87
|
+
displayName
|
|
88
|
+
email
|
|
89
|
+
createdAt
|
|
90
|
+
updatedAt
|
|
91
|
+
}
|
|
92
|
+
pageInfo { hasNextPage endCursor }
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
"""
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
@dlt.source(name="linear", max_table_nesting=0)
|
|
99
|
+
def linear_source(
|
|
100
|
+
api_key: str,
|
|
101
|
+
start_date: pendulum.DateTime,
|
|
102
|
+
end_date: pendulum.DateTime | None = None,
|
|
103
|
+
) -> Iterable[dlt.sources.DltResource]:
|
|
104
|
+
@dlt.resource(name="issues", primary_key="id", write_disposition="merge")
|
|
105
|
+
def issues(
|
|
106
|
+
updated_at: dlt.sources.incremental[str] = dlt.sources.incremental(
|
|
107
|
+
"updatedAt",
|
|
108
|
+
initial_value=start_date.isoformat(),
|
|
109
|
+
end_value=end_date.isoformat() if end_date else None,
|
|
110
|
+
range_start="closed",
|
|
111
|
+
range_end="closed",
|
|
112
|
+
),
|
|
113
|
+
) -> Iterator[Dict[str, Any]]:
|
|
114
|
+
if updated_at.last_value:
|
|
115
|
+
current_start_date = pendulum.parse(updated_at.last_value)
|
|
116
|
+
else:
|
|
117
|
+
current_start_date = pendulum.parse(start_date)
|
|
118
|
+
|
|
119
|
+
if updated_at.end_value:
|
|
120
|
+
current_end_date = pendulum.parse(updated_at.end_value)
|
|
121
|
+
else:
|
|
122
|
+
current_end_date = pendulum.now(tz="UTC")
|
|
123
|
+
|
|
124
|
+
for item in _paginate(api_key, ISSUES_QUERY, "issues"):
|
|
125
|
+
if pendulum.parse(item["updatedAt"]) >= current_start_date:
|
|
126
|
+
if pendulum.parse(item["updatedAt"]) <= current_end_date:
|
|
127
|
+
yield item
|
|
128
|
+
|
|
129
|
+
@dlt.resource(name="projects", primary_key="id", write_disposition="merge")
|
|
130
|
+
def projects(
|
|
131
|
+
updated_at: dlt.sources.incremental[str] = dlt.sources.incremental(
|
|
132
|
+
"updatedAt",
|
|
133
|
+
initial_value=start_date.isoformat(),
|
|
134
|
+
end_value=end_date.isoformat() if end_date else None,
|
|
135
|
+
range_start="closed",
|
|
136
|
+
range_end="closed",
|
|
137
|
+
),
|
|
138
|
+
) -> Iterator[Dict[str, Any]]:
|
|
139
|
+
if updated_at.last_value:
|
|
140
|
+
current_start_date = pendulum.parse(updated_at.last_value)
|
|
141
|
+
else:
|
|
142
|
+
current_start_date = pendulum.parse(start_date)
|
|
143
|
+
|
|
144
|
+
if updated_at.end_value:
|
|
145
|
+
current_end_date = pendulum.parse(updated_at.end_value)
|
|
146
|
+
else:
|
|
147
|
+
current_end_date = pendulum.now(tz="UTC")
|
|
148
|
+
|
|
149
|
+
for item in _paginate(api_key, PROJECTS_QUERY, "projects"):
|
|
150
|
+
if pendulum.parse(item["updatedAt"]) >= current_start_date:
|
|
151
|
+
if pendulum.parse(item["updatedAt"]) <= current_end_date:
|
|
152
|
+
yield item
|
|
153
|
+
|
|
154
|
+
@dlt.resource(name="teams", primary_key="id", write_disposition="merge")
|
|
155
|
+
def teams() -> Iterator[Dict[str, Any]]:
|
|
156
|
+
yield from _paginate(api_key, TEAMS_QUERY, "teams")
|
|
157
|
+
|
|
158
|
+
@dlt.resource(name="users", primary_key="id", write_disposition="merge")
|
|
159
|
+
def users(
|
|
160
|
+
updated_at: dlt.sources.incremental[str] = dlt.sources.incremental(
|
|
161
|
+
"updatedAt",
|
|
162
|
+
initial_value=start_date.isoformat(),
|
|
163
|
+
end_value=end_date.isoformat() if end_date else None,
|
|
164
|
+
range_start="closed",
|
|
165
|
+
range_end="closed",
|
|
166
|
+
),
|
|
167
|
+
) -> Iterator[Dict[str, Any]]:
|
|
168
|
+
if updated_at.last_value:
|
|
169
|
+
current_start_date = pendulum.parse(updated_at.last_value)
|
|
170
|
+
else:
|
|
171
|
+
current_start_date = pendulum.parse(start_date)
|
|
172
|
+
|
|
173
|
+
if updated_at.end_value:
|
|
174
|
+
current_end_date = pendulum.parse(updated_at.end_value)
|
|
175
|
+
else:
|
|
176
|
+
current_end_date = pendulum.now(tz="UTC")
|
|
177
|
+
|
|
178
|
+
for item in _paginate(api_key, USERS_QUERY, "users"):
|
|
179
|
+
if pendulum.parse(item["updatedAt"]) >= current_start_date:
|
|
180
|
+
if pendulum.parse(item["updatedAt"]) <= current_end_date:
|
|
181
|
+
yield item
|
|
182
|
+
|
|
183
|
+
return issues, projects, teams, users
|
ingestr/src/sources.py
CHANGED
|
@@ -1885,7 +1885,7 @@ class GCSSource:
|
|
|
1885
1885
|
endpoint = blob.parse_endpoint(path_to_file)
|
|
1886
1886
|
except blob.UnsupportedEndpointError:
|
|
1887
1887
|
raise ValueError(
|
|
1888
|
-
"
|
|
1888
|
+
"GCS Source only supports specific formats files: csv, jsonl, parquet"
|
|
1889
1889
|
)
|
|
1890
1890
|
except Exception as e:
|
|
1891
1891
|
raise ValueError(
|
|
@@ -2851,3 +2851,77 @@ class PinterestSource:
|
|
|
2851
2851
|
start_date=start_date,
|
|
2852
2852
|
end_date=end_date,
|
|
2853
2853
|
).with_resources(table)
|
|
2854
|
+
|
|
2855
|
+
|
|
2856
|
+
class LinearSource:
|
|
2857
|
+
def handles_incrementality(self) -> bool:
|
|
2858
|
+
return True
|
|
2859
|
+
|
|
2860
|
+
def dlt_source(self, uri: str, table: str, **kwargs):
|
|
2861
|
+
parsed_uri = urlparse(uri)
|
|
2862
|
+
params = parse_qs(parsed_uri.query)
|
|
2863
|
+
api_key = params.get("api_key")
|
|
2864
|
+
if api_key is None:
|
|
2865
|
+
raise MissingValueError("api_key", "Linear")
|
|
2866
|
+
|
|
2867
|
+
if table not in ["issues", "projects", "teams", "users"]:
|
|
2868
|
+
raise UnsupportedResourceError(table, "Linear")
|
|
2869
|
+
|
|
2870
|
+
start_date = kwargs.get("interval_start")
|
|
2871
|
+
if start_date is not None:
|
|
2872
|
+
start_date = ensure_pendulum_datetime(start_date)
|
|
2873
|
+
else:
|
|
2874
|
+
start_date = pendulum.datetime(2020, 1, 1).in_tz("UTC")
|
|
2875
|
+
|
|
2876
|
+
end_date = kwargs.get("interval_end")
|
|
2877
|
+
if end_date is not None:
|
|
2878
|
+
end_date = end_date = ensure_pendulum_datetime(end_date).in_tz("UTC")
|
|
2879
|
+
|
|
2880
|
+
from ingestr.src.linear import linear_source
|
|
2881
|
+
|
|
2882
|
+
return linear_source(
|
|
2883
|
+
api_key=api_key[0],
|
|
2884
|
+
start_date=start_date,
|
|
2885
|
+
end_date=end_date,
|
|
2886
|
+
).with_resources(table)
|
|
2887
|
+
|
|
2888
|
+
|
|
2889
|
+
class ZoomSource:
|
|
2890
|
+
def handles_incrementality(self) -> bool:
|
|
2891
|
+
return True
|
|
2892
|
+
|
|
2893
|
+
def dlt_source(self, uri: str, table: str, **kwargs):
|
|
2894
|
+
parsed = urlparse(uri)
|
|
2895
|
+
params = parse_qs(parsed.query)
|
|
2896
|
+
client_id = params.get("client_id")
|
|
2897
|
+
client_secret = params.get("client_secret")
|
|
2898
|
+
account_id = params.get("account_id")
|
|
2899
|
+
|
|
2900
|
+
if not (client_id and client_secret and account_id):
|
|
2901
|
+
raise MissingValueError(
|
|
2902
|
+
"client_id/client_secret/account_id",
|
|
2903
|
+
"Zoom",
|
|
2904
|
+
)
|
|
2905
|
+
|
|
2906
|
+
start_date = kwargs.get("interval_start")
|
|
2907
|
+
if start_date is not None:
|
|
2908
|
+
start_date = ensure_pendulum_datetime(start_date)
|
|
2909
|
+
else:
|
|
2910
|
+
start_date = pendulum.datetime(2020, 1, 26).in_tz("UTC")
|
|
2911
|
+
|
|
2912
|
+
end_date = kwargs.get("interval_end")
|
|
2913
|
+
if end_date is not None:
|
|
2914
|
+
end_date = end_date = ensure_pendulum_datetime(end_date).in_tz("UTC")
|
|
2915
|
+
|
|
2916
|
+
from ingestr.src.zoom import zoom_source
|
|
2917
|
+
|
|
2918
|
+
if table not in {"meetings"}:
|
|
2919
|
+
raise UnsupportedResourceError(table, "Zoom")
|
|
2920
|
+
|
|
2921
|
+
return zoom_source(
|
|
2922
|
+
client_id=client_id[0],
|
|
2923
|
+
client_secret=client_secret[0],
|
|
2924
|
+
account_id=account_id[0],
|
|
2925
|
+
start_date=start_date,
|
|
2926
|
+
end_date=end_date,
|
|
2927
|
+
).with_resources(table)
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
from typing import Any, Dict, Iterable, Sequence
|
|
2
|
+
|
|
3
|
+
import dlt
|
|
4
|
+
import pendulum
|
|
5
|
+
from dlt.common.typing import TAnyDateTime, TDataItem
|
|
6
|
+
from dlt.sources import DltResource
|
|
7
|
+
|
|
8
|
+
from .helpers import ZoomClient
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
@dlt.source(name="zoom", max_table_nesting=0)
|
|
12
|
+
def zoom_source(
|
|
13
|
+
client_id: str,
|
|
14
|
+
client_secret: str,
|
|
15
|
+
account_id: str,
|
|
16
|
+
start_date: pendulum.DateTime,
|
|
17
|
+
end_date: pendulum.DateTime | None = None,
|
|
18
|
+
) -> Sequence[DltResource]:
|
|
19
|
+
"""Create a Zoom source with meetings resource for all users in the account."""
|
|
20
|
+
client = ZoomClient(
|
|
21
|
+
client_id=client_id,
|
|
22
|
+
client_secret=client_secret,
|
|
23
|
+
account_id=account_id,
|
|
24
|
+
)
|
|
25
|
+
|
|
26
|
+
@dlt.resource(write_disposition="merge", primary_key="id")
|
|
27
|
+
def meetings(
|
|
28
|
+
datetime: dlt.sources.incremental[TAnyDateTime] = dlt.sources.incremental(
|
|
29
|
+
"start_time",
|
|
30
|
+
initial_value=start_date.isoformat(),
|
|
31
|
+
end_value=end_date.isoformat() if end_date is not None else None,
|
|
32
|
+
range_start="closed",
|
|
33
|
+
range_end="closed",
|
|
34
|
+
),
|
|
35
|
+
) -> Iterable[TDataItem]:
|
|
36
|
+
if datetime.last_value:
|
|
37
|
+
start_dt = pendulum.parse(datetime.last_value)
|
|
38
|
+
else:
|
|
39
|
+
start_dt = pendulum.parse(start_date)
|
|
40
|
+
|
|
41
|
+
if end_date is None:
|
|
42
|
+
end_dt = pendulum.now("UTC")
|
|
43
|
+
else:
|
|
44
|
+
end_dt = pendulum.parse(datetime.end_value)
|
|
45
|
+
base_params: Dict[str, Any] = {
|
|
46
|
+
"type": "scheduled",
|
|
47
|
+
"page_size": 300,
|
|
48
|
+
"from": start_dt.to_date_string(),
|
|
49
|
+
"to": end_dt.to_date_string(),
|
|
50
|
+
}
|
|
51
|
+
for user in client.get_users():
|
|
52
|
+
user_id = user["id"]
|
|
53
|
+
yield from client.get_meetings(user_id, base_params)
|
|
54
|
+
|
|
55
|
+
return meetings
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
import time
|
|
2
|
+
from typing import Any, Dict, Iterator, Optional
|
|
3
|
+
|
|
4
|
+
from ingestr.src.http_client import create_client
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class ZoomClient:
|
|
8
|
+
"""Minimal Zoom API client supporting Server-to-Server OAuth."""
|
|
9
|
+
|
|
10
|
+
def __init__(
|
|
11
|
+
self,
|
|
12
|
+
client_id: Optional[str] = None,
|
|
13
|
+
client_secret: Optional[str] = None,
|
|
14
|
+
account_id: Optional[str] = None,
|
|
15
|
+
) -> None:
|
|
16
|
+
self.client_id = client_id
|
|
17
|
+
self.client_secret = client_secret
|
|
18
|
+
self.account_id = account_id
|
|
19
|
+
self.token_expires_at: float = 0
|
|
20
|
+
self.base_url = "https://api.zoom.us/v2"
|
|
21
|
+
self.session = create_client()
|
|
22
|
+
self._refresh_access_token()
|
|
23
|
+
|
|
24
|
+
def _refresh_access_token(self) -> None:
|
|
25
|
+
token_url = "https://zoom.us/oauth/token"
|
|
26
|
+
auth = (self.client_id, self.client_secret)
|
|
27
|
+
resp = self.session.post(
|
|
28
|
+
token_url,
|
|
29
|
+
params={"grant_type": "account_credentials", "account_id": self.account_id},
|
|
30
|
+
auth=auth,
|
|
31
|
+
)
|
|
32
|
+
resp.raise_for_status()
|
|
33
|
+
data = resp.json()
|
|
34
|
+
self.access_token = data.get("access_token")
|
|
35
|
+
self.token_expires_at = time.time() + data.get("expires_in", 3600)
|
|
36
|
+
|
|
37
|
+
def _ensure_token(self) -> None:
|
|
38
|
+
if self.access_token is None or self.token_expires_at <= time.time():
|
|
39
|
+
self._refresh_access_token()
|
|
40
|
+
|
|
41
|
+
def _headers(self) -> Dict[str, str]:
|
|
42
|
+
self._ensure_token()
|
|
43
|
+
return {
|
|
44
|
+
"Authorization": f"Bearer {self.access_token}",
|
|
45
|
+
"Accept": "application/json",
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
def get_users(self) -> Iterator[Dict[str, Any]]:
|
|
49
|
+
url = f"{self.base_url}/users"
|
|
50
|
+
params = {"page_size": 1000}
|
|
51
|
+
while True:
|
|
52
|
+
response = self.session.get(url, headers=self._headers(), params=params)
|
|
53
|
+
response.raise_for_status()
|
|
54
|
+
data = response.json()
|
|
55
|
+
for user in data.get("users", []):
|
|
56
|
+
yield user
|
|
57
|
+
token = data.get("next_page_token")
|
|
58
|
+
if not token:
|
|
59
|
+
break
|
|
60
|
+
params["next_page_token"] = token
|
|
61
|
+
|
|
62
|
+
def get_meetings(
|
|
63
|
+
self, user_id: str, params: Dict[str, Any]
|
|
64
|
+
) -> Iterator[Dict[str, Any]]:
|
|
65
|
+
url = f"{self.base_url}/users/{user_id}/meetings"
|
|
66
|
+
while True:
|
|
67
|
+
response = self.session.get(url, headers=self._headers(), params=params)
|
|
68
|
+
response.raise_for_status()
|
|
69
|
+
data = response.json()
|
|
70
|
+
for item in data.get("meetings", []):
|
|
71
|
+
item["zoom_user_id"] = user_id
|
|
72
|
+
yield item
|
|
73
|
+
token = data.get("next_page_token")
|
|
74
|
+
if not token:
|
|
75
|
+
break
|
|
76
|
+
params["next_page_token"] = token
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: ingestr
|
|
3
|
-
Version: 0.13.
|
|
3
|
+
Version: 0.13.62
|
|
4
4
|
Summary: ingestr is a command-line application that ingests data from various sources and stores them in any database.
|
|
5
5
|
Project-URL: Homepage, https://github.com/bruin-data/ingestr
|
|
6
6
|
Project-URL: Issues, https://github.com/bruin-data/ingestr/issues
|
|
@@ -47,6 +47,7 @@ Requires-Dist: databricks-sqlalchemy==1.0.2
|
|
|
47
47
|
Requires-Dist: dataclasses-json==0.6.7
|
|
48
48
|
Requires-Dist: decorator==5.2.1
|
|
49
49
|
Requires-Dist: deprecation==2.1.0
|
|
50
|
+
Requires-Dist: dlt-cratedb==0.0.1
|
|
50
51
|
Requires-Dist: dlt==1.10.0
|
|
51
52
|
Requires-Dist: dnspython==2.7.0
|
|
52
53
|
Requires-Dist: duckdb-engine==0.17.0
|
|
@@ -305,7 +306,7 @@ Pull requests are welcome. However, please open an issue first to discuss what y
|
|
|
305
306
|
<tr>
|
|
306
307
|
<td>CrateDB</td>
|
|
307
308
|
<td>✅</td>
|
|
308
|
-
<td
|
|
309
|
+
<td>✅</td>
|
|
309
310
|
</tr>
|
|
310
311
|
<tr>
|
|
311
312
|
<td>Databricks</td>
|
|
@@ -2,16 +2,16 @@ ingestr/conftest.py,sha256=Q03FIJIZpLBbpj55cfCHIKEjc1FCvWJhMF2cidUJKQU,1748
|
|
|
2
2
|
ingestr/main.py,sha256=taDyHyaVSpB17iNLl8zA0gmr4CqDO-MSTQX1CaRBB9U,26364
|
|
3
3
|
ingestr/src/.gitignore,sha256=8cX1AZTSI0TcdZFGTmS_oyBjpfCzhOEt0DdAo2dFIY8,203
|
|
4
4
|
ingestr/src/blob.py,sha256=UUWMjHUuoR9xP1XZQ6UANQmnMVyDx3d0X4-2FQC271I,2138
|
|
5
|
-
ingestr/src/buildinfo.py,sha256=
|
|
6
|
-
ingestr/src/destinations.py,sha256=
|
|
5
|
+
ingestr/src/buildinfo.py,sha256=TQVet9YuexpAbyia8_nOwytK_io6rEVmB-flmAr4z8E,21
|
|
6
|
+
ingestr/src/destinations.py,sha256=YU7c5cNqViCIJ9NDhgGuorh5jMvLi7yViPWpEJ57Xx0,18788
|
|
7
7
|
ingestr/src/errors.py,sha256=Ufs4_DfE77_E3vnA1fOQdi6cmuLVNm7_SbFLkL1XPGk,686
|
|
8
|
-
ingestr/src/factory.py,sha256=
|
|
8
|
+
ingestr/src/factory.py,sha256=nYXINj54QtMGuQv9oCXbHO7DFuAudqZfijkSvSXEVj8,6526
|
|
9
9
|
ingestr/src/filters.py,sha256=LLecXe9QkLFkFLUZ92OXNdcANr1a8edDxrflc2ko_KA,1452
|
|
10
10
|
ingestr/src/http_client.py,sha256=bxqsk6nJNXCo-79gW04B53DQO-yr25vaSsqP0AKtjx4,732
|
|
11
11
|
ingestr/src/loader.py,sha256=9NaWAyfkXdqAZSS-N72Iwo36Lbx4PyqIfaaH1dNdkFs,1712
|
|
12
12
|
ingestr/src/partition.py,sha256=BrIP6wFJvyR7Nus_3ElnfxknUXeCipK_E_bB8kZowfc,969
|
|
13
13
|
ingestr/src/resource.py,sha256=ZqmZxFQVGlF8rFPhBiUB08HES0yoTj8sZ--jKfaaVps,1164
|
|
14
|
-
ingestr/src/sources.py,sha256=
|
|
14
|
+
ingestr/src/sources.py,sha256=nTzCi_RZhUesBkXyUTAIoAUgj5iZSQJ_D8mId8rq8mE,101467
|
|
15
15
|
ingestr/src/table_definition.py,sha256=REbAbqdlmUMUuRh8nEQRreWjPVOQ5ZcfqGkScKdCrmk,390
|
|
16
16
|
ingestr/src/time.py,sha256=H_Fk2J4ShXyUM-EMY7MqCLZQhlnZMZvO952bmZPc4yE,254
|
|
17
17
|
ingestr/src/version.py,sha256=J_2xgZ0mKlvuHcjdKCx2nlioneLH0I47JiU_Slr_Nwc,189
|
|
@@ -73,7 +73,7 @@ ingestr/src/gorgias/helpers.py,sha256=DamuijnvhGY9hysQO4txrVMf4izkGbh5qfBKImdOIN
|
|
|
73
73
|
ingestr/src/hubspot/__init__.py,sha256=wqHefhc_YRI5dNFCcpvH-UUilNThE49sbGouSBiHYsw,11776
|
|
74
74
|
ingestr/src/hubspot/helpers.py,sha256=k2b-lhxqBNKHoOSHoHegFSsk8xxjjGA0I04V0XyX2b4,7883
|
|
75
75
|
ingestr/src/hubspot/settings.py,sha256=i73MkSiJfRLMFLfiJgYdhp-rhymHTfoqFzZ4uOJdFJM,2456
|
|
76
|
-
ingestr/src/isoc_pulse/__init__.py,sha256=
|
|
76
|
+
ingestr/src/isoc_pulse/__init__.py,sha256=9b4eN4faatpiwTuRNPuYcEt1hEFDEjua9XhfakUigBk,4648
|
|
77
77
|
ingestr/src/kafka/__init__.py,sha256=wMCXdiraeKd1Kssi9WcVCGZaNGm2tJEtnNyuB4aR5_k,3541
|
|
78
78
|
ingestr/src/kafka/helpers.py,sha256=V9WcVn3PKnEpggArHda4vnAcaV8VDuh__dSmRviJb5Y,7502
|
|
79
79
|
ingestr/src/kinesis/__init__.py,sha256=YretSz4F28tbkcPhd55mBp2Xk7XE9unyWx0nmvl8iEc,6235
|
|
@@ -81,6 +81,7 @@ ingestr/src/kinesis/helpers.py,sha256=SO2cFmWNGcykUYmjHdfxWsOQSkLQXyhFtfWnkcUOM0
|
|
|
81
81
|
ingestr/src/klaviyo/__init__.py,sha256=o_noUgbxLk36s4f9W56_ibPorF0n7kVapPUlV0p-jfA,7875
|
|
82
82
|
ingestr/src/klaviyo/client.py,sha256=tPj79ia7AW0ZOJhzlKNPCliGbdojRNwUFp8HvB2ym5s,7434
|
|
83
83
|
ingestr/src/klaviyo/helpers.py,sha256=_i-SHffhv25feLDcjy6Blj1UxYLISCwVCMgGtrlnYHk,496
|
|
84
|
+
ingestr/src/linear/__init__.py,sha256=ITMLsuLjrGYx3bTsEK1cdPUkowJYCdAII_ucci_lGDQ,5422
|
|
84
85
|
ingestr/src/linkedin_ads/__init__.py,sha256=CAPWFyV24loziiphbLmODxZUXZJwm4JxlFkr56q0jfo,1855
|
|
85
86
|
ingestr/src/linkedin_ads/dimension_time_enum.py,sha256=EmHRdkFyTAfo4chGjThrwqffWJxmAadZMbpTvf0xkQc,198
|
|
86
87
|
ingestr/src/linkedin_ads/helpers.py,sha256=eUWudRVlXl4kqIhfXQ1eVsUpZwJn7UFqKSpnbLfxzds,4498
|
|
@@ -134,6 +135,8 @@ ingestr/src/zendesk/helpers/__init__.py,sha256=YTJejCiUjfIcsj9FrkY0l-JGYDI7RRte1
|
|
|
134
135
|
ingestr/src/zendesk/helpers/api_helpers.py,sha256=dMkNn4ZQXgJTDOXAAXdmRt41phNFoRhYyPaLJih0pZY,4184
|
|
135
136
|
ingestr/src/zendesk/helpers/credentials.py,sha256=EWyi0ZlxWFgd1huD86KNF4dApLHgmabqWksFpEg1cf0,1332
|
|
136
137
|
ingestr/src/zendesk/helpers/talk_api.py,sha256=TSVSOErsBZvxcX91LMhAgvy6yLSYvpuVfOyKViOHtvA,4718
|
|
138
|
+
ingestr/src/zoom/__init__.py,sha256=6NdHXLv438FLD-cUEgmgnuJi__70-C88bTGOLqYHqbQ,1736
|
|
139
|
+
ingestr/src/zoom/helpers.py,sha256=Y6fjIpQTWUAkKXJKfwjJnZc6wlFlqzJfWqq34WZhrcU,2669
|
|
137
140
|
ingestr/testdata/.gitignore,sha256=DFzYYOpqdTiT7S1HjCT-jffZSmEvFZge295_upAB0FY,13
|
|
138
141
|
ingestr/testdata/create_replace.csv,sha256=TQDbOSkRKq9ZZv1d68Qjwh94aIyUQ-oEwxpJIrd3YK8,1060
|
|
139
142
|
ingestr/testdata/delete_insert_expected.csv,sha256=wbj7uboVWwm3sNMh1n7f4-OKFEQJv1s96snjEHp9nkg,336
|
|
@@ -143,8 +146,8 @@ ingestr/testdata/merge_expected.csv,sha256=DReHqWGnQMsf2PBv_Q2pfjsgvikYFnf1zYcQZ
|
|
|
143
146
|
ingestr/testdata/merge_part1.csv,sha256=Pw8Z9IDKcNU0qQHx1z6BUf4rF_-SxKGFOvymCt4OY9I,185
|
|
144
147
|
ingestr/testdata/merge_part2.csv,sha256=T_GiWxA81SN63_tMOIuemcvboEFeAmbKc7xRXvL9esw,287
|
|
145
148
|
ingestr/tests/unit/test_smartsheets.py,sha256=eiC2CCO4iNJcuN36ONvqmEDryCA1bA1REpayHpu42lk,5058
|
|
146
|
-
ingestr-0.13.
|
|
147
|
-
ingestr-0.13.
|
|
148
|
-
ingestr-0.13.
|
|
149
|
-
ingestr-0.13.
|
|
150
|
-
ingestr-0.13.
|
|
149
|
+
ingestr-0.13.62.dist-info/METADATA,sha256=GdxhHlw_v3U_5vL1ZTbDFxRMr7RLBnWDA-dVwG74OtQ,15027
|
|
150
|
+
ingestr-0.13.62.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
151
|
+
ingestr-0.13.62.dist-info/entry_points.txt,sha256=oPJy0KBnPWYjDtP1k8qwAihcTLHSZokSQvRAw_wtfJM,46
|
|
152
|
+
ingestr-0.13.62.dist-info/licenses/LICENSE.md,sha256=cW8wIhn8HFE-KLStDF9jHQ1O_ARWP3kTpk_-eOccL24,1075
|
|
153
|
+
ingestr-0.13.62.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|