ingestr 0.13.42__py3-none-any.whl → 0.13.44__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ingestr might be problematic. Click here for more details.
- ingestr/src/attio/__init__.py +100 -0
- ingestr/src/attio/helpers.py +54 -0
- ingestr/src/buildinfo.py +1 -1
- ingestr/src/destinations.py +19 -3
- ingestr/src/factory.py +2 -0
- ingestr/src/http_client.py +17 -0
- ingestr/src/sources.py +27 -0
- {ingestr-0.13.42.dist-info → ingestr-0.13.44.dist-info}/METADATA +1 -1
- {ingestr-0.13.42.dist-info → ingestr-0.13.44.dist-info}/RECORD +12 -9
- {ingestr-0.13.42.dist-info → ingestr-0.13.44.dist-info}/WHEEL +0 -0
- {ingestr-0.13.42.dist-info → ingestr-0.13.44.dist-info}/entry_points.txt +0 -0
- {ingestr-0.13.42.dist-info → ingestr-0.13.44.dist-info}/licenses/LICENSE.md +0 -0
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
from typing import Iterable, Iterator
|
|
2
|
+
|
|
3
|
+
import dlt
|
|
4
|
+
from dlt.sources import DltResource
|
|
5
|
+
|
|
6
|
+
from .helpers import AttioClient
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
@dlt.source(max_table_nesting=0)
|
|
10
|
+
def attio_source(
|
|
11
|
+
api_key: str,
|
|
12
|
+
params: list[str],
|
|
13
|
+
) -> Iterable[DltResource]:
|
|
14
|
+
|
|
15
|
+
attio_client = AttioClient(api_key)
|
|
16
|
+
|
|
17
|
+
@dlt.resource(
|
|
18
|
+
name="objects",
|
|
19
|
+
write_disposition="replace",
|
|
20
|
+
columns={
|
|
21
|
+
"created_at": {"data_type": "timestamp", "partition": True},
|
|
22
|
+
},
|
|
23
|
+
)
|
|
24
|
+
def fetch_objects() -> Iterator[dict]:
|
|
25
|
+
if len(params) != 0:
|
|
26
|
+
raise ValueError("Objects table must be in the format `objects`")
|
|
27
|
+
|
|
28
|
+
path = "objects"
|
|
29
|
+
yield attio_client.fetch_data(path, "get")
|
|
30
|
+
|
|
31
|
+
@dlt.resource(
|
|
32
|
+
name="records",
|
|
33
|
+
write_disposition="replace",
|
|
34
|
+
columns={
|
|
35
|
+
"created_at": {"data_type": "timestamp", "partition": True},
|
|
36
|
+
},
|
|
37
|
+
)
|
|
38
|
+
def fetch_records() -> Iterator[dict]:
|
|
39
|
+
if len(params) != 1:
|
|
40
|
+
raise ValueError(
|
|
41
|
+
"Records table must be in the format `records:{object_api_slug}`"
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
object_id = params[0]
|
|
45
|
+
path = f"objects/{object_id}/records/query"
|
|
46
|
+
|
|
47
|
+
yield attio_client.fetch_data(path, "post")
|
|
48
|
+
|
|
49
|
+
@dlt.resource(
|
|
50
|
+
name="lists",
|
|
51
|
+
write_disposition="replace",
|
|
52
|
+
columns={
|
|
53
|
+
"created_at": {"data_type": "timestamp", "partition": True},
|
|
54
|
+
},
|
|
55
|
+
)
|
|
56
|
+
def fetch_lists() -> Iterator[dict]:
|
|
57
|
+
path = "lists"
|
|
58
|
+
yield attio_client.fetch_data(path, "get")
|
|
59
|
+
|
|
60
|
+
@dlt.resource(
|
|
61
|
+
name="list_entries",
|
|
62
|
+
write_disposition="replace",
|
|
63
|
+
columns={
|
|
64
|
+
"created_at": {"data_type": "timestamp", "partition": True},
|
|
65
|
+
},
|
|
66
|
+
)
|
|
67
|
+
def fetch_list_entries() -> Iterator[dict]:
|
|
68
|
+
if len(params) != 1:
|
|
69
|
+
raise ValueError(
|
|
70
|
+
"List entries table must be in the format `list_entries:{list_id}`"
|
|
71
|
+
)
|
|
72
|
+
path = f"lists/{params[0]}/entries/query"
|
|
73
|
+
|
|
74
|
+
yield attio_client.fetch_data(path, "post")
|
|
75
|
+
|
|
76
|
+
@dlt.resource(
|
|
77
|
+
name="all_list_entries",
|
|
78
|
+
write_disposition="replace",
|
|
79
|
+
columns={
|
|
80
|
+
"created_at": {"data_type": "timestamp", "partition": True},
|
|
81
|
+
},
|
|
82
|
+
)
|
|
83
|
+
def fetch_all_list_entries() -> Iterator[dict]:
|
|
84
|
+
if len(params) != 1:
|
|
85
|
+
raise ValueError(
|
|
86
|
+
"All list entries table must be in the format `all_list_entries:{object_api_slug}`"
|
|
87
|
+
)
|
|
88
|
+
path = "lists"
|
|
89
|
+
for lst in attio_client.fetch_data(path, "get"):
|
|
90
|
+
if params[0] in lst["parent_object"]:
|
|
91
|
+
path = f"lists/{lst['id']['list_id']}/entries/query"
|
|
92
|
+
yield from attio_client.fetch_data(path, "post")
|
|
93
|
+
|
|
94
|
+
return (
|
|
95
|
+
fetch_objects,
|
|
96
|
+
fetch_records,
|
|
97
|
+
fetch_lists,
|
|
98
|
+
fetch_list_entries,
|
|
99
|
+
fetch_all_list_entries,
|
|
100
|
+
)
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
from ingestr.src.http_client import create_client
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class AttioClient:
|
|
5
|
+
def __init__(self, api_key: str):
|
|
6
|
+
self.base_url = "https://api.attio.com/v2"
|
|
7
|
+
self.headers = {
|
|
8
|
+
"Accept": "application/json",
|
|
9
|
+
"Authorization": f"Bearer {api_key}",
|
|
10
|
+
}
|
|
11
|
+
self.client = create_client()
|
|
12
|
+
|
|
13
|
+
def fetch_data(self, path: str, method: str, limit: int = 1000, params=None):
|
|
14
|
+
url = f"{self.base_url}/{path}"
|
|
15
|
+
if params is None:
|
|
16
|
+
params = {}
|
|
17
|
+
offset = 0
|
|
18
|
+
while True:
|
|
19
|
+
query_params = {**params, "limit": limit, "offset": offset}
|
|
20
|
+
if method == "get":
|
|
21
|
+
response = self.client.get(
|
|
22
|
+
url, headers=self.headers, params=query_params
|
|
23
|
+
)
|
|
24
|
+
else:
|
|
25
|
+
response = self.client.post(
|
|
26
|
+
url, headers=self.headers, params=query_params
|
|
27
|
+
)
|
|
28
|
+
|
|
29
|
+
if response.status_code != 200:
|
|
30
|
+
raise Exception(f"HTTP {response.status_code} error: {response.text}")
|
|
31
|
+
|
|
32
|
+
response_data = response.json()
|
|
33
|
+
if "data" not in response_data:
|
|
34
|
+
print(f"API Response: {response_data}")
|
|
35
|
+
raise Exception(
|
|
36
|
+
"Attio API returned a response without the expected data"
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
data = response_data["data"]
|
|
40
|
+
|
|
41
|
+
for item in data:
|
|
42
|
+
flat_item = flatten_item(item)
|
|
43
|
+
yield flat_item
|
|
44
|
+
|
|
45
|
+
if len(data) < limit:
|
|
46
|
+
break
|
|
47
|
+
offset += limit
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def flatten_item(item: dict) -> dict:
|
|
51
|
+
if "id" in item:
|
|
52
|
+
for key, value in item["id"].items():
|
|
53
|
+
item[key] = value
|
|
54
|
+
return item
|
ingestr/src/buildinfo.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
version = "v0.13.
|
|
1
|
+
version = "v0.13.44"
|
ingestr/src/destinations.py
CHANGED
|
@@ -413,6 +413,10 @@ class S3Destination:
|
|
|
413
413
|
raise MissingValueError("secret_access_key", "S3")
|
|
414
414
|
|
|
415
415
|
endpoint_url = params.get("endpoint_url", [None])[0]
|
|
416
|
+
if endpoint_url is not None:
|
|
417
|
+
parsed_endpoint = urlparse(endpoint_url)
|
|
418
|
+
if not parsed_endpoint.scheme or not parsed_endpoint.netloc:
|
|
419
|
+
raise ValueError("Invalid endpoint_url. Must be a valid URL.")
|
|
416
420
|
|
|
417
421
|
creds = AwsCredentials(
|
|
418
422
|
aws_access_key_id=access_key_id,
|
|
@@ -420,8 +424,21 @@ class S3Destination:
|
|
|
420
424
|
endpoint_url=endpoint_url,
|
|
421
425
|
)
|
|
422
426
|
|
|
423
|
-
dest_table =
|
|
427
|
+
dest_table = kwargs["dest_table"]
|
|
428
|
+
|
|
429
|
+
# only validate if dest_table is not a full URI
|
|
430
|
+
if not parsed_uri.netloc:
|
|
431
|
+
dest_table = self.validate_table(dest_table)
|
|
432
|
+
|
|
424
433
|
table_parts = dest_table.split("/")
|
|
434
|
+
|
|
435
|
+
if parsed_uri.path.strip("/"):
|
|
436
|
+
path_parts = parsed_uri.path.strip("/ ").split("/")
|
|
437
|
+
table_parts = path_parts + table_parts
|
|
438
|
+
|
|
439
|
+
if parsed_uri.netloc:
|
|
440
|
+
table_parts.insert(0, parsed_uri.netloc.strip())
|
|
441
|
+
|
|
425
442
|
base_path = "/".join(table_parts[:-1])
|
|
426
443
|
|
|
427
444
|
opts = {
|
|
@@ -444,10 +461,9 @@ class S3Destination:
|
|
|
444
461
|
return table
|
|
445
462
|
|
|
446
463
|
def dlt_run_params(self, uri: str, table: str, **kwargs):
|
|
447
|
-
table = self.validate_table(table)
|
|
448
464
|
table_parts = table.split("/")
|
|
449
465
|
return {
|
|
450
|
-
"table_name": table_parts[-1],
|
|
466
|
+
"table_name": table_parts[-1].strip(),
|
|
451
467
|
}
|
|
452
468
|
|
|
453
469
|
def post_load(self) -> None:
|
ingestr/src/factory.py
CHANGED
|
@@ -26,6 +26,7 @@ from ingestr.src.sources import (
|
|
|
26
26
|
AppsflyerSource,
|
|
27
27
|
ArrowMemoryMappedSource,
|
|
28
28
|
AsanaSource,
|
|
29
|
+
AttioSource,
|
|
29
30
|
ChessSource,
|
|
30
31
|
DynamoDBSource,
|
|
31
32
|
ElasticsearchSource,
|
|
@@ -157,6 +158,7 @@ class SourceDestinationFactory:
|
|
|
157
158
|
"freshdesk": FreshdeskSource,
|
|
158
159
|
"phantombuster": PhantombusterSource,
|
|
159
160
|
"elasticsearch": ElasticsearchSource,
|
|
161
|
+
"attio": AttioSource,
|
|
160
162
|
}
|
|
161
163
|
destinations: Dict[str, Type[DestinationProtocol]] = {
|
|
162
164
|
"bigquery": BigQueryDestination,
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
import requests
|
|
2
|
+
from dlt.sources.helpers.requests import Client
|
|
3
|
+
|
|
4
|
+
def create_client() -> requests.Session:
|
|
5
|
+
return Client(
|
|
6
|
+
raise_for_status=False,
|
|
7
|
+
retry_condition=retry_on_limit,
|
|
8
|
+
request_max_attempts=12,
|
|
9
|
+
).session
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def retry_on_limit(
|
|
13
|
+
response: requests.Response | None, exception: BaseException | None
|
|
14
|
+
) -> bool:
|
|
15
|
+
if response is None:
|
|
16
|
+
return False
|
|
17
|
+
return response.status_code == 502
|
ingestr/src/sources.py
CHANGED
|
@@ -21,6 +21,7 @@ from urllib.parse import ParseResult, parse_qs, quote, urlencode, urlparse
|
|
|
21
21
|
import pendulum
|
|
22
22
|
from dlt.common.time import ensure_pendulum_datetime
|
|
23
23
|
from dlt.extract import Incremental
|
|
24
|
+
from dlt.extract.exceptions import ResourcesNotFoundError
|
|
24
25
|
from dlt.sources import incremental as dlt_incremental
|
|
25
26
|
from dlt.sources.credentials import (
|
|
26
27
|
ConnectionStringCredentials,
|
|
@@ -2392,3 +2393,29 @@ class ElasticsearchSource:
|
|
|
2392
2393
|
verify_certs=verify_certs,
|
|
2393
2394
|
incremental=incremental,
|
|
2394
2395
|
).with_resources(table)
|
|
2396
|
+
|
|
2397
|
+
|
|
2398
|
+
class AttioSource:
|
|
2399
|
+
def handles_incrementality(self) -> bool:
|
|
2400
|
+
return True
|
|
2401
|
+
|
|
2402
|
+
def dlt_source(self, uri: str, table: str, **kwargs):
|
|
2403
|
+
parsed_uri = urlparse(uri)
|
|
2404
|
+
query_params = parse_qs(parsed_uri.query)
|
|
2405
|
+
api_key = query_params.get("api_key")
|
|
2406
|
+
|
|
2407
|
+
if api_key is None:
|
|
2408
|
+
raise MissingValueError("api_key", "Attio")
|
|
2409
|
+
|
|
2410
|
+
parts = table.replace(" ", "").split(":")
|
|
2411
|
+
table_name = parts[0]
|
|
2412
|
+
params = parts[1:]
|
|
2413
|
+
|
|
2414
|
+
from ingestr.src.attio import attio_source
|
|
2415
|
+
|
|
2416
|
+
try:
|
|
2417
|
+
return attio_source(api_key=api_key[0], params=params).with_resources(
|
|
2418
|
+
table_name
|
|
2419
|
+
)
|
|
2420
|
+
except ResourcesNotFoundError:
|
|
2421
|
+
raise UnsupportedResourceError(table_name, "Attio")
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: ingestr
|
|
3
|
-
Version: 0.13.
|
|
3
|
+
Version: 0.13.44
|
|
4
4
|
Summary: ingestr is a command-line application that ingests data from various sources and stores them in any database.
|
|
5
5
|
Project-URL: Homepage, https://github.com/bruin-data/ingestr
|
|
6
6
|
Project-URL: Issues, https://github.com/bruin-data/ingestr/issues
|
|
@@ -2,15 +2,16 @@ ingestr/conftest.py,sha256=Q03FIJIZpLBbpj55cfCHIKEjc1FCvWJhMF2cidUJKQU,1748
|
|
|
2
2
|
ingestr/main.py,sha256=Pe_rzwcDRKIYa7baEVUAAPOHyqQbX29RUexMl0F_S1k,25273
|
|
3
3
|
ingestr/src/.gitignore,sha256=8cX1AZTSI0TcdZFGTmS_oyBjpfCzhOEt0DdAo2dFIY8,203
|
|
4
4
|
ingestr/src/blob.py,sha256=onMe5ZHxPXTdcB_s2oGNdMo-XQJ3ajwOsWE9eSTGFmc,1495
|
|
5
|
-
ingestr/src/buildinfo.py,sha256=
|
|
6
|
-
ingestr/src/destinations.py,sha256=
|
|
5
|
+
ingestr/src/buildinfo.py,sha256=OPkL7VFHFQshQEKAR1PO1Cv4UtyYa4D2hv4hxj0DVK8,21
|
|
6
|
+
ingestr/src/destinations.py,sha256=fC6W_1lqNDVKRimUYDhAnEy4LP3yk8OfAHVViRbGW94,16015
|
|
7
7
|
ingestr/src/errors.py,sha256=Ufs4_DfE77_E3vnA1fOQdi6cmuLVNm7_SbFLkL1XPGk,686
|
|
8
|
-
ingestr/src/factory.py,sha256=
|
|
8
|
+
ingestr/src/factory.py,sha256=c5WfqmRrXFj1PddnBOzTzzZUHJ-Fb42cvCvsBEqn6Yo,5682
|
|
9
9
|
ingestr/src/filters.py,sha256=C-_TIVkF_cxZBgG-Run2Oyn0TAhJgA8IWXZ-OPY3uek,1136
|
|
10
|
+
ingestr/src/http_client.py,sha256=dbAAf6Ptxf9pSn9RmNSHL4HEn5xhFimWjE2JZ67J00o,434
|
|
10
11
|
ingestr/src/loader.py,sha256=9NaWAyfkXdqAZSS-N72Iwo36Lbx4PyqIfaaH1dNdkFs,1712
|
|
11
12
|
ingestr/src/partition.py,sha256=BrIP6wFJvyR7Nus_3ElnfxknUXeCipK_E_bB8kZowfc,969
|
|
12
13
|
ingestr/src/resource.py,sha256=XG-sbBapFVEM7OhHQFQRTdTLlh-mHB-N4V1t8F8Tsww,543
|
|
13
|
-
ingestr/src/sources.py,sha256=
|
|
14
|
+
ingestr/src/sources.py,sha256=HZQGWPuDdf4sVq91KuaJ9p7MtSfjWue8d2vknZpIwg8,83456
|
|
14
15
|
ingestr/src/table_definition.py,sha256=REbAbqdlmUMUuRh8nEQRreWjPVOQ5ZcfqGkScKdCrmk,390
|
|
15
16
|
ingestr/src/time.py,sha256=H_Fk2J4ShXyUM-EMY7MqCLZQhlnZMZvO952bmZPc4yE,254
|
|
16
17
|
ingestr/src/version.py,sha256=J_2xgZ0mKlvuHcjdKCx2nlioneLH0I47JiU_Slr_Nwc,189
|
|
@@ -30,6 +31,8 @@ ingestr/src/arrow/__init__.py,sha256=8fEntgHseKjFMiPQIzxYzw_raicNsEgnveLi1IzBca0
|
|
|
30
31
|
ingestr/src/asana_source/__init__.py,sha256=QwQTCb5PXts8I4wLHG9UfRP-5ChfjSe88XAVfxMV5Ag,8183
|
|
31
32
|
ingestr/src/asana_source/helpers.py,sha256=PukcdDQWIGqnGxuuobbLw4hUy4-t6gxXg_XywR7Lg9M,375
|
|
32
33
|
ingestr/src/asana_source/settings.py,sha256=-2tpdkwh04RvLKFvwQodnFLYn9MaxOO1hsebGnDQMTU,2829
|
|
34
|
+
ingestr/src/attio/__init__.py,sha256=Dvr9rSZUlw6HGsqF7iabUit-xRC17Nv6QcmA4cRF2wA,2864
|
|
35
|
+
ingestr/src/attio/helpers.py,sha256=QvB-0BV_Z-cvMTBZDwOCuhxY1cB5PraPdrDkNyQ5TsM,1715
|
|
33
36
|
ingestr/src/chess/__init__.py,sha256=y0Q8aKBigeKf3N7wuB_gadMQjVJzBPUT8Jhp1ObEWjk,6812
|
|
34
37
|
ingestr/src/chess/helpers.py,sha256=v1HTImOMjAF7AzZUPDIuHu00e7ut0o5y1kWcVYo4QZw,549
|
|
35
38
|
ingestr/src/chess/settings.py,sha256=p0RlCGgtXUacPDEvZmwzSWmzX0Apj1riwfz-nrMK89k,158
|
|
@@ -128,8 +131,8 @@ ingestr/testdata/delete_insert_part2.csv,sha256=B_KUzpzbNdDY_n7wWop1mT2cz36TmayS
|
|
|
128
131
|
ingestr/testdata/merge_expected.csv,sha256=DReHqWGnQMsf2PBv_Q2pfjsgvikYFnf1zYcQZ7ZqYN0,276
|
|
129
132
|
ingestr/testdata/merge_part1.csv,sha256=Pw8Z9IDKcNU0qQHx1z6BUf4rF_-SxKGFOvymCt4OY9I,185
|
|
130
133
|
ingestr/testdata/merge_part2.csv,sha256=T_GiWxA81SN63_tMOIuemcvboEFeAmbKc7xRXvL9esw,287
|
|
131
|
-
ingestr-0.13.
|
|
132
|
-
ingestr-0.13.
|
|
133
|
-
ingestr-0.13.
|
|
134
|
-
ingestr-0.13.
|
|
135
|
-
ingestr-0.13.
|
|
134
|
+
ingestr-0.13.44.dist-info/METADATA,sha256=oIJqHNawuqTK_coB0V7N0SozLIaC8pSc36AulUDGq3Q,13852
|
|
135
|
+
ingestr-0.13.44.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
136
|
+
ingestr-0.13.44.dist-info/entry_points.txt,sha256=oPJy0KBnPWYjDtP1k8qwAihcTLHSZokSQvRAw_wtfJM,46
|
|
137
|
+
ingestr-0.13.44.dist-info/licenses/LICENSE.md,sha256=cW8wIhn8HFE-KLStDF9jHQ1O_ARWP3kTpk_-eOccL24,1075
|
|
138
|
+
ingestr-0.13.44.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|