ingestr 0.13.42__py3-none-any.whl → 0.13.44__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ingestr might be problematic. Click here for more details.

@@ -0,0 +1,100 @@
1
+ from typing import Iterable, Iterator
2
+
3
+ import dlt
4
+ from dlt.sources import DltResource
5
+
6
+ from .helpers import AttioClient
7
+
8
+
9
+ @dlt.source(max_table_nesting=0)
10
+ def attio_source(
11
+ api_key: str,
12
+ params: list[str],
13
+ ) -> Iterable[DltResource]:
14
+
15
+ attio_client = AttioClient(api_key)
16
+
17
+ @dlt.resource(
18
+ name="objects",
19
+ write_disposition="replace",
20
+ columns={
21
+ "created_at": {"data_type": "timestamp", "partition": True},
22
+ },
23
+ )
24
+ def fetch_objects() -> Iterator[dict]:
25
+ if len(params) != 0:
26
+ raise ValueError("Objects table must be in the format `objects`")
27
+
28
+ path = "objects"
29
+ yield attio_client.fetch_data(path, "get")
30
+
31
+ @dlt.resource(
32
+ name="records",
33
+ write_disposition="replace",
34
+ columns={
35
+ "created_at": {"data_type": "timestamp", "partition": True},
36
+ },
37
+ )
38
+ def fetch_records() -> Iterator[dict]:
39
+ if len(params) != 1:
40
+ raise ValueError(
41
+ "Records table must be in the format `records:{object_api_slug}`"
42
+ )
43
+
44
+ object_id = params[0]
45
+ path = f"objects/{object_id}/records/query"
46
+
47
+ yield attio_client.fetch_data(path, "post")
48
+
49
+ @dlt.resource(
50
+ name="lists",
51
+ write_disposition="replace",
52
+ columns={
53
+ "created_at": {"data_type": "timestamp", "partition": True},
54
+ },
55
+ )
56
+ def fetch_lists() -> Iterator[dict]:
57
+ path = "lists"
58
+ yield attio_client.fetch_data(path, "get")
59
+
60
+ @dlt.resource(
61
+ name="list_entries",
62
+ write_disposition="replace",
63
+ columns={
64
+ "created_at": {"data_type": "timestamp", "partition": True},
65
+ },
66
+ )
67
+ def fetch_list_entries() -> Iterator[dict]:
68
+ if len(params) != 1:
69
+ raise ValueError(
70
+ "List entries table must be in the format `list_entries:{list_id}`"
71
+ )
72
+ path = f"lists/{params[0]}/entries/query"
73
+
74
+ yield attio_client.fetch_data(path, "post")
75
+
76
+ @dlt.resource(
77
+ name="all_list_entries",
78
+ write_disposition="replace",
79
+ columns={
80
+ "created_at": {"data_type": "timestamp", "partition": True},
81
+ },
82
+ )
83
+ def fetch_all_list_entries() -> Iterator[dict]:
84
+ if len(params) != 1:
85
+ raise ValueError(
86
+ "All list entries table must be in the format `all_list_entries:{object_api_slug}`"
87
+ )
88
+ path = "lists"
89
+ for lst in attio_client.fetch_data(path, "get"):
90
+ if params[0] in lst["parent_object"]:
91
+ path = f"lists/{lst['id']['list_id']}/entries/query"
92
+ yield from attio_client.fetch_data(path, "post")
93
+
94
+ return (
95
+ fetch_objects,
96
+ fetch_records,
97
+ fetch_lists,
98
+ fetch_list_entries,
99
+ fetch_all_list_entries,
100
+ )
@@ -0,0 +1,54 @@
1
+ from ingestr.src.http_client import create_client
2
+
3
+
4
+ class AttioClient:
5
+ def __init__(self, api_key: str):
6
+ self.base_url = "https://api.attio.com/v2"
7
+ self.headers = {
8
+ "Accept": "application/json",
9
+ "Authorization": f"Bearer {api_key}",
10
+ }
11
+ self.client = create_client()
12
+
13
+ def fetch_data(self, path: str, method: str, limit: int = 1000, params=None):
14
+ url = f"{self.base_url}/{path}"
15
+ if params is None:
16
+ params = {}
17
+ offset = 0
18
+ while True:
19
+ query_params = {**params, "limit": limit, "offset": offset}
20
+ if method == "get":
21
+ response = self.client.get(
22
+ url, headers=self.headers, params=query_params
23
+ )
24
+ else:
25
+ response = self.client.post(
26
+ url, headers=self.headers, params=query_params
27
+ )
28
+
29
+ if response.status_code != 200:
30
+ raise Exception(f"HTTP {response.status_code} error: {response.text}")
31
+
32
+ response_data = response.json()
33
+ if "data" not in response_data:
34
+ print(f"API Response: {response_data}")
35
+ raise Exception(
36
+ "Attio API returned a response without the expected data"
37
+ )
38
+
39
+ data = response_data["data"]
40
+
41
+ for item in data:
42
+ flat_item = flatten_item(item)
43
+ yield flat_item
44
+
45
+ if len(data) < limit:
46
+ break
47
+ offset += limit
48
+
49
+
50
+ def flatten_item(item: dict) -> dict:
51
+ if "id" in item:
52
+ for key, value in item["id"].items():
53
+ item[key] = value
54
+ return item
ingestr/src/buildinfo.py CHANGED
@@ -1 +1 @@
1
- version = "v0.13.42"
1
+ version = "v0.13.44"
@@ -413,6 +413,10 @@ class S3Destination:
413
413
  raise MissingValueError("secret_access_key", "S3")
414
414
 
415
415
  endpoint_url = params.get("endpoint_url", [None])[0]
416
+ if endpoint_url is not None:
417
+ parsed_endpoint = urlparse(endpoint_url)
418
+ if not parsed_endpoint.scheme or not parsed_endpoint.netloc:
419
+ raise ValueError("Invalid endpoint_url. Must be a valid URL.")
416
420
 
417
421
  creds = AwsCredentials(
418
422
  aws_access_key_id=access_key_id,
@@ -420,8 +424,21 @@ class S3Destination:
420
424
  endpoint_url=endpoint_url,
421
425
  )
422
426
 
423
- dest_table = self.validate_table(kwargs["dest_table"])
427
+ dest_table = kwargs["dest_table"]
428
+
429
+ # only validate if dest_table is not a full URI
430
+ if not parsed_uri.netloc:
431
+ dest_table = self.validate_table(dest_table)
432
+
424
433
  table_parts = dest_table.split("/")
434
+
435
+ if parsed_uri.path.strip("/"):
436
+ path_parts = parsed_uri.path.strip("/ ").split("/")
437
+ table_parts = path_parts + table_parts
438
+
439
+ if parsed_uri.netloc:
440
+ table_parts.insert(0, parsed_uri.netloc.strip())
441
+
425
442
  base_path = "/".join(table_parts[:-1])
426
443
 
427
444
  opts = {
@@ -444,10 +461,9 @@ class S3Destination:
444
461
  return table
445
462
 
446
463
  def dlt_run_params(self, uri: str, table: str, **kwargs):
447
- table = self.validate_table(table)
448
464
  table_parts = table.split("/")
449
465
  return {
450
- "table_name": table_parts[-1],
466
+ "table_name": table_parts[-1].strip(),
451
467
  }
452
468
 
453
469
  def post_load(self) -> None:
ingestr/src/factory.py CHANGED
@@ -26,6 +26,7 @@ from ingestr.src.sources import (
26
26
  AppsflyerSource,
27
27
  ArrowMemoryMappedSource,
28
28
  AsanaSource,
29
+ AttioSource,
29
30
  ChessSource,
30
31
  DynamoDBSource,
31
32
  ElasticsearchSource,
@@ -157,6 +158,7 @@ class SourceDestinationFactory:
157
158
  "freshdesk": FreshdeskSource,
158
159
  "phantombuster": PhantombusterSource,
159
160
  "elasticsearch": ElasticsearchSource,
161
+ "attio": AttioSource,
160
162
  }
161
163
  destinations: Dict[str, Type[DestinationProtocol]] = {
162
164
  "bigquery": BigQueryDestination,
@@ -0,0 +1,17 @@
1
+ import requests
2
+ from dlt.sources.helpers.requests import Client
3
+
4
+ def create_client() -> requests.Session:
5
+ return Client(
6
+ raise_for_status=False,
7
+ retry_condition=retry_on_limit,
8
+ request_max_attempts=12,
9
+ ).session
10
+
11
+
12
+ def retry_on_limit(
13
+ response: requests.Response | None, exception: BaseException | None
14
+ ) -> bool:
15
+ if response is None:
16
+ return False
17
+ return response.status_code == 502
ingestr/src/sources.py CHANGED
@@ -21,6 +21,7 @@ from urllib.parse import ParseResult, parse_qs, quote, urlencode, urlparse
21
21
  import pendulum
22
22
  from dlt.common.time import ensure_pendulum_datetime
23
23
  from dlt.extract import Incremental
24
+ from dlt.extract.exceptions import ResourcesNotFoundError
24
25
  from dlt.sources import incremental as dlt_incremental
25
26
  from dlt.sources.credentials import (
26
27
  ConnectionStringCredentials,
@@ -2392,3 +2393,29 @@ class ElasticsearchSource:
2392
2393
  verify_certs=verify_certs,
2393
2394
  incremental=incremental,
2394
2395
  ).with_resources(table)
2396
+
2397
+
2398
+ class AttioSource:
2399
+ def handles_incrementality(self) -> bool:
2400
+ return True
2401
+
2402
+ def dlt_source(self, uri: str, table: str, **kwargs):
2403
+ parsed_uri = urlparse(uri)
2404
+ query_params = parse_qs(parsed_uri.query)
2405
+ api_key = query_params.get("api_key")
2406
+
2407
+ if api_key is None:
2408
+ raise MissingValueError("api_key", "Attio")
2409
+
2410
+ parts = table.replace(" ", "").split(":")
2411
+ table_name = parts[0]
2412
+ params = parts[1:]
2413
+
2414
+ from ingestr.src.attio import attio_source
2415
+
2416
+ try:
2417
+ return attio_source(api_key=api_key[0], params=params).with_resources(
2418
+ table_name
2419
+ )
2420
+ except ResourcesNotFoundError:
2421
+ raise UnsupportedResourceError(table_name, "Attio")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ingestr
3
- Version: 0.13.42
3
+ Version: 0.13.44
4
4
  Summary: ingestr is a command-line application that ingests data from various sources and stores them in any database.
5
5
  Project-URL: Homepage, https://github.com/bruin-data/ingestr
6
6
  Project-URL: Issues, https://github.com/bruin-data/ingestr/issues
@@ -2,15 +2,16 @@ ingestr/conftest.py,sha256=Q03FIJIZpLBbpj55cfCHIKEjc1FCvWJhMF2cidUJKQU,1748
2
2
  ingestr/main.py,sha256=Pe_rzwcDRKIYa7baEVUAAPOHyqQbX29RUexMl0F_S1k,25273
3
3
  ingestr/src/.gitignore,sha256=8cX1AZTSI0TcdZFGTmS_oyBjpfCzhOEt0DdAo2dFIY8,203
4
4
  ingestr/src/blob.py,sha256=onMe5ZHxPXTdcB_s2oGNdMo-XQJ3ajwOsWE9eSTGFmc,1495
5
- ingestr/src/buildinfo.py,sha256=O7TKwcOrZv6D3qO1eCdzlVqCphHukEF6_O786P7Z1yo,21
6
- ingestr/src/destinations.py,sha256=MctbeJUyNr0DRB0XYt2xAbEKkHZ40-nXXEOYCs4KuoE,15420
5
+ ingestr/src/buildinfo.py,sha256=OPkL7VFHFQshQEKAR1PO1Cv4UtyYa4D2hv4hxj0DVK8,21
6
+ ingestr/src/destinations.py,sha256=fC6W_1lqNDVKRimUYDhAnEy4LP3yk8OfAHVViRbGW94,16015
7
7
  ingestr/src/errors.py,sha256=Ufs4_DfE77_E3vnA1fOQdi6cmuLVNm7_SbFLkL1XPGk,686
8
- ingestr/src/factory.py,sha256=l9PpW4cVTvhOLXhb7jha5CdPY7YT_Uhj4Ac9RndB7fM,5635
8
+ ingestr/src/factory.py,sha256=c5WfqmRrXFj1PddnBOzTzzZUHJ-Fb42cvCvsBEqn6Yo,5682
9
9
  ingestr/src/filters.py,sha256=C-_TIVkF_cxZBgG-Run2Oyn0TAhJgA8IWXZ-OPY3uek,1136
10
+ ingestr/src/http_client.py,sha256=dbAAf6Ptxf9pSn9RmNSHL4HEn5xhFimWjE2JZ67J00o,434
10
11
  ingestr/src/loader.py,sha256=9NaWAyfkXdqAZSS-N72Iwo36Lbx4PyqIfaaH1dNdkFs,1712
11
12
  ingestr/src/partition.py,sha256=BrIP6wFJvyR7Nus_3ElnfxknUXeCipK_E_bB8kZowfc,969
12
13
  ingestr/src/resource.py,sha256=XG-sbBapFVEM7OhHQFQRTdTLlh-mHB-N4V1t8F8Tsww,543
13
- ingestr/src/sources.py,sha256=SWZAa6bokLurQRPtH7rxi8K-GSVLp_p9Ig1ArGRsxCo,82635
14
+ ingestr/src/sources.py,sha256=HZQGWPuDdf4sVq91KuaJ9p7MtSfjWue8d2vknZpIwg8,83456
14
15
  ingestr/src/table_definition.py,sha256=REbAbqdlmUMUuRh8nEQRreWjPVOQ5ZcfqGkScKdCrmk,390
15
16
  ingestr/src/time.py,sha256=H_Fk2J4ShXyUM-EMY7MqCLZQhlnZMZvO952bmZPc4yE,254
16
17
  ingestr/src/version.py,sha256=J_2xgZ0mKlvuHcjdKCx2nlioneLH0I47JiU_Slr_Nwc,189
@@ -30,6 +31,8 @@ ingestr/src/arrow/__init__.py,sha256=8fEntgHseKjFMiPQIzxYzw_raicNsEgnveLi1IzBca0
30
31
  ingestr/src/asana_source/__init__.py,sha256=QwQTCb5PXts8I4wLHG9UfRP-5ChfjSe88XAVfxMV5Ag,8183
31
32
  ingestr/src/asana_source/helpers.py,sha256=PukcdDQWIGqnGxuuobbLw4hUy4-t6gxXg_XywR7Lg9M,375
32
33
  ingestr/src/asana_source/settings.py,sha256=-2tpdkwh04RvLKFvwQodnFLYn9MaxOO1hsebGnDQMTU,2829
34
+ ingestr/src/attio/__init__.py,sha256=Dvr9rSZUlw6HGsqF7iabUit-xRC17Nv6QcmA4cRF2wA,2864
35
+ ingestr/src/attio/helpers.py,sha256=QvB-0BV_Z-cvMTBZDwOCuhxY1cB5PraPdrDkNyQ5TsM,1715
33
36
  ingestr/src/chess/__init__.py,sha256=y0Q8aKBigeKf3N7wuB_gadMQjVJzBPUT8Jhp1ObEWjk,6812
34
37
  ingestr/src/chess/helpers.py,sha256=v1HTImOMjAF7AzZUPDIuHu00e7ut0o5y1kWcVYo4QZw,549
35
38
  ingestr/src/chess/settings.py,sha256=p0RlCGgtXUacPDEvZmwzSWmzX0Apj1riwfz-nrMK89k,158
@@ -128,8 +131,8 @@ ingestr/testdata/delete_insert_part2.csv,sha256=B_KUzpzbNdDY_n7wWop1mT2cz36TmayS
128
131
  ingestr/testdata/merge_expected.csv,sha256=DReHqWGnQMsf2PBv_Q2pfjsgvikYFnf1zYcQZ7ZqYN0,276
129
132
  ingestr/testdata/merge_part1.csv,sha256=Pw8Z9IDKcNU0qQHx1z6BUf4rF_-SxKGFOvymCt4OY9I,185
130
133
  ingestr/testdata/merge_part2.csv,sha256=T_GiWxA81SN63_tMOIuemcvboEFeAmbKc7xRXvL9esw,287
131
- ingestr-0.13.42.dist-info/METADATA,sha256=Njczb9BZLigMPvPeGS7gzh3OiBR5yxRU47huQPGvW-I,13852
132
- ingestr-0.13.42.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
133
- ingestr-0.13.42.dist-info/entry_points.txt,sha256=oPJy0KBnPWYjDtP1k8qwAihcTLHSZokSQvRAw_wtfJM,46
134
- ingestr-0.13.42.dist-info/licenses/LICENSE.md,sha256=cW8wIhn8HFE-KLStDF9jHQ1O_ARWP3kTpk_-eOccL24,1075
135
- ingestr-0.13.42.dist-info/RECORD,,
134
+ ingestr-0.13.44.dist-info/METADATA,sha256=oIJqHNawuqTK_coB0V7N0SozLIaC8pSc36AulUDGq3Q,13852
135
+ ingestr-0.13.44.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
136
+ ingestr-0.13.44.dist-info/entry_points.txt,sha256=oPJy0KBnPWYjDtP1k8qwAihcTLHSZokSQvRAw_wtfJM,46
137
+ ingestr-0.13.44.dist-info/licenses/LICENSE.md,sha256=cW8wIhn8HFE-KLStDF9jHQ1O_ARWP3kTpk_-eOccL24,1075
138
+ ingestr-0.13.44.dist-info/RECORD,,