ingestr 0.13.18__py3-none-any.whl → 0.13.19__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ingestr might be problematic. Click here for more details.

ingestr/main.py CHANGED
@@ -8,6 +8,9 @@ from rich.console import Console
8
8
  from rich.status import Status
9
9
  from typing_extensions import Annotated
10
10
 
11
+ import ingestr.src.partition as partition
12
+ import ingestr.src.resource as resource
13
+ from ingestr.src.destinations import AthenaDestination
11
14
  from ingestr.src.filters import cast_set_to_list
12
15
  from ingestr.src.telemetry.event import track
13
16
 
@@ -357,14 +360,6 @@ def ingest(
357
360
  )
358
361
  raise typer.Abort()
359
362
 
360
- def run_on_resource(source, executable):
361
- if hasattr(source, "selected_resources") and source.selected_resources:
362
- resource_names = list(source.selected_resources.keys())
363
- for res in resource_names:
364
- executable(source.resources[res])
365
- else:
366
- executable(source)
367
-
368
363
  def parse_columns(columns: list[str]) -> dict[str, TDataType]:
369
364
  from typing import cast, get_args
370
365
 
@@ -553,20 +548,23 @@ def ingest(
553
548
  sql_exclude_columns=sql_exclude_columns,
554
549
  )
555
550
 
556
- run_on_resource(dlt_source, lambda x: x.add_map(cast_set_to_list))
551
+ resource.for_each(dlt_source, lambda x: x.add_map(cast_set_to_list))
557
552
 
558
553
  def col_h(x):
559
554
  if column_hints:
560
555
  x.apply_hints(columns=column_hints)
561
556
 
562
- run_on_resource(dlt_source, col_h)
557
+ resource.for_each(dlt_source, col_h)
558
+
559
+ if isinstance(destination, AthenaDestination) and partition_by:
560
+ partition.apply_athena_hints(dlt_source, partition_by, column_hints)
563
561
 
564
562
  if original_incremental_strategy == IncrementalStrategy.delete_insert:
565
563
 
566
564
  def set_primary_key(x):
567
565
  x.incremental.primary_key = ()
568
566
 
569
- run_on_resource(dlt_source, set_primary_key)
567
+ resource.for_each(dlt_source, set_primary_key)
570
568
 
571
569
  if (
572
570
  factory.destination_scheme in PARQUET_SUPPORTED_DESTINATIONS
@@ -82,7 +82,7 @@ class AdjustAPI:
82
82
  items = result.get("rows", [])
83
83
  yield items
84
84
  else:
85
- raise HTTPError(f"Request failed with status code: {response.status_code}")
85
+ raise HTTPError(f"Request failed with status code: {response.status_code}, {response.text}.")
86
86
 
87
87
  def fetch_events(self):
88
88
  headers = {"Authorization": f"Bearer {self.api_key}"}
@@ -93,7 +93,7 @@ class AdjustAPI:
93
93
  result = response.json()
94
94
  yield result
95
95
  else:
96
- raise HTTPError(f"Request failed with status code: {response.status_code}")
96
+ raise HTTPError(f"Request failed with status code: {response.status_code}, {response.text}.")
97
97
 
98
98
 
99
99
  def parse_filters(filters_raw: str) -> dict:
ingestr/src/blob.py CHANGED
@@ -1,6 +1,6 @@
1
1
  import warnings
2
2
  from typing import Tuple, TypeAlias
3
- from urllib.parse import ParseResult
3
+ from urllib.parse import ParseResult, urlparse
4
4
 
5
5
  BucketName: TypeAlias = str
6
6
  FileGlob: TypeAlias = str
@@ -14,13 +14,16 @@ def parse_uri(uri: ParseResult, table: str) -> Tuple[BucketName, FileGlob]:
14
14
  Supports the following Forms:
15
15
  - uri: "gs://"
16
16
  table: "bucket-name/file-glob"
17
+ - uri: "gs://uri-bucket-name" (uri-bucket-name is preferred)
18
+ table: "gs://table-bucket-name/file-glob"
19
+ - uri: "gs://"
20
+ table: "gs://bucket-name/file-glob"
17
21
  - uri: gs://bucket-name/file-glob
18
22
  table: None
19
23
  - uri: "gs://bucket-name"
20
24
  table: "file-glob"
21
25
 
22
- The first form is the prefered method. Other forms are supported
23
- for backward compatibility, but discouraged.
26
+ The first form is the prefered method. Other forms are supported but discouraged.
24
27
  """
25
28
 
26
29
  table = table.strip()
@@ -34,15 +37,15 @@ def parse_uri(uri: ParseResult, table: str) -> Tuple[BucketName, FileGlob]:
34
37
  )
35
38
  return host, uri.path.lstrip("/")
36
39
 
40
+ table_uri = urlparse(table)
41
+
37
42
  if host != "":
38
- warnings.warn(
39
- f"Using the form '{uri.scheme}://bucket-name' is deprecated and will be removed in future versions.",
40
- DeprecationWarning,
41
- stacklevel=2,
42
- )
43
- return host, table.lstrip("/")
43
+ return host, table_uri.path.lstrip("/")
44
+
45
+ if table_uri.hostname:
46
+ return table_uri.hostname, table_uri.path.lstrip("/")
44
47
 
45
- parts = table.lstrip("/").split("/", maxsplit=1)
48
+ parts = table_uri.path.lstrip("/").split("/", maxsplit=1)
46
49
  if len(parts) != 2:
47
50
  return "", parts[0]
48
51
 
ingestr/src/buildinfo.py CHANGED
@@ -1 +1 @@
1
- version = "v0.13.18"
1
+ version = "v0.13.19"
@@ -16,6 +16,7 @@ from .helpers import get_shard_iterator, max_sequence_by_shard
16
16
  name=lambda args: args["stream_name"],
17
17
  primary_key="kinesis_msg_id",
18
18
  standalone=True,
19
+ max_table_nesting=0
19
20
  )
20
21
  def kinesis_stream(
21
22
  stream_name: str,
@@ -0,0 +1,31 @@
1
+ from typing import Dict
2
+
3
+ from dlt.common.schema.typing import TColumnSchema
4
+ from dlt.destinations.adapters import athena_adapter, athena_partition
5
+ from dlt.sources import DltResource, DltSource
6
+
7
+ import ingestr.src.resource as resource
8
+
9
+
10
+ def apply_athena_hints(
11
+ source: DltSource | DltResource,
12
+ partition_column: str,
13
+ additional_hints: Dict[str, TColumnSchema] = {},
14
+ ) -> None:
15
+ def _apply_partition_hint(resource: DltResource) -> None:
16
+
17
+ columns = resource.columns if resource.columns else {}
18
+
19
+ partition_hint = (
20
+ columns.get(partition_column) # type: ignore
21
+ or additional_hints.get(partition_column)
22
+ )
23
+
24
+ athena_adapter(
25
+ resource,
26
+ athena_partition.day(partition_column)
27
+ if partition_hint and partition_hint.get("data_type") in ("timestamp", "date")
28
+ else partition_column,
29
+ )
30
+
31
+ resource.for_each(source, _apply_partition_hint)
@@ -0,0 +1,17 @@
1
+ from typing import Callable
2
+
3
+ from dlt.sources import DltResource, DltSource
4
+
5
+
6
+ def for_each(
7
+ source: DltSource | DltResource, ex: Callable[[DltResource], None | DltResource]
8
+ ):
9
+ """
10
+ Apply a function to each resource in a source.
11
+ """
12
+ if hasattr(source, "selected_resources") and source.selected_resources:
13
+ resource_names = list(source.selected_resources.keys())
14
+ for res in resource_names:
15
+ ex(source.resources[res]) # type: ignore[union-attr]
16
+ else:
17
+ ex(source) # type: ignore[arg-type]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ingestr
3
- Version: 0.13.18
3
+ Version: 0.13.19
4
4
  Summary: ingestr is a command-line application that ingests data from various sources and stores them in any database.
5
5
  Project-URL: Homepage, https://github.com/bruin-data/ingestr
6
6
  Project-URL: Issues, https://github.com/bruin-data/ingestr/issues
@@ -1,18 +1,20 @@
1
- ingestr/main.py,sha256=ufn8AcM2ID80ChUApJzYDjnQaurMXOkYfTm6GzAggSQ,24746
1
+ ingestr/main.py,sha256=0KTNvWPaMrEVcHN6p8Vvffhui8e6OjlNY9UrsGbh36I,24715
2
2
  ingestr/src/.gitignore,sha256=8cX1AZTSI0TcdZFGTmS_oyBjpfCzhOEt0DdAo2dFIY8,203
3
- ingestr/src/blob.py,sha256=LtEZWoUhm5i2aKerdgEpLtNCf3fdhGGMM4td-LRZVbY,1407
4
- ingestr/src/buildinfo.py,sha256=Ph2-6uM0ocE2IFQ-YyOxltsHqhrIhOawp3Pr6vUGaWE,21
3
+ ingestr/src/blob.py,sha256=onMe5ZHxPXTdcB_s2oGNdMo-XQJ3ajwOsWE9eSTGFmc,1495
4
+ ingestr/src/buildinfo.py,sha256=68wPE6UM1mnzvFqCjAYCZcJWG8TjiDQV8y4j0wLlD0U,21
5
5
  ingestr/src/destinations.py,sha256=vrGij4qMPCdXTMIimROWBJFqzOqCM4DFmgyubgSHejA,11279
6
6
  ingestr/src/errors.py,sha256=Ufs4_DfE77_E3vnA1fOQdi6cmuLVNm7_SbFLkL1XPGk,686
7
7
  ingestr/src/factory.py,sha256=Si3xQuaqiwR_LMtxg2rA93MkDYpq_BnVWTfBsNVnFIA,5198
8
8
  ingestr/src/filters.py,sha256=0JQXeAr2APFMnW2sd-6BlAMWv93bXV17j8b5MM8sHmM,580
9
9
  ingestr/src/loader.py,sha256=9NaWAyfkXdqAZSS-N72Iwo36Lbx4PyqIfaaH1dNdkFs,1712
10
+ ingestr/src/partition.py,sha256=ZqcCTz6xrSurgJDSZYn-_gaBaEXj4peV7N1wYe3IiQk,953
11
+ ingestr/src/resource.py,sha256=XG-sbBapFVEM7OhHQFQRTdTLlh-mHB-N4V1t8F8Tsww,543
10
12
  ingestr/src/sources.py,sha256=y6zFGGbi5FvrdQ89e0t1ud3BWNN2kvrNu2iuXb9wu6g,70977
11
13
  ingestr/src/table_definition.py,sha256=REbAbqdlmUMUuRh8nEQRreWjPVOQ5ZcfqGkScKdCrmk,390
12
14
  ingestr/src/time.py,sha256=H_Fk2J4ShXyUM-EMY7MqCLZQhlnZMZvO952bmZPc4yE,254
13
15
  ingestr/src/version.py,sha256=J_2xgZ0mKlvuHcjdKCx2nlioneLH0I47JiU_Slr_Nwc,189
14
16
  ingestr/src/adjust/__init__.py,sha256=ULjtJqrNS6XDvUyGl0tjl12-tLyXlCgeFe2icTbtu3Q,3255
15
- ingestr/src/adjust/adjust_helpers.py,sha256=av97NPSn-hQtTbAC0vUSCAWYePmOiG5R-DGdMssm7FQ,3646
17
+ ingestr/src/adjust/adjust_helpers.py,sha256=8PbDTmO3dnPrVGBA1qCmkJamVZOLN52oE1gkSsfDeZM,3682
16
18
  ingestr/src/airtable/__init__.py,sha256=GHWYrjI2qhs_JihdNJysB0Ni3bzqT_MLXn_S9_Q5zRA,2775
17
19
  ingestr/src/applovin/__init__.py,sha256=X_YCLppPrnL8KXfYWICE_uDfMzHHH3JZ-DBGZ1RlaOI,6984
18
20
  ingestr/src/applovin_max/__init__.py,sha256=o0aL4jBZqwK528MVw9dS1G5EZbF4tx6_Ef0IfqkhAT0,3294
@@ -61,7 +63,7 @@ ingestr/src/hubspot/helpers.py,sha256=PTn-UHJv1ENIvA5azUTaHCmFXgmHLJC1tUatQ1N-KF
61
63
  ingestr/src/hubspot/settings.py,sha256=9P1OKiRL88kl_m8n1HhuG-Qpq9VGbqPLn5Q0QYneToU,2193
62
64
  ingestr/src/kafka/__init__.py,sha256=wMCXdiraeKd1Kssi9WcVCGZaNGm2tJEtnNyuB4aR5_k,3541
63
65
  ingestr/src/kafka/helpers.py,sha256=V9WcVn3PKnEpggArHda4vnAcaV8VDuh__dSmRviJb5Y,7502
64
- ingestr/src/kinesis/__init__.py,sha256=Bm0S9BvWDHZUhOc8WKTeawORRgldmJsb0Y3XNHpuJ-c,6205
66
+ ingestr/src/kinesis/__init__.py,sha256=6Sqg8VQp0yhz8xCm1LPvciOUT8SrD6u5Klmfu7p4Y8c,6229
65
67
  ingestr/src/kinesis/helpers.py,sha256=aF0GCDKSectaaW8XPdERY_6bUs0ky19dcBs24ZFn-o0,2473
66
68
  ingestr/src/klaviyo/_init_.py,sha256=ucWHqBe8DQvXVpbmxKFAV5ljpCFb4ps_2QTD0OSiWxY,7905
67
69
  ingestr/src/klaviyo/client.py,sha256=tPj79ia7AW0ZOJhzlKNPCliGbdojRNwUFp8HvB2ym5s,7434
@@ -110,8 +112,8 @@ ingestr/testdata/delete_insert_part2.csv,sha256=B_KUzpzbNdDY_n7wWop1mT2cz36TmayS
110
112
  ingestr/testdata/merge_expected.csv,sha256=DReHqWGnQMsf2PBv_Q2pfjsgvikYFnf1zYcQZ7ZqYN0,276
111
113
  ingestr/testdata/merge_part1.csv,sha256=Pw8Z9IDKcNU0qQHx1z6BUf4rF_-SxKGFOvymCt4OY9I,185
112
114
  ingestr/testdata/merge_part2.csv,sha256=T_GiWxA81SN63_tMOIuemcvboEFeAmbKc7xRXvL9esw,287
113
- ingestr-0.13.18.dist-info/METADATA,sha256=HYn4_5kTZd5RoQgkz06WF4-x12T8RVJtoQe6fyHQwYk,13569
114
- ingestr-0.13.18.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
115
- ingestr-0.13.18.dist-info/entry_points.txt,sha256=oPJy0KBnPWYjDtP1k8qwAihcTLHSZokSQvRAw_wtfJM,46
116
- ingestr-0.13.18.dist-info/licenses/LICENSE.md,sha256=cW8wIhn8HFE-KLStDF9jHQ1O_ARWP3kTpk_-eOccL24,1075
117
- ingestr-0.13.18.dist-info/RECORD,,
115
+ ingestr-0.13.19.dist-info/METADATA,sha256=eA-R_U1t7kGuLAZoUOf3QX3PfYHlNAEy0YRGe9nZYfk,13569
116
+ ingestr-0.13.19.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
117
+ ingestr-0.13.19.dist-info/entry_points.txt,sha256=oPJy0KBnPWYjDtP1k8qwAihcTLHSZokSQvRAw_wtfJM,46
118
+ ingestr-0.13.19.dist-info/licenses/LICENSE.md,sha256=cW8wIhn8HFE-KLStDF9jHQ1O_ARWP3kTpk_-eOccL24,1075
119
+ ingestr-0.13.19.dist-info/RECORD,,