ingestr 0.6.6__py3-none-any.whl → 0.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ingestr might be problematic. Click here for more details.

@@ -1,39 +1,73 @@
1
- from typing import TYPE_CHECKING, Any, Optional, Sequence, Type
1
+ from typing import (
2
+ TYPE_CHECKING,
3
+ Any,
4
+ Callable,
5
+ List,
6
+ Literal,
7
+ Optional,
8
+ Type,
9
+ Union,
10
+ )
2
11
 
3
12
  from dlt.common import logger
4
- from dlt.common.configuration import with_config
5
- from dlt.common.destination import DestinationCapabilitiesContext
6
13
  from dlt.common.schema.typing import TColumnSchema, TTableSchemaColumns
7
14
  from sqlalchemy import Column, Table
8
15
  from sqlalchemy.engine import Row
9
16
  from sqlalchemy.sql import Select, sqltypes
17
+ from sqlalchemy.sql.sqltypes import TypeEngine
10
18
  from typing_extensions import TypeAlias
11
19
 
20
+ ReflectionLevel = Literal["minimal", "full", "full_with_precision"]
21
+
22
+
12
23
  # optionally create generics with any so they can be imported by dlt importer
13
24
  if TYPE_CHECKING:
14
25
  SelectAny: TypeAlias = Select[Any]
15
26
  ColumnAny: TypeAlias = Column[Any]
16
27
  RowAny: TypeAlias = Row[Any]
28
+ TypeEngineAny = TypeEngine[Any]
17
29
  else:
18
30
  SelectAny: TypeAlias = Type[Any]
19
31
  ColumnAny: TypeAlias = Type[Any]
20
32
  RowAny: TypeAlias = Type[Any]
33
+ TypeEngineAny = Type[Any]
34
+
35
+
36
+ TTypeAdapter = Callable[
37
+ [TypeEngineAny], Optional[Union[TypeEngineAny, Type[TypeEngineAny]]]
38
+ ]
21
39
 
22
40
 
23
41
  def sqla_col_to_column_schema(
24
- sql_col: ColumnAny, add_precision: bool = False
42
+ sql_col: ColumnAny,
43
+ reflection_level: ReflectionLevel,
44
+ type_adapter_callback: Optional[TTypeAdapter] = None,
25
45
  ) -> Optional[TColumnSchema]:
26
46
  """Infer dlt schema column type from an sqlalchemy type.
27
47
 
28
48
  If `add_precision` is set, precision and scale is inferred from that types that support it,
29
49
  such as numeric, varchar, int, bigint. Numeric (decimal) types have always precision added.
30
50
  """
31
- sql_t = sql_col.type
32
51
  col: TColumnSchema = {
33
52
  "name": sql_col.name,
34
- "data_type": None, # set that later
35
53
  "nullable": sql_col.nullable,
36
54
  }
55
+ if reflection_level == "minimal":
56
+ return col
57
+
58
+ sql_t = sql_col.type
59
+
60
+ if type_adapter_callback:
61
+ sql_t = type_adapter_callback(sql_t) # type: ignore[assignment]
62
+ # Check if sqla type class rather than instance is returned
63
+ if sql_t is not None and isinstance(sql_t, type):
64
+ sql_t = sql_t()
65
+
66
+ if sql_t is None:
67
+ # Column ignored by callback
68
+ return col
69
+
70
+ add_precision = reflection_level == "full_with_precision"
37
71
 
38
72
  if isinstance(sql_t, sqltypes.SmallInteger):
39
73
  col["data_type"] = "bigint"
@@ -77,86 +111,29 @@ def sqla_col_to_column_schema(
77
111
  col["data_type"] = "bool"
78
112
  else:
79
113
  logger.warning(
80
- f"A column with name {sql_col.name} contains unknown data type {sql_t} which cannot be mapped to `dlt` data type. When using sqlalchemy backend such data will be passed to the normalizer. In case of `pyarrow` backend such data will be ignored. In case of other backends, the behavior is backend-specific."
114
+ f"A column with name {sql_col.name} contains unknown data type {sql_t} which cannot be mapped to `dlt` data type. When using sqlalchemy backend such data will be passed to the normalizer. In case of `pyarrow` and `pandas` backend, data types are detected from numpy ndarrays. In case of other backends, the behavior is backend-specific."
81
115
  )
82
- col = None
83
- if col:
84
- return {key: value for key, value in col.items() if value is not None} # type: ignore[return-value]
85
- return None
86
116
 
117
+ return {key: value for key, value in col.items() if value is not None} # type: ignore[return-value]
87
118
 
88
- def table_to_columns(table: Table, add_precision: bool = False) -> TTableSchemaColumns:
89
- """Convert an sqlalchemy table to a dlt table schema.
90
119
 
91
- Adds precision to columns when `add_precision` is set.
92
- """
93
- return {
94
- col["name"]: col
95
- for col in (sqla_col_to_column_schema(c, add_precision) for c in table.columns)
96
- if col is not None
97
- }
120
+ def get_primary_key(table: Table) -> Optional[List[str]]:
121
+ """Create primary key or return None if no key defined"""
122
+ primary_key = [c.name for c in table.primary_key]
123
+ return primary_key if len(primary_key) > 0 else None
98
124
 
99
125
 
100
- @with_config
101
- def columns_to_arrow(
102
- columns_schema: TTableSchemaColumns,
103
- caps: DestinationCapabilitiesContext = None,
104
- tz: str = "UTC",
105
- ) -> Any:
106
- """Converts `column_schema` to arrow schema using `caps` and `tz`. `caps` are injected from the container - which
107
- is always the case if run within the pipeline. This will generate arrow schema compatible with the destination.
108
- Otherwise generic capabilities are used
109
- """
110
- from dlt.common.destination.capabilities import DestinationCapabilitiesContext
111
- from dlt.common.libs.pyarrow import get_py_arrow_datatype
112
- from dlt.common.libs.pyarrow import pyarrow as pa
113
-
114
- return pa.schema(
115
- [
116
- pa.field(
117
- name,
118
- get_py_arrow_datatype(
119
- schema_item,
120
- caps or DestinationCapabilitiesContext.generic_capabilities(),
121
- tz,
122
- ),
123
- nullable=schema_item.get("nullable", True),
124
- )
125
- for name, schema_item in columns_schema.items()
126
- ]
127
- )
128
-
129
-
130
- def row_tuples_to_arrow(
131
- rows: Sequence[RowAny], columns: TTableSchemaColumns, tz: str
132
- ) -> Any:
133
- import numpy as np
134
- from dlt.common.libs.pyarrow import pyarrow as pa
135
-
136
- arrow_schema = columns_to_arrow(columns, tz=tz)
137
-
138
- try:
139
- from pandas._libs import lib
140
-
141
- pivoted_rows = lib.to_object_array_tuples(rows).T # type: ignore[attr-defined]
142
- except ImportError:
143
- logger.info(
144
- "Pandas not installed, reverting to numpy.asarray to create a table which is slower"
126
+ def table_to_columns(
127
+ table: Table,
128
+ reflection_level: ReflectionLevel = "full",
129
+ type_conversion_fallback: Optional[TTypeAdapter] = None,
130
+ ) -> TTableSchemaColumns:
131
+ """Convert an sqlalchemy table to a dlt table schema."""
132
+ return {
133
+ col["name"]: col
134
+ for col in (
135
+ sqla_col_to_column_schema(c, reflection_level, type_conversion_fallback)
136
+ for c in table.columns
145
137
  )
146
- pivoted_rows = np.asarray(rows, dtype="object", order="k").T # type: ignore[call-overload]
147
-
148
- columnar = {
149
- col: dat.ravel()
150
- for col, dat in zip(columns, np.vsplit(pivoted_rows, len(columns)))
138
+ if col is not None
151
139
  }
152
- for idx in range(0, len(arrow_schema.names)):
153
- field = arrow_schema.field(idx)
154
- py_type = type(rows[0][idx])
155
- # cast double / float ndarrays to decimals if type mismatch, looks like decimals and floats are often mixed up in dialects
156
- if pa.types.is_decimal(field.type) and issubclass(py_type, (str, float)):
157
- logger.warning(
158
- f"Field {field.name} was reflected as decimal type, but rows contains {py_type.__name__}. Additional cast is required which may slow down arrow table generation."
159
- )
160
- float_array = pa.array(columnar[field.name], type=pa.float64())
161
- columnar[field.name] = float_array.cast(field.type, safe=False)
162
- return pa.Table.from_pydict(columnar, schema=arrow_schema)
ingestr/src/version.py CHANGED
@@ -1 +1 @@
1
- __version__ = "0.6.6"
1
+ __version__ = "0.7.0"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: ingestr
3
- Version: 0.6.6
3
+ Version: 0.7.0
4
4
  Summary: ingestr is a command-line application that ingests data from various sources and stores them in any database.
5
5
  Project-URL: Homepage, https://github.com/bruin-data/ingestr
6
6
  Project-URL: Issues, https://github.com/bruin-data/ingestr/issues
@@ -16,7 +16,7 @@ Classifier: Topic :: Database
16
16
  Requires-Python: >=3.9
17
17
  Requires-Dist: cx-oracle==8.3.0
18
18
  Requires-Dist: databricks-sql-connector==2.9.3
19
- Requires-Dist: dlt==0.4.12
19
+ Requires-Dist: dlt==0.5.1
20
20
  Requires-Dist: duckdb-engine==0.11.5
21
21
  Requires-Dist: duckdb==0.10.2
22
22
  Requires-Dist: google-api-python-client==2.130.0
@@ -1,13 +1,9 @@
1
- ingestr/main.py,sha256=PvZtqrlHO3aSFtdhIdLAhzsOLG1hP_ql24vof76SFaI,14862
2
- ingestr/main_test.py,sha256=MDV2Eo86W_CcxGgEkYYoBc6xIXjVMER4hMhgAdxXYMc,28464
1
+ ingestr/main.py,sha256=JYgh3rZSO9n_Ko2D_9BLaF_cGQHrLSywK1WH6XioefQ,15961
3
2
  ingestr/src/destinations.py,sha256=2SfPMjtTelPmzQmc3zNs8xGcKIPuGn_hoZFIBUuhjXI,6338
4
- ingestr/src/destinations_test.py,sha256=rgEk8EpAntFbSOwXovC4prv3RA22mwq8pIO6sZ_rYzg,4212
5
- ingestr/src/factory.py,sha256=w6xb8scWa_uG4otoMjqvqcx99hofrN2YcLzaB7QSgYo,3545
6
- ingestr/src/factory_test.py,sha256=X9sFkvNByWChIcyeDt1QiIPMIzGNKb7M5A_GUE0-nnI,664
7
- ingestr/src/sources.py,sha256=sLT8CDiLz6aTekm6QZCXatZAdTyzCabqjJesnEe88GU,10061
8
- ingestr/src/sources_test.py,sha256=wZopz4tFtFnOiCEO8pZW816Nj86pLlR5fEqfq0TtWMQ,3629
3
+ ingestr/src/factory.py,sha256=XuT_8LvWd7gBxOjoD_NiG-jtPvHNQ9nqOeoCJzhRb6Y,3630
4
+ ingestr/src/sources.py,sha256=QbSvECvGbHJKOpE9_dbq11343pA5ajsS9BPPPab1ivw,10007
9
5
  ingestr/src/table_definition.py,sha256=REbAbqdlmUMUuRh8nEQRreWjPVOQ5ZcfqGkScKdCrmk,390
10
- ingestr/src/version.py,sha256=I3h5MyD10PkOUQEBnR6L9ja7s4WeTEg8rRjRKTCWYWQ,22
6
+ ingestr/src/version.py,sha256=RaANGbRu5e-vehwXI1-Qe2ggPPfs1TQaZj072JdbLk4,22
11
7
  ingestr/src/google_sheets/README.md,sha256=wFQhvmGpRA38Ba2N_WIax6duyD4c7c_pwvvprRfQDnw,5470
12
8
  ingestr/src/google_sheets/__init__.py,sha256=5qlX-6ilx5MW7klC7B_0jGSxloQSLkSESTh4nlY3Aos,6643
13
9
  ingestr/src/google_sheets/helpers/__init__.py,sha256=5hXZrZK8cMO3UOuL-s4OKOpdACdihQD0hYYlSEu-iQ8,35
@@ -15,7 +11,6 @@ ingestr/src/google_sheets/helpers/api_calls.py,sha256=RiVfdacbaneszhmuhYilkJnkc9
15
11
  ingestr/src/google_sheets/helpers/data_processing.py,sha256=WYO6z4XjGcG0Hat2J2enb-eLX5mSNVb2vaqRE83FBWU,11000
16
12
  ingestr/src/gorgias/__init__.py,sha256=BzX9X1Yc_1Mch6NP1pn26hjRIiaadErgHxkdJHw4P3o,21227
17
13
  ingestr/src/gorgias/helpers.py,sha256=DamuijnvhGY9hysQO4txrVMf4izkGbh5qfBKImdOINE,5427
18
- ingestr/src/gorgias/helpers_test.py,sha256=kSR2nhB8U8HZ8pgDnd7HvXlzojmBnpOm8fTKHJvvKGY,1580
19
14
  ingestr/src/mongodb/__init__.py,sha256=E7SDeCyYNkYZZ_RFhjCRDZUGpKtaxpPG5sFSmKJV62U,4336
20
15
  ingestr/src/mongodb/helpers.py,sha256=80vtAeNyUn1iMN0CeLrTlKqYN6I6fHF81Kd2UuE8Kns,5653
21
16
  ingestr/src/notion/__init__.py,sha256=36wUui8finbc85ObkRMq8boMraXMUehdABN_AMe_hzA,1834
@@ -27,10 +22,11 @@ ingestr/src/shopify/__init__.py,sha256=EWjpvZz7K6Pms7uUoqqkM4Wj0XeE2NrDvVp4BNM8d
27
22
  ingestr/src/shopify/exceptions.py,sha256=BhV3lIVWeBt8Eh4CWGW_REFJpGCzvW6-62yZrBWa3nQ,50
28
23
  ingestr/src/shopify/helpers.py,sha256=OO_Tw-HwVLnRhwT3vqUWEQEEcWIS9KWE6VDDe8BCC2w,4972
29
24
  ingestr/src/shopify/settings.py,sha256=StY0EPr7wFJ7KzRRDN4TKxV0_gkIS1wPj2eR4AYSsDk,141
30
- ingestr/src/sql_database/__init__.py,sha256=S5MVJr8juPSs61C2D7pInsTwNEHetChK6RjjhPAD0Lg,8845
31
- ingestr/src/sql_database/helpers.py,sha256=tbn-GjjBIVu3hVVh5vrUSiZqQ32_Tp0oBP5Fvv_wY4E,8986
25
+ ingestr/src/sql_database/__init__.py,sha256=HEqY6U-YzzbeZ8avIthj-Fatm2C3i3jqYs5DAIAu4Ss,11511
26
+ ingestr/src/sql_database/arrow_helpers.py,sha256=yze1X3A9nUQA4HeuFDDWrfJVkCq8Uo5UyDo_zhJtI60,5699
27
+ ingestr/src/sql_database/helpers.py,sha256=6o8e2_8MIuj3qlo40a2E6ns3gyK18ei1jCePONrMUjI,10191
32
28
  ingestr/src/sql_database/override.py,sha256=xbKGDztCzvrhJ5kJTXERal3LA56bEeVug4_rrTs8DgA,333
33
- ingestr/src/sql_database/schema_types.py,sha256=foGHh4iGagGLfS7nF3uGYhBjqgX0jlrjj0XYE1T3nSs,6592
29
+ ingestr/src/sql_database/schema_types.py,sha256=qXTanvFPE8wMCSDzQWPDi5yqaO-llfrFXjiGJALI4NA,5013
34
30
  ingestr/src/telemetry/event.py,sha256=MpWc5tt0lSJ1pWKe9HQ11BHrcPBxSH40l4wjZi9u0tI,924
35
31
  ingestr/src/testdata/fakebqcredentials.json,sha256=scc6TUc963KAbKTLZCfcmqVzbtzDCW1_8JNRnyAXyy8,628
36
32
  ingestr/testdata/.gitignore,sha256=DFzYYOpqdTiT7S1HjCT-jffZSmEvFZge295_upAB0FY,13
@@ -41,8 +37,8 @@ ingestr/testdata/delete_insert_part2.csv,sha256=B_KUzpzbNdDY_n7wWop1mT2cz36TmayS
41
37
  ingestr/testdata/merge_expected.csv,sha256=DReHqWGnQMsf2PBv_Q2pfjsgvikYFnf1zYcQZ7ZqYN0,276
42
38
  ingestr/testdata/merge_part1.csv,sha256=Pw8Z9IDKcNU0qQHx1z6BUf4rF_-SxKGFOvymCt4OY9I,185
43
39
  ingestr/testdata/merge_part2.csv,sha256=T_GiWxA81SN63_tMOIuemcvboEFeAmbKc7xRXvL9esw,287
44
- ingestr-0.6.6.dist-info/METADATA,sha256=o9eKbLqKlQ-TkYoaLHFLeQAjuraP9UZhM1jsCL2v7Gg,5830
45
- ingestr-0.6.6.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
46
- ingestr-0.6.6.dist-info/entry_points.txt,sha256=oPJy0KBnPWYjDtP1k8qwAihcTLHSZokSQvRAw_wtfJM,46
47
- ingestr-0.6.6.dist-info/licenses/LICENSE.md,sha256=cW8wIhn8HFE-KLStDF9jHQ1O_ARWP3kTpk_-eOccL24,1075
48
- ingestr-0.6.6.dist-info/RECORD,,
40
+ ingestr-0.7.0.dist-info/METADATA,sha256=MmQ_futv2ZZbVg4hdbCErSAgs2AobplIUr4vMErTXEI,5829
41
+ ingestr-0.7.0.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
42
+ ingestr-0.7.0.dist-info/entry_points.txt,sha256=oPJy0KBnPWYjDtP1k8qwAihcTLHSZokSQvRAw_wtfJM,46
43
+ ingestr-0.7.0.dist-info/licenses/LICENSE.md,sha256=cW8wIhn8HFE-KLStDF9jHQ1O_ARWP3kTpk_-eOccL24,1075
44
+ ingestr-0.7.0.dist-info/RECORD,,