ingestr 0.9.5__py3-none-any.whl → 0.10.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ingestr might be problematic. Click here for more details.

@@ -1,139 +0,0 @@
1
- from typing import (
2
- TYPE_CHECKING,
3
- Any,
4
- Callable,
5
- List,
6
- Literal,
7
- Optional,
8
- Type,
9
- Union,
10
- )
11
-
12
- from dlt.common import logger
13
- from dlt.common.schema.typing import TColumnSchema, TTableSchemaColumns
14
- from sqlalchemy import Column, Table
15
- from sqlalchemy.engine import Row
16
- from sqlalchemy.sql import Select, sqltypes
17
- from sqlalchemy.sql.sqltypes import TypeEngine
18
- from typing_extensions import TypeAlias
19
-
20
- ReflectionLevel = Literal["minimal", "full", "full_with_precision"]
21
-
22
-
23
- # optionally create generics with any so they can be imported by dlt importer
24
- if TYPE_CHECKING:
25
- SelectAny: TypeAlias = Select[Any]
26
- ColumnAny: TypeAlias = Column[Any]
27
- RowAny: TypeAlias = Row[Any]
28
- TypeEngineAny = TypeEngine[Any]
29
- else:
30
- SelectAny: TypeAlias = Type[Any]
31
- ColumnAny: TypeAlias = Type[Any]
32
- RowAny: TypeAlias = Type[Any]
33
- TypeEngineAny = Type[Any]
34
-
35
-
36
- TTypeAdapter = Callable[
37
- [TypeEngineAny], Optional[Union[TypeEngineAny, Type[TypeEngineAny]]]
38
- ]
39
-
40
-
41
- def sqla_col_to_column_schema(
42
- sql_col: ColumnAny,
43
- reflection_level: ReflectionLevel,
44
- type_adapter_callback: Optional[TTypeAdapter] = None,
45
- ) -> Optional[TColumnSchema]:
46
- """Infer dlt schema column type from an sqlalchemy type.
47
-
48
- If `add_precision` is set, precision and scale is inferred from that types that support it,
49
- such as numeric, varchar, int, bigint. Numeric (decimal) types have always precision added.
50
- """
51
- col: TColumnSchema = {
52
- "name": sql_col.name,
53
- "nullable": sql_col.nullable,
54
- }
55
- if reflection_level == "minimal":
56
- return col
57
-
58
- sql_t = sql_col.type
59
-
60
- if type_adapter_callback:
61
- sql_t = type_adapter_callback(sql_t) # type: ignore[assignment]
62
- # Check if sqla type class rather than instance is returned
63
- if sql_t is not None and isinstance(sql_t, type):
64
- sql_t = sql_t()
65
-
66
- if sql_t is None:
67
- # Column ignored by callback
68
- return col
69
-
70
- add_precision = reflection_level == "full_with_precision"
71
-
72
- if isinstance(sql_t, sqltypes.SmallInteger):
73
- col["data_type"] = "bigint"
74
- if add_precision:
75
- col["precision"] = 32
76
- elif isinstance(sql_t, sqltypes.Integer):
77
- col["data_type"] = "bigint"
78
- elif isinstance(sql_t, sqltypes.Numeric):
79
- # dlt column type depends on the data returned by the sql alchemy dialect
80
- # and not on the metadata reflected in the database. all Numeric types
81
- # that are returned as floats will assume "double" type
82
- # and returned as decimals will assume "decimal" type
83
- if sql_t.asdecimal is False:
84
- col["data_type"] = "double"
85
- else:
86
- col["data_type"] = "decimal"
87
- if sql_t.precision is not None:
88
- col["precision"] = sql_t.precision
89
- # must have a precision for any meaningful scale
90
- if sql_t.scale is not None:
91
- col["scale"] = sql_t.scale
92
- elif sql_t.decimal_return_scale is not None:
93
- col["scale"] = sql_t.decimal_return_scale
94
- elif isinstance(sql_t, sqltypes.String):
95
- col["data_type"] = "text"
96
- if add_precision and sql_t.length:
97
- col["precision"] = sql_t.length
98
- elif isinstance(sql_t, sqltypes._Binary):
99
- col["data_type"] = "binary"
100
- if add_precision and sql_t.length:
101
- col["precision"] = sql_t.length
102
- elif isinstance(sql_t, sqltypes.DateTime):
103
- col["data_type"] = "timestamp"
104
- elif isinstance(sql_t, sqltypes.Date):
105
- col["data_type"] = "date"
106
- elif isinstance(sql_t, sqltypes.Time):
107
- col["data_type"] = "time"
108
- elif isinstance(sql_t, sqltypes.JSON):
109
- col["data_type"] = "complex"
110
- elif isinstance(sql_t, sqltypes.Boolean):
111
- col["data_type"] = "bool"
112
- else:
113
- logger.warning(
114
- f"A column with name {sql_col.name} contains unknown data type {sql_t} which cannot be mapped to `dlt` data type. When using sqlalchemy backend such data will be passed to the normalizer. In case of `pyarrow` and `pandas` backend, data types are detected from numpy ndarrays. In case of other backends, the behavior is backend-specific."
115
- )
116
-
117
- return {key: value for key, value in col.items() if value is not None} # type: ignore[return-value]
118
-
119
-
120
- def get_primary_key(table: Table) -> Optional[List[str]]:
121
- """Create primary key or return None if no key defined"""
122
- primary_key = [c.name for c in table.primary_key]
123
- return primary_key if len(primary_key) > 0 else None
124
-
125
-
126
- def table_to_columns(
127
- table: Table,
128
- reflection_level: ReflectionLevel = "full",
129
- type_conversion_fallback: Optional[TTypeAdapter] = None,
130
- ) -> TTableSchemaColumns:
131
- """Convert an sqlalchemy table to a dlt table schema."""
132
- return {
133
- col["name"]: col
134
- for col in (
135
- sqla_col_to_column_schema(c, reflection_level, type_conversion_fallback)
136
- for c in table.columns
137
- )
138
- if col is not None
139
- }