ingestr 0.9.4__py3-none-any.whl → 0.10.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ingestr might be problematic. Click here for more details.
- ingestr/main.py +156 -40
- ingestr/src/adjust/__init__.py +1 -1
- ingestr/src/filters.py +21 -0
- ingestr/src/gorgias/__init__.py +17 -17
- ingestr/src/shopify/__init__.py +42 -42
- ingestr/src/slack/__init__.py +2 -2
- ingestr/src/sources.py +34 -7
- ingestr/src/version.py +1 -1
- ingestr/src/zendesk/__init__.py +2 -2
- {ingestr-0.9.4.dist-info → ingestr-0.10.0.dist-info}/METADATA +18 -18
- {ingestr-0.9.4.dist-info → ingestr-0.10.0.dist-info}/RECORD +14 -18
- ingestr/src/sql_database/__init__.py +0 -206
- ingestr/src/sql_database/arrow_helpers.py +0 -139
- ingestr/src/sql_database/helpers.py +0 -282
- ingestr/src/sql_database/override.py +0 -10
- ingestr/src/sql_database/schema_types.py +0 -139
- {ingestr-0.9.4.dist-info → ingestr-0.10.0.dist-info}/WHEEL +0 -0
- {ingestr-0.9.4.dist-info → ingestr-0.10.0.dist-info}/entry_points.txt +0 -0
- {ingestr-0.9.4.dist-info → ingestr-0.10.0.dist-info}/licenses/LICENSE.md +0 -0
|
@@ -1,139 +0,0 @@
|
|
|
1
|
-
from typing import (
|
|
2
|
-
TYPE_CHECKING,
|
|
3
|
-
Any,
|
|
4
|
-
Callable,
|
|
5
|
-
List,
|
|
6
|
-
Literal,
|
|
7
|
-
Optional,
|
|
8
|
-
Type,
|
|
9
|
-
Union,
|
|
10
|
-
)
|
|
11
|
-
|
|
12
|
-
from dlt.common import logger
|
|
13
|
-
from dlt.common.schema.typing import TColumnSchema, TTableSchemaColumns
|
|
14
|
-
from sqlalchemy import Column, Table
|
|
15
|
-
from sqlalchemy.engine import Row
|
|
16
|
-
from sqlalchemy.sql import Select, sqltypes
|
|
17
|
-
from sqlalchemy.sql.sqltypes import TypeEngine
|
|
18
|
-
from typing_extensions import TypeAlias
|
|
19
|
-
|
|
20
|
-
ReflectionLevel = Literal["minimal", "full", "full_with_precision"]
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
# optionally create generics with any so they can be imported by dlt importer
|
|
24
|
-
if TYPE_CHECKING:
|
|
25
|
-
SelectAny: TypeAlias = Select[Any]
|
|
26
|
-
ColumnAny: TypeAlias = Column[Any]
|
|
27
|
-
RowAny: TypeAlias = Row[Any]
|
|
28
|
-
TypeEngineAny = TypeEngine[Any]
|
|
29
|
-
else:
|
|
30
|
-
SelectAny: TypeAlias = Type[Any]
|
|
31
|
-
ColumnAny: TypeAlias = Type[Any]
|
|
32
|
-
RowAny: TypeAlias = Type[Any]
|
|
33
|
-
TypeEngineAny = Type[Any]
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
TTypeAdapter = Callable[
|
|
37
|
-
[TypeEngineAny], Optional[Union[TypeEngineAny, Type[TypeEngineAny]]]
|
|
38
|
-
]
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
def sqla_col_to_column_schema(
|
|
42
|
-
sql_col: ColumnAny,
|
|
43
|
-
reflection_level: ReflectionLevel,
|
|
44
|
-
type_adapter_callback: Optional[TTypeAdapter] = None,
|
|
45
|
-
) -> Optional[TColumnSchema]:
|
|
46
|
-
"""Infer dlt schema column type from an sqlalchemy type.
|
|
47
|
-
|
|
48
|
-
If `add_precision` is set, precision and scale is inferred from that types that support it,
|
|
49
|
-
such as numeric, varchar, int, bigint. Numeric (decimal) types have always precision added.
|
|
50
|
-
"""
|
|
51
|
-
col: TColumnSchema = {
|
|
52
|
-
"name": sql_col.name,
|
|
53
|
-
"nullable": sql_col.nullable,
|
|
54
|
-
}
|
|
55
|
-
if reflection_level == "minimal":
|
|
56
|
-
return col
|
|
57
|
-
|
|
58
|
-
sql_t = sql_col.type
|
|
59
|
-
|
|
60
|
-
if type_adapter_callback:
|
|
61
|
-
sql_t = type_adapter_callback(sql_t) # type: ignore[assignment]
|
|
62
|
-
# Check if sqla type class rather than instance is returned
|
|
63
|
-
if sql_t is not None and isinstance(sql_t, type):
|
|
64
|
-
sql_t = sql_t()
|
|
65
|
-
|
|
66
|
-
if sql_t is None:
|
|
67
|
-
# Column ignored by callback
|
|
68
|
-
return col
|
|
69
|
-
|
|
70
|
-
add_precision = reflection_level == "full_with_precision"
|
|
71
|
-
|
|
72
|
-
if isinstance(sql_t, sqltypes.SmallInteger):
|
|
73
|
-
col["data_type"] = "bigint"
|
|
74
|
-
if add_precision:
|
|
75
|
-
col["precision"] = 32
|
|
76
|
-
elif isinstance(sql_t, sqltypes.Integer):
|
|
77
|
-
col["data_type"] = "bigint"
|
|
78
|
-
elif isinstance(sql_t, sqltypes.Numeric):
|
|
79
|
-
# dlt column type depends on the data returned by the sql alchemy dialect
|
|
80
|
-
# and not on the metadata reflected in the database. all Numeric types
|
|
81
|
-
# that are returned as floats will assume "double" type
|
|
82
|
-
# and returned as decimals will assume "decimal" type
|
|
83
|
-
if sql_t.asdecimal is False:
|
|
84
|
-
col["data_type"] = "double"
|
|
85
|
-
else:
|
|
86
|
-
col["data_type"] = "decimal"
|
|
87
|
-
if sql_t.precision is not None:
|
|
88
|
-
col["precision"] = sql_t.precision
|
|
89
|
-
# must have a precision for any meaningful scale
|
|
90
|
-
if sql_t.scale is not None:
|
|
91
|
-
col["scale"] = sql_t.scale
|
|
92
|
-
elif sql_t.decimal_return_scale is not None:
|
|
93
|
-
col["scale"] = sql_t.decimal_return_scale
|
|
94
|
-
elif isinstance(sql_t, sqltypes.String):
|
|
95
|
-
col["data_type"] = "text"
|
|
96
|
-
if add_precision and sql_t.length:
|
|
97
|
-
col["precision"] = sql_t.length
|
|
98
|
-
elif isinstance(sql_t, sqltypes._Binary):
|
|
99
|
-
col["data_type"] = "binary"
|
|
100
|
-
if add_precision and sql_t.length:
|
|
101
|
-
col["precision"] = sql_t.length
|
|
102
|
-
elif isinstance(sql_t, sqltypes.DateTime):
|
|
103
|
-
col["data_type"] = "timestamp"
|
|
104
|
-
elif isinstance(sql_t, sqltypes.Date):
|
|
105
|
-
col["data_type"] = "date"
|
|
106
|
-
elif isinstance(sql_t, sqltypes.Time):
|
|
107
|
-
col["data_type"] = "time"
|
|
108
|
-
elif isinstance(sql_t, sqltypes.JSON):
|
|
109
|
-
col["data_type"] = "complex"
|
|
110
|
-
elif isinstance(sql_t, sqltypes.Boolean):
|
|
111
|
-
col["data_type"] = "bool"
|
|
112
|
-
else:
|
|
113
|
-
logger.warning(
|
|
114
|
-
f"A column with name {sql_col.name} contains unknown data type {sql_t} which cannot be mapped to `dlt` data type. When using sqlalchemy backend such data will be passed to the normalizer. In case of `pyarrow` and `pandas` backend, data types are detected from numpy ndarrays. In case of other backends, the behavior is backend-specific."
|
|
115
|
-
)
|
|
116
|
-
|
|
117
|
-
return {key: value for key, value in col.items() if value is not None} # type: ignore[return-value]
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
def get_primary_key(table: Table) -> Optional[List[str]]:
|
|
121
|
-
"""Create primary key or return None if no key defined"""
|
|
122
|
-
primary_key = [c.name for c in table.primary_key]
|
|
123
|
-
return primary_key if len(primary_key) > 0 else None
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
def table_to_columns(
|
|
127
|
-
table: Table,
|
|
128
|
-
reflection_level: ReflectionLevel = "full",
|
|
129
|
-
type_conversion_fallback: Optional[TTypeAdapter] = None,
|
|
130
|
-
) -> TTableSchemaColumns:
|
|
131
|
-
"""Convert an sqlalchemy table to a dlt table schema."""
|
|
132
|
-
return {
|
|
133
|
-
col["name"]: col
|
|
134
|
-
for col in (
|
|
135
|
-
sqla_col_to_column_schema(c, reflection_level, type_conversion_fallback)
|
|
136
|
-
for c in table.columns
|
|
137
|
-
)
|
|
138
|
-
if col is not None
|
|
139
|
-
}
|
|
File without changes
|
|
File without changes
|
|
File without changes
|