ingestr 0.6.6__py3-none-any.whl → 0.7.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ingestr might be problematic. Click here for more details.
- ingestr/main.py +29 -4
- ingestr/src/factory.py +2 -0
- ingestr/src/sources.py +1 -3
- ingestr/src/sql_database/__init__.py +62 -28
- ingestr/src/sql_database/arrow_helpers.py +139 -0
- ingestr/src/sql_database/helpers.py +57 -33
- ingestr/src/sql_database/schema_types.py +58 -81
- ingestr/src/version.py +1 -1
- {ingestr-0.6.6.dist-info → ingestr-0.7.0.dist-info}/METADATA +2 -2
- {ingestr-0.6.6.dist-info → ingestr-0.7.0.dist-info}/RECORD +13 -17
- ingestr/main_test.py +0 -875
- ingestr/src/destinations_test.py +0 -113
- ingestr/src/factory_test.py +0 -13
- ingestr/src/gorgias/helpers_test.py +0 -45
- ingestr/src/sources_test.py +0 -96
- {ingestr-0.6.6.dist-info → ingestr-0.7.0.dist-info}/WHEEL +0 -0
- {ingestr-0.6.6.dist-info → ingestr-0.7.0.dist-info}/entry_points.txt +0 -0
- {ingestr-0.6.6.dist-info → ingestr-0.7.0.dist-info}/licenses/LICENSE.md +0 -0
|
@@ -1,39 +1,73 @@
|
|
|
1
|
-
from typing import
|
|
1
|
+
from typing import (
|
|
2
|
+
TYPE_CHECKING,
|
|
3
|
+
Any,
|
|
4
|
+
Callable,
|
|
5
|
+
List,
|
|
6
|
+
Literal,
|
|
7
|
+
Optional,
|
|
8
|
+
Type,
|
|
9
|
+
Union,
|
|
10
|
+
)
|
|
2
11
|
|
|
3
12
|
from dlt.common import logger
|
|
4
|
-
from dlt.common.configuration import with_config
|
|
5
|
-
from dlt.common.destination import DestinationCapabilitiesContext
|
|
6
13
|
from dlt.common.schema.typing import TColumnSchema, TTableSchemaColumns
|
|
7
14
|
from sqlalchemy import Column, Table
|
|
8
15
|
from sqlalchemy.engine import Row
|
|
9
16
|
from sqlalchemy.sql import Select, sqltypes
|
|
17
|
+
from sqlalchemy.sql.sqltypes import TypeEngine
|
|
10
18
|
from typing_extensions import TypeAlias
|
|
11
19
|
|
|
20
|
+
ReflectionLevel = Literal["minimal", "full", "full_with_precision"]
|
|
21
|
+
|
|
22
|
+
|
|
12
23
|
# optionally create generics with any so they can be imported by dlt importer
|
|
13
24
|
if TYPE_CHECKING:
|
|
14
25
|
SelectAny: TypeAlias = Select[Any]
|
|
15
26
|
ColumnAny: TypeAlias = Column[Any]
|
|
16
27
|
RowAny: TypeAlias = Row[Any]
|
|
28
|
+
TypeEngineAny = TypeEngine[Any]
|
|
17
29
|
else:
|
|
18
30
|
SelectAny: TypeAlias = Type[Any]
|
|
19
31
|
ColumnAny: TypeAlias = Type[Any]
|
|
20
32
|
RowAny: TypeAlias = Type[Any]
|
|
33
|
+
TypeEngineAny = Type[Any]
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
TTypeAdapter = Callable[
|
|
37
|
+
[TypeEngineAny], Optional[Union[TypeEngineAny, Type[TypeEngineAny]]]
|
|
38
|
+
]
|
|
21
39
|
|
|
22
40
|
|
|
23
41
|
def sqla_col_to_column_schema(
|
|
24
|
-
sql_col: ColumnAny,
|
|
42
|
+
sql_col: ColumnAny,
|
|
43
|
+
reflection_level: ReflectionLevel,
|
|
44
|
+
type_adapter_callback: Optional[TTypeAdapter] = None,
|
|
25
45
|
) -> Optional[TColumnSchema]:
|
|
26
46
|
"""Infer dlt schema column type from an sqlalchemy type.
|
|
27
47
|
|
|
28
48
|
If `add_precision` is set, precision and scale is inferred from that types that support it,
|
|
29
49
|
such as numeric, varchar, int, bigint. Numeric (decimal) types have always precision added.
|
|
30
50
|
"""
|
|
31
|
-
sql_t = sql_col.type
|
|
32
51
|
col: TColumnSchema = {
|
|
33
52
|
"name": sql_col.name,
|
|
34
|
-
"data_type": None, # set that later
|
|
35
53
|
"nullable": sql_col.nullable,
|
|
36
54
|
}
|
|
55
|
+
if reflection_level == "minimal":
|
|
56
|
+
return col
|
|
57
|
+
|
|
58
|
+
sql_t = sql_col.type
|
|
59
|
+
|
|
60
|
+
if type_adapter_callback:
|
|
61
|
+
sql_t = type_adapter_callback(sql_t) # type: ignore[assignment]
|
|
62
|
+
# Check if sqla type class rather than instance is returned
|
|
63
|
+
if sql_t is not None and isinstance(sql_t, type):
|
|
64
|
+
sql_t = sql_t()
|
|
65
|
+
|
|
66
|
+
if sql_t is None:
|
|
67
|
+
# Column ignored by callback
|
|
68
|
+
return col
|
|
69
|
+
|
|
70
|
+
add_precision = reflection_level == "full_with_precision"
|
|
37
71
|
|
|
38
72
|
if isinstance(sql_t, sqltypes.SmallInteger):
|
|
39
73
|
col["data_type"] = "bigint"
|
|
@@ -77,86 +111,29 @@ def sqla_col_to_column_schema(
|
|
|
77
111
|
col["data_type"] = "bool"
|
|
78
112
|
else:
|
|
79
113
|
logger.warning(
|
|
80
|
-
f"A column with name {sql_col.name} contains unknown data type {sql_t} which cannot be mapped to `dlt` data type. When using sqlalchemy backend such data will be passed to the normalizer. In case of `pyarrow` backend
|
|
114
|
+
f"A column with name {sql_col.name} contains unknown data type {sql_t} which cannot be mapped to `dlt` data type. When using sqlalchemy backend such data will be passed to the normalizer. In case of `pyarrow` and `pandas` backend, data types are detected from numpy ndarrays. In case of other backends, the behavior is backend-specific."
|
|
81
115
|
)
|
|
82
|
-
col = None
|
|
83
|
-
if col:
|
|
84
|
-
return {key: value for key, value in col.items() if value is not None} # type: ignore[return-value]
|
|
85
|
-
return None
|
|
86
116
|
|
|
117
|
+
return {key: value for key, value in col.items() if value is not None} # type: ignore[return-value]
|
|
87
118
|
|
|
88
|
-
def table_to_columns(table: Table, add_precision: bool = False) -> TTableSchemaColumns:
|
|
89
|
-
"""Convert an sqlalchemy table to a dlt table schema.
|
|
90
119
|
|
|
91
|
-
|
|
92
|
-
"""
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
for col in (sqla_col_to_column_schema(c, add_precision) for c in table.columns)
|
|
96
|
-
if col is not None
|
|
97
|
-
}
|
|
120
|
+
def get_primary_key(table: Table) -> Optional[List[str]]:
|
|
121
|
+
"""Create primary key or return None if no key defined"""
|
|
122
|
+
primary_key = [c.name for c in table.primary_key]
|
|
123
|
+
return primary_key if len(primary_key) > 0 else None
|
|
98
124
|
|
|
99
125
|
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
from dlt.common.libs.pyarrow import get_py_arrow_datatype
|
|
112
|
-
from dlt.common.libs.pyarrow import pyarrow as pa
|
|
113
|
-
|
|
114
|
-
return pa.schema(
|
|
115
|
-
[
|
|
116
|
-
pa.field(
|
|
117
|
-
name,
|
|
118
|
-
get_py_arrow_datatype(
|
|
119
|
-
schema_item,
|
|
120
|
-
caps or DestinationCapabilitiesContext.generic_capabilities(),
|
|
121
|
-
tz,
|
|
122
|
-
),
|
|
123
|
-
nullable=schema_item.get("nullable", True),
|
|
124
|
-
)
|
|
125
|
-
for name, schema_item in columns_schema.items()
|
|
126
|
-
]
|
|
127
|
-
)
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
def row_tuples_to_arrow(
|
|
131
|
-
rows: Sequence[RowAny], columns: TTableSchemaColumns, tz: str
|
|
132
|
-
) -> Any:
|
|
133
|
-
import numpy as np
|
|
134
|
-
from dlt.common.libs.pyarrow import pyarrow as pa
|
|
135
|
-
|
|
136
|
-
arrow_schema = columns_to_arrow(columns, tz=tz)
|
|
137
|
-
|
|
138
|
-
try:
|
|
139
|
-
from pandas._libs import lib
|
|
140
|
-
|
|
141
|
-
pivoted_rows = lib.to_object_array_tuples(rows).T # type: ignore[attr-defined]
|
|
142
|
-
except ImportError:
|
|
143
|
-
logger.info(
|
|
144
|
-
"Pandas not installed, reverting to numpy.asarray to create a table which is slower"
|
|
126
|
+
def table_to_columns(
|
|
127
|
+
table: Table,
|
|
128
|
+
reflection_level: ReflectionLevel = "full",
|
|
129
|
+
type_conversion_fallback: Optional[TTypeAdapter] = None,
|
|
130
|
+
) -> TTableSchemaColumns:
|
|
131
|
+
"""Convert an sqlalchemy table to a dlt table schema."""
|
|
132
|
+
return {
|
|
133
|
+
col["name"]: col
|
|
134
|
+
for col in (
|
|
135
|
+
sqla_col_to_column_schema(c, reflection_level, type_conversion_fallback)
|
|
136
|
+
for c in table.columns
|
|
145
137
|
)
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
columnar = {
|
|
149
|
-
col: dat.ravel()
|
|
150
|
-
for col, dat in zip(columns, np.vsplit(pivoted_rows, len(columns)))
|
|
138
|
+
if col is not None
|
|
151
139
|
}
|
|
152
|
-
for idx in range(0, len(arrow_schema.names)):
|
|
153
|
-
field = arrow_schema.field(idx)
|
|
154
|
-
py_type = type(rows[0][idx])
|
|
155
|
-
# cast double / float ndarrays to decimals if type mismatch, looks like decimals and floats are often mixed up in dialects
|
|
156
|
-
if pa.types.is_decimal(field.type) and issubclass(py_type, (str, float)):
|
|
157
|
-
logger.warning(
|
|
158
|
-
f"Field {field.name} was reflected as decimal type, but rows contains {py_type.__name__}. Additional cast is required which may slow down arrow table generation."
|
|
159
|
-
)
|
|
160
|
-
float_array = pa.array(columnar[field.name], type=pa.float64())
|
|
161
|
-
columnar[field.name] = float_array.cast(field.type, safe=False)
|
|
162
|
-
return pa.Table.from_pydict(columnar, schema=arrow_schema)
|
ingestr/src/version.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = "0.
|
|
1
|
+
__version__ = "0.7.0"
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: ingestr
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.7.0
|
|
4
4
|
Summary: ingestr is a command-line application that ingests data from various sources and stores them in any database.
|
|
5
5
|
Project-URL: Homepage, https://github.com/bruin-data/ingestr
|
|
6
6
|
Project-URL: Issues, https://github.com/bruin-data/ingestr/issues
|
|
@@ -16,7 +16,7 @@ Classifier: Topic :: Database
|
|
|
16
16
|
Requires-Python: >=3.9
|
|
17
17
|
Requires-Dist: cx-oracle==8.3.0
|
|
18
18
|
Requires-Dist: databricks-sql-connector==2.9.3
|
|
19
|
-
Requires-Dist: dlt==0.
|
|
19
|
+
Requires-Dist: dlt==0.5.1
|
|
20
20
|
Requires-Dist: duckdb-engine==0.11.5
|
|
21
21
|
Requires-Dist: duckdb==0.10.2
|
|
22
22
|
Requires-Dist: google-api-python-client==2.130.0
|
|
@@ -1,13 +1,9 @@
|
|
|
1
|
-
ingestr/main.py,sha256=
|
|
2
|
-
ingestr/main_test.py,sha256=MDV2Eo86W_CcxGgEkYYoBc6xIXjVMER4hMhgAdxXYMc,28464
|
|
1
|
+
ingestr/main.py,sha256=JYgh3rZSO9n_Ko2D_9BLaF_cGQHrLSywK1WH6XioefQ,15961
|
|
3
2
|
ingestr/src/destinations.py,sha256=2SfPMjtTelPmzQmc3zNs8xGcKIPuGn_hoZFIBUuhjXI,6338
|
|
4
|
-
ingestr/src/
|
|
5
|
-
ingestr/src/
|
|
6
|
-
ingestr/src/factory_test.py,sha256=X9sFkvNByWChIcyeDt1QiIPMIzGNKb7M5A_GUE0-nnI,664
|
|
7
|
-
ingestr/src/sources.py,sha256=sLT8CDiLz6aTekm6QZCXatZAdTyzCabqjJesnEe88GU,10061
|
|
8
|
-
ingestr/src/sources_test.py,sha256=wZopz4tFtFnOiCEO8pZW816Nj86pLlR5fEqfq0TtWMQ,3629
|
|
3
|
+
ingestr/src/factory.py,sha256=XuT_8LvWd7gBxOjoD_NiG-jtPvHNQ9nqOeoCJzhRb6Y,3630
|
|
4
|
+
ingestr/src/sources.py,sha256=QbSvECvGbHJKOpE9_dbq11343pA5ajsS9BPPPab1ivw,10007
|
|
9
5
|
ingestr/src/table_definition.py,sha256=REbAbqdlmUMUuRh8nEQRreWjPVOQ5ZcfqGkScKdCrmk,390
|
|
10
|
-
ingestr/src/version.py,sha256=
|
|
6
|
+
ingestr/src/version.py,sha256=RaANGbRu5e-vehwXI1-Qe2ggPPfs1TQaZj072JdbLk4,22
|
|
11
7
|
ingestr/src/google_sheets/README.md,sha256=wFQhvmGpRA38Ba2N_WIax6duyD4c7c_pwvvprRfQDnw,5470
|
|
12
8
|
ingestr/src/google_sheets/__init__.py,sha256=5qlX-6ilx5MW7klC7B_0jGSxloQSLkSESTh4nlY3Aos,6643
|
|
13
9
|
ingestr/src/google_sheets/helpers/__init__.py,sha256=5hXZrZK8cMO3UOuL-s4OKOpdACdihQD0hYYlSEu-iQ8,35
|
|
@@ -15,7 +11,6 @@ ingestr/src/google_sheets/helpers/api_calls.py,sha256=RiVfdacbaneszhmuhYilkJnkc9
|
|
|
15
11
|
ingestr/src/google_sheets/helpers/data_processing.py,sha256=WYO6z4XjGcG0Hat2J2enb-eLX5mSNVb2vaqRE83FBWU,11000
|
|
16
12
|
ingestr/src/gorgias/__init__.py,sha256=BzX9X1Yc_1Mch6NP1pn26hjRIiaadErgHxkdJHw4P3o,21227
|
|
17
13
|
ingestr/src/gorgias/helpers.py,sha256=DamuijnvhGY9hysQO4txrVMf4izkGbh5qfBKImdOINE,5427
|
|
18
|
-
ingestr/src/gorgias/helpers_test.py,sha256=kSR2nhB8U8HZ8pgDnd7HvXlzojmBnpOm8fTKHJvvKGY,1580
|
|
19
14
|
ingestr/src/mongodb/__init__.py,sha256=E7SDeCyYNkYZZ_RFhjCRDZUGpKtaxpPG5sFSmKJV62U,4336
|
|
20
15
|
ingestr/src/mongodb/helpers.py,sha256=80vtAeNyUn1iMN0CeLrTlKqYN6I6fHF81Kd2UuE8Kns,5653
|
|
21
16
|
ingestr/src/notion/__init__.py,sha256=36wUui8finbc85ObkRMq8boMraXMUehdABN_AMe_hzA,1834
|
|
@@ -27,10 +22,11 @@ ingestr/src/shopify/__init__.py,sha256=EWjpvZz7K6Pms7uUoqqkM4Wj0XeE2NrDvVp4BNM8d
|
|
|
27
22
|
ingestr/src/shopify/exceptions.py,sha256=BhV3lIVWeBt8Eh4CWGW_REFJpGCzvW6-62yZrBWa3nQ,50
|
|
28
23
|
ingestr/src/shopify/helpers.py,sha256=OO_Tw-HwVLnRhwT3vqUWEQEEcWIS9KWE6VDDe8BCC2w,4972
|
|
29
24
|
ingestr/src/shopify/settings.py,sha256=StY0EPr7wFJ7KzRRDN4TKxV0_gkIS1wPj2eR4AYSsDk,141
|
|
30
|
-
ingestr/src/sql_database/__init__.py,sha256=
|
|
31
|
-
ingestr/src/sql_database/
|
|
25
|
+
ingestr/src/sql_database/__init__.py,sha256=HEqY6U-YzzbeZ8avIthj-Fatm2C3i3jqYs5DAIAu4Ss,11511
|
|
26
|
+
ingestr/src/sql_database/arrow_helpers.py,sha256=yze1X3A9nUQA4HeuFDDWrfJVkCq8Uo5UyDo_zhJtI60,5699
|
|
27
|
+
ingestr/src/sql_database/helpers.py,sha256=6o8e2_8MIuj3qlo40a2E6ns3gyK18ei1jCePONrMUjI,10191
|
|
32
28
|
ingestr/src/sql_database/override.py,sha256=xbKGDztCzvrhJ5kJTXERal3LA56bEeVug4_rrTs8DgA,333
|
|
33
|
-
ingestr/src/sql_database/schema_types.py,sha256=
|
|
29
|
+
ingestr/src/sql_database/schema_types.py,sha256=qXTanvFPE8wMCSDzQWPDi5yqaO-llfrFXjiGJALI4NA,5013
|
|
34
30
|
ingestr/src/telemetry/event.py,sha256=MpWc5tt0lSJ1pWKe9HQ11BHrcPBxSH40l4wjZi9u0tI,924
|
|
35
31
|
ingestr/src/testdata/fakebqcredentials.json,sha256=scc6TUc963KAbKTLZCfcmqVzbtzDCW1_8JNRnyAXyy8,628
|
|
36
32
|
ingestr/testdata/.gitignore,sha256=DFzYYOpqdTiT7S1HjCT-jffZSmEvFZge295_upAB0FY,13
|
|
@@ -41,8 +37,8 @@ ingestr/testdata/delete_insert_part2.csv,sha256=B_KUzpzbNdDY_n7wWop1mT2cz36TmayS
|
|
|
41
37
|
ingestr/testdata/merge_expected.csv,sha256=DReHqWGnQMsf2PBv_Q2pfjsgvikYFnf1zYcQZ7ZqYN0,276
|
|
42
38
|
ingestr/testdata/merge_part1.csv,sha256=Pw8Z9IDKcNU0qQHx1z6BUf4rF_-SxKGFOvymCt4OY9I,185
|
|
43
39
|
ingestr/testdata/merge_part2.csv,sha256=T_GiWxA81SN63_tMOIuemcvboEFeAmbKc7xRXvL9esw,287
|
|
44
|
-
ingestr-0.
|
|
45
|
-
ingestr-0.
|
|
46
|
-
ingestr-0.
|
|
47
|
-
ingestr-0.
|
|
48
|
-
ingestr-0.
|
|
40
|
+
ingestr-0.7.0.dist-info/METADATA,sha256=MmQ_futv2ZZbVg4hdbCErSAgs2AobplIUr4vMErTXEI,5829
|
|
41
|
+
ingestr-0.7.0.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
|
|
42
|
+
ingestr-0.7.0.dist-info/entry_points.txt,sha256=oPJy0KBnPWYjDtP1k8qwAihcTLHSZokSQvRAw_wtfJM,46
|
|
43
|
+
ingestr-0.7.0.dist-info/licenses/LICENSE.md,sha256=cW8wIhn8HFE-KLStDF9jHQ1O_ARWP3kTpk_-eOccL24,1075
|
|
44
|
+
ingestr-0.7.0.dist-info/RECORD,,
|