PyPI - ingestr - Versions diffs - 0.9.5__py3-none-any.whl → 0.10.0rc0__py3-none-any.whl - Mend

ingestr 0.9.5py3-none-any.whl → 0.10.0rc0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of ingestr might be problematic. Click here for more details.

Files changed (17) hide show

ingestr/main.py +108 -36
ingestr/src/gorgias/__init__.py +17 -17
ingestr/src/shopify/__init__.py +42 -42
ingestr/src/slack/__init__.py +2 -2
ingestr/src/sources.py +3 -3
ingestr/src/version.py +1 -1
ingestr/src/zendesk/__init__.py +2 -2
{ingestr-0.9.5.dist-info → ingestr-0.10.0rc0.dist-info}/METADATA +18 -18
{ingestr-0.9.5.dist-info → ingestr-0.10.0rc0.dist-info}/RECORD +12 -17
ingestr/src/sql_database/__init__.py +0 -206
ingestr/src/sql_database/arrow_helpers.py +0 -139
ingestr/src/sql_database/helpers.py +0 -282
ingestr/src/sql_database/override.py +0 -10
ingestr/src/sql_database/schema_types.py +0 -139
{ingestr-0.9.5.dist-info → ingestr-0.10.0rc0.dist-info}/WHEEL +0 -0
{ingestr-0.9.5.dist-info → ingestr-0.10.0rc0.dist-info}/entry_points.txt +0 -0
{ingestr-0.9.5.dist-info → ingestr-0.10.0rc0.dist-info}/licenses/LICENSE.md +0 -0

ingestr/src/sql_database/helpers.py DELETED Viewed

@@ -1,282 +0,0 @@
-"""SQL database source helpers"""
-import operator
-import warnings
-from typing import (
-    Any,
-    Callable,
-    Dict,
-    Iterator,
-    Literal,
-    Optional,
-    Union,
-)
-import dlt
-from dlt.common.configuration.specs import BaseConfiguration, configspec
-from dlt.common.exceptions import MissingDependencyException
-from dlt.common.schema import TTableSchemaColumns
-from dlt.common.typing import TDataItem, TSortOrder
-from sqlalchemy import create_engine
-from sqlalchemy.engine import Engine
-from sqlalchemy.exc import CompileError
-from .arrow_helpers import row_tuples_to_arrow
-from .override import IngestrConnectionStringCredentials as ConnectionStringCredentials
-from .schema_types import (
-    ReflectionLevel,
-    SelectAny,
-    Table,
-    TTypeAdapter,
-    get_primary_key,
-    table_to_columns,
-)
-TableBackend = Literal["sqlalchemy", "pyarrow", "pandas", "connectorx"]
-class TableLoader:
-    def __init__(
-        self,
-        engine: Engine,
-        backend: TableBackend,
-        table: Table,
-        columns: TTableSchemaColumns,
-        chunk_size: int = 1000,
-        incremental: Optional[dlt.sources.incremental[Any]] = None,
-    ) -> None:
-        self.engine = engine
-        self.backend = backend
-        self.table = table
-        self.columns = columns
-        self.chunk_size = chunk_size
-        self.incremental = incremental
-        if incremental:
-            try:
-                self.cursor_column = table.c[incremental.cursor_path]
-            except KeyError as e:
-                raise KeyError(
-                    f"Cursor column '{incremental.cursor_path}' does not exist in table '{table.name}'"
-                ) from e
-            self.last_value = incremental.last_value
-            self.end_value = incremental.end_value
-            self.row_order: TSortOrder = self.incremental.row_order
-        else:
-            self.cursor_column = None
-            self.last_value = None
-            self.end_value = None
-            self.row_order = None
-    def make_query(self) -> SelectAny:
-        table = self.table
-        query = table.select()
-        if not self.incremental:
-            return query
-        last_value_func = self.incremental.last_value_func
-        # generate where
-        if (
-            last_value_func is max
-        ):  # Query ordered and filtered according to last_value function
-            filter_op = operator.ge
-            filter_op_end = operator.lt
-        elif last_value_func is min:
-            filter_op = operator.le
-            filter_op_end = operator.gt
-        else:  # Custom last_value, load everything and let incremental handle filtering
-            return query
-        if self.last_value is not None:
-            query = query.where(filter_op(self.cursor_column, self.last_value))
-            if self.end_value is not None:
-                query = query.where(filter_op_end(self.cursor_column, self.end_value))
-        # generate order by from declared row order
-        order_by = None
-        if (self.row_order == "asc" and last_value_func is max) or (
-            self.row_order == "desc" and last_value_func is min
-        ):
-            order_by = self.cursor_column.asc()
-        elif (self.row_order == "asc" and last_value_func is min) or (
-            self.row_order == "desc" and last_value_func is max
-        ):
-            order_by = self.cursor_column.desc()
-        if order_by is not None:
-            query = query.order_by(order_by)
-        return query
-    def load_rows(self, backend_kwargs: Dict[str, Any] = None) -> Iterator[TDataItem]:
-        # make copy of kwargs
-        backend_kwargs = dict(backend_kwargs or {})
-        query = self.make_query()
-        if self.backend == "connectorx":
-            yield from self._load_rows_connectorx(query, backend_kwargs)
-        else:
-            yield from self._load_rows(query, backend_kwargs)
-    def _load_rows(self, query: SelectAny, backend_kwargs: Dict[str, Any]) -> TDataItem:
-        with self.engine.connect() as conn:
-            result = conn.execution_options(yield_per=self.chunk_size).execute(query)
-            # NOTE: cursor returns not normalized column names! may be quite useful in case of Oracle dialect
-            # that normalizes columns
-            # columns = [c[0] for c in result.cursor.description]
-            columns = list(result.keys())
-            for partition in result.partitions(size=self.chunk_size):
-                if self.backend == "sqlalchemy":
-                    yield [dict(row._mapping) for row in partition]
-                elif self.backend == "pandas":
-                    from dlt.common.libs.pandas_sql import _wrap_result
-                    df = _wrap_result(
-                        partition,
-                        columns,
-                        **{"dtype_backend": "pyarrow", **backend_kwargs},
-                    )
-                    yield df
-                elif self.backend == "pyarrow":
-                    yield row_tuples_to_arrow(
-                        partition, self.columns, tz=backend_kwargs.get("tz", "UTC")
-                    )
-    def _load_rows_connectorx(
-        self, query: SelectAny, backend_kwargs: Dict[str, Any]
-    ) -> Iterator[TDataItem]:
-        try:
-            import connectorx as cx  # type: ignore
-        except ImportError:
-            raise MissingDependencyException(
-                "Connector X table backend", ["connectorx"]
-            )
-        # default settings
-        backend_kwargs = {
-            "return_type": "arrow2",
-            "protocol": "binary",
-            **backend_kwargs,
-        }
-        conn = backend_kwargs.pop(
-            "conn",
-            self.engine.url._replace(
-                drivername=self.engine.url.get_backend_name()
-            ).render_as_string(hide_password=False),
-        )
-        try:
-            query_str = str(
-                query.compile(self.engine, compile_kwargs={"literal_binds": True})
-            )
-        except CompileError as ex:
-            raise NotImplementedError(
-                f"Query for table {self.table.name} could not be compiled to string to execute it on ConnectorX. If you are on SQLAlchemy 1.4.x the causing exception is due to literals that cannot be rendered, upgrade to 2.x: {str(ex)}"
-            ) from ex
-        df = cx.read_sql(conn, query_str, **backend_kwargs)
-        yield df
-def table_rows(
-    engine: Engine,
-    table: Table,
-    chunk_size: int,
-    backend: TableBackend,
-    incremental: Optional[dlt.sources.incremental[Any]] = None,
-    defer_table_reflect: bool = False,
-    table_adapter_callback: Callable[[Table], None] = None,
-    reflection_level: ReflectionLevel = "minimal",
-    backend_kwargs: Dict[str, Any] = None,
-    type_adapter_callback: Optional[TTypeAdapter] = None,
-) -> Iterator[TDataItem]:
-    columns: TTableSchemaColumns = None
-    if defer_table_reflect:
-        table = Table(
-            table.name, table.metadata, autoload_with=engine, extend_existing=True
-        )
-        if table_adapter_callback:
-            table_adapter_callback(table)
-        columns = table_to_columns(table, reflection_level, type_adapter_callback)
-        # set the primary_key in the incremental
-        if incremental and incremental.primary_key is None:
-            primary_key = get_primary_key(table)
-            if primary_key is not None:
-                incremental.primary_key = primary_key
-        # yield empty record to set hints
-        yield dlt.mark.with_hints(
-            [],
-            dlt.mark.make_hints(
-                primary_key=get_primary_key(table),
-                columns=columns,
-            ),
-        )
-    else:
-        # table was already reflected
-        columns = table_to_columns(table, reflection_level, type_adapter_callback)
-    loader = TableLoader(
-        engine, backend, table, columns, incremental=incremental, chunk_size=chunk_size
-    )
-    yield from loader.load_rows(backend_kwargs)
-def engine_from_credentials(
-    credentials: Union[ConnectionStringCredentials, Engine, str], **backend_kwargs: Any
-) -> Engine:
-    if isinstance(credentials, Engine):
-        return credentials
-    if isinstance(credentials, ConnectionStringCredentials):
-        credentials = credentials.to_native_representation()
-    return create_engine(credentials, **backend_kwargs)
-def unwrap_json_connector_x(field: str) -> TDataItem:
-    """Creates a transform function to be added with `add_map` that will unwrap JSON columns
-    ingested via connectorx. Such columns are additionally quoted and translate SQL NULL to json "null"
-    """
-    import pyarrow as pa
-    import pyarrow.compute as pc
-    def _unwrap(table: TDataItem) -> TDataItem:
-        col_index = table.column_names.index(field)
-        # remove quotes
-        column = pc.replace_substring_regex(table[field], '"(.*)"', "\\1")
-        # convert json null to null
-        column = pc.replace_with_mask(
-            column,
-            pc.equal(column, "null").combine_chunks(),
-            pa.scalar(None, pa.large_string()),
-        )
-        return table.set_column(col_index, table.schema.field(col_index), column)
-    return _unwrap
-def _detect_precision_hints_deprecated(value: Optional[bool]) -> None:
-    if value is None:
-        return
-    msg = "`detect_precision_hints` argument is deprecated and will be removed in a future release. "
-    if value:
-        msg += "Use `reflection_level='full_with_precision'` which has the same effect instead."
-    warnings.warn(
-        msg,
-        DeprecationWarning,
-    )
-@configspec
-class SqlDatabaseTableConfiguration(BaseConfiguration):
-    incremental: Optional[dlt.sources.incremental] = None  # type: ignore[type-arg]
-@configspec
-class SqlTableResourceConfiguration(BaseConfiguration):
-    credentials: Union[ConnectionStringCredentials, Engine, str] = None
-    table: str = None
-    schema: Optional[str] = None
-    incremental: Optional[dlt.sources.incremental] = None  # type: ignore[type-arg]
-    chunk_size: int = 50000
-    backend: TableBackend = "sqlalchemy"
-    detect_precision_hints: Optional[bool] = None
-    defer_table_reflect: Optional[bool] = False
-    reflection_level: Optional[ReflectionLevel] = "full"

ingestr/src/sql_database/override.py DELETED Viewed

@@ -1,10 +0,0 @@
-from typing import Optional
-from dlt.common.configuration.specs.base_configuration import configspec
-from dlt.sources.credentials import ConnectionStringCredentials
-@configspec(init=False)
-class IngestrConnectionStringCredentials(ConnectionStringCredentials):
-    username: Optional[str] = None
-    database: Optional[str] = None

ingestr/src/sql_database/schema_types.py DELETED Viewed

@@ -1,139 +0,0 @@
-from typing import (
-    TYPE_CHECKING,
-    Any,
-    Callable,
-    List,
-    Literal,
-    Optional,
-    Type,
-    Union,
-)
-from dlt.common import logger
-from dlt.common.schema.typing import TColumnSchema, TTableSchemaColumns
-from sqlalchemy import Column, Table
-from sqlalchemy.engine import Row
-from sqlalchemy.sql import Select, sqltypes
-from sqlalchemy.sql.sqltypes import TypeEngine
-from typing_extensions import TypeAlias
-ReflectionLevel = Literal["minimal", "full", "full_with_precision"]
-# optionally create generics with any so they can be imported by dlt importer
-if TYPE_CHECKING:
-    SelectAny: TypeAlias = Select[Any]
-    ColumnAny: TypeAlias = Column[Any]
-    RowAny: TypeAlias = Row[Any]
-    TypeEngineAny = TypeEngine[Any]
-else:
-    SelectAny: TypeAlias = Type[Any]
-    ColumnAny: TypeAlias = Type[Any]
-    RowAny: TypeAlias = Type[Any]
-    TypeEngineAny = Type[Any]
-TTypeAdapter = Callable[
-    [TypeEngineAny], Optional[Union[TypeEngineAny, Type[TypeEngineAny]]]
-]
-def sqla_col_to_column_schema(
-    sql_col: ColumnAny,
-    reflection_level: ReflectionLevel,
-    type_adapter_callback: Optional[TTypeAdapter] = None,
-) -> Optional[TColumnSchema]:
-    """Infer dlt schema column type from an sqlalchemy type.
-    If `add_precision` is set, precision and scale is inferred from that types that support it,
-    such as numeric, varchar, int, bigint. Numeric (decimal) types have always precision added.
-    """
-    col: TColumnSchema = {
-        "name": sql_col.name,
-        "nullable": sql_col.nullable,
-    }
-    if reflection_level == "minimal":
-        return col
-    sql_t = sql_col.type
-    if type_adapter_callback:
-        sql_t = type_adapter_callback(sql_t)  # type: ignore[assignment]
-        # Check if sqla type class rather than instance is returned
-        if sql_t is not None and isinstance(sql_t, type):
-            sql_t = sql_t()
-    if sql_t is None:
-        # Column ignored by callback
-        return col
-    add_precision = reflection_level == "full_with_precision"
-    if isinstance(sql_t, sqltypes.SmallInteger):
-        col["data_type"] = "bigint"
-        if add_precision:
-            col["precision"] = 32
-    elif isinstance(sql_t, sqltypes.Integer):
-        col["data_type"] = "bigint"
-    elif isinstance(sql_t, sqltypes.Numeric):
-        # dlt column type depends on the data returned by the sql alchemy dialect
-        # and not on the metadata reflected in the database. all Numeric types
-        # that are returned as floats will assume "double" type
-        # and returned as decimals will assume "decimal" type
-        if sql_t.asdecimal is False:
-            col["data_type"] = "double"
-        else:
-            col["data_type"] = "decimal"
-            if sql_t.precision is not None:
-                col["precision"] = sql_t.precision
-                # must have a precision for any meaningful scale
-                if sql_t.scale is not None:
-                    col["scale"] = sql_t.scale
-                elif sql_t.decimal_return_scale is not None:
-                    col["scale"] = sql_t.decimal_return_scale
-    elif isinstance(sql_t, sqltypes.String):
-        col["data_type"] = "text"
-        if add_precision and sql_t.length:
-            col["precision"] = sql_t.length
-    elif isinstance(sql_t, sqltypes._Binary):
-        col["data_type"] = "binary"
-        if add_precision and sql_t.length:
-            col["precision"] = sql_t.length
-    elif isinstance(sql_t, sqltypes.DateTime):
-        col["data_type"] = "timestamp"
-    elif isinstance(sql_t, sqltypes.Date):
-        col["data_type"] = "date"
-    elif isinstance(sql_t, sqltypes.Time):
-        col["data_type"] = "time"
-    elif isinstance(sql_t, sqltypes.JSON):
-        col["data_type"] = "complex"
-    elif isinstance(sql_t, sqltypes.Boolean):
-        col["data_type"] = "bool"
-    else:
-        logger.warning(
-            f"A column with name {sql_col.name} contains unknown data type {sql_t} which cannot be mapped to `dlt` data type. When using sqlalchemy backend such data will be passed to the normalizer. In case of `pyarrow` and `pandas` backend, data types are detected from numpy ndarrays. In case of other backends, the behavior is backend-specific."
-        )
-    return {key: value for key, value in col.items() if value is not None}  # type: ignore[return-value]
-def get_primary_key(table: Table) -> Optional[List[str]]:
-    """Create primary key or return None if no key defined"""
-    primary_key = [c.name for c in table.primary_key]
-    return primary_key if len(primary_key) > 0 else None
-def table_to_columns(
-    table: Table,
-    reflection_level: ReflectionLevel = "full",
-    type_conversion_fallback: Optional[TTypeAdapter] = None,
-) -> TTableSchemaColumns:
-    """Convert an sqlalchemy table to a dlt table schema."""
-    return {
-        col["name"]: col
-        for col in (
-            sqla_col_to_column_schema(c, reflection_level, type_conversion_fallback)
-            for c in table.columns
-        )
-        if col is not None
-    }

{ingestr-0.9.5.dist-info → ingestr-0.10.0rc0.dist-info}/WHEEL RENAMED Viewed

File without changes

{ingestr-0.9.5.dist-info → ingestr-0.10.0rc0.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{ingestr-0.9.5.dist-info → ingestr-0.10.0rc0.dist-info}/licenses/LICENSE.md RENAMED Viewed

File without changes

ingestr 0.9.5__py3-none-any.whl → 0.10.0rc0__py3-none-any.whl

Potentially problematic release.

ingestr 0.9.5py3-none-any.whl → 0.10.0rc0py3-none-any.whl