PyPI - ingestr - Versions diffs - 0.13.2__py3-none-any.whl → 0.14.104__py3-none-any.whl - Mend

ingestr 0.13.2py3-none-any.whl → 0.14.104py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (146) hide show

ingestr/conftest.py +72 -0
ingestr/main.py +134 -87
ingestr/src/adjust/__init__.py +4 -4
ingestr/src/adjust/adjust_helpers.py +7 -3
ingestr/src/airtable/__init__.py +3 -2
ingestr/src/allium/__init__.py +128 -0
ingestr/src/anthropic/__init__.py +277 -0
ingestr/src/anthropic/helpers.py +525 -0
ingestr/src/applovin/__init__.py +262 -0
ingestr/src/applovin_max/__init__.py +117 -0
ingestr/src/appsflyer/__init__.py +325 -0
ingestr/src/appsflyer/client.py +49 -45
ingestr/src/appstore/__init__.py +1 -0
ingestr/src/arrow/__init__.py +9 -1
ingestr/src/asana_source/__init__.py +1 -1
ingestr/src/attio/__init__.py +102 -0
ingestr/src/attio/helpers.py +65 -0
ingestr/src/blob.py +38 -11
ingestr/src/buildinfo.py +1 -0
ingestr/src/chess/__init__.py +1 -1
ingestr/src/clickup/__init__.py +85 -0
ingestr/src/clickup/helpers.py +47 -0
ingestr/src/collector/spinner.py +43 -0
ingestr/src/couchbase_source/__init__.py +118 -0
ingestr/src/couchbase_source/helpers.py +135 -0
ingestr/src/cursor/__init__.py +83 -0
ingestr/src/cursor/helpers.py +188 -0
ingestr/src/destinations.py +520 -33
ingestr/src/docebo/__init__.py +589 -0
ingestr/src/docebo/client.py +435 -0
ingestr/src/docebo/helpers.py +97 -0
ingestr/src/elasticsearch/__init__.py +80 -0
ingestr/src/elasticsearch/helpers.py +138 -0
ingestr/src/errors.py +8 -0
ingestr/src/facebook_ads/__init__.py +47 -28
ingestr/src/facebook_ads/helpers.py +59 -37
ingestr/src/facebook_ads/settings.py +2 -0
ingestr/src/facebook_ads/utils.py +39 -0
ingestr/src/factory.py +116 -2
ingestr/src/filesystem/__init__.py +8 -3
ingestr/src/filters.py +46 -3
ingestr/src/fluxx/__init__.py +9906 -0
ingestr/src/fluxx/helpers.py +209 -0
ingestr/src/frankfurter/__init__.py +157 -0
ingestr/src/frankfurter/helpers.py +48 -0
ingestr/src/freshdesk/__init__.py +89 -0
ingestr/src/freshdesk/freshdesk_client.py +137 -0
ingestr/src/freshdesk/settings.py +9 -0
ingestr/src/fundraiseup/__init__.py +95 -0
ingestr/src/fundraiseup/client.py +81 -0
ingestr/src/github/__init__.py +41 -6
ingestr/src/github/helpers.py +5 -5
ingestr/src/google_analytics/__init__.py +22 -4
ingestr/src/google_analytics/helpers.py +124 -6
ingestr/src/google_sheets/__init__.py +4 -4
ingestr/src/google_sheets/helpers/data_processing.py +2 -2
ingestr/src/hostaway/__init__.py +302 -0
ingestr/src/hostaway/client.py +288 -0
ingestr/src/http/__init__.py +35 -0
ingestr/src/http/readers.py +114 -0
ingestr/src/http_client.py +24 -0
ingestr/src/hubspot/__init__.py +66 -23
ingestr/src/hubspot/helpers.py +52 -22
ingestr/src/hubspot/settings.py +14 -7
ingestr/src/influxdb/__init__.py +46 -0
ingestr/src/influxdb/client.py +34 -0
ingestr/src/intercom/__init__.py +142 -0
ingestr/src/intercom/helpers.py +674 -0
ingestr/src/intercom/settings.py +279 -0
ingestr/src/isoc_pulse/__init__.py +159 -0
ingestr/src/jira_source/__init__.py +340 -0
ingestr/src/jira_source/helpers.py +439 -0
ingestr/src/jira_source/settings.py +170 -0
ingestr/src/kafka/__init__.py +4 -1
ingestr/src/kinesis/__init__.py +139 -0
ingestr/src/kinesis/helpers.py +82 -0
ingestr/src/klaviyo/{_init_.py → __init__.py} +5 -6
ingestr/src/linear/__init__.py +634 -0
ingestr/src/linear/helpers.py +111 -0
ingestr/src/linkedin_ads/helpers.py +0 -1
ingestr/src/loader.py +69 -0
ingestr/src/mailchimp/__init__.py +126 -0
ingestr/src/mailchimp/helpers.py +226 -0
ingestr/src/mailchimp/settings.py +164 -0
ingestr/src/masking.py +344 -0
ingestr/src/mixpanel/__init__.py +62 -0
ingestr/src/mixpanel/client.py +99 -0
ingestr/src/monday/__init__.py +246 -0
ingestr/src/monday/helpers.py +392 -0
ingestr/src/monday/settings.py +328 -0
ingestr/src/mongodb/__init__.py +72 -8
ingestr/src/mongodb/helpers.py +915 -38
ingestr/src/partition.py +32 -0
ingestr/src/personio/__init__.py +331 -0
ingestr/src/personio/helpers.py +86 -0
ingestr/src/phantombuster/__init__.py +65 -0
ingestr/src/phantombuster/client.py +87 -0
ingestr/src/pinterest/__init__.py +82 -0
ingestr/src/pipedrive/__init__.py +198 -0
ingestr/src/pipedrive/helpers/__init__.py +23 -0
ingestr/src/pipedrive/helpers/custom_fields_munger.py +102 -0
ingestr/src/pipedrive/helpers/pages.py +115 -0
ingestr/src/pipedrive/settings.py +27 -0
ingestr/src/pipedrive/typing.py +3 -0
ingestr/src/plusvibeai/__init__.py +335 -0
ingestr/src/plusvibeai/helpers.py +544 -0
ingestr/src/plusvibeai/settings.py +252 -0
ingestr/src/quickbooks/__init__.py +117 -0
ingestr/src/resource.py +40 -0
ingestr/src/revenuecat/__init__.py +83 -0
ingestr/src/revenuecat/helpers.py +237 -0
ingestr/src/salesforce/__init__.py +156 -0
ingestr/src/salesforce/helpers.py +64 -0
ingestr/src/shopify/__init__.py +1 -17
ingestr/src/smartsheets/__init__.py +82 -0
ingestr/src/snapchat_ads/__init__.py +489 -0
ingestr/src/snapchat_ads/client.py +72 -0
ingestr/src/snapchat_ads/helpers.py +535 -0
ingestr/src/socrata_source/__init__.py +83 -0
ingestr/src/socrata_source/helpers.py +85 -0
ingestr/src/socrata_source/settings.py +8 -0
ingestr/src/solidgate/__init__.py +219 -0
ingestr/src/solidgate/helpers.py +154 -0
ingestr/src/sources.py +3132 -212
ingestr/src/stripe_analytics/__init__.py +49 -21
ingestr/src/stripe_analytics/helpers.py +286 -1
ingestr/src/stripe_analytics/settings.py +62 -10
ingestr/src/telemetry/event.py +10 -9
ingestr/src/tiktok_ads/__init__.py +12 -6
ingestr/src/tiktok_ads/tiktok_helpers.py +0 -1
ingestr/src/trustpilot/__init__.py +48 -0
ingestr/src/trustpilot/client.py +48 -0
ingestr/src/version.py +6 -1
ingestr/src/wise/__init__.py +68 -0
ingestr/src/wise/client.py +63 -0
ingestr/src/zoom/__init__.py +99 -0
ingestr/src/zoom/helpers.py +102 -0
ingestr/tests/unit/test_smartsheets.py +133 -0
ingestr-0.14.104.dist-info/METADATA +563 -0
ingestr-0.14.104.dist-info/RECORD +203 -0
ingestr/src/appsflyer/_init_.py +0 -24
ingestr-0.13.2.dist-info/METADATA +0 -302
ingestr-0.13.2.dist-info/RECORD +0 -107
{ingestr-0.13.2.dist-info → ingestr-0.14.104.dist-info}/WHEEL +0 -0
{ingestr-0.13.2.dist-info → ingestr-0.14.104.dist-info}/entry_points.txt +0 -0
{ingestr-0.13.2.dist-info → ingestr-0.14.104.dist-info}/licenses/LICENSE.md +0 -0

ingestr/src/destinations.py CHANGED Viewed

@@ -1,21 +1,44 @@
+import abc
 import base64
 import csv
+import datetime
 import json
 import os
 import shutil
+import struct
 import tempfile
 from urllib.parse import parse_qs, quote, urlparse
 import dlt
-import pyarrow.parquet  # type: ignore
+import dlt.destinations.impl.filesystem.filesystem
 from dlt.common.configuration.specs import AwsCredentials
+from dlt.common.destination.capabilities import DestinationCapabilitiesContext
+from dlt.common.schema import Schema
+from dlt.common.storages.configuration import FileSystemCredentials
 from dlt.destinations.impl.clickhouse.configuration import (
     ClickHouseCredentials,
 )
+from ingestr.src.elasticsearch.helpers import elasticsearch_insert
+from ingestr.src.errors import MissingValueError
+from ingestr.src.loader import load_dlt_file
+from ingestr.src.mongodb.helpers import mongodb_insert
 class GenericSqlDestination:
     def dlt_run_params(self, uri: str, table: str, **kwargs) -> dict:
+        if uri.startswith("databricks://"):
+            p = urlparse(uri)
+            q = parse_qs(p.query)
+            schema = q.get("schema", [None])[0]
+            if not schema:
+                raise ValueError("Databricks requires schema in the URI.")
+            res = {
+                "dataset_name": schema,
+                "table_name": table,
+            }
+            return res
         table_fields = table.split(".")
         if len(table_fields) != 2:
             raise ValueError("Table name must be in the format <schema>.<table>")
@@ -59,9 +82,30 @@ class BigQueryDestination:
                 base64.b64decode(credentials_base64[0]).decode("utf-8")
             )
+        staging_bucket = kwargs.get("staging_bucket", None)
+        if staging_bucket:
+            if not staging_bucket.startswith("gs://"):
+                raise ValueError("Staging bucket must start with gs://")
+            os.environ["DESTINATION__FILESYSTEM__BUCKET_URL"] = staging_bucket
+            os.environ["DESTINATION__FILESYSTEM__CREDENTIALS__PROJECT_ID"] = (
+                credentials.get("project_id", None)
+            )
+            os.environ["DESTINATION__FILESYSTEM__CREDENTIALS__PRIVATE_KEY"] = (
+                credentials.get("private_key", None)
+            )
+            os.environ["DESTINATION__FILESYSTEM__CREDENTIALS__CLIENT_EMAIL"] = (
+                credentials.get("client_email", None)
+            )
+        project_id = None
+        if source_fields.hostname:
+            project_id = source_fields.hostname
         return dlt.destinations.bigquery(
             credentials=credentials,  # type: ignore
-            location=location,
+            location=location,  # type: ignore
+            project_id=project_id,
             **kwargs,
         )
@@ -77,12 +121,24 @@ class BigQueryDestination:
             "table_name": table_fields[-1],
         }
+        staging_bucket = kwargs.get("staging_bucket", None)
+        if staging_bucket:
+            res["staging"] = "filesystem"
         return res
     def post_load(self):
         pass
+class CrateDBDestination(GenericSqlDestination):
+    def dlt_dest(self, uri: str, **kwargs):
+        uri = uri.replace("cratedb://", "postgres://")
+        import dlt_cratedb.impl.cratedb.factory
+        return dlt_cratedb.impl.cratedb.factory.cratedb(credentials=uri, **kwargs)
 class PostgresDestination(GenericSqlDestination):
     def dlt_dest(self, uri: str, **kwargs):
         return dlt.destinations.postgres(credentials=uri, **kwargs)
@@ -105,14 +161,149 @@ class DuckDBDestination(GenericSqlDestination):
         return dlt.destinations.duckdb(uri, **kwargs)
+class MotherduckDestination(GenericSqlDestination):
+    def dlt_dest(self, uri: str, **kwargs):
+        from urllib.parse import parse_qs, urlparse
+        parsed = urlparse(uri)
+        query = parse_qs(parsed.query)
+        token = query.get("token", [None])[0]
+        from dlt.destinations.impl.motherduck.configuration import MotherDuckCredentials
+        creds = {
+            "password": token,
+        }
+        if parsed.path.lstrip("/"):
+            creds["database"] = parsed.path.lstrip("/")
+        return dlt.destinations.motherduck(MotherDuckCredentials(creds), **kwargs)
+def handle_datetimeoffset(dto_value: bytes) -> datetime.datetime:
+    # ref: https://github.com/mkleehammer/pyodbc/issues/134#issuecomment-281739794
+    tup = struct.unpack(
+        "<6hI2h", dto_value
+    )  # e.g., (2017, 3, 16, 10, 35, 18, 500000000, -6, 0)
+    return datetime.datetime(
+        tup[0],
+        tup[1],
+        tup[2],
+        tup[3],
+        tup[4],
+        tup[5],
+        tup[6] // 1000,
+        datetime.timezone(datetime.timedelta(hours=tup[7], minutes=tup[8])),
+    )
+# MSSQL_COPT_SS_ACCESS_TOKEN is a connection attribute used to pass
+# an Azure Active Directory access token to the SQL Server ODBC driver.
+MSSQL_COPT_SS_ACCESS_TOKEN = 1256
+def serialize_azure_token(token):
+    # https://github.com/mkleehammer/pyodbc/issues/228#issuecomment-494773723
+    encoded = token.encode("utf_16_le")
+    return struct.pack("<i", len(encoded)) + encoded
+def build_mssql_dest():
+    # https://github.com/bruin-data/ingestr/issues/293
+    from dlt.destinations.impl.mssql.configuration import MsSqlClientConfiguration
+    from dlt.destinations.impl.mssql.mssql import (
+        HINT_TO_MSSQL_ATTR,
+        MsSqlJobClient,
+    )
+    from dlt.destinations.impl.mssql.sql_client import (
+        PyOdbcMsSqlClient,
+    )
+    class OdbcMsSqlClient(PyOdbcMsSqlClient):
+        SKIP_CREDENTIALS = {"PWD", "AUTHENTICATION", "UID"}
+        def open_connection(self):
+            cfg = self.credentials._get_odbc_dsn_dict()
+            if (
+                cfg.get("AUTHENTICATION", "").strip().lower()
+                != "activedirectoryaccesstoken"
+            ):
+                return super().open_connection()
+            import pyodbc  # type: ignore
+            dsn = ";".join(
+                [f"{k}={v}" for k, v in cfg.items() if k not in self.SKIP_CREDENTIALS]
+            )
+            self._conn = pyodbc.connect(
+                dsn,
+                timeout=self.credentials.connect_timeout,
+                attrs_before={
+                    MSSQL_COPT_SS_ACCESS_TOKEN: serialize_azure_token(cfg["PWD"]),
+                },
+            )
+            # https://github.com/mkleehammer/pyodbc/wiki/Using-an-Output-Converter-function
+            self._conn.add_output_converter(-155, handle_datetimeoffset)
+            self._conn.autocommit = True
+            return self._conn
+    class MsSqlClient(MsSqlJobClient):
+        def __init__(
+            self,
+            schema: Schema,
+            config: MsSqlClientConfiguration,
+            capabilities: DestinationCapabilitiesContext,
+        ) -> None:
+            sql_client = OdbcMsSqlClient(
+                config.normalize_dataset_name(schema),
+                config.normalize_staging_dataset_name(schema),
+                config.credentials,
+                capabilities,
+            )
+            super(MsSqlJobClient, self).__init__(schema, config, sql_client)
+            self.config: MsSqlClientConfiguration = config
+            self.sql_client = sql_client
+            self.active_hints = HINT_TO_MSSQL_ATTR if self.config.create_indexes else {}
+            self.type_mapper = capabilities.get_type_mapper()
+    class MsSqlDestImpl(dlt.destinations.mssql):
+        @property
+        def client_class(self):
+            return MsSqlClient
+    return MsSqlDestImpl
 class MsSQLDestination(GenericSqlDestination):
     def dlt_dest(self, uri: str, **kwargs):
-        return dlt.destinations.mssql(credentials=uri, **kwargs)
+        cls = build_mssql_dest()
+        return cls(credentials=uri, **kwargs)
 class DatabricksDestination(GenericSqlDestination):
     def dlt_dest(self, uri: str, **kwargs):
-        return dlt.destinations.databricks(credentials=uri, **kwargs)
+        p = urlparse(uri)
+        q = parse_qs(p.query)
+        access_token = p.password
+        server_hostname = p.hostname
+        http_path = q.get("http_path", [None])[0]
+        catalog = q.get("catalog", [None])[0]
+        schema = q.get("schema", [None])[0]
+        creds = {
+            "access_token": access_token,
+            "server_hostname": server_hostname,
+            "http_path": http_path,
+            "catalog": catalog,
+            "schema": schema,
+        }
+        return dlt.destinations.databricks(
+            credentials=creds,
+            **kwargs,
+        )
 class SynapseDestination(GenericSqlDestination):
@@ -184,11 +375,9 @@ class CsvDestination(GenericSqlDestination):
         if output_path.count("/") > 1:
             os.makedirs(os.path.dirname(output_path), exist_ok=True)
-        table = pyarrow.parquet.read_table(first_file_path)
-        rows = table.to_pylist()
         with open(output_path, "w", newline="") as csv_file:
             csv_writer = None
-            for row in rows:
+            for row in load_dlt_file(first_file_path):
                 row = filter_keys(row)
                 if csv_writer is None:
                     csv_writer = csv.DictWriter(csv_file, fieldnames=row.keys())
@@ -211,43 +400,64 @@ class AthenaDestination:
         if not bucket.startswith("s3://"):
             bucket = f"s3://{bucket}"
-        query_result_path = source_params.get("query_results_path", [None])[0]
-        if query_result_path:
-            if not query_result_path.startswith("s3://"):
-                query_result_path = f"s3://{query_result_path}"
-        else:
-            query_result_path = bucket
+        bucket = bucket.rstrip("/")
-        access_key_id = source_params.get("access_key_id", [None])[0]
-        if not access_key_id:
-            raise ValueError("The AWS access_key_id is required to connect to Athena.")
+        dest_table = kwargs.get("dest_table", None)
+        if not dest_table:
+            raise ValueError("A destination table is required to connect to Athena.")
-        secret_access_key = source_params.get("secret_access_key", [None])[0]
-        if not secret_access_key:
-            raise ValueError("The AWS secret_access_key is required to connect Athena")
+        dest_table_fields = dest_table.split(".")
+        if len(dest_table_fields) != 2:
+            raise ValueError(
+                f"Table name must be in the format <schema>.<table>, given: {dest_table}"
+            )
-        work_group = source_params.get("workgroup", [None])[0]
+        query_result_path = f"{bucket}/{dest_table_fields[0]}_staging/metadata"
+        access_key_id = source_params.get("access_key_id", [None])[0]
+        secret_access_key = source_params.get("secret_access_key", [None])[0]
+        session_token = source_params.get("session_token", [None])[0]
+        profile_name = source_params.get("profile", ["default"])[0]
         region_name = source_params.get("region_name", [None])[0]
+        if not access_key_id and not secret_access_key:
+            import botocore.session  # type: ignore
+            session = botocore.session.Session(profile=profile_name)
+            default = session.get_credentials()
+            if not profile_name:
+                raise ValueError(
+                    "You have to either provide access_key_id and secret_access_key pair or a valid AWS profile name."
+                )
+            access_key_id = default.access_key
+            secret_access_key = default.secret_key
+            session_token = default.token
+            if region_name is None:
+                region_name = session.get_config_variable("region")
         if not region_name:
             raise ValueError("The region_name is required to connect to Athena.")
         os.environ["DESTINATION__BUCKET_URL"] = bucket
-        os.environ["DESTINATION__CREDENTIALS__AWS_ACCESS_KEY_ID"] = access_key_id
-        os.environ["DESTINATION__CREDENTIALS__AWS_SECRET_ACCESS_KEY"] = (
-            secret_access_key
-        )
+        if access_key_id and secret_access_key:
+            os.environ["DESTINATION__CREDENTIALS__AWS_ACCESS_KEY_ID"] = access_key_id
+            os.environ["DESTINATION__CREDENTIALS__AWS_SECRET_ACCESS_KEY"] = (
+                secret_access_key
+            )
+        if session_token:
+            os.environ["DESTINATION__CREDENTIALS__AWS_SESSION_TOKEN"] = session_token
-        credentials = AwsCredentials(
-            aws_access_key_id=access_key_id,
-            aws_secret_access_key=secret_access_key,
-            region_name=region_name,
-        )
         return dlt.destinations.athena(
             query_result_bucket=query_result_path,
-            athena_work_group=work_group,
-            credentials=credentials,
+            athena_work_group=source_params.get("workgroup", [None])[0],  # type: ignore
+            credentials=AwsCredentials(
+                aws_access_key_id=access_key_id,  # type: ignore
+                aws_secret_access_key=secret_access_key,  # type: ignore
+                aws_session_token=session_token,
+                region_name=region_name,
+            ),
             destination_name=bucket,
+            force_iceberg=True,
         )
     def dlt_run_params(self, uri: str, table: str, **kwargs) -> dict:
@@ -297,14 +507,16 @@ class ClickhouseDestination:
             raise ValueError(
                 "The TCP port of the ClickHouse server is required to establish a connection."
             )
         query_params = parse_qs(parsed_uri.query)
         secure = int(query_params["secure"][0]) if "secure" in query_params else 1
         http_port = (
             int(query_params["http_port"][0])
             if "http_port" in query_params
-            else 8443 if secure == 1 else 8123
+            else 8443
+            if secure == 1
+            else 8123
         )
         if secure not in (0, 1):
@@ -335,3 +547,278 @@ class ClickhouseDestination:
     def post_load(self):
         pass
+class BlobFSClient(dlt.destinations.impl.filesystem.filesystem.FilesystemClient):
+    @property
+    def dataset_path(self):
+        # override to remove dataset path
+        return self.bucket_path
+class BlobFS(dlt.destinations.filesystem):
+    @property
+    def client_class(self):
+        return BlobFSClient
+class SqliteDestination(GenericSqlDestination):
+    def dlt_dest(self, uri: str, **kwargs):
+        return dlt.destinations.sqlalchemy(credentials=uri)
+    def dlt_run_params(self, uri: str, table: str, **kwargs):
+        return {
+            # https://dlthub.com/docs/dlt-ecosystem/destinations/sqlalchemy#dataset-files
+            "dataset_name": "main",
+            "table_name": table,
+        }
+class MySqlDestination(GenericSqlDestination):
+    def dlt_dest(self, uri: str, **kwargs):
+        return dlt.destinations.sqlalchemy(credentials=uri)
+    def dlt_run_params(self, uri: str, table: str, **kwargs):
+        parsed = urlparse(uri)
+        database = parsed.path.lstrip("/")
+        if not database:
+            raise ValueError("You need to specify a database")
+        return {
+            "dataset_name": database,
+            "table_name": table,
+        }
+class TrinoTypeMapper:
+    """Custom type mapper for Trino to handle unsupported types."""
+    @staticmethod
+    def create_type_mapper():
+        """Create a custom type mapper for Trino."""
+        from dlt.destinations.impl.sqlalchemy.type_mapper import SqlalchemyTypeMapper
+        from sqlalchemy import BigInteger, Text
+        from sqlalchemy.sql import sqltypes
+        class CustomTrinoTypeMapper(SqlalchemyTypeMapper):
+            """Custom type mapper that converts unsupported Trino types."""
+            def to_destination_type(self, column, table=None):
+                # Handle special cases before calling parent
+                data_type = column.get("data_type", "")
+                # Convert JSON to VARCHAR for Trino's Iceberg catalog
+                if data_type == "json":
+                    # Use TEXT (unlimited VARCHAR) for JSON data
+                    return Text()
+                # Convert BINARY to VARCHAR
+                if data_type == "binary":
+                    return Text()
+                # Handle integer types - always use BIGINT for Trino
+                # Note: dlt uses "bigint" internally, not "integer"
+                if data_type in ["bigint", "integer", "int"]:
+                    return BigInteger()
+                # For other types, try parent mapper
+                try:
+                    type_ = super().to_destination_type(column, table)
+                except Exception:
+                    # If parent can't handle it, default to TEXT
+                    return Text()
+                # Convert any INTEGER type to BIGINT
+                if isinstance(type_, sqltypes.Integer) and not isinstance(
+                    type_, sqltypes.BigInteger
+                ):
+                    return BigInteger()
+                # Ensure VARCHAR types don't have constraints that Trino doesn't support
+                if isinstance(type_, sqltypes.String):
+                    # Return TEXT for unlimited string
+                    return Text()
+                return type_
+        return CustomTrinoTypeMapper
+class TrinoDestination(GenericSqlDestination):
+    def dlt_dest(self, uri: str, **kwargs):
+        # Import required modules
+        from dlt.destinations.impl.sqlalchemy.factory import (
+            sqlalchemy as sqlalchemy_factory,
+        )
+        # Create the destination with custom type mapper
+        # We need to use the factory to properly configure the type mapper
+        dest = sqlalchemy_factory(
+            credentials=uri, type_mapper=TrinoTypeMapper.create_type_mapper(), **kwargs
+        )
+        return dest
+class BlobStorageDestination(abc.ABC):
+    @abc.abstractmethod
+    def credentials(self, params: dict) -> FileSystemCredentials:
+        """Build credentials for the blob storage destination."""
+        pass
+    @property
+    @abc.abstractmethod
+    def protocol(self) -> str:
+        """The protocol used for the blob storage destination."""
+        pass
+    def dlt_dest(self, uri: str, **kwargs):
+        parsed_uri = urlparse(uri)
+        params = parse_qs(parsed_uri.query)
+        creds = self.credentials(params)
+        dest_table = kwargs["dest_table"]
+        # only validate if dest_table is not a full URI
+        if not parsed_uri.netloc:
+            dest_table = self.validate_table(dest_table)
+        table_parts = dest_table.split("/")
+        if parsed_uri.path.strip("/"):
+            path_parts = parsed_uri.path.strip("/ ").split("/")
+            table_parts = path_parts + table_parts
+        if parsed_uri.netloc:
+            table_parts.insert(0, parsed_uri.netloc.strip())
+        base_path = "/".join(table_parts[:-1])
+        opts = {
+            "bucket_url": f"{self.protocol}://{base_path}",
+            "credentials": creds,
+            # supresses dlt warnings about dataset name normalization.
+            # we don't use dataset names in S3 so it's fine to disable this.
+            "enable_dataset_name_normalization": False,
+        }
+        layout = params.get("layout", [None])[0]
+        if layout is not None:
+            opts["layout"] = layout
+        return BlobFS(**opts)  # type: ignore
+    def validate_table(self, table: str):
+        table = table.strip("/ ")
+        if len(table.split("/")) < 2:
+            raise ValueError("Table name must be in the format {bucket-name}/{path}")
+        return table
+    def dlt_run_params(self, uri: str, table: str, **kwargs):
+        table_parts = table.split("/")
+        return {
+            "table_name": table_parts[-1].strip(),
+        }
+    def post_load(self) -> None:
+        pass
+class S3Destination(BlobStorageDestination):
+    @property
+    def protocol(self) -> str:
+        return "s3"
+    def credentials(self, params: dict) -> FileSystemCredentials:
+        access_key_id = params.get("access_key_id", [None])[0]
+        if access_key_id is None:
+            raise MissingValueError("access_key_id", "S3")
+        secret_access_key = params.get("secret_access_key", [None])[0]
+        if secret_access_key is None:
+            raise MissingValueError("secret_access_key", "S3")
+        endpoint_url = params.get("endpoint_url", [None])[0]
+        if endpoint_url is not None:
+            parsed_endpoint = urlparse(endpoint_url)
+            if not parsed_endpoint.scheme or not parsed_endpoint.netloc:
+                raise ValueError("Invalid endpoint_url. Must be a valid URL.")
+        return AwsCredentials(
+            aws_access_key_id=access_key_id,
+            aws_secret_access_key=secret_access_key,
+            endpoint_url=endpoint_url,
+        )
+class GCSDestination(BlobStorageDestination):
+    @property
+    def protocol(self) -> str:
+        return "gs"
+    def credentials(self, params: dict) -> FileSystemCredentials:
+        """Builds GCS credentials from the provided parameters."""
+        credentials_path = params.get("credentials_path")
+        credentials_base64 = params.get("credentials_base64")
+        credentials_available = any(
+            map(
+                lambda x: x is not None,
+                [credentials_path, credentials_base64],
+            )
+        )
+        if credentials_available is False:
+            raise MissingValueError("credentials_path or credentials_base64", "GCS")
+        credentials = None
+        if credentials_path:
+            with open(credentials_path[0], "r") as f:
+                credentials = json.load(f)
+        else:
+            credentials = json.loads(base64.b64decode(credentials_base64[0]).decode())  # type: ignore
+        return credentials
+class ElasticsearchDestination:
+    def dlt_dest(self, uri: str, **kwargs):
+        from urllib.parse import urlparse
+        parsed_uri = urlparse(uri)
+        # Extract connection details from URI
+        scheme = parsed_uri.scheme or "http"
+        host = parsed_uri.hostname or "localhost"
+        port = parsed_uri.port or 9200
+        username = parsed_uri.username
+        password = parsed_uri.password
+        # Build connection string
+        if username and password:
+            connection_string = f"{scheme}://{username}:{password}@{host}:{port}"
+        else:
+            connection_string = f"{scheme}://{host}:{port}"
+        # Add query parameters if any
+        if parsed_uri.query:
+            connection_string += f"?{parsed_uri.query}"
+        return elasticsearch_insert(connection_string=connection_string)
+    def dlt_run_params(self, uri: str, table: str, **kwargs) -> dict:
+        return {
+            "table_name": table,
+        }
+    def post_load(self):
+        pass
+class MongoDBDestination:
+    def dlt_dest(self, uri: str, **kwargs):
+        return mongodb_insert(uri)
+    def dlt_run_params(self, uri: str, table: str, **kwargs) -> dict:
+        return {
+            "table_name": table,
+        }
+    def post_load(self):
+        pass

ingestr 0.13.2__py3-none-any.whl → 0.14.104__py3-none-any.whl

ingestr 0.13.2py3-none-any.whl → 0.14.104py3-none-any.whl