ingestr 0.13.93__py3-none-any.whl → 0.14.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ingestr might be problematic. Click here for more details.

@@ -19,13 +19,14 @@ from dlt.destinations.impl.clickhouse.configuration import (
19
19
  ClickHouseCredentials,
20
20
  )
21
21
 
22
+ from ingestr.src.elasticsearch.helpers import elasticsearch_insert
22
23
  from ingestr.src.errors import MissingValueError
23
24
  from ingestr.src.loader import load_dlt_file
25
+ from ingestr.src.mongodb.helpers import mongodb_insert
24
26
 
25
27
 
26
28
  class GenericSqlDestination:
27
29
  def dlt_run_params(self, uri: str, table: str, **kwargs) -> dict:
28
-
29
30
  if uri.startswith("databricks://"):
30
31
  p = urlparse(uri)
31
32
  q = parse_qs(p.query)
@@ -37,8 +38,8 @@ class GenericSqlDestination:
37
38
  "table_name": table,
38
39
  }
39
40
  return res
40
-
41
- table_fields = table.split(".")
41
+
42
+ table_fields = table.split(".")
42
43
  if len(table_fields) != 2:
43
44
  raise ValueError("Table name must be in the format <schema>.<table>")
44
45
 
@@ -290,7 +291,6 @@ class DatabricksDestination(GenericSqlDestination):
290
291
  http_path = q.get("http_path", [None])[0]
291
292
  catalog = q.get("catalog", [None])[0]
292
293
  schema = q.get("schema", [None])[0]
293
-
294
294
 
295
295
  creds = {
296
296
  "access_token": access_token,
@@ -299,14 +299,12 @@ class DatabricksDestination(GenericSqlDestination):
299
299
  "catalog": catalog,
300
300
  "schema": schema,
301
301
  }
302
-
302
+
303
303
  return dlt.destinations.databricks(
304
304
  credentials=creds,
305
305
  **kwargs,
306
306
  )
307
-
308
307
 
309
-
310
308
 
311
309
  class SynapseDestination(GenericSqlDestination):
312
310
  def dlt_dest(self, uri: str, **kwargs):
@@ -591,6 +589,76 @@ class MySqlDestination(GenericSqlDestination):
591
589
  }
592
590
 
593
591
 
592
+ class TrinoTypeMapper:
593
+ """Custom type mapper for Trino to handle unsupported types."""
594
+
595
+ @staticmethod
596
+ def create_type_mapper():
597
+ """Create a custom type mapper for Trino."""
598
+ from dlt.destinations.impl.sqlalchemy.type_mapper import SqlalchemyTypeMapper
599
+ from sqlalchemy import BigInteger, Text
600
+ from sqlalchemy.sql import sqltypes
601
+
602
+ class CustomTrinoTypeMapper(SqlalchemyTypeMapper):
603
+ """Custom type mapper that converts unsupported Trino types."""
604
+
605
+ def to_destination_type(self, column, table=None):
606
+ # Handle special cases before calling parent
607
+ data_type = column.get("data_type", "")
608
+
609
+ # Convert JSON to VARCHAR for Trino's Iceberg catalog
610
+ if data_type == "json":
611
+ # Use TEXT (unlimited VARCHAR) for JSON data
612
+ return Text()
613
+
614
+ # Convert BINARY to VARCHAR
615
+ if data_type == "binary":
616
+ return Text()
617
+
618
+ # Handle integer types - always use BIGINT for Trino
619
+ # Note: dlt uses "bigint" internally, not "integer"
620
+ if data_type in ["bigint", "integer", "int"]:
621
+ return BigInteger()
622
+
623
+ # For other types, try parent mapper
624
+ try:
625
+ type_ = super().to_destination_type(column, table)
626
+ except Exception:
627
+ # If parent can't handle it, default to TEXT
628
+ return Text()
629
+
630
+ # Convert any INTEGER type to BIGINT
631
+ if isinstance(type_, sqltypes.Integer) and not isinstance(
632
+ type_, sqltypes.BigInteger
633
+ ):
634
+ return BigInteger()
635
+
636
+ # Ensure VARCHAR types don't have constraints that Trino doesn't support
637
+ if isinstance(type_, sqltypes.String):
638
+ # Return TEXT for unlimited string
639
+ return Text()
640
+
641
+ return type_
642
+
643
+ return CustomTrinoTypeMapper
644
+
645
+
646
+ class TrinoDestination(GenericSqlDestination):
647
+ def dlt_dest(self, uri: str, **kwargs):
648
+ # Import required modules
649
+ from dlt.destinations.impl.sqlalchemy.factory import (
650
+ sqlalchemy as sqlalchemy_factory,
651
+ )
652
+
653
+ # Create the destination with custom type mapper
654
+ # We need to use the factory to properly configure the type mapper
655
+ dest = sqlalchemy_factory(
656
+ credentials=uri, type_mapper=TrinoTypeMapper.create_type_mapper(), **kwargs
657
+ )
658
+
659
+ return dest
660
+
661
+
594
662
  class BlobStorageDestination(abc.ABC):
595
663
  @abc.abstractmethod
596
664
  def credentials(self, params: dict) -> FileSystemCredentials:
@@ -707,3 +775,73 @@ class GCSDestination(BlobStorageDestination):
707
775
  credentials = json.loads(base64.b64decode(credentials_base64[0]).decode()) # type: ignore
708
776
 
709
777
  return credentials
778
+
779
+
780
+ class ElasticsearchDestination:
781
+ def dlt_dest(self, uri: str, **kwargs):
782
+ from urllib.parse import urlparse
783
+
784
+ parsed_uri = urlparse(uri)
785
+
786
+ # Extract connection details from URI
787
+ scheme = parsed_uri.scheme or "http"
788
+ host = parsed_uri.hostname or "localhost"
789
+ port = parsed_uri.port or 9200
790
+ username = parsed_uri.username
791
+ password = parsed_uri.password
792
+
793
+ # Build connection string
794
+ if username and password:
795
+ connection_string = f"{scheme}://{username}:{password}@{host}:{port}"
796
+ else:
797
+ connection_string = f"{scheme}://{host}:{port}"
798
+
799
+ # Add query parameters if any
800
+ if parsed_uri.query:
801
+ connection_string += f"?{parsed_uri.query}"
802
+
803
+ return elasticsearch_insert(connection_string=connection_string)
804
+
805
+ def dlt_run_params(self, uri: str, table: str, **kwargs) -> dict:
806
+ return {
807
+ "table_name": table,
808
+ }
809
+
810
+ def post_load(self):
811
+ pass
812
+
813
+
814
+ class MongoDBDestination:
815
+ def dlt_dest(self, uri: str, **kwargs):
816
+ from urllib.parse import urlparse
817
+
818
+ parsed_uri = urlparse(uri)
819
+
820
+ # Extract connection details from URI
821
+ host = parsed_uri.hostname or "localhost"
822
+ port = parsed_uri.port or 27017
823
+ username = parsed_uri.username
824
+ password = parsed_uri.password
825
+ database = (
826
+ parsed_uri.path.lstrip("/") if parsed_uri.path.lstrip("/") else "ingestr_db"
827
+ )
828
+
829
+ # Build connection string
830
+ if username and password:
831
+ connection_string = f"mongodb://{username}:{password}@{host}:{port}"
832
+ else:
833
+ connection_string = f"mongodb://{host}:{port}"
834
+
835
+ # Add query parameters if any
836
+ if parsed_uri.query:
837
+ connection_string += f"?{parsed_uri.query}"
838
+
839
+ return mongodb_insert(connection_string, database)
840
+
841
+ def dlt_run_params(self, uri: str, table: str, **kwargs) -> dict:
842
+ return {
843
+ "table_name": table,
844
+ }
845
+
846
+ def post_load(self):
847
+ pass