ingestr 0.13.93__py3-none-any.whl → 0.14.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ingestr might be problematic. Click here for more details.
- ingestr/src/adjust/__init__.py +4 -4
- ingestr/src/anthropic/__init__.py +277 -0
- ingestr/src/anthropic/helpers.py +525 -0
- ingestr/src/applovin/__init__.py +1 -1
- ingestr/src/buildinfo.py +1 -1
- ingestr/src/destinations.py +145 -7
- ingestr/src/docebo/__init__.py +589 -0
- ingestr/src/docebo/client.py +435 -0
- ingestr/src/docebo/helpers.py +97 -0
- ingestr/src/elasticsearch/helpers.py +112 -0
- ingestr/src/facebook_ads/__init__.py +10 -5
- ingestr/src/factory.py +14 -0
- ingestr/src/fluxx/__init__.py +4175 -0
- ingestr/src/frankfurter/__init__.py +157 -157
- ingestr/src/fundraiseup/__init__.py +49 -0
- ingestr/src/fundraiseup/client.py +81 -0
- ingestr/src/google_analytics/__init__.py +1 -1
- ingestr/src/mongodb/__init__.py +1 -1
- ingestr/src/mongodb/helpers.py +101 -79
- ingestr/src/sources.py +151 -4
- ingestr/tests/unit/test_smartsheets.py +1 -1
- {ingestr-0.13.93.dist-info → ingestr-0.14.0.dist-info}/METADATA +12 -7
- {ingestr-0.13.93.dist-info → ingestr-0.14.0.dist-info}/RECORD +26 -18
- {ingestr-0.13.93.dist-info → ingestr-0.14.0.dist-info}/WHEEL +0 -0
- {ingestr-0.13.93.dist-info → ingestr-0.14.0.dist-info}/entry_points.txt +0 -0
- {ingestr-0.13.93.dist-info → ingestr-0.14.0.dist-info}/licenses/LICENSE.md +0 -0
ingestr/src/destinations.py
CHANGED
|
@@ -19,13 +19,14 @@ from dlt.destinations.impl.clickhouse.configuration import (
|
|
|
19
19
|
ClickHouseCredentials,
|
|
20
20
|
)
|
|
21
21
|
|
|
22
|
+
from ingestr.src.elasticsearch.helpers import elasticsearch_insert
|
|
22
23
|
from ingestr.src.errors import MissingValueError
|
|
23
24
|
from ingestr.src.loader import load_dlt_file
|
|
25
|
+
from ingestr.src.mongodb.helpers import mongodb_insert
|
|
24
26
|
|
|
25
27
|
|
|
26
28
|
class GenericSqlDestination:
|
|
27
29
|
def dlt_run_params(self, uri: str, table: str, **kwargs) -> dict:
|
|
28
|
-
|
|
29
30
|
if uri.startswith("databricks://"):
|
|
30
31
|
p = urlparse(uri)
|
|
31
32
|
q = parse_qs(p.query)
|
|
@@ -37,8 +38,8 @@ class GenericSqlDestination:
|
|
|
37
38
|
"table_name": table,
|
|
38
39
|
}
|
|
39
40
|
return res
|
|
40
|
-
|
|
41
|
-
table_fields = table.split(".")
|
|
41
|
+
|
|
42
|
+
table_fields = table.split(".")
|
|
42
43
|
if len(table_fields) != 2:
|
|
43
44
|
raise ValueError("Table name must be in the format <schema>.<table>")
|
|
44
45
|
|
|
@@ -290,7 +291,6 @@ class DatabricksDestination(GenericSqlDestination):
|
|
|
290
291
|
http_path = q.get("http_path", [None])[0]
|
|
291
292
|
catalog = q.get("catalog", [None])[0]
|
|
292
293
|
schema = q.get("schema", [None])[0]
|
|
293
|
-
|
|
294
294
|
|
|
295
295
|
creds = {
|
|
296
296
|
"access_token": access_token,
|
|
@@ -299,14 +299,12 @@ class DatabricksDestination(GenericSqlDestination):
|
|
|
299
299
|
"catalog": catalog,
|
|
300
300
|
"schema": schema,
|
|
301
301
|
}
|
|
302
|
-
|
|
302
|
+
|
|
303
303
|
return dlt.destinations.databricks(
|
|
304
304
|
credentials=creds,
|
|
305
305
|
**kwargs,
|
|
306
306
|
)
|
|
307
|
-
|
|
308
307
|
|
|
309
|
-
|
|
310
308
|
|
|
311
309
|
class SynapseDestination(GenericSqlDestination):
|
|
312
310
|
def dlt_dest(self, uri: str, **kwargs):
|
|
@@ -591,6 +589,76 @@ class MySqlDestination(GenericSqlDestination):
|
|
|
591
589
|
}
|
|
592
590
|
|
|
593
591
|
|
|
592
|
+
class TrinoTypeMapper:
|
|
593
|
+
"""Custom type mapper for Trino to handle unsupported types."""
|
|
594
|
+
|
|
595
|
+
@staticmethod
|
|
596
|
+
def create_type_mapper():
|
|
597
|
+
"""Create a custom type mapper for Trino."""
|
|
598
|
+
from dlt.destinations.impl.sqlalchemy.type_mapper import SqlalchemyTypeMapper
|
|
599
|
+
from sqlalchemy import BigInteger, Text
|
|
600
|
+
from sqlalchemy.sql import sqltypes
|
|
601
|
+
|
|
602
|
+
class CustomTrinoTypeMapper(SqlalchemyTypeMapper):
|
|
603
|
+
"""Custom type mapper that converts unsupported Trino types."""
|
|
604
|
+
|
|
605
|
+
def to_destination_type(self, column, table=None):
|
|
606
|
+
# Handle special cases before calling parent
|
|
607
|
+
data_type = column.get("data_type", "")
|
|
608
|
+
|
|
609
|
+
# Convert JSON to VARCHAR for Trino's Iceberg catalog
|
|
610
|
+
if data_type == "json":
|
|
611
|
+
# Use TEXT (unlimited VARCHAR) for JSON data
|
|
612
|
+
return Text()
|
|
613
|
+
|
|
614
|
+
# Convert BINARY to VARCHAR
|
|
615
|
+
if data_type == "binary":
|
|
616
|
+
return Text()
|
|
617
|
+
|
|
618
|
+
# Handle integer types - always use BIGINT for Trino
|
|
619
|
+
# Note: dlt uses "bigint" internally, not "integer"
|
|
620
|
+
if data_type in ["bigint", "integer", "int"]:
|
|
621
|
+
return BigInteger()
|
|
622
|
+
|
|
623
|
+
# For other types, try parent mapper
|
|
624
|
+
try:
|
|
625
|
+
type_ = super().to_destination_type(column, table)
|
|
626
|
+
except Exception:
|
|
627
|
+
# If parent can't handle it, default to TEXT
|
|
628
|
+
return Text()
|
|
629
|
+
|
|
630
|
+
# Convert any INTEGER type to BIGINT
|
|
631
|
+
if isinstance(type_, sqltypes.Integer) and not isinstance(
|
|
632
|
+
type_, sqltypes.BigInteger
|
|
633
|
+
):
|
|
634
|
+
return BigInteger()
|
|
635
|
+
|
|
636
|
+
# Ensure VARCHAR types don't have constraints that Trino doesn't support
|
|
637
|
+
if isinstance(type_, sqltypes.String):
|
|
638
|
+
# Return TEXT for unlimited string
|
|
639
|
+
return Text()
|
|
640
|
+
|
|
641
|
+
return type_
|
|
642
|
+
|
|
643
|
+
return CustomTrinoTypeMapper
|
|
644
|
+
|
|
645
|
+
|
|
646
|
+
class TrinoDestination(GenericSqlDestination):
|
|
647
|
+
def dlt_dest(self, uri: str, **kwargs):
|
|
648
|
+
# Import required modules
|
|
649
|
+
from dlt.destinations.impl.sqlalchemy.factory import (
|
|
650
|
+
sqlalchemy as sqlalchemy_factory,
|
|
651
|
+
)
|
|
652
|
+
|
|
653
|
+
# Create the destination with custom type mapper
|
|
654
|
+
# We need to use the factory to properly configure the type mapper
|
|
655
|
+
dest = sqlalchemy_factory(
|
|
656
|
+
credentials=uri, type_mapper=TrinoTypeMapper.create_type_mapper(), **kwargs
|
|
657
|
+
)
|
|
658
|
+
|
|
659
|
+
return dest
|
|
660
|
+
|
|
661
|
+
|
|
594
662
|
class BlobStorageDestination(abc.ABC):
|
|
595
663
|
@abc.abstractmethod
|
|
596
664
|
def credentials(self, params: dict) -> FileSystemCredentials:
|
|
@@ -707,3 +775,73 @@ class GCSDestination(BlobStorageDestination):
|
|
|
707
775
|
credentials = json.loads(base64.b64decode(credentials_base64[0]).decode()) # type: ignore
|
|
708
776
|
|
|
709
777
|
return credentials
|
|
778
|
+
|
|
779
|
+
|
|
780
|
+
class ElasticsearchDestination:
|
|
781
|
+
def dlt_dest(self, uri: str, **kwargs):
|
|
782
|
+
from urllib.parse import urlparse
|
|
783
|
+
|
|
784
|
+
parsed_uri = urlparse(uri)
|
|
785
|
+
|
|
786
|
+
# Extract connection details from URI
|
|
787
|
+
scheme = parsed_uri.scheme or "http"
|
|
788
|
+
host = parsed_uri.hostname or "localhost"
|
|
789
|
+
port = parsed_uri.port or 9200
|
|
790
|
+
username = parsed_uri.username
|
|
791
|
+
password = parsed_uri.password
|
|
792
|
+
|
|
793
|
+
# Build connection string
|
|
794
|
+
if username and password:
|
|
795
|
+
connection_string = f"{scheme}://{username}:{password}@{host}:{port}"
|
|
796
|
+
else:
|
|
797
|
+
connection_string = f"{scheme}://{host}:{port}"
|
|
798
|
+
|
|
799
|
+
# Add query parameters if any
|
|
800
|
+
if parsed_uri.query:
|
|
801
|
+
connection_string += f"?{parsed_uri.query}"
|
|
802
|
+
|
|
803
|
+
return elasticsearch_insert(connection_string=connection_string)
|
|
804
|
+
|
|
805
|
+
def dlt_run_params(self, uri: str, table: str, **kwargs) -> dict:
|
|
806
|
+
return {
|
|
807
|
+
"table_name": table,
|
|
808
|
+
}
|
|
809
|
+
|
|
810
|
+
def post_load(self):
|
|
811
|
+
pass
|
|
812
|
+
|
|
813
|
+
|
|
814
|
+
class MongoDBDestination:
|
|
815
|
+
def dlt_dest(self, uri: str, **kwargs):
|
|
816
|
+
from urllib.parse import urlparse
|
|
817
|
+
|
|
818
|
+
parsed_uri = urlparse(uri)
|
|
819
|
+
|
|
820
|
+
# Extract connection details from URI
|
|
821
|
+
host = parsed_uri.hostname or "localhost"
|
|
822
|
+
port = parsed_uri.port or 27017
|
|
823
|
+
username = parsed_uri.username
|
|
824
|
+
password = parsed_uri.password
|
|
825
|
+
database = (
|
|
826
|
+
parsed_uri.path.lstrip("/") if parsed_uri.path.lstrip("/") else "ingestr_db"
|
|
827
|
+
)
|
|
828
|
+
|
|
829
|
+
# Build connection string
|
|
830
|
+
if username and password:
|
|
831
|
+
connection_string = f"mongodb://{username}:{password}@{host}:{port}"
|
|
832
|
+
else:
|
|
833
|
+
connection_string = f"mongodb://{host}:{port}"
|
|
834
|
+
|
|
835
|
+
# Add query parameters if any
|
|
836
|
+
if parsed_uri.query:
|
|
837
|
+
connection_string += f"?{parsed_uri.query}"
|
|
838
|
+
|
|
839
|
+
return mongodb_insert(connection_string, database)
|
|
840
|
+
|
|
841
|
+
def dlt_run_params(self, uri: str, table: str, **kwargs) -> dict:
|
|
842
|
+
return {
|
|
843
|
+
"table_name": table,
|
|
844
|
+
}
|
|
845
|
+
|
|
846
|
+
def post_load(self):
|
|
847
|
+
pass
|