PyPI - ingestr - Versions diffs - 0.13.39__py3-none-any.whl → 0.13.41__py3-none-any.whl - Mend

ingestr 0.13.39py3-none-any.whl → 0.13.41py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of ingestr might be problematic. Click here for more details.

Files changed (9) hide show

ingestr/src/buildinfo.py CHANGED Viewed

	@@ -1 +1 @@
1	- version = "v0.13.39"
1	+ version = "v0.13.41"

ingestr/src/elasticsearch/__init__.py ADDED Viewed

@@ -0,0 +1,80 @@
+from datetime import date, datetime
+from typing import Any, Optional
+import dlt
+import pendulum
+from dlt.common.time import ensure_pendulum_datetime
+from pendulum import parse
+from elasticsearch import Elasticsearch
+@dlt.source
+def elasticsearch_source(
+    connection_url: str,
+    index: str,
+    verify_certs: bool,
+    incremental: Optional[dlt.sources.incremental] = None,
+):
+    client = Elasticsearch(connection_url, verify_certs=verify_certs)
+    @dlt.resource(
+        name=index, primary_key="id", write_disposition="merge", incremental=incremental
+    )
+    def get_documents(incremental=incremental):
+        body = {"query": {"match_all": {}}}
+        if incremental:
+            start_value = incremental.last_value
+            range_filter = {"gte": start_value}
+            if incremental.end_value is not None:
+                range_filter["lt"] = incremental.end_value
+            body = {"query": {"range": {incremental.cursor_path: range_filter}}}
+        page = client.search(index=index, scroll="5m", size=5, body=body)
+        sid = page["_scroll_id"]
+        hits = page["hits"]["hits"]
+        if not hits:
+            return
+        # fetching first page (via .search)
+        for doc in hits:
+            doc_data = {"id": doc["_id"], **doc["_source"]}
+            if incremental:
+                doc_data[incremental.cursor_path] = convert_elasticsearch_objs(
+                    doc_data[incremental.cursor_path]
+                )
+            yield doc_data
+        while True:
+            # fetching page 2 and other pages (via .scroll)
+            page = client.scroll(scroll_id=sid, scroll="5m")
+            sid = page["_scroll_id"]
+            hits = page["hits"]["hits"]
+            if not hits:
+                break
+            for doc in hits:
+                doc_data = {"id": doc["_id"], **doc["_source"]}
+                if incremental:
+                    doc_data[incremental.cursor_path] = convert_elasticsearch_objs(
+                        doc_data[incremental.cursor_path]
+                    )
+                yield doc_data
+        client.clear_scroll(scroll_id=sid)
+    return get_documents
+def convert_elasticsearch_objs(value: Any) -> Any:
+    if isinstance(value, str):
+        parsed_date = parse(value, strict=False)
+        if parsed_date is not None:
+            if isinstance(
+                parsed_date,
+                (pendulum.DateTime, pendulum.Date, datetime, date, str, float, int),
+            ):
+                return ensure_pendulum_datetime(parsed_date)
+    return value

ingestr/src/factory.py CHANGED Viewed

@@ -28,6 +28,7 @@ from ingestr.src.sources import (
     AsanaSource,
     ChessSource,
     DynamoDBSource,
+    ElasticsearchSource,
     FacebookAdsSource,
     FrankfurterSource,
     FreshdeskSource,
@@ -78,6 +79,7 @@ SQL_SOURCE_SCHEMES = [
     "clickhouse",
     "databricks",
     "db2",
+    "spanner",
 ]
@@ -153,6 +155,7 @@ class SourceDestinationFactory:
         "frankfurter": FrankfurterSource,
         "freshdesk": FreshdeskSource,
         "phantombuster": PhantombusterSource,
+        "elasticsearch": ElasticsearchSource,
     }
     destinations: Dict[str, Type[DestinationProtocol]] = {
         "bigquery": BigQueryDestination,

ingestr/src/sources.py CHANGED Viewed

@@ -52,7 +52,10 @@ class SqlSource:
     def dlt_source(self, uri: str, table: str, **kwargs):
         table_fields = TableDefinition(dataset="custom", table="custom")
         if not table.startswith("query:"):
-            table_fields = table_string_to_dataclass(table)
+            if uri.startswith("spanner://"):
+                table_fields = TableDefinition(dataset="", table=table)
+            else:
+                table_fields = table_string_to_dataclass(table)
         incremental = None
         if kwargs.get("incremental_key"):
@@ -113,6 +116,45 @@ class SqlSource:
         if uri.startswith("db2://"):
             uri = uri.replace("db2://", "db2+ibm_db://")
+        if uri.startswith("spanner://"):
+            parsed_uri = urlparse(uri)
+            query_params = parse_qs(parsed_uri.query)
+            project_id_param = query_params.get("project_id")
+            instance_id_param = query_params.get("instance_id")
+            database_param = query_params.get("database")
+            cred_path = query_params.get("credentials_path")
+            cred_base64 = query_params.get("credentials_base64")
+            if not project_id_param or not instance_id_param or not database_param:
+                raise ValueError(
+                    "project_id, instance_id and database are required in the URI to get data from Google Spanner"
+                )
+            project_id = project_id_param[0]
+            instance_id = instance_id_param[0]
+            database = database_param[0]
+            if not cred_path and not cred_base64:
+                raise ValueError(
+                    "credentials_path or credentials_base64 is required in the URI to get data from Google Sheets"
+                )
+            if cred_path:
+                os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = cred_path[0]
+            elif cred_base64:
+                credentials = json.loads(
+                    base64.b64decode(cred_base64[0]).decode("utf-8")
+                )
+                temp = tempfile.NamedTemporaryFile(
+                    mode="w", delete=False, suffix=".json"
+                )
+                json.dump(credentials, temp)
+                temp.close()
+                os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = temp.name
+            uri = f"spanner+spanner:///projects/{project_id}/instances/{instance_id}/databases/{database}"
         from dlt.common.libs.sql_alchemy import (
             Engine,
             MetaData,
@@ -2298,3 +2340,55 @@ class PhantombusterSource:
             start_date=start_date,
             end_date=end_date,
         ).with_resources(table_name)
+class ElasticsearchSource:
+    def handles_incrementality(self) -> bool:
+        return False
+    def dlt_source(self, uri: str, table: str, **kwargs):
+        from ingestr.src.elasticsearch import elasticsearch_source
+        incremental = None
+        if kwargs.get("incremental_key"):
+            start_value = kwargs.get("interval_start")
+            end_value = kwargs.get("interval_end")
+            incremental = dlt_incremental(
+                kwargs.get("incremental_key", ""),
+                initial_value=start_value,
+                end_value=end_value,
+                range_end="closed",
+                range_start="closed",
+            )
+        # elasticsearch://localhost:9200?secure=true&verify_certs=false
+        parsed = urlparse(uri)
+        index = table
+        if not index:
+            raise ValueError(
+                "Table name must be provided which is the index name in elasticsearch"
+            )
+        query_params = parsed.query
+        params = parse_qs(query_params)
+        secure = True
+        if "secure" in params:
+            secure = params["secure"][0].capitalize() == "True"
+        verify_certs = True
+        if "verify_certs" in params:
+            verify_certs = params["verify_certs"][0].capitalize() == "True"
+        scheme = "https" if secure else "http"
+        netloc = parsed.netloc
+        connection_url = f"{scheme}://{netloc}"
+        return elasticsearch_source(
+            connection_url=connection_url,
+            index=index,
+            verify_certs=verify_certs,
+            incremental=incremental,
+        ).with_resources(table)

{ingestr-0.13.39.dist-info → ingestr-0.13.41.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: ingestr
-Version: 0.13.39
+Version: 0.13.41
 Summary: ingestr is a command-line application that ingests data from various sources and stores them in any database.
 Project-URL: Homepage, https://github.com/bruin-data/ingestr
 Project-URL: Issues, https://github.com/bruin-data/ingestr/issues
@@ -50,6 +50,8 @@ Requires-Dist: dlt==1.10.0
 Requires-Dist: dnspython==2.7.0
 Requires-Dist: duckdb-engine==0.17.0
 Requires-Dist: duckdb==1.2.1
+Requires-Dist: elastic-transport==8.17.1
+Requires-Dist: elasticsearch==8.10.1
 Requires-Dist: et-xmlfile==2.0.0
 Requires-Dist: facebook-business==20.0.0
 Requires-Dist: filelock==3.17.0
@@ -70,11 +72,14 @@ Requires-Dist: google-auth==2.38.0
 Requires-Dist: google-cloud-bigquery-storage==2.24.0
 Requires-Dist: google-cloud-bigquery==3.30.0
 Requires-Dist: google-cloud-core==2.4.2
+Requires-Dist: google-cloud-spanner==3.54.0
 Requires-Dist: google-cloud-storage==3.1.0
 Requires-Dist: google-crc32c==1.6.0
 Requires-Dist: google-resumable-media==2.7.2
 Requires-Dist: googleapis-common-protos==1.69.0
 Requires-Dist: greenlet==3.2.2
+Requires-Dist: grpc-google-iam-v1==0.14.2
+Requires-Dist: grpc-interceptor==0.15.4
 Requires-Dist: grpcio-status==1.62.3
 Requires-Dist: grpcio==1.70.0
 Requires-Dist: hdbcli==2.23.27
@@ -166,9 +171,11 @@ Requires-Dist: soupsieve==2.6
 Requires-Dist: sqlalchemy-bigquery==1.12.1
 Requires-Dist: sqlalchemy-hana==2.0.0
 Requires-Dist: sqlalchemy-redshift==0.8.14
+Requires-Dist: sqlalchemy-spanner==1.11.0
 Requires-Dist: sqlalchemy2-stubs==0.0.2a38
 Requires-Dist: sqlalchemy==1.4.52
 Requires-Dist: sqlglot==26.12.1
+Requires-Dist: sqlparse==0.5.3
 Requires-Dist: stripe==10.7.0
 Requires-Dist: tenacity==9.0.0
 Requires-Dist: thrift==0.16.0

{ingestr-0.13.39.dist-info → ingestr-0.13.41.dist-info}/RECORD RENAMED Viewed

@@ -2,15 +2,15 @@ ingestr/conftest.py,sha256=Q03FIJIZpLBbpj55cfCHIKEjc1FCvWJhMF2cidUJKQU,1748
 ingestr/main.py,sha256=Pe_rzwcDRKIYa7baEVUAAPOHyqQbX29RUexMl0F_S1k,25273
 ingestr/src/.gitignore,sha256=8cX1AZTSI0TcdZFGTmS_oyBjpfCzhOEt0DdAo2dFIY8,203
 ingestr/src/blob.py,sha256=onMe5ZHxPXTdcB_s2oGNdMo-XQJ3ajwOsWE9eSTGFmc,1495
-ingestr/src/buildinfo.py,sha256=edyodue-Rkn4zTwWVR9OU0dSsDXVokKw_KKllrI1amM,21
+ingestr/src/buildinfo.py,sha256=AK7sGUNx6CPDKJOXeMexFRun9bjpyv2c9t4DII73Pes,21
 ingestr/src/destinations.py,sha256=MctbeJUyNr0DRB0XYt2xAbEKkHZ40-nXXEOYCs4KuoE,15420
 ingestr/src/errors.py,sha256=Ufs4_DfE77_E3vnA1fOQdi6cmuLVNm7_SbFLkL1XPGk,686
-ingestr/src/factory.py,sha256=j-FKRBEBZVLT_DEn-SCu9KEvaab3BchEV5hzTjpree8,5511
+ingestr/src/factory.py,sha256=KJKIL9q7kU4oAVXy5o0wDwLAU0nG9y0xC8D7HzksYak,5597
 ingestr/src/filters.py,sha256=C-_TIVkF_cxZBgG-Run2Oyn0TAhJgA8IWXZ-OPY3uek,1136
 ingestr/src/loader.py,sha256=9NaWAyfkXdqAZSS-N72Iwo36Lbx4PyqIfaaH1dNdkFs,1712
 ingestr/src/partition.py,sha256=BrIP6wFJvyR7Nus_3ElnfxknUXeCipK_E_bB8kZowfc,969
 ingestr/src/resource.py,sha256=XG-sbBapFVEM7OhHQFQRTdTLlh-mHB-N4V1t8F8Tsww,543
-ingestr/src/sources.py,sha256=vppNI75ucM0EtW2kP5ldKyhc4Pij_hGVmKlZ9DNL4g0,79181
+ingestr/src/sources.py,sha256=SWZAa6bokLurQRPtH7rxi8K-GSVLp_p9Ig1ArGRsxCo,82635
 ingestr/src/table_definition.py,sha256=REbAbqdlmUMUuRh8nEQRreWjPVOQ5ZcfqGkScKdCrmk,390
 ingestr/src/time.py,sha256=H_Fk2J4ShXyUM-EMY7MqCLZQhlnZMZvO952bmZPc4yE,254
 ingestr/src/version.py,sha256=J_2xgZ0mKlvuHcjdKCx2nlioneLH0I47JiU_Slr_Nwc,189
@@ -35,6 +35,7 @@ ingestr/src/chess/helpers.py,sha256=v1HTImOMjAF7AzZUPDIuHu00e7ut0o5y1kWcVYo4QZw,
 ingestr/src/chess/settings.py,sha256=p0RlCGgtXUacPDEvZmwzSWmzX0Apj1riwfz-nrMK89k,158
 ingestr/src/collector/spinner.py,sha256=_ZUqF5MI43hVIULdjF5s5mrAZbhEFXaiWirQmrv3Yk4,1201
 ingestr/src/dynamodb/__init__.py,sha256=swhxkeYBbJ35jn1IghCtvYWT2BM33KynVCh_oR4z28A,2264
+ingestr/src/elasticsearch/__init__.py,sha256=m-q93HgUmTwGDUwHOjHawstWL06TC3WIX3H05szybrY,2556
 ingestr/src/facebook_ads/__init__.py,sha256=reEpSr4BaKA1wO3qVgCH51gW-TgWkbJ_g24UIhJWbac,9286
 ingestr/src/facebook_ads/exceptions.py,sha256=4Nlbc0Mv3i5g-9AoyT-n1PIa8IDi3VCTfEAzholx4Wc,115
 ingestr/src/facebook_ads/helpers.py,sha256=ZLbNHiKer5lPb4g3_435XeBJr57Wv0o1KTyBA1mQ100,9068
@@ -127,8 +128,8 @@ ingestr/testdata/delete_insert_part2.csv,sha256=B_KUzpzbNdDY_n7wWop1mT2cz36TmayS
 ingestr/testdata/merge_expected.csv,sha256=DReHqWGnQMsf2PBv_Q2pfjsgvikYFnf1zYcQZ7ZqYN0,276
 ingestr/testdata/merge_part1.csv,sha256=Pw8Z9IDKcNU0qQHx1z6BUf4rF_-SxKGFOvymCt4OY9I,185
 ingestr/testdata/merge_part2.csv,sha256=T_GiWxA81SN63_tMOIuemcvboEFeAmbKc7xRXvL9esw,287
-ingestr-0.13.39.dist-info/METADATA,sha256=goY5MW5AzJwYQ0cbwTmlNxZgCP1QRSt6ROmBOImESIM,13575
-ingestr-0.13.39.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
-ingestr-0.13.39.dist-info/entry_points.txt,sha256=oPJy0KBnPWYjDtP1k8qwAihcTLHSZokSQvRAw_wtfJM,46
-ingestr-0.13.39.dist-info/licenses/LICENSE.md,sha256=cW8wIhn8HFE-KLStDF9jHQ1O_ARWP3kTpk_-eOccL24,1075
-ingestr-0.13.39.dist-info/RECORD,,
+ingestr-0.13.41.dist-info/METADATA,sha256=UzGBs9s0Kr6R1xji_ULG5Tuc383Klx2AIzfyZdXLBp4,13852
+ingestr-0.13.41.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
+ingestr-0.13.41.dist-info/entry_points.txt,sha256=oPJy0KBnPWYjDtP1k8qwAihcTLHSZokSQvRAw_wtfJM,46
+ingestr-0.13.41.dist-info/licenses/LICENSE.md,sha256=cW8wIhn8HFE-KLStDF9jHQ1O_ARWP3kTpk_-eOccL24,1075
+ingestr-0.13.41.dist-info/RECORD,,

{ingestr-0.13.39.dist-info → ingestr-0.13.41.dist-info}/WHEEL RENAMED Viewed

File without changes

{ingestr-0.13.39.dist-info → ingestr-0.13.41.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{ingestr-0.13.39.dist-info → ingestr-0.13.41.dist-info}/licenses/LICENSE.md RENAMED Viewed

File without changes

ingestr 0.13.39__py3-none-any.whl → 0.13.41__py3-none-any.whl

Potentially problematic release.

ingestr 0.13.39py3-none-any.whl → 0.13.41py3-none-any.whl