adapta 3.2.6a482.dev7__tar.gz → 3.2.6a485.dev5__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/PKG-INFO +2 -2
- adapta-3.2.6a485.dev5/adapta/_version.py +1 -0
- {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/storage/delta_lake/v3/_functions.py +5 -0
- {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/storage/distributed_object_store/v3/datastax_astra/_model_mappers.py +0 -2
- {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/storage/distributed_object_store/v3/datastax_astra/astra_client.py +33 -39
- {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/storage/models/format.py +69 -0
- {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/storage/query_enabled_store/_models.py +15 -4
- {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/storage/query_enabled_store/_qes_astra.py +3 -1
- {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/storage/query_enabled_store/_qes_delta.py +6 -1
- {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/pyproject.toml +2 -2
- adapta-3.2.6a482.dev7/adapta/_version.py +0 -1
- {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/LICENSE +0 -0
- {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/README.md +0 -0
- {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/__init__.py +0 -0
- {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/connectors/__init__.py +0 -0
- {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/connectors/service_bus/__init__.py +0 -0
- {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/connectors/service_bus/_connector.py +0 -0
- {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/logs/README.md +0 -0
- {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/logs/__init__.py +0 -0
- {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/logs/_async_logger.py +0 -0
- {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/logs/_base.py +0 -0
- {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/logs/_internal.py +0 -0
- {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/logs/_internal_logger.py +0 -0
- {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/logs/_logger_interface.py +0 -0
- {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/logs/handlers/__init__.py +0 -0
- {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/logs/handlers/datadog_api_handler.py +0 -0
- {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/logs/handlers/safe_stream_handler.py +0 -0
- {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/logs/models/__init__.py +0 -0
- {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/logs/models/_log_level.py +0 -0
- {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/logs/models/_logs_metadata.py +0 -0
- {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/metrics/__init__.py +0 -0
- {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/metrics/_base.py +0 -0
- {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/metrics/providers/README.md +0 -0
- {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/metrics/providers/__init__.py +0 -0
- {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/metrics/providers/datadog_provider.py +0 -0
- {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/ml/__init__.py +0 -0
- {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/ml/_model.py +0 -0
- {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/ml/mlflow/__init__.py +0 -0
- {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/ml/mlflow/_client.py +0 -0
- {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/ml/mlflow/_functions.py +0 -0
- {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/process_communication/__init__.py +0 -0
- {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/process_communication/_models.py +0 -0
- {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/schema_management/README.md +0 -0
- {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/schema_management/__init__.py +0 -0
- {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/schema_management/schema_entity.py +0 -0
- {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/security/__init__.py +0 -0
- {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/security/clients/README.md +0 -0
- {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/security/clients/__init__.py +0 -0
- {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/security/clients/_azure_client.py +0 -0
- {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/security/clients/_base.py +0 -0
- {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/security/clients/_local_client.py +0 -0
- {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/security/clients/aws/__init__.py +0 -0
- {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/security/clients/aws/_aws_client.py +0 -0
- {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/security/clients/aws/_aws_credentials.py +0 -0
- {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/security/clients/hashicorp_vault/__init__.py +0 -0
- {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/security/clients/hashicorp_vault/hashicorp_vault_client.py +0 -0
- {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/security/clients/hashicorp_vault/kubernetes_client.py +0 -0
- {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/security/clients/hashicorp_vault/oidc_client.py +0 -0
- {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/security/clients/hashicorp_vault/token_client.py +0 -0
- {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/storage/__init__.py +0 -0
- {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/storage/blob/README.md +0 -0
- {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/storage/blob/__init__.py +0 -0
- {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/storage/blob/azure_storage_client.py +0 -0
- {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/storage/blob/base.py +0 -0
- {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/storage/blob/local_storage_client.py +0 -0
- {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/storage/blob/s3_storage_client.py +0 -0
- {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/storage/cache/__init__.py +0 -0
- {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/storage/cache/_base.py +0 -0
- {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/storage/cache/redis_cache.py +0 -0
- {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/storage/database/__init__.py +0 -0
- {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/storage/database/v2/README.md +0 -0
- {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/storage/database/v2/__init__.py +0 -0
- {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/storage/database/v2/azure_sql.py +0 -0
- {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/storage/database/v2/models/__init__.py +0 -0
- {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/storage/database/v2/models/_models.py +0 -0
- {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/storage/database/v2/odbc.py +0 -0
- {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/storage/database/v2/snowflake_sql.py +0 -0
- {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/storage/database/v2/trino_sql.py +0 -0
- {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/storage/database/v3/README.md +0 -0
- {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/storage/database/v3/__init__.py +0 -0
- {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/storage/database/v3/azure_sql.py +0 -0
- {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/storage/database/v3/models/__init__.py +0 -0
- {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/storage/database/v3/models/_models.py +0 -0
- {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/storage/database/v3/odbc.py +0 -0
- {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/storage/database/v3/snowflake_sql.py +0 -0
- {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/storage/database/v3/trino_sql.py +0 -0
- {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/storage/delta_lake/__init__.py +0 -0
- {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/storage/delta_lake/v2/README.md +0 -0
- {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/storage/delta_lake/v2/__init__.py +0 -0
- {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/storage/delta_lake/v2/_functions.py +0 -0
- {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/storage/delta_lake/v2/_models.py +0 -0
- {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/storage/delta_lake/v3/README.md +0 -0
- {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/storage/delta_lake/v3/__init__.py +0 -0
- {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/storage/delta_lake/v3/_models.py +0 -0
- {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/storage/distributed_object_store/__init__.py +0 -0
- {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/storage/distributed_object_store/v2/__init__.py +0 -0
- {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/storage/distributed_object_store/v2/datastax_astra/README.md +0 -0
- {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/storage/distributed_object_store/v2/datastax_astra/__init__.py +0 -0
- {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/storage/distributed_object_store/v2/datastax_astra/_models.py +0 -0
- {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/storage/distributed_object_store/v2/datastax_astra/astra_client.py +0 -0
- {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/storage/distributed_object_store/v3/__init__.py +0 -0
- {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/storage/distributed_object_store/v3/datastax_astra/README.md +0 -0
- {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/storage/distributed_object_store/v3/datastax_astra/__init__.py +0 -0
- {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/storage/distributed_object_store/v3/datastax_astra/_models.py +0 -0
- {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/storage/exceptions.py +0 -0
- {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/storage/models/__init__.py +0 -0
- {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/storage/models/_functions.py +0 -0
- {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/storage/models/astra.py +0 -0
- {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/storage/models/aws.py +0 -0
- {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/storage/models/azure.py +0 -0
- {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/storage/models/base.py +0 -0
- {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/storage/models/filter_expression.py +0 -0
- {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/storage/models/hive.py +0 -0
- {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/storage/models/local.py +0 -0
- {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/storage/query_enabled_store/README.md +0 -0
- {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/storage/query_enabled_store/__init__.py +0 -0
- {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/storage/secrets/README.md +0 -0
- {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/storage/secrets/__init__.py +0 -0
- {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/storage/secrets/_base.py +0 -0
- {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/storage/secrets/azure_secret_client.py +0 -0
- {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/storage/secrets/hashicorp_vault_secret_storage_client.py +0 -0
- {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/utils/README.md +0 -0
- {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/utils/__init__.py +0 -0
- {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/utils/_common.py +0 -0
- {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/utils/concurrent_task_runner.py +0 -0
- {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/utils/data_structures/__init__.py +0 -0
- {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/utils/data_structures/_functions.py +0 -0
- {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/utils/decorators/__init__.py +0 -0
- {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/utils/decorators/_logging.py +0 -0
- {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/utils/decorators/_rate_limit.py +0 -0
- {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/utils/metaframe.py +0 -0
- {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/utils/python_typing/__init__.py +0 -0
- {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/utils/python_typing/_functions.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: adapta
|
|
3
|
-
Version: 3.2.
|
|
3
|
+
Version: 3.2.6a485.dev5
|
|
4
4
|
Summary: Logging, data connectors, monitoring, secret handling and general lifehacks to make data people lives easier.
|
|
5
5
|
Home-page: https://github.com/SneaksAndData/adapta
|
|
6
6
|
License: Apache 2.0
|
|
@@ -47,7 +47,7 @@ Requires-Dist: limits (>=3.7,<3.8)
|
|
|
47
47
|
Requires-Dist: mlflow-skinny (>=2.4.1,<2.5.0) ; extra == "ml"
|
|
48
48
|
Requires-Dist: pandas[performance] (>=2.0.0,<3.0)
|
|
49
49
|
Requires-Dist: pandera (>=0.20.3,<1.0)
|
|
50
|
-
Requires-Dist: polars (>=
|
|
50
|
+
Requires-Dist: polars (>=1.7,<2.0)
|
|
51
51
|
Requires-Dist: pyarrow (>=7.0)
|
|
52
52
|
Requires-Dist: pyodbc (>=4.0,<4.1) ; extra == "databases"
|
|
53
53
|
Requires-Dist: redis[hiredis] (>=4.4.0,<4.5.0) ; extra == "caching"
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = 'v3.2.6a485.dev5'
|
|
@@ -45,6 +45,7 @@ def load( # pylint: disable=R0913
|
|
|
45
45
|
columns: Optional[List[str]] = None,
|
|
46
46
|
batch_size: Optional[int] = None,
|
|
47
47
|
partition_filter_expressions: Optional[List[Tuple]] = None,
|
|
48
|
+
limit: Optional[int] = None,
|
|
48
49
|
) -> Union[MetaFrame, Iterator[MetaFrame]]:
|
|
49
50
|
"""
|
|
50
51
|
Loads Delta Lake table from Azure or AWS storage and converts it to a pandas dataframe.
|
|
@@ -60,6 +61,7 @@ def load( # pylint: disable=R0913
|
|
|
60
61
|
|
|
61
62
|
:param columns: Optional list of columns to select when reading. Defaults to all columns of not provided.
|
|
62
63
|
:param batch_size: Optional batch size when reading in batches. If not set, whole table will be loaded into memory.
|
|
64
|
+
:param limit: Optional limit on number of rows to read.
|
|
63
65
|
:param partition_filter_expressions: Optional partitions filters. Examples:
|
|
64
66
|
|
|
65
67
|
partition_filter_expressions = [("day", "=", "3")]
|
|
@@ -82,6 +84,9 @@ def load( # pylint: disable=R0913
|
|
|
82
84
|
filesystem=auth_client.get_pyarrow_filesystem(path),
|
|
83
85
|
)
|
|
84
86
|
|
|
87
|
+
if limit:
|
|
88
|
+
pyarrow_ds = pyarrow_ds.head(limit)
|
|
89
|
+
|
|
85
90
|
row_filter = (
|
|
86
91
|
compile_expression(row_filter, ArrowFilterExpression) if isinstance(row_filter, Expression) else row_filter
|
|
87
92
|
)
|
|
@@ -336,8 +336,6 @@ class PanderaPolarsMapper(CassandraModelMapper):
|
|
|
336
336
|
polars.Datetime(time_unit="us"): (columns.DateTime,),
|
|
337
337
|
polars.Datetime(time_unit="ns"): (columns.DateTime,),
|
|
338
338
|
polars.Datetime(time_unit="ms"): (columns.DateTime,),
|
|
339
|
-
polars.Datetime(time_unit="ms", time_zone='UTC'): (columns.DateTime,),
|
|
340
|
-
polars.Datetime(time_unit="us", time_zone='UTC'): (columns.DateTime,),
|
|
341
339
|
}
|
|
342
340
|
|
|
343
341
|
column_type = mapping.get(type_to_map, None)
|
|
@@ -18,6 +18,7 @@
|
|
|
18
18
|
#
|
|
19
19
|
|
|
20
20
|
import base64
|
|
21
|
+
import itertools
|
|
21
22
|
import logging
|
|
22
23
|
import math
|
|
23
24
|
import os
|
|
@@ -62,7 +63,7 @@ from adapta import __version__
|
|
|
62
63
|
from adapta.storage.distributed_object_store.v3.datastax_astra._models import SimilarityFunction, VectorSearchQuery
|
|
63
64
|
from adapta.storage.models.filter_expression import Expression, AstraFilterExpression, compile_expression
|
|
64
65
|
from adapta.utils import chunk_list, rate_limit
|
|
65
|
-
from adapta.utils.metaframe import MetaFrame, concat
|
|
66
|
+
from adapta.utils.metaframe import MetaFrame, concat
|
|
66
67
|
from adapta.storage.distributed_object_store.v3.datastax_astra._model_mappers import get_mapper
|
|
67
68
|
|
|
68
69
|
TModel = TypeVar("TModel") # pylint: disable=C0103
|
|
@@ -238,6 +239,7 @@ class AstraClient:
|
|
|
238
239
|
custom_indexes: Optional[List[str]] = None,
|
|
239
240
|
deduplicate=False,
|
|
240
241
|
num_threads: Optional[int] = None,
|
|
242
|
+
limit: Optional[int] = None,
|
|
241
243
|
) -> MetaFrame:
|
|
242
244
|
"""
|
|
243
245
|
Run a filter query on the entity of type TModel backed by table `table_name`.
|
|
@@ -262,6 +264,7 @@ class AstraClient:
|
|
|
262
264
|
:param: custom_indexes: An optional list of custom indexes, if it cannot be inferred, if it cannot be inferred from the data model.
|
|
263
265
|
:param: deduplicate: Optionally deduplicate query result, for example when only the partition key part of a primary key is used to fetch results.
|
|
264
266
|
:param: num_threads: Optionally run filtering using multiple threads. Setting this to -1 will cause this method to automatically evaluate number of threads based on filter expression size.
|
|
267
|
+
:param: limit: Optionally limit the number of results returned.
|
|
265
268
|
"""
|
|
266
269
|
|
|
267
270
|
@on_exception(
|
|
@@ -274,12 +277,14 @@ class AstraClient:
|
|
|
274
277
|
max_time=self._transient_error_max_wait_s,
|
|
275
278
|
raise_on_giveup=True,
|
|
276
279
|
)
|
|
277
|
-
def apply(
|
|
278
|
-
model
|
|
280
|
+
def apply(
|
|
281
|
+
model: Type[Model], key_column_filter: Dict[str, Any], columns_to_select: Optional[List[str]]
|
|
282
|
+
) -> typing.Iterable[dict]:
|
|
283
|
+
model = model.filter(**key_column_filter).limit(limit)
|
|
279
284
|
if columns_to_select:
|
|
280
|
-
|
|
285
|
+
model = model.only(select_columns)
|
|
281
286
|
|
|
282
|
-
return model
|
|
287
|
+
return (dict(v.items()) for v in list(model))
|
|
283
288
|
|
|
284
289
|
def normalize_column_name(column_name: str) -> str:
|
|
285
290
|
filter_suffix = re.findall(self._filter_pattern, column_name)
|
|
@@ -288,15 +293,6 @@ class AstraClient:
|
|
|
288
293
|
|
|
289
294
|
return column_name.replace(filter_suffix[0], "")
|
|
290
295
|
|
|
291
|
-
def to_frame(
|
|
292
|
-
model: Type[Model], key_column_filter: Dict[str, Any], columns_to_select: Optional[List[str]]
|
|
293
|
-
) -> MetaFrame:
|
|
294
|
-
return MetaFrame(
|
|
295
|
-
[dict(v.items()) for v in list(apply(model, key_column_filter, columns_to_select))],
|
|
296
|
-
convert_to_polars=lambda x: polars.DataFrame(x, schema=select_columns),
|
|
297
|
-
convert_to_pandas=lambda x: pandas.DataFrame(x, columns=select_columns),
|
|
298
|
-
)
|
|
299
|
-
|
|
300
296
|
assert (
|
|
301
297
|
self._session is not None
|
|
302
298
|
), "Please instantiate an AstraClient using with AstraClient(...) before calling this method"
|
|
@@ -325,35 +321,33 @@ class AstraClient:
|
|
|
325
321
|
else num_threads
|
|
326
322
|
)
|
|
327
323
|
with ThreadPoolExecutor(max_workers=max_threads) as tpe:
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
chunksize=max(int(len(compiled_filter_values) / num_threads), 1),
|
|
336
|
-
),
|
|
337
|
-
options=[PolarsOptions(how="diagonal_relaxed")],
|
|
324
|
+
data = tpe.map(
|
|
325
|
+
lambda args: apply(*args),
|
|
326
|
+
[
|
|
327
|
+
(cassandra_model, key_column_filter, select_columns)
|
|
328
|
+
for key_column_filter in compiled_filter_values
|
|
329
|
+
],
|
|
330
|
+
chunksize=max(int(len(compiled_filter_values) / num_threads), 1),
|
|
338
331
|
)
|
|
339
332
|
else:
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
[dict(v.items()) for v in list(apply(cassandra_model, key_column_filter, select_columns))],
|
|
344
|
-
convert_to_polars=(lambda x: polars.DataFrame(x, schema=select_columns))
|
|
345
|
-
if not deduplicate
|
|
346
|
-
else (lambda x: polars.DataFrame(x, schema=select_columns).unique()),
|
|
347
|
-
convert_to_pandas=(lambda x: pandas.DataFrame(x, columns=select_columns))
|
|
348
|
-
if not deduplicate
|
|
349
|
-
else (lambda x: pandas.DataFrame(x, columns=select_columns).drop_duplicates()),
|
|
350
|
-
)
|
|
351
|
-
for key_column_filter in compiled_filter_values
|
|
352
|
-
],
|
|
353
|
-
options=[PolarsOptions(how="diagonal_relaxed")],
|
|
333
|
+
data = (
|
|
334
|
+
apply(cassandra_model, key_column_filter, select_columns)
|
|
335
|
+
for key_column_filter in compiled_filter_values
|
|
354
336
|
)
|
|
355
337
|
|
|
356
|
-
|
|
338
|
+
data = itertools.chain.from_iterable(data)
|
|
339
|
+
if limit:
|
|
340
|
+
data = itertools.islice(data, limit)
|
|
341
|
+
|
|
342
|
+
return MetaFrame(
|
|
343
|
+
data,
|
|
344
|
+
convert_to_polars=(lambda x: polars.DataFrame(x, schema=select_columns))
|
|
345
|
+
if not deduplicate
|
|
346
|
+
else (lambda x: polars.DataFrame(x, schema=select_columns).unique()),
|
|
347
|
+
convert_to_pandas=(lambda x: pandas.DataFrame(x, columns=select_columns))
|
|
348
|
+
if not deduplicate
|
|
349
|
+
else (lambda x: pandas.DataFrame(x, columns=select_columns).drop_duplicates()),
|
|
350
|
+
)
|
|
357
351
|
|
|
358
352
|
def get_entities_raw(self, query: str) -> MetaFrame:
|
|
359
353
|
"""
|
|
@@ -193,6 +193,75 @@ class PolarsDataFrameJsonSerializationFormat(SerializationFormat[polars.DataFram
|
|
|
193
193
|
return polars.read_json(io.BytesIO(data))
|
|
194
194
|
|
|
195
195
|
|
|
196
|
+
class PolarsLazyFrameParquetSerializationFormat(SerializationFormat[polars.LazyFrame]):
|
|
197
|
+
"""
|
|
198
|
+
Serializes lazyframes as parquet format.
|
|
199
|
+
"""
|
|
200
|
+
|
|
201
|
+
def serialize(self, data: polars.LazyFrame) -> bytes:
|
|
202
|
+
"""
|
|
203
|
+
Serializes lazyframe to bytes using parquet format.
|
|
204
|
+
:param data: Lazyframe to serialize.
|
|
205
|
+
:return: Parquet serialized lazyframe as byte array.
|
|
206
|
+
"""
|
|
207
|
+
buffer = io.BytesIO()
|
|
208
|
+
data.collect().write_parquet(buffer)
|
|
209
|
+
return buffer.getvalue()
|
|
210
|
+
|
|
211
|
+
def deserialize(self, data: bytes) -> polars.LazyFrame:
|
|
212
|
+
"""
|
|
213
|
+
Deserializes lazyframe from bytes using parquet format.
|
|
214
|
+
:param data: Lazyframe to deserialize in parquet format as bytes.
|
|
215
|
+
:return: Deserialized lazyframe.
|
|
216
|
+
"""
|
|
217
|
+
return polars.scan_parquet(io.BytesIO(data))
|
|
218
|
+
|
|
219
|
+
|
|
220
|
+
class PolarsLazyFrameCsvSerializationFormat(SerializationFormat[polars.LazyFrame]):
|
|
221
|
+
"""
|
|
222
|
+
Serializes lazyframes as CSV format.
|
|
223
|
+
"""
|
|
224
|
+
|
|
225
|
+
def serialize(self, data: polars.LazyFrame) -> bytes:
|
|
226
|
+
"""
|
|
227
|
+
Serializes lazyframe to bytes using CSV format.
|
|
228
|
+
:param data: Lazyframe to serialize.
|
|
229
|
+
:return: CSV serialized Lazyframe as byte array.
|
|
230
|
+
"""
|
|
231
|
+
|
|
232
|
+
return data.collect().write_csv().encode(encoding="utf-8")
|
|
233
|
+
|
|
234
|
+
def deserialize(self, data: bytes) -> polars.LazyFrame:
|
|
235
|
+
"""
|
|
236
|
+
Deserializes lazyframe from bytes using CSV format.
|
|
237
|
+
:param data: LazyFrame to deserialize in CSV format as bytes.
|
|
238
|
+
:return: Deserialized lazyframe.
|
|
239
|
+
"""
|
|
240
|
+
return polars.scan_csv(io.BytesIO(data))
|
|
241
|
+
|
|
242
|
+
|
|
243
|
+
class PolarsLazyFrameJsonSerializationFormat(SerializationFormat[polars.LazyFrame]):
|
|
244
|
+
"""
|
|
245
|
+
Serializes lazyframes as JSON format.
|
|
246
|
+
"""
|
|
247
|
+
|
|
248
|
+
def serialize(self, data: polars.LazyFrame) -> bytes:
|
|
249
|
+
"""
|
|
250
|
+
Serializes lazyframes to bytes using JSON format.
|
|
251
|
+
:param data: LazyFrame to serialize.
|
|
252
|
+
:return: JSON serialized lazyframe as byte array.
|
|
253
|
+
"""
|
|
254
|
+
return data.collect().write_ndjson().encode(encoding="utf-8")
|
|
255
|
+
|
|
256
|
+
def deserialize(self, data: bytes) -> polars.LazyFrame:
|
|
257
|
+
"""
|
|
258
|
+
Deserializes lazyframes from bytes using JSON format.
|
|
259
|
+
:param data: LazyFrame to deserialize in JSON format as bytes.
|
|
260
|
+
:return: Deserialized lazyframe.
|
|
261
|
+
"""
|
|
262
|
+
return polars.scan_ndjson(io.BytesIO(data))
|
|
263
|
+
|
|
264
|
+
|
|
196
265
|
class DictJsonSerializationFormat(SerializationFormat[dict]):
|
|
197
266
|
"""
|
|
198
267
|
Serializes dictionaries as JSON format.
|
{adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/storage/query_enabled_store/_models.py
RENAMED
|
@@ -83,16 +83,16 @@ class QueryEnabledStore(Generic[TCredential, TSettings], ABC):
|
|
|
83
83
|
|
|
84
84
|
@abstractmethod
|
|
85
85
|
def _apply_filter(
|
|
86
|
-
self, path: DataPath, filter_expression: Expression, columns: list[str]
|
|
86
|
+
self, path: DataPath, filter_expression: Expression, columns: list[str], limit: Optional[int] = 10000
|
|
87
87
|
) -> Union[MetaFrame, Iterator[MetaFrame]]:
|
|
88
88
|
"""
|
|
89
|
-
Applies the provided filter expression to this Store and returns the result in a
|
|
89
|
+
Applies the provided filter expression to this Store and returns the result in a MetaFrame
|
|
90
90
|
"""
|
|
91
91
|
|
|
92
92
|
@abstractmethod
|
|
93
93
|
def _apply_query(self, query: str) -> Union[MetaFrame, Iterator[MetaFrame]]:
|
|
94
94
|
"""
|
|
95
|
-
Applies a plaintext query to this Store and returns the result in a
|
|
95
|
+
Applies a plaintext query to this Store and returns the result in a MetaFrame
|
|
96
96
|
"""
|
|
97
97
|
|
|
98
98
|
@classmethod
|
|
@@ -139,6 +139,7 @@ class QueryConfigurationBuilder:
|
|
|
139
139
|
self._path = path
|
|
140
140
|
self._filter_expression: Optional[Expression] = None
|
|
141
141
|
self._columns: list[str] = []
|
|
142
|
+
self._limit = 10000
|
|
142
143
|
|
|
143
144
|
def filter(self, filter_expression: Expression) -> "QueryConfigurationBuilder":
|
|
144
145
|
"""
|
|
@@ -156,10 +157,20 @@ class QueryConfigurationBuilder:
|
|
|
156
157
|
self._columns = list(columns)
|
|
157
158
|
return self
|
|
158
159
|
|
|
160
|
+
def limit(self, limit: int) -> "QueryConfigurationBuilder":
|
|
161
|
+
"""
|
|
162
|
+
Limit the number of results returned by the underlying store.
|
|
163
|
+
"""
|
|
164
|
+
self._limit = limit
|
|
165
|
+
return self
|
|
166
|
+
|
|
159
167
|
def read(self) -> Union[MetaFrame, Iterator[MetaFrame]]:
|
|
160
168
|
"""
|
|
161
169
|
Execute the query on the underlying store.
|
|
162
170
|
"""
|
|
163
171
|
return self._store._apply_filter(
|
|
164
|
-
path=self._path,
|
|
172
|
+
path=self._path,
|
|
173
|
+
filter_expression=self._filter_expression,
|
|
174
|
+
columns=self._columns,
|
|
175
|
+
limit=self._limit,
|
|
165
176
|
)
|
{adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/storage/query_enabled_store/_qes_astra.py
RENAMED
|
@@ -78,7 +78,7 @@ class AstraQueryEnabledStore(QueryEnabledStore[AstraCredential, AstraSettings]):
|
|
|
78
78
|
self._astra_client.connect()
|
|
79
79
|
|
|
80
80
|
def _apply_filter(
|
|
81
|
-
self, path: DataPath, filter_expression: Expression, columns: list[str]
|
|
81
|
+
self, path: DataPath, filter_expression: Expression, columns: list[str], limit: Optional[int] = 10000
|
|
82
82
|
) -> Union[MetaFrame, Iterator[MetaFrame]]:
|
|
83
83
|
assert isinstance(path, AstraPath)
|
|
84
84
|
astra_path: AstraPath = path
|
|
@@ -91,6 +91,7 @@ class AstraQueryEnabledStore(QueryEnabledStore[AstraCredential, AstraSettings]):
|
|
|
91
91
|
table_name=astra_path.table,
|
|
92
92
|
select_columns=columns,
|
|
93
93
|
num_threads=-1, # auto-infer, see method documentation
|
|
94
|
+
limit=limit,
|
|
94
95
|
)
|
|
95
96
|
|
|
96
97
|
return self._astra_client.filter_entities(
|
|
@@ -100,6 +101,7 @@ class AstraQueryEnabledStore(QueryEnabledStore[AstraCredential, AstraSettings]):
|
|
|
100
101
|
table_name=astra_path.table,
|
|
101
102
|
select_columns=columns,
|
|
102
103
|
num_threads=-1, # auto-infer, see method documentation
|
|
104
|
+
limit=limit,
|
|
103
105
|
)
|
|
104
106
|
|
|
105
107
|
def _apply_query(self, query: str) -> Union[MetaFrame, Iterator[MetaFrame]]:
|
{adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/storage/query_enabled_store/_qes_delta.py
RENAMED
|
@@ -67,13 +67,18 @@ class DeltaQueryEnabledStore(QueryEnabledStore[DeltaCredential, DeltaSettings]):
|
|
|
67
67
|
return cls(credentials=DeltaCredential.from_json(credentials), settings=DeltaSettings.from_json(settings))
|
|
68
68
|
|
|
69
69
|
def _apply_filter(
|
|
70
|
-
self,
|
|
70
|
+
self,
|
|
71
|
+
path: DataPath,
|
|
72
|
+
filter_expression: Expression,
|
|
73
|
+
columns: list[str],
|
|
74
|
+
limit: Optional[int] = 10000,
|
|
71
75
|
) -> Union[MetaFrame, Iterator[MetaFrame]]:
|
|
72
76
|
return load(
|
|
73
77
|
auth_client=self.credentials.auth_client(credentials=self.credentials.auth_client_credentials()),
|
|
74
78
|
path=path,
|
|
75
79
|
row_filter=filter_expression,
|
|
76
80
|
columns=columns,
|
|
81
|
+
limit=limit,
|
|
77
82
|
)
|
|
78
83
|
|
|
79
84
|
def _apply_query(self, query: str) -> Union[MetaFrame, Iterator[MetaFrame]]:
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[tool.poetry]
|
|
2
2
|
name = "adapta"
|
|
3
|
-
version = "v3.2.
|
|
3
|
+
version = "v3.2.6a485.dev5"
|
|
4
4
|
description = "Logging, data connectors, monitoring, secret handling and general lifehacks to make data people lives easier."
|
|
5
5
|
authors = ["ECCO Sneaks & Data <esdsupport@ecco.com>"]
|
|
6
6
|
maintainers = ['GZU <gzu@ecco.com>', 'JRB <ext-jrb@ecco.com>']
|
|
@@ -17,7 +17,7 @@ pandas = { version = ">=2.0.0,<3.0", extras = ["performance"] }
|
|
|
17
17
|
pyarrow = ">=7.0"
|
|
18
18
|
dataclasses-json = "~0.6"
|
|
19
19
|
limits = "~3.7"
|
|
20
|
-
polars = ">=
|
|
20
|
+
polars = ">=1.7 <2.0"
|
|
21
21
|
pandera = ">=0.20.3 <1.0"
|
|
22
22
|
|
|
23
23
|
cassandra-driver = { version = "~3.29.1", optional = true }
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
__version__ = 'v3.2.6a482.dev7'
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/metrics/providers/datadog_provider.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/security/clients/aws/_aws_credentials.py
RENAMED
|
File without changes
|
{adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/security/clients/hashicorp_vault/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/storage/database/v2/models/__init__.py
RENAMED
|
File without changes
|
{adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/storage/database/v2/models/_models.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/storage/database/v3/models/__init__.py
RENAMED
|
File without changes
|
{adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/storage/database/v3/models/_models.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/storage/distributed_object_store/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/storage/query_enabled_store/README.md
RENAMED
|
File without changes
|
{adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/storage/query_enabled_store/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/storage/secrets/azure_secret_client.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|