adapta 3.2.6a482.dev7__tar.gz → 3.2.6a485.dev5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (133) hide show
  1. {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/PKG-INFO +2 -2
  2. adapta-3.2.6a485.dev5/adapta/_version.py +1 -0
  3. {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/storage/delta_lake/v3/_functions.py +5 -0
  4. {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/storage/distributed_object_store/v3/datastax_astra/_model_mappers.py +0 -2
  5. {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/storage/distributed_object_store/v3/datastax_astra/astra_client.py +33 -39
  6. {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/storage/models/format.py +69 -0
  7. {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/storage/query_enabled_store/_models.py +15 -4
  8. {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/storage/query_enabled_store/_qes_astra.py +3 -1
  9. {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/storage/query_enabled_store/_qes_delta.py +6 -1
  10. {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/pyproject.toml +2 -2
  11. adapta-3.2.6a482.dev7/adapta/_version.py +0 -1
  12. {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/LICENSE +0 -0
  13. {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/README.md +0 -0
  14. {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/__init__.py +0 -0
  15. {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/connectors/__init__.py +0 -0
  16. {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/connectors/service_bus/__init__.py +0 -0
  17. {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/connectors/service_bus/_connector.py +0 -0
  18. {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/logs/README.md +0 -0
  19. {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/logs/__init__.py +0 -0
  20. {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/logs/_async_logger.py +0 -0
  21. {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/logs/_base.py +0 -0
  22. {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/logs/_internal.py +0 -0
  23. {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/logs/_internal_logger.py +0 -0
  24. {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/logs/_logger_interface.py +0 -0
  25. {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/logs/handlers/__init__.py +0 -0
  26. {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/logs/handlers/datadog_api_handler.py +0 -0
  27. {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/logs/handlers/safe_stream_handler.py +0 -0
  28. {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/logs/models/__init__.py +0 -0
  29. {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/logs/models/_log_level.py +0 -0
  30. {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/logs/models/_logs_metadata.py +0 -0
  31. {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/metrics/__init__.py +0 -0
  32. {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/metrics/_base.py +0 -0
  33. {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/metrics/providers/README.md +0 -0
  34. {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/metrics/providers/__init__.py +0 -0
  35. {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/metrics/providers/datadog_provider.py +0 -0
  36. {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/ml/__init__.py +0 -0
  37. {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/ml/_model.py +0 -0
  38. {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/ml/mlflow/__init__.py +0 -0
  39. {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/ml/mlflow/_client.py +0 -0
  40. {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/ml/mlflow/_functions.py +0 -0
  41. {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/process_communication/__init__.py +0 -0
  42. {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/process_communication/_models.py +0 -0
  43. {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/schema_management/README.md +0 -0
  44. {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/schema_management/__init__.py +0 -0
  45. {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/schema_management/schema_entity.py +0 -0
  46. {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/security/__init__.py +0 -0
  47. {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/security/clients/README.md +0 -0
  48. {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/security/clients/__init__.py +0 -0
  49. {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/security/clients/_azure_client.py +0 -0
  50. {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/security/clients/_base.py +0 -0
  51. {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/security/clients/_local_client.py +0 -0
  52. {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/security/clients/aws/__init__.py +0 -0
  53. {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/security/clients/aws/_aws_client.py +0 -0
  54. {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/security/clients/aws/_aws_credentials.py +0 -0
  55. {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/security/clients/hashicorp_vault/__init__.py +0 -0
  56. {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/security/clients/hashicorp_vault/hashicorp_vault_client.py +0 -0
  57. {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/security/clients/hashicorp_vault/kubernetes_client.py +0 -0
  58. {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/security/clients/hashicorp_vault/oidc_client.py +0 -0
  59. {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/security/clients/hashicorp_vault/token_client.py +0 -0
  60. {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/storage/__init__.py +0 -0
  61. {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/storage/blob/README.md +0 -0
  62. {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/storage/blob/__init__.py +0 -0
  63. {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/storage/blob/azure_storage_client.py +0 -0
  64. {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/storage/blob/base.py +0 -0
  65. {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/storage/blob/local_storage_client.py +0 -0
  66. {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/storage/blob/s3_storage_client.py +0 -0
  67. {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/storage/cache/__init__.py +0 -0
  68. {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/storage/cache/_base.py +0 -0
  69. {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/storage/cache/redis_cache.py +0 -0
  70. {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/storage/database/__init__.py +0 -0
  71. {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/storage/database/v2/README.md +0 -0
  72. {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/storage/database/v2/__init__.py +0 -0
  73. {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/storage/database/v2/azure_sql.py +0 -0
  74. {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/storage/database/v2/models/__init__.py +0 -0
  75. {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/storage/database/v2/models/_models.py +0 -0
  76. {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/storage/database/v2/odbc.py +0 -0
  77. {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/storage/database/v2/snowflake_sql.py +0 -0
  78. {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/storage/database/v2/trino_sql.py +0 -0
  79. {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/storage/database/v3/README.md +0 -0
  80. {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/storage/database/v3/__init__.py +0 -0
  81. {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/storage/database/v3/azure_sql.py +0 -0
  82. {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/storage/database/v3/models/__init__.py +0 -0
  83. {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/storage/database/v3/models/_models.py +0 -0
  84. {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/storage/database/v3/odbc.py +0 -0
  85. {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/storage/database/v3/snowflake_sql.py +0 -0
  86. {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/storage/database/v3/trino_sql.py +0 -0
  87. {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/storage/delta_lake/__init__.py +0 -0
  88. {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/storage/delta_lake/v2/README.md +0 -0
  89. {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/storage/delta_lake/v2/__init__.py +0 -0
  90. {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/storage/delta_lake/v2/_functions.py +0 -0
  91. {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/storage/delta_lake/v2/_models.py +0 -0
  92. {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/storage/delta_lake/v3/README.md +0 -0
  93. {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/storage/delta_lake/v3/__init__.py +0 -0
  94. {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/storage/delta_lake/v3/_models.py +0 -0
  95. {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/storage/distributed_object_store/__init__.py +0 -0
  96. {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/storage/distributed_object_store/v2/__init__.py +0 -0
  97. {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/storage/distributed_object_store/v2/datastax_astra/README.md +0 -0
  98. {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/storage/distributed_object_store/v2/datastax_astra/__init__.py +0 -0
  99. {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/storage/distributed_object_store/v2/datastax_astra/_models.py +0 -0
  100. {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/storage/distributed_object_store/v2/datastax_astra/astra_client.py +0 -0
  101. {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/storage/distributed_object_store/v3/__init__.py +0 -0
  102. {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/storage/distributed_object_store/v3/datastax_astra/README.md +0 -0
  103. {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/storage/distributed_object_store/v3/datastax_astra/__init__.py +0 -0
  104. {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/storage/distributed_object_store/v3/datastax_astra/_models.py +0 -0
  105. {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/storage/exceptions.py +0 -0
  106. {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/storage/models/__init__.py +0 -0
  107. {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/storage/models/_functions.py +0 -0
  108. {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/storage/models/astra.py +0 -0
  109. {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/storage/models/aws.py +0 -0
  110. {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/storage/models/azure.py +0 -0
  111. {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/storage/models/base.py +0 -0
  112. {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/storage/models/filter_expression.py +0 -0
  113. {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/storage/models/hive.py +0 -0
  114. {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/storage/models/local.py +0 -0
  115. {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/storage/query_enabled_store/README.md +0 -0
  116. {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/storage/query_enabled_store/__init__.py +0 -0
  117. {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/storage/secrets/README.md +0 -0
  118. {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/storage/secrets/__init__.py +0 -0
  119. {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/storage/secrets/_base.py +0 -0
  120. {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/storage/secrets/azure_secret_client.py +0 -0
  121. {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/storage/secrets/hashicorp_vault_secret_storage_client.py +0 -0
  122. {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/utils/README.md +0 -0
  123. {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/utils/__init__.py +0 -0
  124. {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/utils/_common.py +0 -0
  125. {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/utils/concurrent_task_runner.py +0 -0
  126. {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/utils/data_structures/__init__.py +0 -0
  127. {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/utils/data_structures/_functions.py +0 -0
  128. {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/utils/decorators/__init__.py +0 -0
  129. {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/utils/decorators/_logging.py +0 -0
  130. {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/utils/decorators/_rate_limit.py +0 -0
  131. {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/utils/metaframe.py +0 -0
  132. {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/utils/python_typing/__init__.py +0 -0
  133. {adapta-3.2.6a482.dev7 → adapta-3.2.6a485.dev5}/adapta/utils/python_typing/_functions.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: adapta
3
- Version: 3.2.6a482.dev7
3
+ Version: 3.2.6a485.dev5
4
4
  Summary: Logging, data connectors, monitoring, secret handling and general lifehacks to make data people lives easier.
5
5
  Home-page: https://github.com/SneaksAndData/adapta
6
6
  License: Apache 2.0
@@ -47,7 +47,7 @@ Requires-Dist: limits (>=3.7,<3.8)
47
47
  Requires-Dist: mlflow-skinny (>=2.4.1,<2.5.0) ; extra == "ml"
48
48
  Requires-Dist: pandas[performance] (>=2.0.0,<3.0)
49
49
  Requires-Dist: pandera (>=0.20.3,<1.0)
50
- Requires-Dist: polars (>=0.20,<2.0)
50
+ Requires-Dist: polars (>=1.7,<2.0)
51
51
  Requires-Dist: pyarrow (>=7.0)
52
52
  Requires-Dist: pyodbc (>=4.0,<4.1) ; extra == "databases"
53
53
  Requires-Dist: redis[hiredis] (>=4.4.0,<4.5.0) ; extra == "caching"
@@ -0,0 +1 @@
1
+ __version__ = 'v3.2.6a485.dev5'
@@ -45,6 +45,7 @@ def load( # pylint: disable=R0913
45
45
  columns: Optional[List[str]] = None,
46
46
  batch_size: Optional[int] = None,
47
47
  partition_filter_expressions: Optional[List[Tuple]] = None,
48
+ limit: Optional[int] = None,
48
49
  ) -> Union[MetaFrame, Iterator[MetaFrame]]:
49
50
  """
50
51
  Loads Delta Lake table from Azure or AWS storage and converts it to a pandas dataframe.
@@ -60,6 +61,7 @@ def load( # pylint: disable=R0913
60
61
 
61
62
  :param columns: Optional list of columns to select when reading. Defaults to all columns of not provided.
62
63
  :param batch_size: Optional batch size when reading in batches. If not set, whole table will be loaded into memory.
64
+ :param limit: Optional limit on number of rows to read.
63
65
  :param partition_filter_expressions: Optional partitions filters. Examples:
64
66
 
65
67
  partition_filter_expressions = [("day", "=", "3")]
@@ -82,6 +84,9 @@ def load( # pylint: disable=R0913
82
84
  filesystem=auth_client.get_pyarrow_filesystem(path),
83
85
  )
84
86
 
87
+ if limit:
88
+ pyarrow_ds = pyarrow_ds.head(limit)
89
+
85
90
  row_filter = (
86
91
  compile_expression(row_filter, ArrowFilterExpression) if isinstance(row_filter, Expression) else row_filter
87
92
  )
@@ -336,8 +336,6 @@ class PanderaPolarsMapper(CassandraModelMapper):
336
336
  polars.Datetime(time_unit="us"): (columns.DateTime,),
337
337
  polars.Datetime(time_unit="ns"): (columns.DateTime,),
338
338
  polars.Datetime(time_unit="ms"): (columns.DateTime,),
339
- polars.Datetime(time_unit="ms", time_zone='UTC'): (columns.DateTime,),
340
- polars.Datetime(time_unit="us", time_zone='UTC'): (columns.DateTime,),
341
339
  }
342
340
 
343
341
  column_type = mapping.get(type_to_map, None)
@@ -18,6 +18,7 @@
18
18
  #
19
19
 
20
20
  import base64
21
+ import itertools
21
22
  import logging
22
23
  import math
23
24
  import os
@@ -62,7 +63,7 @@ from adapta import __version__
62
63
  from adapta.storage.distributed_object_store.v3.datastax_astra._models import SimilarityFunction, VectorSearchQuery
63
64
  from adapta.storage.models.filter_expression import Expression, AstraFilterExpression, compile_expression
64
65
  from adapta.utils import chunk_list, rate_limit
65
- from adapta.utils.metaframe import MetaFrame, concat, PolarsOptions
66
+ from adapta.utils.metaframe import MetaFrame, concat
66
67
  from adapta.storage.distributed_object_store.v3.datastax_astra._model_mappers import get_mapper
67
68
 
68
69
  TModel = TypeVar("TModel") # pylint: disable=C0103
@@ -238,6 +239,7 @@ class AstraClient:
238
239
  custom_indexes: Optional[List[str]] = None,
239
240
  deduplicate=False,
240
241
  num_threads: Optional[int] = None,
242
+ limit: Optional[int] = None,
241
243
  ) -> MetaFrame:
242
244
  """
243
245
  Run a filter query on the entity of type TModel backed by table `table_name`.
@@ -262,6 +264,7 @@ class AstraClient:
262
264
  :param: custom_indexes: An optional list of custom indexes, if it cannot be inferred, if it cannot be inferred from the data model.
263
265
  :param: deduplicate: Optionally deduplicate query result, for example when only the partition key part of a primary key is used to fetch results.
264
266
  :param: num_threads: Optionally run filtering using multiple threads. Setting this to -1 will cause this method to automatically evaluate number of threads based on filter expression size.
267
+ :param: limit: Optionally limit the number of results returned.
265
268
  """
266
269
 
267
270
  @on_exception(
@@ -274,12 +277,14 @@ class AstraClient:
274
277
  max_time=self._transient_error_max_wait_s,
275
278
  raise_on_giveup=True,
276
279
  )
277
- def apply(model: Type[Model], key_column_filter: Dict[str, Any], columns_to_select: Optional[List[str]]):
278
- model = model.filter(**key_column_filter).limit(None)
280
+ def apply(
281
+ model: Type[Model], key_column_filter: Dict[str, Any], columns_to_select: Optional[List[str]]
282
+ ) -> typing.Iterable[dict]:
283
+ model = model.filter(**key_column_filter).limit(limit)
279
284
  if columns_to_select:
280
- return model.only(select_columns)
285
+ model = model.only(select_columns)
281
286
 
282
- return model
287
+ return (dict(v.items()) for v in list(model))
283
288
 
284
289
  def normalize_column_name(column_name: str) -> str:
285
290
  filter_suffix = re.findall(self._filter_pattern, column_name)
@@ -288,15 +293,6 @@ class AstraClient:
288
293
 
289
294
  return column_name.replace(filter_suffix[0], "")
290
295
 
291
- def to_frame(
292
- model: Type[Model], key_column_filter: Dict[str, Any], columns_to_select: Optional[List[str]]
293
- ) -> MetaFrame:
294
- return MetaFrame(
295
- [dict(v.items()) for v in list(apply(model, key_column_filter, columns_to_select))],
296
- convert_to_polars=lambda x: polars.DataFrame(x, schema=select_columns),
297
- convert_to_pandas=lambda x: pandas.DataFrame(x, columns=select_columns),
298
- )
299
-
300
296
  assert (
301
297
  self._session is not None
302
298
  ), "Please instantiate an AstraClient using with AstraClient(...) before calling this method"
@@ -325,35 +321,33 @@ class AstraClient:
325
321
  else num_threads
326
322
  )
327
323
  with ThreadPoolExecutor(max_workers=max_threads) as tpe:
328
- result = concat(
329
- tpe.map(
330
- lambda args: to_frame(*args),
331
- [
332
- (cassandra_model, key_column_filter, select_columns)
333
- for key_column_filter in compiled_filter_values
334
- ],
335
- chunksize=max(int(len(compiled_filter_values) / num_threads), 1),
336
- ),
337
- options=[PolarsOptions(how="diagonal_relaxed")],
324
+ data = tpe.map(
325
+ lambda args: apply(*args),
326
+ [
327
+ (cassandra_model, key_column_filter, select_columns)
328
+ for key_column_filter in compiled_filter_values
329
+ ],
330
+ chunksize=max(int(len(compiled_filter_values) / num_threads), 1),
338
331
  )
339
332
  else:
340
- result = concat(
341
- [
342
- MetaFrame(
343
- [dict(v.items()) for v in list(apply(cassandra_model, key_column_filter, select_columns))],
344
- convert_to_polars=(lambda x: polars.DataFrame(x, schema=select_columns))
345
- if not deduplicate
346
- else (lambda x: polars.DataFrame(x, schema=select_columns).unique()),
347
- convert_to_pandas=(lambda x: pandas.DataFrame(x, columns=select_columns))
348
- if not deduplicate
349
- else (lambda x: pandas.DataFrame(x, columns=select_columns).drop_duplicates()),
350
- )
351
- for key_column_filter in compiled_filter_values
352
- ],
353
- options=[PolarsOptions(how="diagonal_relaxed")],
333
+ data = (
334
+ apply(cassandra_model, key_column_filter, select_columns)
335
+ for key_column_filter in compiled_filter_values
354
336
  )
355
337
 
356
- return result
338
+ data = itertools.chain.from_iterable(data)
339
+ if limit:
340
+ data = itertools.islice(data, limit)
341
+
342
+ return MetaFrame(
343
+ data,
344
+ convert_to_polars=(lambda x: polars.DataFrame(x, schema=select_columns))
345
+ if not deduplicate
346
+ else (lambda x: polars.DataFrame(x, schema=select_columns).unique()),
347
+ convert_to_pandas=(lambda x: pandas.DataFrame(x, columns=select_columns))
348
+ if not deduplicate
349
+ else (lambda x: pandas.DataFrame(x, columns=select_columns).drop_duplicates()),
350
+ )
357
351
 
358
352
  def get_entities_raw(self, query: str) -> MetaFrame:
359
353
  """
@@ -193,6 +193,75 @@ class PolarsDataFrameJsonSerializationFormat(SerializationFormat[polars.DataFram
193
193
  return polars.read_json(io.BytesIO(data))
194
194
 
195
195
 
196
+ class PolarsLazyFrameParquetSerializationFormat(SerializationFormat[polars.LazyFrame]):
197
+ """
198
+ Serializes lazyframes as parquet format.
199
+ """
200
+
201
+ def serialize(self, data: polars.LazyFrame) -> bytes:
202
+ """
203
+ Serializes lazyframe to bytes using parquet format.
204
+ :param data: Lazyframe to serialize.
205
+ :return: Parquet serialized lazyframe as byte array.
206
+ """
207
+ buffer = io.BytesIO()
208
+ data.collect().write_parquet(buffer)
209
+ return buffer.getvalue()
210
+
211
+ def deserialize(self, data: bytes) -> polars.LazyFrame:
212
+ """
213
+ Deserializes lazyframe from bytes using parquet format.
214
+ :param data: Lazyframe to deserialize in parquet format as bytes.
215
+ :return: Deserialized lazyframe.
216
+ """
217
+ return polars.scan_parquet(io.BytesIO(data))
218
+
219
+
220
+ class PolarsLazyFrameCsvSerializationFormat(SerializationFormat[polars.LazyFrame]):
221
+ """
222
+ Serializes lazyframes as CSV format.
223
+ """
224
+
225
+ def serialize(self, data: polars.LazyFrame) -> bytes:
226
+ """
227
+ Serializes lazyframe to bytes using CSV format.
228
+ :param data: Lazyframe to serialize.
229
+ :return: CSV serialized Lazyframe as byte array.
230
+ """
231
+
232
+ return data.collect().write_csv().encode(encoding="utf-8")
233
+
234
+ def deserialize(self, data: bytes) -> polars.LazyFrame:
235
+ """
236
+ Deserializes lazyframe from bytes using CSV format.
237
+ :param data: LazyFrame to deserialize in CSV format as bytes.
238
+ :return: Deserialized lazyframe.
239
+ """
240
+ return polars.scan_csv(io.BytesIO(data))
241
+
242
+
243
+ class PolarsLazyFrameJsonSerializationFormat(SerializationFormat[polars.LazyFrame]):
244
+ """
245
+ Serializes lazyframes as JSON format.
246
+ """
247
+
248
+ def serialize(self, data: polars.LazyFrame) -> bytes:
249
+ """
250
+ Serializes lazyframes to bytes using JSON format.
251
+ :param data: LazyFrame to serialize.
252
+ :return: JSON serialized lazyframe as byte array.
253
+ """
254
+ return data.collect().write_ndjson().encode(encoding="utf-8")
255
+
256
+ def deserialize(self, data: bytes) -> polars.LazyFrame:
257
+ """
258
+ Deserializes lazyframes from bytes using JSON format.
259
+ :param data: LazyFrame to deserialize in JSON format as bytes.
260
+ :return: Deserialized lazyframe.
261
+ """
262
+ return polars.scan_ndjson(io.BytesIO(data))
263
+
264
+
196
265
  class DictJsonSerializationFormat(SerializationFormat[dict]):
197
266
  """
198
267
  Serializes dictionaries as JSON format.
@@ -83,16 +83,16 @@ class QueryEnabledStore(Generic[TCredential, TSettings], ABC):
83
83
 
84
84
  @abstractmethod
85
85
  def _apply_filter(
86
- self, path: DataPath, filter_expression: Expression, columns: list[str]
86
+ self, path: DataPath, filter_expression: Expression, columns: list[str], limit: Optional[int] = 10000
87
87
  ) -> Union[MetaFrame, Iterator[MetaFrame]]:
88
88
  """
89
- Applies the provided filter expression to this Store and returns the result in a pandas DataFrame
89
+ Applies the provided filter expression to this Store and returns the result in a MetaFrame
90
90
  """
91
91
 
92
92
  @abstractmethod
93
93
  def _apply_query(self, query: str) -> Union[MetaFrame, Iterator[MetaFrame]]:
94
94
  """
95
- Applies a plaintext query to this Store and returns the result in a pandas DataFrame
95
+ Applies a plaintext query to this Store and returns the result in a MetaFrame
96
96
  """
97
97
 
98
98
  @classmethod
@@ -139,6 +139,7 @@ class QueryConfigurationBuilder:
139
139
  self._path = path
140
140
  self._filter_expression: Optional[Expression] = None
141
141
  self._columns: list[str] = []
142
+ self._limit = 10000
142
143
 
143
144
  def filter(self, filter_expression: Expression) -> "QueryConfigurationBuilder":
144
145
  """
@@ -156,10 +157,20 @@ class QueryConfigurationBuilder:
156
157
  self._columns = list(columns)
157
158
  return self
158
159
 
160
+ def limit(self, limit: int) -> "QueryConfigurationBuilder":
161
+ """
162
+ Limit the number of results returned by the underlying store.
163
+ """
164
+ self._limit = limit
165
+ return self
166
+
159
167
  def read(self) -> Union[MetaFrame, Iterator[MetaFrame]]:
160
168
  """
161
169
  Execute the query on the underlying store.
162
170
  """
163
171
  return self._store._apply_filter(
164
- path=self._path, filter_expression=self._filter_expression, columns=self._columns
172
+ path=self._path,
173
+ filter_expression=self._filter_expression,
174
+ columns=self._columns,
175
+ limit=self._limit,
165
176
  )
@@ -78,7 +78,7 @@ class AstraQueryEnabledStore(QueryEnabledStore[AstraCredential, AstraSettings]):
78
78
  self._astra_client.connect()
79
79
 
80
80
  def _apply_filter(
81
- self, path: DataPath, filter_expression: Expression, columns: list[str]
81
+ self, path: DataPath, filter_expression: Expression, columns: list[str], limit: Optional[int] = 10000
82
82
  ) -> Union[MetaFrame, Iterator[MetaFrame]]:
83
83
  assert isinstance(path, AstraPath)
84
84
  astra_path: AstraPath = path
@@ -91,6 +91,7 @@ class AstraQueryEnabledStore(QueryEnabledStore[AstraCredential, AstraSettings]):
91
91
  table_name=astra_path.table,
92
92
  select_columns=columns,
93
93
  num_threads=-1, # auto-infer, see method documentation
94
+ limit=limit,
94
95
  )
95
96
 
96
97
  return self._astra_client.filter_entities(
@@ -100,6 +101,7 @@ class AstraQueryEnabledStore(QueryEnabledStore[AstraCredential, AstraSettings]):
100
101
  table_name=astra_path.table,
101
102
  select_columns=columns,
102
103
  num_threads=-1, # auto-infer, see method documentation
104
+ limit=limit,
103
105
  )
104
106
 
105
107
  def _apply_query(self, query: str) -> Union[MetaFrame, Iterator[MetaFrame]]:
@@ -67,13 +67,18 @@ class DeltaQueryEnabledStore(QueryEnabledStore[DeltaCredential, DeltaSettings]):
67
67
  return cls(credentials=DeltaCredential.from_json(credentials), settings=DeltaSettings.from_json(settings))
68
68
 
69
69
  def _apply_filter(
70
- self, path: DataPath, filter_expression: Expression, columns: list[str]
70
+ self,
71
+ path: DataPath,
72
+ filter_expression: Expression,
73
+ columns: list[str],
74
+ limit: Optional[int] = 10000,
71
75
  ) -> Union[MetaFrame, Iterator[MetaFrame]]:
72
76
  return load(
73
77
  auth_client=self.credentials.auth_client(credentials=self.credentials.auth_client_credentials()),
74
78
  path=path,
75
79
  row_filter=filter_expression,
76
80
  columns=columns,
81
+ limit=limit,
77
82
  )
78
83
 
79
84
  def _apply_query(self, query: str) -> Union[MetaFrame, Iterator[MetaFrame]]:
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "adapta"
3
- version = "v3.2.6a482.dev7"
3
+ version = "v3.2.6a485.dev5"
4
4
  description = "Logging, data connectors, monitoring, secret handling and general lifehacks to make data people lives easier."
5
5
  authors = ["ECCO Sneaks & Data <esdsupport@ecco.com>"]
6
6
  maintainers = ['GZU <gzu@ecco.com>', 'JRB <ext-jrb@ecco.com>']
@@ -17,7 +17,7 @@ pandas = { version = ">=2.0.0,<3.0", extras = ["performance"] }
17
17
  pyarrow = ">=7.0"
18
18
  dataclasses-json = "~0.6"
19
19
  limits = "~3.7"
20
- polars = ">=0.20,<2.0"
20
+ polars = ">=1.7 <2.0"
21
21
  pandera = ">=0.20.3 <1.0"
22
22
 
23
23
  cassandra-driver = { version = "~3.29.1", optional = true }
@@ -1 +0,0 @@
1
- __version__ = 'v3.2.6a482.dev7'
File without changes