adapta 2.11.9__py3-none-any.whl → 3.5.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (140) hide show
  1. adapta/__init__.py +1 -1
  2. adapta/_version.py +1 -1
  3. adapta/connectors/__init__.py +1 -1
  4. adapta/connectors/service_bus/__init__.py +1 -1
  5. adapta/connectors/service_bus/_connector.py +2 -3
  6. adapta/logs/__init__.py +1 -1
  7. adapta/logs/_async_logger.py +38 -24
  8. adapta/logs/_base.py +21 -21
  9. adapta/logs/_internal.py +6 -7
  10. adapta/logs/_internal_logger.py +113 -41
  11. adapta/logs/_logger_interface.py +9 -10
  12. adapta/logs/handlers/__init__.py +1 -1
  13. adapta/logs/handlers/datadog_api_handler.py +7 -7
  14. adapta/logs/handlers/safe_stream_handler.py +4 -4
  15. adapta/logs/models/__init__.py +1 -1
  16. adapta/logs/models/_log_level.py +1 -1
  17. adapta/logs/models/_logs_metadata.py +4 -5
  18. adapta/metrics/__init__.py +1 -1
  19. adapta/metrics/_base.py +14 -15
  20. adapta/metrics/providers/__init__.py +1 -1
  21. adapta/metrics/providers/datadog_provider.py +21 -22
  22. adapta/metrics/providers/void_provider.py +34 -0
  23. adapta/ml/__init__.py +1 -1
  24. adapta/ml/_model.py +1 -1
  25. adapta/ml/mlflow/__init__.py +1 -1
  26. adapta/ml/mlflow/_client.py +101 -5
  27. adapta/ml/mlflow/_functions.py +44 -13
  28. adapta/process_communication/__init__.py +1 -1
  29. adapta/process_communication/_models.py +8 -6
  30. adapta/schema_management/README.md +0 -1
  31. adapta/schema_management/__init__.py +1 -1
  32. adapta/schema_management/schema_entity.py +3 -3
  33. adapta/security/__init__.py +1 -1
  34. adapta/security/clients/__init__.py +1 -1
  35. adapta/security/clients/_azure_client.py +14 -12
  36. adapta/security/clients/_base.py +11 -6
  37. adapta/security/clients/_local_client.py +6 -6
  38. adapta/security/clients/aws/__init__.py +1 -1
  39. adapta/security/clients/aws/_aws_client.py +12 -10
  40. adapta/security/clients/aws/_aws_credentials.py +7 -8
  41. adapta/security/clients/hashicorp_vault/__init__.py +1 -1
  42. adapta/security/clients/hashicorp_vault/hashicorp_vault_client.py +7 -6
  43. adapta/security/clients/hashicorp_vault/kubernetes_client.py +2 -2
  44. adapta/security/clients/hashicorp_vault/oidc_client.py +2 -2
  45. adapta/security/clients/hashicorp_vault/token_client.py +2 -2
  46. adapta/storage/__init__.py +1 -1
  47. adapta/storage/blob/README.md +14 -10
  48. adapta/storage/blob/__init__.py +1 -1
  49. adapta/storage/blob/azure_storage_client.py +76 -24
  50. adapta/storage/blob/base.py +15 -13
  51. adapta/storage/blob/local_storage_client.py +28 -16
  52. adapta/storage/blob/s3_storage_client.py +19 -24
  53. adapta/storage/cache/__init__.py +1 -1
  54. adapta/storage/cache/_base.py +5 -5
  55. adapta/storage/cache/redis_cache.py +5 -5
  56. adapta/storage/database/__init__.py +4 -1
  57. adapta/storage/database/{README.md → v2/README.md} +2 -0
  58. adapta/storage/database/v2/__init__.py +17 -0
  59. adapta/storage/database/v2/azure_sql.py +143 -0
  60. adapta/storage/{distributed_object_store/datastax_astra → database/v2/models}/__init__.py +5 -5
  61. adapta/storage/database/v2/models/_models.py +53 -0
  62. adapta/storage/database/{odbc.py → v2/odbc.py} +22 -13
  63. adapta/storage/database/{snowflake_sql.py → v2/snowflake_sql.py} +20 -12
  64. adapta/storage/database/{trino_sql.py → v2/trino_sql.py} +15 -6
  65. adapta/storage/database/v3/README.md +109 -0
  66. adapta/storage/database/v3/__init__.py +14 -0
  67. adapta/storage/database/{azure_sql.py → v3/azure_sql.py} +7 -9
  68. adapta/storage/database/v3/models/__init__.py +19 -0
  69. adapta/storage/database/{models → v3/models}/_models.py +2 -3
  70. adapta/storage/database/v3/odbc.py +217 -0
  71. adapta/storage/database/v3/snowflake_sql.py +241 -0
  72. adapta/storage/database/v3/trino_sql.py +154 -0
  73. adapta/storage/delta_lake/__init__.py +2 -3
  74. adapta/storage/delta_lake/{README.md → v2/README.md} +2 -0
  75. adapta/storage/delta_lake/v2/__init__.py +19 -0
  76. adapta/storage/delta_lake/{_functions.py → v2/_functions.py} +43 -27
  77. adapta/storage/delta_lake/v2/_models.py +72 -0
  78. adapta/storage/delta_lake/v3/README.md +147 -0
  79. adapta/storage/delta_lake/v3/__init__.py +20 -0
  80. adapta/storage/delta_lake/v3/_functions.py +315 -0
  81. adapta/storage/delta_lake/{_models.py → v3/_models.py} +4 -5
  82. adapta/storage/distributed_object_store/__init__.py +3 -1
  83. adapta/storage/distributed_object_store/v2/__init__.py +18 -0
  84. adapta/storage/distributed_object_store/{datastax_astra → v2/datastax_astra}/README.md +2 -0
  85. adapta/storage/distributed_object_store/v2/datastax_astra/__init__.py +20 -0
  86. adapta/storage/distributed_object_store/{datastax_astra → v2/datastax_astra}/_models.py +16 -0
  87. adapta/storage/distributed_object_store/{datastax_astra → v2/datastax_astra}/astra_client.py +61 -52
  88. adapta/storage/{database/models → distributed_object_store/v3}/__init__.py +4 -5
  89. adapta/storage/distributed_object_store/v3/datastax_astra/README.md +277 -0
  90. adapta/storage/distributed_object_store/v3/datastax_astra/__init__.py +20 -0
  91. adapta/storage/distributed_object_store/v3/datastax_astra/_model_mappers.py +469 -0
  92. adapta/storage/distributed_object_store/v3/datastax_astra/_models.py +134 -0
  93. adapta/storage/distributed_object_store/v3/datastax_astra/astra_client.py +569 -0
  94. adapta/storage/exceptions.py +1 -1
  95. adapta/storage/models/__init__.py +1 -1
  96. adapta/storage/models/_functions.py +5 -5
  97. adapta/storage/models/astra.py +4 -4
  98. adapta/storage/models/aws.py +1 -1
  99. adapta/storage/models/azure.py +2 -3
  100. adapta/storage/models/base.py +9 -1
  101. adapta/storage/models/enum.py +19 -0
  102. adapta/storage/models/filter_expression.py +124 -10
  103. adapta/storage/models/format.py +16 -205
  104. adapta/storage/models/formatters/__init__.py +36 -0
  105. adapta/storage/models/formatters/dict.py +43 -0
  106. adapta/storage/models/formatters/exceptions.py +7 -0
  107. adapta/storage/models/formatters/metaframe.py +48 -0
  108. adapta/storage/models/formatters/pandas.py +139 -0
  109. adapta/storage/models/formatters/pickle.py +36 -0
  110. adapta/storage/models/formatters/polars.py +240 -0
  111. adapta/storage/models/formatters/unit.py +26 -0
  112. adapta/storage/models/hive.py +24 -16
  113. adapta/storage/models/local.py +1 -1
  114. adapta/storage/models/trino.py +56 -0
  115. adapta/storage/query_enabled_store/README.md +1 -1
  116. adapta/storage/query_enabled_store/__init__.py +7 -1
  117. adapta/storage/query_enabled_store/_models.py +42 -13
  118. adapta/storage/query_enabled_store/_qes_astra.py +27 -14
  119. adapta/storage/query_enabled_store/_qes_delta.py +32 -10
  120. adapta/storage/query_enabled_store/_qes_local.py +81 -0
  121. adapta/storage/query_enabled_store/_qes_trino.py +133 -0
  122. adapta/storage/secrets/__init__.py +1 -1
  123. adapta/storage/secrets/_base.py +5 -4
  124. adapta/storage/secrets/azure_secret_client.py +3 -4
  125. adapta/storage/secrets/hashicorp_vault_secret_storage_client.py +5 -5
  126. adapta/utils/README.md +92 -0
  127. adapta/utils/__init__.py +2 -1
  128. adapta/utils/_common.py +50 -17
  129. adapta/utils/_requests.py +53 -0
  130. adapta/utils/concurrent_task_runner.py +10 -9
  131. adapta/utils/data_structures/_functions.py +6 -6
  132. adapta/utils/decorators/_logging.py +3 -3
  133. adapta/utils/decorators/_rate_limit.py +2 -2
  134. adapta/utils/metaframe.py +172 -0
  135. adapta/utils/python_typing/_functions.py +5 -10
  136. {adapta-2.11.9.dist-info → adapta-3.5.13.dist-info}/METADATA +18 -14
  137. adapta-3.5.13.dist-info/RECORD +146 -0
  138. {adapta-2.11.9.dist-info → adapta-3.5.13.dist-info}/WHEEL +1 -1
  139. adapta-2.11.9.dist-info/RECORD +0 -110
  140. {adapta-2.11.9.dist-info → adapta-3.5.13.dist-info/licenses}/LICENSE +0 -0
@@ -1,7 +1,8 @@
1
+ # pylint: disable=duplicate-code
1
2
  """
2
3
  Operations on Delta Lake tables.
3
4
  """
4
- # Copyright (c) 2023-2024. ECCO Sneaks & Data
5
+ # Copyright (c) 2023-2026. ECCO Data & AI and other project contributors.
5
6
  #
6
7
  # Licensed under the Apache License, Version 2.0 (the "License");
7
8
  # you may not use this file except in compliance with the License.
@@ -19,7 +20,8 @@
19
20
  import datetime
20
21
  import hashlib
21
22
  import zlib
22
- from typing import Optional, Union, Iterator, List, Iterable, Tuple
23
+ from collections.abc import Iterator, Iterable
24
+ from warnings import warn
23
25
 
24
26
  from pandas import DataFrame, concat
25
27
  import pyarrow
@@ -30,27 +32,29 @@ from pyarrow._dataset_parquet import ParquetReadOptions # pylint: disable=E0611
30
32
  from adapta.logs import SemanticLogger
31
33
  from adapta.security.clients._base import AuthenticationClient
32
34
  from adapta.storage.models.base import DataPath
33
- from adapta.storage.delta_lake._models import DeltaTransaction
35
+ from adapta.storage.delta_lake.v2._models import DeltaTransaction
34
36
  from adapta.storage.cache import KeyValueCache
35
- from adapta.storage.models.format import DataFrameParquetSerializationFormat
37
+ from adapta.storage.models.formatters import PandasDataFrameParquetSerializationFormat
36
38
  from adapta.storage.models.filter_expression import Expression, ArrowFilterExpression, compile_expression
37
39
 
38
40
 
39
41
  def load( # pylint: disable=R0913
40
42
  auth_client: AuthenticationClient,
41
43
  path: DataPath,
42
- version: Optional[int] = None,
43
- row_filter: Optional[Union[Expression, pyarrow.compute.Expression]] = None,
44
- columns: Optional[List[str]] = None,
45
- batch_size: Optional[int] = None,
46
- partition_filter_expressions: Optional[List[Tuple]] = None,
47
- ) -> Union[DeltaTable, DataFrame, Iterator[DataFrame]]:
44
+ version: int | None = None,
45
+ timestamp: datetime.datetime | None = None,
46
+ row_filter: Expression | pyarrow.compute.Expression | None = None,
47
+ columns: list[str] | None = None,
48
+ batch_size: int | None = None,
49
+ partition_filter_expressions: list[tuple] | None = None,
50
+ ) -> DeltaTable | DataFrame | Iterator[DataFrame]:
48
51
  """
49
52
  Loads Delta Lake table from Azure or AWS storage and converts it to a pandas dataframe.
50
53
 
51
54
  :param auth_client: AuthenticationClient for target storage.
52
55
  :param path: Path to delta table, in HDFS format: abfss://container@account.dfs.core.windows.net/my/path
53
- :param version: Optional version to read. Defaults to latest.
56
+ :param version: Optional version to read. Defaults to latest. If set, timestamp will be ignored.
57
+ :param timestamp: Optional time travel timestamp. Allows to read data as of a specific time. Ignored if version is set.
54
58
  :param row_filter: Optional filter to apply, as pyarrow expression. Example:
55
59
  from pyarrow.dataset import field as pyarrow_field
56
60
 
@@ -66,9 +70,21 @@ def load( # pylint: disable=R0913
66
70
 
67
71
  :return: A DeltaTable wrapped Rust class, pandas Dataframe or an iterator of pandas Dataframes, for batched reads.
68
72
  """
69
- pyarrow_ds = DeltaTable(
70
- path.to_delta_rs_path(), version=version, storage_options=auth_client.connect_storage(path)
71
- ).to_pyarrow_dataset(
73
+ warn(
74
+ "You are using version 2 of the load function. "
75
+ "This is deprecated and will be removed in adapta version 4. "
76
+ "Please upgrade to version 3: adapta.storage.delta_lake.v3",
77
+ DeprecationWarning,
78
+ )
79
+ if version:
80
+ timestamp = None
81
+
82
+ pyarrow_ds = DeltaTable(path.to_delta_rs_path(), version=version, storage_options=auth_client.connect_storage(path))
83
+
84
+ if timestamp:
85
+ pyarrow_ds.load_as_version(timestamp)
86
+
87
+ pyarrow_ds = pyarrow_ds.to_pyarrow_dataset(
72
88
  partitions=partition_filter_expressions,
73
89
  parquet_read_options=ParquetReadOptions(coerce_int96_timestamp_unit="ms"),
74
90
  filesystem=auth_client.get_pyarrow_filesystem(path),
@@ -90,7 +106,7 @@ def load( # pylint: disable=R0913
90
106
  return pyarrow_table.to_pandas(timestamp_as_object=True)
91
107
 
92
108
 
93
- def history(auth_client: AuthenticationClient, path: DataPath, limit: Optional[int] = 1) -> Iterable[DeltaTransaction]:
109
+ def history(auth_client: AuthenticationClient, path: DataPath, limit: int | None = 1) -> Iterable[DeltaTransaction]:
94
110
  """
95
111
  Returns transaction history for the table under path.
96
112
 
@@ -108,10 +124,10 @@ def get_cache_key(
108
124
  auth_client: AuthenticationClient,
109
125
  path: DataPath,
110
126
  batch_size=1000,
111
- version: Optional[int] = None,
112
- row_filter: Optional[Expression] = None,
113
- columns: Optional[List[str]] = None,
114
- partition_filter_expressions: Optional[List[Tuple]] = None,
127
+ version: int | None = None,
128
+ row_filter: Expression | None = None,
129
+ columns: list[str] | None = None,
130
+ partition_filter_expressions: list[tuple] | None = None,
115
131
  ) -> str:
116
132
  """
117
133
  Returns a cache key for the path and data read arguments
@@ -156,13 +172,13 @@ def load_cached( # pylint: disable=R0913
156
172
  auth_client: AuthenticationClient,
157
173
  path: DataPath,
158
174
  cache: KeyValueCache,
159
- cache_expires_after: Optional[datetime.timedelta] = datetime.timedelta(hours=1),
175
+ cache_expires_after: datetime.timedelta | None = datetime.timedelta(hours=1),
160
176
  batch_size=1000,
161
- version: Optional[int] = None,
162
- row_filter: Optional[Expression] = None,
163
- columns: Optional[List[str]] = None,
164
- partition_filter_expressions: Optional[List[Tuple]] = None,
165
- logger: Optional[SemanticLogger] = None,
177
+ version: int | None = None,
178
+ row_filter: Expression | None = None,
179
+ columns: list[str] | None = None,
180
+ partition_filter_expressions: list[tuple] | None = None,
181
+ logger: SemanticLogger | None = None,
166
182
  ) -> DataFrame:
167
183
  """
168
184
  Loads Delta Lake table from an external cache and converts it to a single pandas dataframe (after applying column projections and row filters).
@@ -217,7 +233,7 @@ def load_cached( # pylint: disable=R0913
217
233
  try:
218
234
  return concat(
219
235
  [
220
- DataFrameParquetSerializationFormat().deserialize(zlib.decompress(cached_batch))
236
+ PandasDataFrameParquetSerializationFormat().deserialize(zlib.decompress(cached_batch))
221
237
  for batch_key, cached_batch in cache.get(cache_key, is_map=True).items()
222
238
  if batch_key != b"completed"
223
239
  ]
@@ -255,7 +271,7 @@ def load_cached( # pylint: disable=R0913
255
271
  cache.include(
256
272
  key=cache_key,
257
273
  attribute=str(batch_index),
258
- value=zlib.compress(DataFrameParquetSerializationFormat().serialize(batch)),
274
+ value=zlib.compress(PandasDataFrameParquetSerializationFormat().serialize(batch)),
259
275
  )
260
276
  for batch_index, batch in enumerate(data)
261
277
  ],
@@ -0,0 +1,72 @@
1
+ # pylint: disable=duplicate-code
2
+ """
3
+ Models used by delta lake functions.
4
+ """
5
+ # Copyright (c) 2023-2026. ECCO Data & AI and other project contributors.
6
+ #
7
+ # Licensed under the Apache License, Version 2.0 (the "License");
8
+ # you may not use this file except in compliance with the License.
9
+ # You may obtain a copy of the License at
10
+ #
11
+ # http://www.apache.org/licenses/LICENSE-2.0
12
+ #
13
+ # Unless required by applicable law or agreed to in writing, software
14
+ # distributed under the License is distributed on an "AS IS" BASIS,
15
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16
+ # See the License for the specific language governing permissions and
17
+ # limitations under the License.
18
+ #
19
+
20
+ from dataclasses import dataclass
21
+ from enum import Enum
22
+
23
+
24
+ class DeltaOperation(Enum):
25
+ """
26
+ Possible Delta table operations.
27
+ """
28
+
29
+ DELETE = "DELETE"
30
+ UPDATE = "UPDATE"
31
+ WRITE = "WRITE"
32
+ MERGE = "MERGE"
33
+ CREATE_TABLE = "CREATE TABLE"
34
+ CREATE_TABLE_AS_SELECT = "CREATE TABLE AS SELECT"
35
+ CREATE_OR_REPLACE_TABLE_AS_SELECT = "CREATE OR REPLACE TABLE AS SELECT"
36
+ CHANGE_COLUMN = "CHANGE COLUMN"
37
+ VACUUM_START = "VACUUM START"
38
+ VACUUM_END = "VACUUM END"
39
+ UNDEFINED = "UNDEFINED"
40
+
41
+
42
+ @dataclass
43
+ class DeltaTransaction:
44
+ """
45
+ A subset of Delta table transaction entry properties.
46
+ """
47
+
48
+ version: int
49
+ timestamp: int
50
+ operation: DeltaOperation
51
+ operation_parameters: dict
52
+ read_version: int
53
+ is_blind_append: bool
54
+
55
+ @classmethod
56
+ def from_dict(cls, value: dict) -> "DeltaTransaction":
57
+ """
58
+ Converts delta transaction log entry to DeltaTransaction.
59
+ :param value: single entry from `describe history ...`
60
+ :return:
61
+ """
62
+ delta_op = value.get("operation", DeltaOperation.UNDEFINED.value)
63
+ supported_ops = {item.value for item in DeltaOperation}
64
+
65
+ return cls(
66
+ version=value.get("version", -1),
67
+ timestamp=value["timestamp"],
68
+ operation=DeltaOperation(delta_op) if delta_op in supported_ops else DeltaOperation.UNDEFINED,
69
+ operation_parameters=value.get("operationParameters", {}),
70
+ read_version=value.get("readVersion", -1),
71
+ is_blind_append=value.get("isBlindAppend", False),
72
+ )
@@ -0,0 +1,147 @@
1
+ # Delta Lake Operations
2
+
3
+ Supported API:
4
+ - read delta table as `MetaFrame` which can easily be converted to `pandas.DataFrame` or `polars.DataFrame`
5
+ - read delta table in batches of a provided size, each batch being `MetaFrame`
6
+ - read a subset of columns from delta table
7
+ - read and filter a delta table without loading all rows in memory
8
+
9
+ ## Examples usage
10
+ Prepare connection and load
11
+ ### For Azure Datalake Gen2
12
+
13
+ ```python
14
+ import os
15
+ from adapta.security.clients import AzureClient
16
+ from adapta.storage.models.azure import AdlsGen2Path
17
+ from adapta.storage.delta_lake import load
18
+
19
+ os.environ["PROTEUS__USE_AZURE_CREDENTIAL"] = "1"
20
+ azure_client = AzureClient()
21
+ adls_path = AdlsGen2Path.from_hdfs_path('abfss://container@account.dfs.core.windows.net/path/to/my/table')
22
+
23
+ # get Iterable[MetaFrame]
24
+ batches = load(azure_client, adls_path, batch_size=1000)
25
+ ```
26
+
27
+ ### For AWS Simple Storage Service (S3) or S3-Compatible Storage
28
+
29
+ ```python
30
+ import os
31
+ from adapta.security.clients import AwsClient
32
+ from adapta.security.clients.aws._aws_credentials import EnvironmentAwsCredentials
33
+ from adapta.storage.delta_lake import load
34
+ import pandas as pd
35
+ import pyarrow as pa
36
+
37
+ # Set up environment variables
38
+ os.environ["PROTEUS__AWS_ACCESS_KEY_ID"] = minio_access_key_id
39
+ os.environ["PROTEUS__AWS_SECRET_ACCESS_KEY"] = minio_secret_key
40
+ os.environ["PROTEUS__AWS_REGION"] = "eu-central-1"
41
+ os.environ["PROTEUS__AWS_ENDPOINT"] = "http://example.com"
42
+
43
+ # Create client
44
+ credentials = EnvironmentAwsCredentials()
45
+ aws_client = AwsClient(credentials)
46
+
47
+ # Initialize session
48
+ aws_client.initialize_session()
49
+
50
+ # Creating a delta lake table with sample data
51
+ data = {
52
+ 'Character': ['Boromir', 'Harry Potter', 'Sherlock Holmes', 'Tony Stark', 'Darth Vader'],
53
+ 'Occupation': ['Professional succumber to temptation', 'Wizard', 'Detective', 'Iron Man', 'Sith Lord'],
54
+ 'Catchphrase': [
55
+ 'One does not simply walk into Mordor.',
56
+ 'Expecto Patronum!',
57
+ 'Elementary, my dear Watson.',
58
+ 'I am Iron Man.',
59
+ 'I find your lack of faith disturbing.'
60
+ ]
61
+ }
62
+
63
+ df = pd.DataFrame(data) # Create a pandas DataFrame from the data
64
+ table = pa.Table.from_pandas(df) # Convert the DataFrame to a PyArrow Table
65
+ path_test = '/path/to/store/locally/delta/lake/table'
66
+ deltalake.write_deltalake(path_test, table) # Write the PyArrow Table to a Delta Lake table
67
+
68
+ # Save the Delta Lake table to S3 blob storage
69
+ s3_client.save_data(path_test, s3_path)
70
+
71
+ # Get Iterable[pandas.DataFrame]
72
+ batches = load(aws_client, s3_path, batch_size=1000))
73
+
74
+ # Print each loaded batch
75
+ for batch in batches:
76
+ print(batch.to_pandas())
77
+ print("\n---\n")
78
+
79
+ # The content of the Delta Lake table should be printed in the screen
80
+ # Character ... Catchphrase
81
+ # 0 Boromir ... One does not simply walk into Mordor.
82
+ # 1 Harry Potter ... Expecto Patronum!
83
+ # 2 Sherlock Holmes ... Elementary, my dear Watson.
84
+ # 3 Tony Stark ... I am Iron Man.
85
+ # 4 Darth Vader ... I find your lack of faith disturbing.
86
+ #
87
+ # [5 rows x 3 columns]
88
+ # ---
89
+ ```
90
+ ## Using the Filtering API.
91
+ 1. Create generic filter expressions
92
+ ```python
93
+ from adapta.storage.models.filter_expression import FilterField
94
+
95
+ simple_filter = FilterField("my_column") == "some-value"
96
+ combined_filter = (FilterField("my_column") == "some-value") & (FilterField("other_column") == "another-value")
97
+ combined_filter_with_collection = (FilterField("my_column") == "something1") & (FilterField("other_column").isin(['else', 'nonexistent']))
98
+ complex_filter = (FilterField("my_column") == "something1") | (FilterField("my_other_column") == "else") & (FilterField("another_column") == 123)
99
+ ```
100
+ 2. Load and apply the expression
101
+ ```python
102
+ # simple_filtered is of type pandas.DataFrame
103
+ simple_filtered = load(azure_client, adls_path, row_filter=simple_expression_pyarrow, columns=["my_column", "my_other_column"]).to_pandas()
104
+ # my_column my_other_column
105
+ # 0 some-value 123
106
+ # 1 some-value another-value
107
+
108
+ print(load(azure_client, adls_path, row_filter=combined_filter, columns=["my_column", "my_other_column"]).to_pandas())
109
+ # my_column my_other_column
110
+ # 0 some-value another-value
111
+
112
+ print(load(azure_client, adls_path, row_filter=combined_filter_with_collection, columns=["my_column", "my_other_column"]).to_pandas())
113
+ # my_column my_other_column
114
+ # 0 something1 else
115
+ # 1 something1 nonexistent
116
+
117
+ print(load(azure_client, adls_path, row_filter=complex_filter, columns=["my_column", "my_other_column", "another_column"]).to_pandas())
118
+ # my_column my_other_column another_column
119
+ # 0 something1 else 1
120
+ # 1 something1 nonexistent 2
121
+ # 2 something1 nonexistent1 123
122
+
123
+ ```
124
+ # Using with Hive paths
125
+ ```python
126
+ logger: SemanticLogger # review proteus.logs readme to learn how to construct a logger instance
127
+ os.environ['PROTEUS__HIVE_USER'] = 'delamain'
128
+ os.environ['PROTEUS__HIVE_PASSWORD'] = 'secret'
129
+ hive_path = HivePath.from_hdfs_path(
130
+ "hive://sqlserver@myserver.database.windows.net:1433/sparkdatalake/bronze/bronze_table")
131
+
132
+ adls_path2 = AdlsGen2Path.from_hdfs_path(hive_path.get_physical_path(logger=logger))
133
+
134
+ # get Iterable[MetaFrame]
135
+ batches2 = load(azure_client, adls_path2, batch_size=1000)
136
+
137
+ # read data using Redis Cache, improves read time by a factor of >10 on single-node Redis.
138
+ # for big tables, choose bigger batch sizes to speed up cache population. General rule:
139
+ # batch_size = row_count / 10
140
+ # if there is no cache hit, load_cached() will fallback to load() behaviour
141
+ r_cache = RedisCache(host="esd-superset-test.redis.cache.windows.net", database_number=1)
142
+ os.environ['PROTEUS__CACHE_REDIS_PASSWORD'] = '...'
143
+ read_raw = load_cached(azure_client, adls_path, r_cache, row_filter=filter,
144
+ cache_expires_after=datetime.timedelta(minutes=15), batch_size=int(1e6))
145
+ ```
146
+
147
+
@@ -0,0 +1,20 @@
1
+ """
2
+ Import index
3
+ """
4
+ # Copyright (c) 2023-2026. ECCO Data & AI and other project contributors.
5
+ #
6
+ # Licensed under the Apache License, Version 2.0 (the "License");
7
+ # you may not use this file except in compliance with the License.
8
+ # You may obtain a copy of the License at
9
+ #
10
+ # http://www.apache.org/licenses/LICENSE-2.0
11
+ #
12
+ # Unless required by applicable law or agreed to in writing, software
13
+ # distributed under the License is distributed on an "AS IS" BASIS,
14
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
+ # See the License for the specific language governing permissions and
16
+ # limitations under the License.
17
+ #
18
+
19
+ from adapta.storage.delta_lake.v3._functions import *
20
+ from adapta.storage.delta_lake.v3._models import *