databricks4py 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. databricks4py/__init__.py +56 -0
  2. databricks4py/catalog.py +65 -0
  3. databricks4py/config/__init__.py +6 -0
  4. databricks4py/config/base.py +119 -0
  5. databricks4py/config/unity.py +72 -0
  6. databricks4py/filters/__init__.py +17 -0
  7. databricks4py/filters/base.py +154 -0
  8. databricks4py/io/__init__.py +40 -0
  9. databricks4py/io/checkpoint.py +98 -0
  10. databricks4py/io/dbfs.py +91 -0
  11. databricks4py/io/delta.py +564 -0
  12. databricks4py/io/merge.py +176 -0
  13. databricks4py/io/streaming.py +281 -0
  14. databricks4py/logging.py +39 -0
  15. databricks4py/metrics/__init__.py +22 -0
  16. databricks4py/metrics/base.py +66 -0
  17. databricks4py/metrics/delta_sink.py +75 -0
  18. databricks4py/metrics/logging_sink.py +20 -0
  19. databricks4py/migrations/__init__.py +27 -0
  20. databricks4py/migrations/alter.py +114 -0
  21. databricks4py/migrations/runner.py +241 -0
  22. databricks4py/migrations/schema_diff.py +136 -0
  23. databricks4py/migrations/validators.py +195 -0
  24. databricks4py/observability/__init__.py +24 -0
  25. databricks4py/observability/_utils.py +24 -0
  26. databricks4py/observability/batch_context.py +134 -0
  27. databricks4py/observability/health.py +223 -0
  28. databricks4py/observability/query_listener.py +236 -0
  29. databricks4py/py.typed +0 -0
  30. databricks4py/quality/__init__.py +26 -0
  31. databricks4py/quality/base.py +54 -0
  32. databricks4py/quality/expectations.py +184 -0
  33. databricks4py/quality/gate.py +90 -0
  34. databricks4py/retry.py +102 -0
  35. databricks4py/secrets.py +69 -0
  36. databricks4py/spark_session.py +68 -0
  37. databricks4py/testing/__init__.py +35 -0
  38. databricks4py/testing/assertions.py +111 -0
  39. databricks4py/testing/builders.py +127 -0
  40. databricks4py/testing/fixtures.py +134 -0
  41. databricks4py/testing/mocks.py +106 -0
  42. databricks4py/testing/temp_table.py +73 -0
  43. databricks4py/workflow.py +219 -0
  44. databricks4py-0.2.0.dist-info/METADATA +589 -0
  45. databricks4py-0.2.0.dist-info/RECORD +48 -0
  46. databricks4py-0.2.0.dist-info/WHEEL +5 -0
  47. databricks4py-0.2.0.dist-info/licenses/LICENSE +21 -0
  48. databricks4py-0.2.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,69 @@
1
+ """Databricks secret management via dbutils."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import logging
6
+ from typing import Any
7
+
8
+ import pyspark.sql
9
+
10
+ from databricks4py.spark_session import active_fallback
11
+
12
+ __all__ = ["SecretFetcher", "inject_dbutils"]
13
+
14
+ logger = logging.getLogger(__name__)
15
+
16
+
17
+ class SecretFetcher:
18
+ """Fetch secrets from Databricks secret vaults via dbutils.
19
+
20
+ The ``dbutils`` module must be injected before use, either by calling
21
+ :func:`inject_dbutils` or by setting :attr:`dbutils` directly.
22
+
23
+ Example::
24
+
25
+ from databricks4py.secrets import SecretFetcher, inject_dbutils
26
+ import pyspark.dbutils # only available on Databricks
27
+
28
+ inject_dbutils(pyspark.dbutils)
29
+ value = SecretFetcher.fetch_secret("my-scope", "api-key")
30
+ """
31
+
32
+ dbutils: Any = None
33
+
34
+ @staticmethod
35
+ def fetch_secret(
36
+ secret_scope: str,
37
+ secret_key: str,
38
+ *,
39
+ spark: pyspark.sql.SparkSession | None = None,
40
+ ) -> str:
41
+ """Fetch a secret from the Databricks secret vault.
42
+
43
+ Args:
44
+ secret_scope: The secret scope name.
45
+ secret_key: The secret key name.
46
+ spark: Optional SparkSession (used to create DBUtils instance).
47
+
48
+ Raises:
49
+ RuntimeError: If dbutils has not been injected.
50
+ """
51
+ if SecretFetcher.dbutils is None:
52
+ raise RuntimeError(
53
+ "SecretFetcher.dbutils has not been set. "
54
+ "Call inject_dbutils(pyspark.dbutils) first."
55
+ )
56
+
57
+ spark = active_fallback(spark)
58
+ _dbutils = SecretFetcher.dbutils.DBUtils(spark)
59
+ return _dbutils.secrets.get(scope=secret_scope, key=secret_key)
60
+
61
+
62
+ def inject_dbutils(dbutils_module: Any) -> None:
63
+ """Inject the dbutils module for secret fetching.
64
+
65
+ Args:
66
+ dbutils_module: The ``pyspark.dbutils`` module (only available on Databricks).
67
+ """
68
+ SecretFetcher.dbutils = dbutils_module
69
+ logger.debug("Injected dbutils module: %s", dbutils_module)
@@ -0,0 +1,68 @@
1
+ """SparkSession management utilities."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import pyspark.sql
6
+
7
+ __all__ = ["get_active", "active_fallback", "get_or_create_local_session"]
8
+
9
+
10
+ def get_active() -> pyspark.sql.SparkSession:
11
+ """Get the currently active SparkSession.
12
+
13
+ Raises:
14
+ RuntimeError: If no active SparkSession exists.
15
+ """
16
+ spark = pyspark.sql.SparkSession.getActiveSession()
17
+ if spark is None:
18
+ raise RuntimeError(
19
+ "No active SparkSession found. "
20
+ "Create one with SparkSession.builder.getOrCreate() or "
21
+ "use get_or_create_local_session()."
22
+ )
23
+ return spark
24
+
25
+
26
+ def active_fallback(spark: pyspark.sql.SparkSession | None = None) -> pyspark.sql.SparkSession:
27
+ """Return the given SparkSession, or fall back to the active one.
28
+
29
+ This is the standard pattern for library functions that accept an
30
+ optional ``spark`` parameter::
31
+
32
+ def my_function(data, *, spark=None):
33
+ spark = active_fallback(spark)
34
+ ...
35
+
36
+ Args:
37
+ spark: An explicit SparkSession, or None to use the active session.
38
+
39
+ Raises:
40
+ RuntimeError: If spark is None and no active session exists.
41
+ """
42
+ return spark if spark is not None else get_active()
43
+
44
+
45
+ def get_or_create_local_session() -> pyspark.sql.SparkSession:
46
+ """Create a local SparkSession configured for Delta Lake.
47
+
48
+ Suitable for local development and testing. Configures:
49
+ - ``local[*]`` master
50
+ - Delta Lake SQL extensions and catalog
51
+ - Local Derby metastore
52
+
53
+ Returns:
54
+ A SparkSession with Delta Lake support.
55
+ """
56
+ from delta import configure_spark_with_delta_pip
57
+
58
+ builder = (
59
+ pyspark.sql.SparkSession.builder.master("local[*]")
60
+ .config("spark.sql.extensions", "io.delta.sql.DeltaSparkSessionExtension")
61
+ .config(
62
+ "spark.sql.catalog.spark_catalog",
63
+ "org.apache.spark.sql.delta.catalog.DeltaCatalog",
64
+ )
65
+ .config("spark.sql.warehouse.dir", "spark-warehouse")
66
+ )
67
+
68
+ return configure_spark_with_delta_pip(builder).getOrCreate()
@@ -0,0 +1,35 @@
1
+ """Test utilities and fixtures for databricks4py.
2
+
3
+ Provides pytest fixtures and mock objects for testing Spark and
4
+ Databricks applications locally.
5
+
6
+ Usage in your ``conftest.py``::
7
+
8
+ from databricks4py.testing.fixtures import * # noqa: F401,F403
9
+ """
10
+
11
+ from databricks4py.testing.assertions import assert_frame_equal, assert_schema_equal
12
+ from databricks4py.testing.builders import DataFrameBuilder
13
+ from databricks4py.testing.fixtures import (
14
+ clear_env,
15
+ df_builder,
16
+ spark_session,
17
+ spark_session_function,
18
+ temp_delta,
19
+ )
20
+ from databricks4py.testing.mocks import MockDBUtils, MockDBUtilsModule
21
+ from databricks4py.testing.temp_table import TempDeltaTable
22
+
23
+ __all__ = [
24
+ "DataFrameBuilder",
25
+ "MockDBUtils",
26
+ "MockDBUtilsModule",
27
+ "TempDeltaTable",
28
+ "assert_frame_equal",
29
+ "assert_schema_equal",
30
+ "clear_env",
31
+ "df_builder",
32
+ "spark_session",
33
+ "spark_session_function",
34
+ "temp_delta",
35
+ ]
@@ -0,0 +1,111 @@
1
+ """DataFrame and schema assertion helpers for testing."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from pyspark.sql import DataFrame
6
+ from pyspark.sql.types import StructType
7
+
8
+ __all__ = ["assert_frame_equal", "assert_schema_equal"]
9
+
10
+
11
+ def assert_schema_equal(
12
+ actual: StructType,
13
+ expected: StructType,
14
+ *,
15
+ check_nullable: bool = False,
16
+ ) -> None:
17
+ """Compare two StructTypes field-by-field with a clear diff on mismatch.
18
+
19
+ Args:
20
+ actual: The actual schema.
21
+ expected: The expected schema.
22
+ check_nullable: Whether to compare nullable flags (default False).
23
+
24
+ Raises:
25
+ AssertionError: If the schemas differ.
26
+ """
27
+ actual_fields = actual.fields
28
+ expected_fields = expected.fields
29
+
30
+ if len(actual_fields) != len(expected_fields):
31
+ raise AssertionError(
32
+ f"Schema length mismatch: actual has {len(actual_fields)} fields, "
33
+ f"expected {len(expected_fields)}"
34
+ )
35
+
36
+ for i, (a, e) in enumerate(zip(actual_fields, expected_fields, strict=True)):
37
+ if a.name != e.name:
38
+ raise AssertionError(
39
+ f"Field {i}: name mismatch — actual={a.name!r}, expected={e.name!r}"
40
+ )
41
+ if a.dataType != e.dataType:
42
+ raise AssertionError(
43
+ f"Field {a.name!r}: type mismatch — actual={a.dataType}, expected={e.dataType}"
44
+ )
45
+ if check_nullable and a.nullable != e.nullable:
46
+ raise AssertionError(
47
+ f"Field {a.name!r}: nullable mismatch — actual={a.nullable}, expected={e.nullable}"
48
+ )
49
+
50
+
51
+ def assert_frame_equal(
52
+ actual: DataFrame,
53
+ expected: DataFrame,
54
+ *,
55
+ check_order: bool = False,
56
+ check_schema: bool = True,
57
+ check_nullable: bool = False,
58
+ ) -> None:
59
+ """Assert two DataFrames are equal.
60
+
61
+ Uses Spark 3.5's ``assertDataFrameEqual`` when available, otherwise
62
+ falls back to a manual row-by-row comparison.
63
+
64
+ Args:
65
+ actual: The actual DataFrame.
66
+ expected: The expected DataFrame.
67
+ check_order: Whether row order matters (default False).
68
+ check_schema: Whether to compare schemas (default True).
69
+ check_nullable: Whether to compare nullable flags (default False).
70
+
71
+ Raises:
72
+ AssertionError: If the DataFrames differ.
73
+ """
74
+ if check_schema:
75
+ assert_schema_equal(actual.schema, expected.schema, check_nullable=check_nullable)
76
+
77
+ try:
78
+ from pyspark.testing.utils import assertDataFrameEqual
79
+
80
+ assertDataFrameEqual(actual, expected, checkRowOrder=check_order)
81
+ except ImportError:
82
+ _manual_frame_compare(actual, expected, check_order=check_order)
83
+
84
+
85
+ def _manual_frame_compare(
86
+ actual: DataFrame,
87
+ expected: DataFrame,
88
+ *,
89
+ check_order: bool,
90
+ ) -> None:
91
+ actual_rows = actual.collect()
92
+ expected_rows = expected.collect()
93
+
94
+ if len(actual_rows) != len(expected_rows):
95
+ raise AssertionError(
96
+ f"Row count mismatch: actual={len(actual_rows)}, expected={len(expected_rows)}"
97
+ )
98
+
99
+ if check_order:
100
+ for i, (a, e) in enumerate(zip(actual_rows, expected_rows, strict=True)):
101
+ if a != e:
102
+ raise AssertionError(f"Row {i} mismatch:\n actual: {a}\n expected: {e}")
103
+ else:
104
+ actual_sorted = sorted(actual_rows, key=str)
105
+ expected_sorted = sorted(expected_rows, key=str)
106
+ if actual_sorted != expected_sorted:
107
+ raise AssertionError(
108
+ f"DataFrames differ (ignoring order):\n"
109
+ f" actual: {actual_sorted}\n"
110
+ f" expected: {expected_sorted}"
111
+ )
@@ -0,0 +1,127 @@
1
+ """Fluent builder for test DataFrames."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import random
6
+ from typing import Any
7
+
8
+ from pyspark.sql import DataFrame, SparkSession
9
+ from pyspark.sql.types import (
10
+ BooleanType,
11
+ DataType,
12
+ DateType,
13
+ DoubleType,
14
+ FloatType,
15
+ IntegerType,
16
+ LongType,
17
+ ShortType,
18
+ StringType,
19
+ StructField,
20
+ StructType,
21
+ TimestampType,
22
+ )
23
+
24
+ __all__ = ["DataFrameBuilder"]
25
+
26
+ _TYPE_MAP: dict[str, DataType] = {
27
+ "string": StringType(),
28
+ "int": IntegerType(),
29
+ "integer": IntegerType(),
30
+ "long": LongType(),
31
+ "bigint": LongType(),
32
+ "short": ShortType(),
33
+ "smallint": ShortType(),
34
+ "float": FloatType(),
35
+ "double": DoubleType(),
36
+ "boolean": BooleanType(),
37
+ "date": DateType(),
38
+ "timestamp": TimestampType(),
39
+ }
40
+
41
+
42
+ def _resolve_type(type_str: str) -> DataType:
43
+ resolved = _TYPE_MAP.get(type_str.lower())
44
+ if resolved is not None:
45
+ return resolved
46
+ from pyspark.sql.types import _parse_datatype_string
47
+
48
+ return _parse_datatype_string(type_str)
49
+
50
+
51
+ class DataFrameBuilder:
52
+ """Fluent builder for constructing test DataFrames.
53
+
54
+ Example::
55
+
56
+ df = (
57
+ DataFrameBuilder(spark)
58
+ .with_columns({"id": "int", "name": "string"})
59
+ .with_rows((1, "alice"), (2, "bob"))
60
+ .build()
61
+ )
62
+ """
63
+
64
+ def __init__(self, spark: SparkSession) -> None:
65
+ self._spark = spark
66
+ self._schema: StructType | None = None
67
+ self._rows: list[tuple] = []
68
+
69
+ def with_columns(self, schema: dict[str, str]) -> DataFrameBuilder:
70
+ """Define columns from a dict of ``{name: type_string}``."""
71
+ fields = [StructField(name, _resolve_type(t)) for name, t in schema.items()]
72
+ self._schema = StructType(fields)
73
+ return self
74
+
75
+ def with_schema(self, schema: StructType) -> DataFrameBuilder:
76
+ """Define columns from a StructType directly."""
77
+ self._schema = schema
78
+ return self
79
+
80
+ def with_rows(self, *rows: tuple) -> DataFrameBuilder:
81
+ """Add explicit data rows."""
82
+ self._rows.extend(rows)
83
+ return self
84
+
85
+ def with_sequential(self, column: str, start: int = 1, count: int = 10) -> DataFrameBuilder:
86
+ """Generate sequential integer rows for a single column.
87
+
88
+ If rows already exist, this replaces them.
89
+ """
90
+ if self._schema is None:
91
+ self._schema = StructType([StructField(column, IntegerType())])
92
+ self._rows = [(i,) for i in range(start, start + count)]
93
+ return self
94
+
95
+ def with_nulls(
96
+ self, column: str, frequency: float = 0.1, *, seed: int | None = None
97
+ ) -> DataFrameBuilder:
98
+ """Inject nulls into a column at the given frequency.
99
+
100
+ Must be called after rows are populated.
101
+ """
102
+ if self._schema is None:
103
+ raise ValueError("Schema must be defined before injecting nulls")
104
+
105
+ col_idx = next((i for i, f in enumerate(self._schema.fields) if f.name == column), None)
106
+ if col_idx is None:
107
+ raise ValueError(f"Column {column!r} not found in schema")
108
+
109
+ rng = random.Random(seed)
110
+ new_rows: list[tuple[Any, ...]] = []
111
+ for row in self._rows:
112
+ row_list = list(row)
113
+ if rng.random() < frequency:
114
+ row_list[col_idx] = None
115
+ new_rows.append(tuple(row_list))
116
+ self._rows = new_rows
117
+ return self
118
+
119
+ def build(self) -> DataFrame:
120
+ """Construct the DataFrame.
121
+
122
+ Raises:
123
+ ValueError: If no schema has been defined.
124
+ """
125
+ if self._schema is None:
126
+ raise ValueError("No schema defined — call with_columns() or with_schema() first")
127
+ return self._spark.createDataFrame(self._rows, schema=self._schema)
@@ -0,0 +1,134 @@
1
+ """Pytest fixtures for Spark and Delta Lake testing."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import os
6
+ import shutil
7
+ from collections.abc import Callable, Generator
8
+ from typing import Any
9
+
10
+ import pyspark.sql
11
+ import pytest
12
+
13
+ from databricks4py.testing.builders import DataFrameBuilder
14
+ from databricks4py.testing.temp_table import TempDeltaTable
15
+
16
+ __all__ = [
17
+ "clear_env",
18
+ "df_builder",
19
+ "spark_session",
20
+ "spark_session_function",
21
+ "temp_delta",
22
+ ]
23
+
24
+
25
+ @pytest.fixture(scope="session")
26
+ def spark_session(
27
+ tmp_path_factory: pytest.TempPathFactory,
28
+ ) -> Generator[pyspark.sql.SparkSession, None, None]:
29
+ """Session-scoped SparkSession with Delta Lake support.
30
+
31
+ Creates a single SparkSession for the entire test session to avoid
32
+ the overhead of starting/stopping the JVM repeatedly. Uses a temporary
33
+ directory for the Derby metastore and Spark warehouse.
34
+ """
35
+ from delta import configure_spark_with_delta_pip
36
+
37
+ warehouse_dir = str(tmp_path_factory.mktemp("spark-warehouse"))
38
+ derby_dir = str(tmp_path_factory.mktemp("derby"))
39
+
40
+ builder = (
41
+ pyspark.sql.SparkSession.builder.master("local[*]")
42
+ .appName("databricks4py-tests")
43
+ .config("spark.sql.extensions", "io.delta.sql.DeltaSparkSessionExtension")
44
+ .config(
45
+ "spark.sql.catalog.spark_catalog",
46
+ "org.apache.spark.sql.delta.catalog.DeltaCatalog",
47
+ )
48
+ .config("spark.sql.warehouse.dir", warehouse_dir)
49
+ .config("javax.jdo.option.ConnectionURL", f"jdbc:derby:{derby_dir}/metastore;create=true")
50
+ .config("spark.driver.extraJavaOptions", f"-Dderby.system.home={derby_dir}")
51
+ .config("spark.ui.enabled", "false")
52
+ .config("spark.sql.shuffle.partitions", "2")
53
+ )
54
+
55
+ spark = configure_spark_with_delta_pip(builder).getOrCreate()
56
+ yield spark
57
+ spark.stop()
58
+
59
+
60
+ @pytest.fixture()
61
+ def spark_session_function(
62
+ spark_session: pyspark.sql.SparkSession,
63
+ ) -> Generator[pyspark.sql.SparkSession, None, None]:
64
+ """Function-scoped SparkSession that cleans up between tests.
65
+
66
+ Reuses the session-scoped SparkSession but clears the catalog
67
+ and cache after each test to ensure isolation.
68
+ """
69
+ yield spark_session
70
+
71
+ # Clean up tables
72
+ for db in spark_session.catalog.listDatabases():
73
+ for table in spark_session.catalog.listTables(db.name):
74
+ spark_session.sql(f"DROP TABLE IF EXISTS {db.name}.{table.name}")
75
+
76
+ spark_session.catalog.clearCache()
77
+
78
+ # Clean up any leftover warehouse files
79
+ warehouse = spark_session.conf.get("spark.sql.warehouse.dir")
80
+ if warehouse and os.path.exists(warehouse):
81
+ for item in os.listdir(warehouse):
82
+ item_path = os.path.join(warehouse, item)
83
+ if os.path.isdir(item_path):
84
+ shutil.rmtree(item_path, ignore_errors=True)
85
+
86
+
87
+ @pytest.fixture(autouse=True)
88
+ def clear_env() -> Generator[None, None, None]:
89
+ """Auto-use fixture that restores environment variables after each test."""
90
+ original = os.environ.copy()
91
+ yield
92
+ os.environ.clear()
93
+ os.environ.update(original)
94
+
95
+
96
+ @pytest.fixture()
97
+ def df_builder(spark_session: pyspark.sql.SparkSession) -> DataFrameBuilder:
98
+ """Return a DataFrameBuilder bound to the session-scoped SparkSession."""
99
+ return DataFrameBuilder(spark_session)
100
+
101
+
102
+ @pytest.fixture()
103
+ def temp_delta(
104
+ spark_session_function: pyspark.sql.SparkSession,
105
+ ) -> Generator[Callable[..., TempDeltaTable]]:
106
+ """Factory fixture that creates TempDeltaTables and cleans up on exit.
107
+
108
+ Usage::
109
+
110
+ def test_something(temp_delta):
111
+ with temp_delta(schema={"id": "int"}, data=[(1,)]) as table:
112
+ assert table.dataframe().count() == 1
113
+ """
114
+ tables: list[TempDeltaTable] = []
115
+
116
+ def _factory(
117
+ *,
118
+ table_name: str | None = None,
119
+ schema: dict[str, str] | None = None,
120
+ data: list[tuple[Any, ...]] | None = None,
121
+ ) -> TempDeltaTable:
122
+ t = TempDeltaTable(
123
+ spark_session_function,
124
+ table_name=table_name,
125
+ schema=schema,
126
+ data=data,
127
+ )
128
+ tables.append(t)
129
+ return t
130
+
131
+ yield _factory
132
+
133
+ for t in tables:
134
+ spark_session_function.sql(f"DROP TABLE IF EXISTS {t.table_name}")
@@ -0,0 +1,106 @@
1
+ """Mock objects for Databricks dbutils in local testing."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import Any
6
+
7
+ __all__ = ["MockDBUtils", "MockDBUtilsModule"]
8
+
9
+
10
+ class _MockSecrets:
11
+ """Mock for dbutils.secrets."""
12
+
13
+ def __init__(self) -> None:
14
+ self._secrets: dict[tuple[str, str], str] = {}
15
+
16
+ def get(self, scope: str, key: str) -> str:
17
+ """Get a secret value.
18
+
19
+ Args:
20
+ scope: The secret scope.
21
+ key: The secret key.
22
+
23
+ Raises:
24
+ KeyError: If the secret is not found.
25
+ """
26
+ try:
27
+ return self._secrets[(scope, key)]
28
+ except KeyError:
29
+ raise KeyError(f"Secret not found: scope={scope!r}, key={key!r}") from None
30
+
31
+ def put(self, scope: str, key: str, value: str) -> None:
32
+ """Store a secret value (for test setup)."""
33
+ self._secrets[(scope, key)] = value
34
+
35
+
36
+ class _MockFS:
37
+ """Mock for dbutils.fs."""
38
+
39
+ def __init__(self) -> None:
40
+ self._copies: list[tuple[str, str, bool]] = []
41
+ self._moves: list[tuple[str, str, bool]] = []
42
+ self._removes: list[tuple[str, bool]] = []
43
+ self._mkdirs: list[str] = []
44
+ self._ls_results: dict[str, list[Any]] = {}
45
+
46
+ def cp(self, source: str, dest: str, recurse: bool = False) -> bool:
47
+ """Record a copy operation."""
48
+ self._copies.append((source, dest, recurse))
49
+ return True
50
+
51
+ def mv(self, source: str, dest: str, recurse: bool = False) -> bool:
52
+ """Record a move operation."""
53
+ self._moves.append((source, dest, recurse))
54
+ return True
55
+
56
+ def rm(self, path: str, recurse: bool = False) -> bool:
57
+ """Record a remove operation."""
58
+ self._removes.append((path, recurse))
59
+ return True
60
+
61
+ def mkdirs(self, path: str) -> bool:
62
+ """Record a mkdirs operation."""
63
+ self._mkdirs.append(path)
64
+ return True
65
+
66
+ def ls(self, path: str) -> list[Any]:
67
+ """List files at path."""
68
+ return self._ls_results.get(path, [])
69
+
70
+
71
+ class MockDBUtils:
72
+ """Mock Databricks DBUtils for local testing.
73
+
74
+ Provides mock implementations of ``secrets`` and ``fs`` modules.
75
+
76
+ Example::
77
+
78
+ mock = MockDBUtils()
79
+ mock.secrets.put("my-scope", "api-key", "secret-value")
80
+ assert mock.secrets.get("my-scope", "api-key") == "secret-value"
81
+ """
82
+
83
+ def __init__(self) -> None:
84
+ self.secrets = _MockSecrets()
85
+ self.fs = _MockFS()
86
+
87
+
88
+ class MockDBUtilsModule:
89
+ """Mock for the ``pyspark.dbutils`` module.
90
+
91
+ Mimics the Databricks runtime's ``pyspark.dbutils`` module which
92
+ provides a ``DBUtils`` class that accepts a SparkSession.
93
+
94
+ Example::
95
+
96
+ mock_module = MockDBUtilsModule()
97
+ dbutils = mock_module.DBUtils(spark)
98
+ dbutils.secrets.get("scope", "key")
99
+ """
100
+
101
+ def __init__(self, mock_dbutils: MockDBUtils | None = None) -> None:
102
+ self._mock_dbutils = mock_dbutils or MockDBUtils()
103
+
104
+ def DBUtils(self, spark: Any = None) -> MockDBUtils: # noqa: N802
105
+ """Create a DBUtils instance (ignores spark argument)."""
106
+ return self._mock_dbutils