aqualisys 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
aqualisys/__init__.py ADDED
@@ -0,0 +1,19 @@
1
+ """
2
+ Aqualisys: a Polars-first data quality toolkit.
3
+
4
+ Expose the key classes so downstream users can import from `aqualisys`.
5
+ """
6
+
7
+ from .checker import DataQualityChecker, RuleBundle
8
+ from .checks.rules import AcceptedValuesRule, NotNullRule, RelationshipRule, UniqueRule
9
+ from .logging.sqlite import SQLiteRunLogger
10
+
11
+ __all__ = [
12
+ "AcceptedValuesRule",
13
+ "DataQualityChecker",
14
+ "NotNullRule",
15
+ "RelationshipRule",
16
+ "RuleBundle",
17
+ "SQLiteRunLogger",
18
+ "UniqueRule",
19
+ ]
aqualisys/checker.py ADDED
@@ -0,0 +1,98 @@
1
+ from collections.abc import Callable, Iterable, Sequence
2
+ from dataclasses import dataclass
3
+ from uuid import uuid4
4
+
5
+ try:
6
+ import polars as pl
7
+ except ModuleNotFoundError:
8
+ # pragma: no cover - optional dependency in some environments
9
+ pl = None # type: ignore
10
+
11
+ from .checks.base import BaseRule, RuleContext, RuleResult, RuleSeverity
12
+ from .logging.base import RunLogger
13
+
14
+
15
+ @dataclass(slots=True)
16
+ class RuleBundle:
17
+ name: str
18
+ description: str
19
+ rule_factory: Callable[[], Sequence[BaseRule]]
20
+
21
+ def rules(self) -> list[BaseRule]:
22
+ return list(self.rule_factory())
23
+
24
+
25
+ @dataclass(slots=True)
26
+ class ValidationReport:
27
+ run_id: str
28
+ dataset_name: str
29
+ results: list[RuleResult]
30
+
31
+ @property
32
+ def passed(self) -> bool:
33
+ return all(result.passed for result in self.results)
34
+
35
+ @property
36
+ def failed_rules(self) -> list[RuleResult]:
37
+ return [result for result in self.results if not result.passed]
38
+
39
+
40
+ class DataQualityChecker:
41
+ """Coordinates rule execution and logging."""
42
+
43
+ def __init__(
44
+ self,
45
+ rules: Iterable[BaseRule] | None = None,
46
+ bundles: Iterable[RuleBundle] | None = None,
47
+ logger: RunLogger | None = None,
48
+ fail_fast: bool = False,
49
+ ) -> None:
50
+ self._rules: list[BaseRule] = list(rules or [])
51
+ for bundle in bundles or []:
52
+ self._rules.extend(bundle.rules())
53
+ self._logger = logger
54
+ self._fail_fast = fail_fast
55
+
56
+ @property
57
+ def rules(self) -> list[BaseRule]:
58
+ return list(self._rules)
59
+
60
+ def add_rules(self, *rules: BaseRule) -> None:
61
+ self._rules.extend(rules)
62
+
63
+ def run(
64
+ self,
65
+ dataframe: "pl.DataFrame",
66
+ dataset_name: str,
67
+ run_id: str | None = None,
68
+ ) -> ValidationReport:
69
+ if pl is None: # pragma: no cover - guard for environments without polars
70
+ raise RuntimeError("polars is required to run validations")
71
+
72
+ run_id = run_id or str(uuid4())
73
+ context = RuleContext(dataset_name=dataset_name, run_id=run_id)
74
+ results: list[RuleResult] = []
75
+
76
+ if self._logger:
77
+ self._logger.log_run_started(context)
78
+
79
+ for rule in self._rules:
80
+ result = rule.evaluate(dataframe)
81
+ results.append(result)
82
+ if self._logger:
83
+ self._logger.log_rule_result(context, result)
84
+ if (
85
+ self._fail_fast
86
+ and not result.passed
87
+ and rule.severity is RuleSeverity.ERROR
88
+ ):
89
+ break
90
+
91
+ if self._logger:
92
+ self._logger.log_run_completed(context, results)
93
+
94
+ return ValidationReport(
95
+ run_id=run_id,
96
+ dataset_name=dataset_name,
97
+ results=results,
98
+ )
@@ -0,0 +1,3 @@
1
+ from .rules import AcceptedValuesRule, NotNullRule, RelationshipRule, UniqueRule
2
+
3
+ __all__ = ["AcceptedValuesRule", "NotNullRule", "RelationshipRule", "UniqueRule"]
@@ -0,0 +1,55 @@
1
+ from collections.abc import Mapping
2
+ from dataclasses import dataclass
3
+ from datetime import UTC, datetime
4
+ from enum import StrEnum
5
+ from typing import Any, Protocol, runtime_checkable
6
+
7
+ try:
8
+ import polars as pl
9
+ except ModuleNotFoundError:
10
+ # pragma: no cover - polars is an optional runtime dependency in tests
11
+ pl = None # type: ignore
12
+
13
+
14
+ class RuleSeverity(StrEnum):
15
+ ERROR = "error"
16
+ WARN = "warn"
17
+
18
+
19
+ class RuleStatus(StrEnum):
20
+ PASSED = "passed"
21
+ FAILED = "failed"
22
+
23
+
24
+ @dataclass(slots=True)
25
+ class RuleResult:
26
+ """Represents the outcome of a single rule."""
27
+
28
+ rule_name: str
29
+ status: RuleStatus
30
+ message: str
31
+ severity: RuleSeverity
32
+ metrics: Mapping[str, Any] | None = None
33
+
34
+ @property
35
+ def passed(self) -> bool:
36
+ return self.status is RuleStatus.PASSED
37
+
38
+
39
+ @dataclass(slots=True)
40
+ class RuleContext:
41
+ dataset_name: str
42
+ run_id: str
43
+ executed_at: datetime = datetime.now(tz=UTC)
44
+
45
+
46
+ @runtime_checkable
47
+ class BaseRule(Protocol):
48
+ """All validation rules must follow this shape."""
49
+
50
+ name: str
51
+ description: str
52
+ severity: RuleSeverity
53
+
54
+ def evaluate(self, df: "pl.DataFrame") -> RuleResult: # pragma: no cover - protocol
55
+ ...
@@ -0,0 +1,130 @@
1
+ from collections.abc import Iterable
2
+ from dataclasses import dataclass
3
+
4
+ import polars as pl
5
+
6
+ from .base import BaseRule, RuleResult, RuleSeverity, RuleStatus
7
+
8
+
9
+ @dataclass(slots=True)
10
+ class ColumnRule(BaseRule):
11
+ column: str
12
+ severity: RuleSeverity = RuleSeverity.ERROR
13
+ description: str | None = None
14
+
15
+ def __post_init__(self) -> None:
16
+ self.description = (
17
+ self.description or f"{self.__class__.__name__} on {self.column}"
18
+ )
19
+
20
+ @property
21
+ def name(self) -> str:
22
+ return f"{self.__class__.__name__}::{self.column}"
23
+
24
+
25
+ class NotNullRule(ColumnRule):
26
+ def evaluate(self, df: pl.DataFrame) -> RuleResult:
27
+ nulls = df.select(pl.col(self.column).is_null().sum().alias("nulls")).item()
28
+ status = RuleStatus.PASSED if nulls == 0 else RuleStatus.FAILED
29
+ message = (
30
+ "column has no nulls"
31
+ if status is RuleStatus.PASSED
32
+ else f"{nulls} null values found"
33
+ )
34
+ return RuleResult(
35
+ rule_name=self.name,
36
+ status=status,
37
+ message=message,
38
+ severity=self.severity,
39
+ metrics={"null_count": nulls},
40
+ )
41
+
42
+
43
+ class UniqueRule(ColumnRule):
44
+ def evaluate(self, df: pl.DataFrame) -> RuleResult:
45
+ total_rows = df.height
46
+ unique_rows = df.select(pl.col(self.column).n_unique().alias("unique")).item()
47
+ duplicates = total_rows - unique_rows
48
+ status = RuleStatus.PASSED if duplicates == 0 else RuleStatus.FAILED
49
+ message = (
50
+ "column values are unique"
51
+ if status is RuleStatus.PASSED
52
+ else f"{duplicates} duplicate rows found"
53
+ )
54
+ return RuleResult(
55
+ rule_name=self.name,
56
+ status=status,
57
+ message=message,
58
+ severity=self.severity,
59
+ metrics={"duplicate_count": duplicates},
60
+ )
61
+
62
+
63
+ class AcceptedValuesRule(ColumnRule):
64
+ def __init__(
65
+ self,
66
+ column: str,
67
+ allowed_values: Iterable[str | int | float],
68
+ severity: RuleSeverity = RuleSeverity.ERROR,
69
+ ):
70
+ super().__init__(column=column, severity=severity)
71
+ self.allowed_values = tuple(dict.fromkeys(allowed_values)) # stable + deduped
72
+
73
+ def evaluate(self, df: pl.DataFrame) -> RuleResult:
74
+ violations = (
75
+ df.filter(~pl.col(self.column).is_in(self.allowed_values))
76
+ .select(pl.len())
77
+ .item()
78
+ )
79
+ status = RuleStatus.PASSED if violations == 0 else RuleStatus.FAILED
80
+ message = (
81
+ "column values match allowed set"
82
+ if status is RuleStatus.PASSED
83
+ else f"{violations} disallowed values detected"
84
+ )
85
+ return RuleResult(
86
+ rule_name=self.name,
87
+ status=status,
88
+ message=message,
89
+ severity=self.severity,
90
+ metrics={
91
+ "violation_count": violations,
92
+ "allowed_values": self.allowed_values,
93
+ },
94
+ )
95
+
96
+
97
+ class RelationshipRule(ColumnRule):
98
+ def __init__(
99
+ self,
100
+ column: str,
101
+ reference_df: pl.DataFrame,
102
+ reference_column: str,
103
+ severity: RuleSeverity = RuleSeverity.ERROR,
104
+ ) -> None:
105
+ super().__init__(column=column, severity=severity)
106
+ self._reference_df = reference_df.select(reference_column)
107
+ self._reference_column = reference_column
108
+
109
+ def evaluate(self, df: pl.DataFrame) -> RuleResult:
110
+ reference_set = set(self._reference_df[self._reference_column].to_list())
111
+ violations = (
112
+ df.filter(~pl.col(self.column).is_in(reference_set)).select(pl.len()).item()
113
+ )
114
+ status = RuleStatus.PASSED if violations == 0 else RuleStatus.FAILED
115
+ message = (
116
+ "referential integrity holds"
117
+ if status is RuleStatus.PASSED
118
+ else f"{violations} values missing from reference {self._reference_column}"
119
+ )
120
+ return RuleResult(
121
+ rule_name=self.name,
122
+ status=status,
123
+ message=message,
124
+ severity=self.severity,
125
+ metrics={
126
+ "violation_count": violations,
127
+ "reference_column": self._reference_column,
128
+ "reference_size": len(reference_set),
129
+ },
130
+ )
aqualisys/cli.py ADDED
@@ -0,0 +1,44 @@
1
+ import json
2
+ from pathlib import Path
3
+
4
+ import click
5
+
6
+ from .config import ValidationSuiteConfig
7
+
8
+
9
+ @click.group()
10
+ def cli() -> None:
11
+ """CLI entry point for running data quality suites."""
12
+
13
+
14
+ @cli.command("validate")
15
+ @click.argument(
16
+ "config_path",
17
+ type=click.Path(exists=True, dir_okay=False, path_type=Path),
18
+ )
19
+ def validate_command(config_path: Path) -> None:
20
+ """Run the configured validation suite and emit a JSON summary."""
21
+
22
+ suite = ValidationSuiteConfig.from_yaml(config_path)
23
+ dataframe = suite.load_dataframe()
24
+ checker = suite.build_checker()
25
+ report = checker.run(dataframe, dataset_name=suite.dataset_name)
26
+
27
+ summary = {
28
+ "run_id": report.run_id,
29
+ "dataset": report.dataset_name,
30
+ "passed": report.passed,
31
+ "failed_rules": [result.rule_name for result in report.failed_rules],
32
+ }
33
+ click.echo(json.dumps(summary, indent=2))
34
+
35
+ if not report.passed:
36
+ raise SystemExit(1)
37
+
38
+
39
+ def run() -> None:
40
+ cli(prog_name="aqualisys")
41
+
42
+
43
+ if __name__ == "__main__": # pragma: no cover - script entry point
44
+ run()
aqualisys/config.py ADDED
@@ -0,0 +1,112 @@
1
+ from collections.abc import Callable, Mapping
2
+ from dataclasses import dataclass
3
+ from pathlib import Path
4
+ from typing import Any, ClassVar
5
+
6
+ import yaml
7
+
8
+ try:
9
+ import polars as pl
10
+ except ModuleNotFoundError:
11
+ # pragma: no cover - optional dependency
12
+ pl = None # type: ignore
13
+
14
+ from .checker import DataQualityChecker
15
+ from .checks.base import BaseRule
16
+ from .checks.rules import AcceptedValuesRule, NotNullRule, RelationshipRule, UniqueRule
17
+ from .logging.sqlite import SQLiteRunLogger
18
+
19
+ RuleFactory = Callable[[Mapping[str, Any]], BaseRule]
20
+
21
+
22
+ def _build_not_null(config: Mapping[str, Any]) -> BaseRule:
23
+ return NotNullRule(column=config["column"])
24
+
25
+
26
+ def _build_unique(config: Mapping[str, Any]) -> BaseRule:
27
+ return UniqueRule(column=config["column"])
28
+
29
+
30
+ def _build_accepted(config: Mapping[str, Any]) -> BaseRule:
31
+ return AcceptedValuesRule(
32
+ column=config["column"],
33
+ allowed_values=config["allowed_values"],
34
+ )
35
+
36
+
37
+ def _build_relationship(config: Mapping[str, Any]) -> BaseRule:
38
+ if pl is None:
39
+ # pragma: no cover - config is still valid without runtime Polars
40
+ raise RuntimeError("polars is required for relationship rules")
41
+ ref_path = Path(config["reference"]["path"])
42
+ ref_format = config["reference"].get("format", "parquet")
43
+ if ref_format == "parquet":
44
+ reference_df = pl.read_parquet(ref_path)
45
+ elif ref_format == "csv":
46
+ reference_df = pl.read_csv(ref_path)
47
+ else: # pragma: no cover - validated elsewhere
48
+ raise ValueError(f"unsupported reference format: {ref_format}")
49
+ return RelationshipRule(
50
+ column=config["column"],
51
+ reference_df=reference_df,
52
+ reference_column=config["reference"]["column"],
53
+ )
54
+
55
+
56
+ RULE_BUILDERS: dict[str, RuleFactory] = {
57
+ "not_null": _build_not_null,
58
+ "unique": _build_unique,
59
+ "accepted_values": _build_accepted,
60
+ "relationship": _build_relationship,
61
+ }
62
+
63
+
64
+ @dataclass(slots=True)
65
+ class ValidationSuiteConfig:
66
+ dataset_name: str
67
+ dataset_path: Path
68
+ dataset_format: str = "parquet"
69
+ fail_fast: bool = False
70
+ rules: list[Mapping[str, Any]] | None = None
71
+ logger_path: Path = Path("aqualisys_runs.db")
72
+
73
+ SUPPORTED_FORMATS: ClassVar[set[str]] = {"parquet", "csv"}
74
+
75
+ @classmethod
76
+ def from_yaml(cls, path: str | Path) -> "ValidationSuiteConfig":
77
+ data = yaml.safe_load(Path(path).read_text())
78
+ return cls(
79
+ dataset_name=data["dataset"]["name"],
80
+ dataset_path=Path(data["dataset"]["path"]),
81
+ dataset_format=data["dataset"].get("format", "parquet"),
82
+ fail_fast=data.get("fail_fast", False),
83
+ rules=data.get("rules", []),
84
+ logger_path=Path(data.get("logger", {}).get("path", "aqualisys_runs.db")),
85
+ )
86
+
87
+ def load_dataframe(self) -> "pl.DataFrame":
88
+ if pl is None: # pragma: no cover - guard for environments lacking polars
89
+ raise RuntimeError("polars is required to load dataframes")
90
+ if self.dataset_format not in self.SUPPORTED_FORMATS:
91
+ raise ValueError(f"unsupported dataset format: {self.dataset_format}")
92
+ if self.dataset_format == "parquet":
93
+ return pl.read_parquet(self.dataset_path)
94
+ return pl.read_csv(self.dataset_path)
95
+
96
+ def build_rules(self) -> list[BaseRule]:
97
+ all_rules: list[BaseRule] = []
98
+ for config in self.rules or []:
99
+ rule_type = config["type"]
100
+ builder = RULE_BUILDERS.get(rule_type)
101
+ if not builder:
102
+ raise ValueError(f"unknown rule type: {rule_type}")
103
+ all_rules.append(builder(config))
104
+ return all_rules
105
+
106
+ def build_checker(self) -> DataQualityChecker:
107
+ logger = SQLiteRunLogger(self.logger_path)
108
+ return DataQualityChecker(
109
+ rules=self.build_rules(),
110
+ logger=logger,
111
+ fail_fast=self.fail_fast,
112
+ )
@@ -0,0 +1,3 @@
1
+ from .sqlite import SQLiteRunLogger
2
+
3
+ __all__ = ["SQLiteRunLogger"]
@@ -0,0 +1,37 @@
1
+ from abc import ABC, abstractmethod
2
+ from collections.abc import Iterable
3
+ from dataclasses import dataclass, field
4
+ from datetime import UTC, datetime
5
+
6
+ from ..checks.base import RuleContext, RuleResult
7
+
8
+
9
+ @dataclass(slots=True)
10
+ class RunSummary:
11
+ run_id: str
12
+ dataset_name: str
13
+ started_at: datetime
14
+ finished_at: datetime = field(default_factory=lambda: datetime.now(tz=UTC))
15
+
16
+
17
+ class RunLogger(ABC):
18
+ """Interface for persisting run metadata + rule outcomes."""
19
+
20
+ @abstractmethod
21
+ def log_run_started(self, context: RuleContext) -> None:
22
+ """Persist metadata that a run has started."""
23
+ ... # pragma: no cover - interface
24
+
25
+ @abstractmethod
26
+ def log_rule_result(self, context: RuleContext, result: RuleResult) -> None:
27
+ """Persist the outcome of a single rule execution."""
28
+ ... # pragma: no cover - interface
29
+
30
+ @abstractmethod
31
+ def log_run_completed(
32
+ self,
33
+ context: RuleContext,
34
+ results: Iterable[RuleResult],
35
+ ) -> None:
36
+ """Persist that a run finished, including summary counts."""
37
+ ... # pragma: no cover
@@ -0,0 +1,113 @@
1
+ import json
2
+ import sqlite3
3
+ from collections.abc import Iterable
4
+ from datetime import UTC, datetime
5
+ from pathlib import Path
6
+
7
+ from ..checks.base import RuleContext, RuleResult
8
+ from .base import RunLogger
9
+
10
+
11
+ class SQLiteRunLogger(RunLogger):
12
+ """Persists run + rule records to a lightweight SQLite database."""
13
+
14
+ def __init__(self, db_path: str | Path = "aqualisys_runs.db") -> None:
15
+ self.db_path = Path(db_path)
16
+ self._ensure_schema()
17
+
18
+ def _connect(self) -> sqlite3.Connection:
19
+ conn = sqlite3.connect(self.db_path)
20
+ conn.execute("PRAGMA journal_mode=WAL;")
21
+ return conn
22
+
23
+ def _ensure_schema(self) -> None:
24
+ with self._connect() as conn:
25
+ conn.execute("""
26
+ CREATE TABLE IF NOT EXISTS runs
27
+ (
28
+ run_id TEXT PRIMARY KEY,
29
+ dataset_name TEXT NOT NULL,
30
+ started_at TEXT NOT NULL,
31
+ finished_at TEXT,
32
+ total_rules INTEGER DEFAULT 0,
33
+ failed_rules INTEGER DEFAULT 0
34
+ )
35
+ """)
36
+ conn.execute("""
37
+ CREATE TABLE IF NOT EXISTS rule_results
38
+ (
39
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
40
+ run_id TEXT NOT NULL,
41
+ rule_name TEXT NOT NULL,
42
+ status TEXT NOT NULL,
43
+ severity TEXT NOT NULL,
44
+ message TEXT NOT NULL,
45
+ metrics TEXT,
46
+ recorded_at TEXT NOT NULL,
47
+ FOREIGN KEY (run_id) REFERENCES runs (run_id)
48
+ )
49
+ """)
50
+ conn.commit()
51
+
52
+ def log_run_started(self, context: RuleContext) -> None:
53
+ with self._connect() as conn:
54
+ conn.execute(
55
+ """
56
+ INSERT OR REPLACE INTO runs(run_id, dataset_name, started_at)
57
+ VALUES (?, ?, ?)
58
+ """,
59
+ (context.run_id, context.dataset_name, context.executed_at.isoformat()),
60
+ )
61
+ conn.commit()
62
+
63
+ def log_rule_result(self, context: RuleContext, result: RuleResult) -> None:
64
+ with self._connect() as conn:
65
+ conn.execute(
66
+ """
67
+ INSERT INTO rule_results(
68
+ run_id,
69
+ rule_name,
70
+ status,
71
+ severity,
72
+ message,
73
+ metrics,
74
+ recorded_at
75
+ )
76
+ VALUES (?, ?, ?, ?, ?, ?, ?)
77
+ """,
78
+ (
79
+ context.run_id,
80
+ result.rule_name,
81
+ result.status.value,
82
+ result.severity.value,
83
+ result.message,
84
+ json.dumps(result.metrics or {}, default=str),
85
+ datetime.now(tz=UTC).isoformat(),
86
+ ),
87
+ )
88
+ conn.commit()
89
+
90
+ def log_run_completed(
91
+ self,
92
+ context: RuleContext,
93
+ results: Iterable[RuleResult],
94
+ ) -> None:
95
+ results_list = list(results)
96
+ failed = sum(1 for result in results_list if not result.passed)
97
+ with self._connect() as conn:
98
+ conn.execute(
99
+ """
100
+ UPDATE runs
101
+ SET finished_at = ?,
102
+ total_rules = ?,
103
+ failed_rules = ?
104
+ WHERE run_id = ?
105
+ """,
106
+ (
107
+ datetime.now(tz=UTC).isoformat(),
108
+ len(results_list),
109
+ failed,
110
+ context.run_id,
111
+ ),
112
+ )
113
+ conn.commit()
@@ -0,0 +1,59 @@
1
+ Metadata-Version: 2.4
2
+ Name: aqualisys
3
+ Version: 0.1.0
4
+ Summary: Polars-first data-quality and data-validation toolkit.
5
+ Author-email: Aqualisys Maintainers <maintainers@aqualisys.dev>
6
+ License: MIT
7
+ License-File: LICENSE
8
+ Requires-Python: >=3.11
9
+ Requires-Dist: click>=8.1.7
10
+ Requires-Dist: polars>=0.20.0
11
+ Requires-Dist: pyyaml>=6.0.1
12
+ Provides-Extra: dev
13
+ Requires-Dist: black>=24.3.0; extra == 'dev'
14
+ Requires-Dist: mypy>=1.8.0; extra == 'dev'
15
+ Requires-Dist: pytest-cov>=4.1.0; extra == 'dev'
16
+ Requires-Dist: pytest>=7.4.4; extra == 'dev'
17
+ Requires-Dist: ruff>=0.2.1; extra == 'dev'
18
+ Description-Content-Type: text/markdown
19
+
20
+ # Aqualisys
21
+
22
+ Polars-first data-quality toolkit delivering deterministic validation, structured logging, and a composable rule registry.
23
+
24
+ ## Why Aqualisys?
25
+ - **Declarative rules**: ship reusable expectations such as not-null, uniqueness, accepted-values, and referential checks.
26
+ - **Deterministic logging**: every run is persisted to SQLite (JSON-friendly) for audits and debugging.
27
+ - **Pipeline-ready**: run from Python code or via `aqualisys validate configs/orders.yml` in CI.
28
+
29
+ ## Quick Start
30
+ ```bash
31
+ python -m venv .venv && source .venv/bin/activate
32
+ pip install -e .[dev]
33
+ pytest
34
+ aqualisys validate configs/orders.yml
35
+ ```
36
+
37
+ ## Usage Example
38
+ ```python
39
+ import polars as pl
40
+ from aqualisys import DataQualityChecker, NotNullRule, UniqueRule, SQLiteRunLogger
41
+
42
+ df = pl.DataFrame({"order_id": [1, 2, 3], "status": ["pending", "shipped", "shipped"]})
43
+ checker = DataQualityChecker(
44
+ rules=[NotNullRule("order_id"), UniqueRule("order_id")],
45
+ logger=SQLiteRunLogger("artifacts/example_runs.db"),
46
+ )
47
+ report = checker.run(df, dataset_name="orders")
48
+ assert report.passed
49
+ ```
50
+
51
+ ## Project Structure
52
+ - `src/aqualisys/`: library source (rules, checker, logging, CLI).
53
+ - `tests/`: pytest suites (unit + integration).
54
+ - `configs/`: sample validation suite definitions.
55
+ - `docs/`: roadmap and design notes.
56
+
57
+ See `docs/PUBLISHING.md` for uv-based build and release steps once you are ready to publish a new version.
58
+
59
+ See `docs/ROADMAP.md` for the multi-week implementation plan inspired by the Start Data Engineering guide.
@@ -0,0 +1,15 @@
1
+ aqualisys/__init__.py,sha256=uoq8i4KvvZP8LE42T0bWcAyIGYiZBB5QhAWNQptcQmI,483
2
+ aqualisys/checker.py,sha256=2-t8c-kuTxeEimMzWoXgPc_sOw3Pz5MtQVs-XZ2ocEY,2774
3
+ aqualisys/cli.py,sha256=muaadBuLCIPLxuIaPM0Bo4bVh8gjhFaMATukyUnTm_4,1082
4
+ aqualisys/config.py,sha256=qL9VLXv7ROTZrzfi3tQBaBsrugwVr_54fkR2Jqrl81I,3893
5
+ aqualisys/checks/__init__.py,sha256=LgjTJqFLU0cjrSfbsz9WArlz1zFhUusYsIBDlfry-8g,164
6
+ aqualisys/checks/base.py,sha256=TKjpufvSWwR12JtW63ubWMOFcSf3wyS5MTKQlo76eg8,1204
7
+ aqualisys/checks/rules.py,sha256=sBQWtWNbzSqFiuElcZAjqQWHw3ax07B3nHf-mcJTJk8,4303
8
+ aqualisys/logging/__init__.py,sha256=4dQpKjbidZeB6yLBP1TvcpcNQ8q4r2vZOnaptpx9nis,67
9
+ aqualisys/logging/base.py,sha256=nSIzV1uu9MVuKBEHqI0wQwgiWBVl1QaVFUSFdGp2yUY,1111
10
+ aqualisys/logging/sqlite.py,sha256=Zrcp-myNrpPqJeb1330fwGqHcP5uMbucbWQvWoPHNaA,3782
11
+ aqualisys-0.1.0.dist-info/METADATA,sha256=B18N61ONqgz3anSXYxIW30ZBGPa55HnEueIKEmXXLEc,2121
12
+ aqualisys-0.1.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
13
+ aqualisys-0.1.0.dist-info/entry_points.txt,sha256=FN2StKUy9iH4Q1-dLZk_tF4uWeFqjJ8-LRLhw8jCEmQ,48
14
+ aqualisys-0.1.0.dist-info/licenses/LICENSE,sha256=nPSYvbzst5Xo16pBTGo5Ju5BwC32lsEmKh8IPT4hxxA,1067
15
+ aqualisys-0.1.0.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: hatchling 1.28.0
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ aqualisys = aqualisys.cli:run
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Absolentia
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.