anomx 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- anomx/__init__.py +53 -0
- anomx/__main__.py +6 -0
- anomx/_shared.py +40 -0
- anomx/cli.py +22 -0
- anomx/components/__init__.py +29 -0
- anomx/components/algorithms/__init__.py +14 -0
- anomx/components/algorithms/base.py +19 -0
- anomx/components/algorithms/contracts.py +49 -0
- anomx/components/algorithms/offline/__init__.py +28 -0
- anomx/components/algorithms/offline/catalog.py +58 -0
- anomx/components/algorithms/offline/pipeline.py +74 -0
- anomx/components/base.py +170 -0
- anomx/components/detection/__init__.py +11 -0
- anomx/components/detection/detectors/__init__.py +9 -0
- anomx/components/detection/detectors/base.py +18 -0
- anomx/components/detection/detectors/threshold.py +40 -0
- anomx/components/detection/scorers/__init__.py +9 -0
- anomx/components/detection/scorers/base.py +18 -0
- anomx/components/detection/scorers/zscore.py +41 -0
- anomx/components/models/__init__.py +13 -0
- anomx/components/models/base.py +30 -0
- anomx/components/models/forecasting/__init__.py +7 -0
- anomx/components/models/forecasting/rolling_window.py +86 -0
- anomx/components/models/reconstruction/__init__.py +7 -0
- anomx/components/models/reconstruction/pca.py +99 -0
- anomx/components/models/representation/__init__.py +7 -0
- anomx/components/models/representation/isolation_forest.py +80 -0
- anomx/data/__init__.py +10 -0
- anomx/data/connectors/__init__.py +9 -0
- anomx/data/connectors/base.py +26 -0
- anomx/data/connectors/local_fs.py +51 -0
- anomx/data/sequences/__init__.py +7 -0
- anomx/data/sequences/timeseries.py +28 -0
- anomx/datasets/__init__.py +10 -0
- anomx/datasets/core.py +136 -0
- anomx/datasets/loaders/__init__.py +5 -0
- anomx/datasets/loaders/synthetic.py +38 -0
- anomx/detectors/__init__.py +10 -0
- anomx/detectors/base.py +50 -0
- anomx/detectors/statistical.py +61 -0
- anomx/integrations/__init__.py +8 -0
- anomx/integrations/darts.py +83 -0
- anomx/models/__init__.py +16 -0
- anomx/models/base.py +35 -0
- anomx/models/forecasting/__init__.py +12 -0
- anomx/models/naive.py +64 -0
- anomx/models/reconstruction/__init__.py +7 -0
- anomx/models/representation/__init__.py +7 -0
- anomx/py.typed +1 -0
- anomx/scorers/__init__.py +10 -0
- anomx/scorers/base.py +15 -0
- anomx/scorers/statistical.py +43 -0
- anomx-0.2.0.dist-info/METADATA +169 -0
- anomx-0.2.0.dist-info/RECORD +57 -0
- anomx-0.2.0.dist-info/WHEEL +4 -0
- anomx-0.2.0.dist-info/entry_points.txt +2 -0
- anomx-0.2.0.dist-info/licenses/LICENSE +17 -0
anomx/__init__.py
ADDED
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
"""Anomx core library for time-series anomaly detection and reusable workflows."""
|
|
2
|
+
|
|
3
|
+
from anomx.components import (
|
|
4
|
+
BaseAlgorithm,
|
|
5
|
+
BaseAnomalyModel,
|
|
6
|
+
BaseComponent,
|
|
7
|
+
BaseDetector,
|
|
8
|
+
BaseScorer,
|
|
9
|
+
IsolationForestModel,
|
|
10
|
+
PcaReconstructionModel,
|
|
11
|
+
PipelineOrchestrator,
|
|
12
|
+
RollingWindowForecastModel,
|
|
13
|
+
ThresholdDetector,
|
|
14
|
+
discover_component_payloads,
|
|
15
|
+
)
|
|
16
|
+
from anomx.data import BaseConnector, LocalFSConnector, TimeSeriesBatch
|
|
17
|
+
from anomx.datasets import TimeSeriesDataset
|
|
18
|
+
from anomx.detectors import AnomalyDetector, DetectionResult, MovingAverageDetector
|
|
19
|
+
from anomx.models import (
|
|
20
|
+
Forecast,
|
|
21
|
+
ForecastingModel,
|
|
22
|
+
NaiveSeasonalModel,
|
|
23
|
+
)
|
|
24
|
+
from anomx.scorers import AnomalyScorer, ThresholdScorer, ZScoreScorer
|
|
25
|
+
|
|
26
|
+
__all__ = [
|
|
27
|
+
"BaseAlgorithm",
|
|
28
|
+
"BaseAnomalyModel",
|
|
29
|
+
"BaseComponent",
|
|
30
|
+
"BaseConnector",
|
|
31
|
+
"BaseDetector",
|
|
32
|
+
"BaseScorer",
|
|
33
|
+
"AnomalyDetector",
|
|
34
|
+
"AnomalyScorer",
|
|
35
|
+
"DetectionResult",
|
|
36
|
+
"Forecast",
|
|
37
|
+
"ForecastingModel",
|
|
38
|
+
"IsolationForestModel",
|
|
39
|
+
"LocalFSConnector",
|
|
40
|
+
"MovingAverageDetector",
|
|
41
|
+
"NaiveSeasonalModel",
|
|
42
|
+
"PcaReconstructionModel",
|
|
43
|
+
"PipelineOrchestrator",
|
|
44
|
+
"RollingWindowForecastModel",
|
|
45
|
+
"ThresholdScorer",
|
|
46
|
+
"ThresholdDetector",
|
|
47
|
+
"TimeSeriesBatch",
|
|
48
|
+
"TimeSeriesDataset",
|
|
49
|
+
"ZScoreScorer",
|
|
50
|
+
"discover_component_payloads",
|
|
51
|
+
]
|
|
52
|
+
|
|
53
|
+
__version__ = "0.2.0"
|
anomx/__main__.py
ADDED
anomx/_shared.py
ADDED
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
"""Shared helpers for package-internal normalization and dataframe handling."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import re
|
|
6
|
+
from typing import Any
|
|
7
|
+
|
|
8
|
+
import pandas as pd
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def normalize_text(value: object) -> str:
|
|
12
|
+
"""Return a trimmed single-line string representation."""
|
|
13
|
+
return str(value or "").strip()
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def normalise_component_key(value: object) -> str:
|
|
17
|
+
"""Normalize component identifiers into a stable snake-case-ish key."""
|
|
18
|
+
normalized = normalize_text(value).lower()
|
|
19
|
+
normalized = re.sub(r"[^a-z0-9]+", "_", normalized)
|
|
20
|
+
return normalized.strip("_")
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def ensure_dataframe(data: Any) -> pd.DataFrame:
|
|
24
|
+
"""Convert a supported tabular payload into a DataFrame copy."""
|
|
25
|
+
if isinstance(data, pd.DataFrame):
|
|
26
|
+
return data.copy()
|
|
27
|
+
if isinstance(data, pd.Series):
|
|
28
|
+
return data.to_frame()
|
|
29
|
+
return pd.DataFrame(data)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def dataframe_to_records(frame: pd.DataFrame) -> list[dict[str, Any]]:
|
|
33
|
+
"""Serialize a DataFrame into JSON-friendly records."""
|
|
34
|
+
normalized = frame.copy()
|
|
35
|
+
for column in normalized.columns:
|
|
36
|
+
if pd.api.types.is_datetime64_any_dtype(normalized[column]):
|
|
37
|
+
normalized[column] = normalized[column].astype(str)
|
|
38
|
+
if isinstance(normalized.index, pd.DatetimeIndex):
|
|
39
|
+
normalized.index = normalized.index.astype(str)
|
|
40
|
+
return list(normalized.reset_index(drop=False).to_dict(orient="records"))
|
anomx/cli.py
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
"""Command line entry points for Anomx."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import argparse
|
|
6
|
+
|
|
7
|
+
from anomx import __version__
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def build_parser() -> argparse.ArgumentParser:
|
|
11
|
+
"""Build the CLI argument parser."""
|
|
12
|
+
parser = argparse.ArgumentParser(prog="anomx", description="Anomx time-series anomaly toolkit.")
|
|
13
|
+
parser.add_argument("--version", action="version", version=f"anomx {__version__}")
|
|
14
|
+
return parser
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def main(argv: list[str] | None = None) -> int:
|
|
18
|
+
"""Run the CLI."""
|
|
19
|
+
parser = build_parser()
|
|
20
|
+
parser.parse_args(argv)
|
|
21
|
+
parser.print_help()
|
|
22
|
+
return 0
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
"""Reusable anomaly-detection components and discovery helpers."""
|
|
2
|
+
|
|
3
|
+
from anomx.components.base import BaseComponent, discover_component_payloads, iter_component_classes
|
|
4
|
+
from anomx.components.algorithms import BaseAlgorithm, PipelineAlgorithm, PipelineOrchestrator
|
|
5
|
+
from anomx.components.detection.detectors import BaseDetector, ThresholdDetector
|
|
6
|
+
from anomx.components.detection.scorers import BaseScorer, ZScoreScorer
|
|
7
|
+
from anomx.components.models import (
|
|
8
|
+
BaseAnomalyModel,
|
|
9
|
+
IsolationForestModel,
|
|
10
|
+
PcaReconstructionModel,
|
|
11
|
+
RollingWindowForecastModel,
|
|
12
|
+
)
|
|
13
|
+
|
|
14
|
+
__all__ = [
|
|
15
|
+
"BaseAlgorithm",
|
|
16
|
+
"BaseAnomalyModel",
|
|
17
|
+
"BaseComponent",
|
|
18
|
+
"BaseDetector",
|
|
19
|
+
"BaseScorer",
|
|
20
|
+
"IsolationForestModel",
|
|
21
|
+
"PcaReconstructionModel",
|
|
22
|
+
"PipelineAlgorithm",
|
|
23
|
+
"PipelineOrchestrator",
|
|
24
|
+
"RollingWindowForecastModel",
|
|
25
|
+
"ThresholdDetector",
|
|
26
|
+
"ZScoreScorer",
|
|
27
|
+
"discover_component_payloads",
|
|
28
|
+
"iter_component_classes",
|
|
29
|
+
]
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
"""Algorithm components."""
|
|
2
|
+
|
|
3
|
+
from anomx.components.algorithms.base import BaseAlgorithm
|
|
4
|
+
from anomx.components.algorithms.contracts import JobResult, JobSpec, JobSummary
|
|
5
|
+
from anomx.components.algorithms.offline import PipelineAlgorithm, PipelineOrchestrator
|
|
6
|
+
|
|
7
|
+
__all__ = [
|
|
8
|
+
"BaseAlgorithm",
|
|
9
|
+
"JobResult",
|
|
10
|
+
"JobSpec",
|
|
11
|
+
"JobSummary",
|
|
12
|
+
"PipelineAlgorithm",
|
|
13
|
+
"PipelineOrchestrator",
|
|
14
|
+
]
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
"""Base algorithm contract."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from abc import ABC, abstractmethod
|
|
6
|
+
from typing import Any
|
|
7
|
+
|
|
8
|
+
from anomx.components.algorithms.contracts import JobResult, JobSpec
|
|
9
|
+
from anomx.components.base import BaseComponent
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class BaseAlgorithm(BaseComponent, ABC):
|
|
13
|
+
"""Base class for reusable anomaly workflows."""
|
|
14
|
+
|
|
15
|
+
component_type = "algorithm"
|
|
16
|
+
|
|
17
|
+
@abstractmethod
|
|
18
|
+
def run_job(self, job_spec: JobSpec | dict[str, Any]) -> JobResult:
|
|
19
|
+
"""Execute an anomaly-detection workflow."""
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
"""Contracts shared by algorithm orchestrators."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from dataclasses import asdict, dataclass, field
|
|
6
|
+
from typing import Any
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
@dataclass(frozen=True)
|
|
10
|
+
class JobSpec:
|
|
11
|
+
connector: str
|
|
12
|
+
model: str
|
|
13
|
+
detector: str
|
|
14
|
+
scorer: str
|
|
15
|
+
config: dict[str, Any] = field(default_factory=dict)
|
|
16
|
+
|
|
17
|
+
@classmethod
|
|
18
|
+
def from_mapping(cls, payload: dict[str, Any]) -> "JobSpec":
|
|
19
|
+
return cls(
|
|
20
|
+
connector=str(payload.get("connector") or "").strip(),
|
|
21
|
+
model=str(payload.get("model") or "").strip(),
|
|
22
|
+
detector=str(payload.get("detector") or "").strip(),
|
|
23
|
+
scorer=str(payload.get("scorer") or "").strip(),
|
|
24
|
+
config=dict(payload.get("config") or {}),
|
|
25
|
+
)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
@dataclass(frozen=True)
|
|
29
|
+
class JobSummary:
|
|
30
|
+
rows_processed: int
|
|
31
|
+
anomaly_count: int
|
|
32
|
+
feature_columns: list[str] = field(default_factory=list)
|
|
33
|
+
score_column: str = "zscore"
|
|
34
|
+
duration_ms: int = 0
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
@dataclass(frozen=True)
|
|
38
|
+
class JobResult:
|
|
39
|
+
job_id: str
|
|
40
|
+
status: str
|
|
41
|
+
connector: str
|
|
42
|
+
model: str
|
|
43
|
+
scorer: str
|
|
44
|
+
detector: str
|
|
45
|
+
summary: JobSummary
|
|
46
|
+
records: list[dict[str, Any]] = field(default_factory=list)
|
|
47
|
+
|
|
48
|
+
def to_dict(self) -> dict[str, Any]:
|
|
49
|
+
return asdict(self)
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
"""Offline orchestration implementations."""
|
|
2
|
+
|
|
3
|
+
from anomx.components.algorithms.offline.catalog import (
|
|
4
|
+
COMPONENT_IMPLEMENTATIONS,
|
|
5
|
+
CONNECTOR_IMPLEMENTATIONS,
|
|
6
|
+
DETECTOR_IMPLEMENTATIONS,
|
|
7
|
+
IMPLEMENTATION_CATALOGS,
|
|
8
|
+
MODEL_IMPLEMENTATIONS,
|
|
9
|
+
SCORER_IMPLEMENTATIONS,
|
|
10
|
+
ComponentClass,
|
|
11
|
+
ComponentKind,
|
|
12
|
+
resolve_implementation,
|
|
13
|
+
)
|
|
14
|
+
from anomx.components.algorithms.offline.pipeline import PipelineAlgorithm, PipelineOrchestrator
|
|
15
|
+
|
|
16
|
+
__all__ = [
|
|
17
|
+
"COMPONENT_IMPLEMENTATIONS",
|
|
18
|
+
"CONNECTOR_IMPLEMENTATIONS",
|
|
19
|
+
"DETECTOR_IMPLEMENTATIONS",
|
|
20
|
+
"IMPLEMENTATION_CATALOGS",
|
|
21
|
+
"MODEL_IMPLEMENTATIONS",
|
|
22
|
+
"SCORER_IMPLEMENTATIONS",
|
|
23
|
+
"ComponentClass",
|
|
24
|
+
"ComponentKind",
|
|
25
|
+
"PipelineAlgorithm",
|
|
26
|
+
"PipelineOrchestrator",
|
|
27
|
+
"resolve_implementation",
|
|
28
|
+
]
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
"""Static implementation catalog for the default offline pipeline."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import Any, Literal, TypeAlias
|
|
6
|
+
|
|
7
|
+
from anomx._shared import normalise_component_key
|
|
8
|
+
from anomx.components.detection.detectors import BaseDetector, ThresholdDetector
|
|
9
|
+
from anomx.components.detection.scorers import BaseScorer, ZScoreScorer
|
|
10
|
+
from anomx.components.models import (
|
|
11
|
+
BaseAnomalyModel,
|
|
12
|
+
IsolationForestModel,
|
|
13
|
+
PcaReconstructionModel,
|
|
14
|
+
RollingWindowForecastModel,
|
|
15
|
+
)
|
|
16
|
+
from anomx.data.connectors import BaseConnector, LocalFSConnector
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
ComponentKind = Literal["connector", "model", "scorer", "detector"]
|
|
20
|
+
ComponentClass: TypeAlias = type[Any]
|
|
21
|
+
|
|
22
|
+
CONNECTOR_IMPLEMENTATIONS: dict[str, type[BaseConnector]] = {
|
|
23
|
+
"local_fs": LocalFSConnector,
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
MODEL_IMPLEMENTATIONS: dict[str, type[BaseAnomalyModel]] = {
|
|
27
|
+
"isolation_forest": IsolationForestModel,
|
|
28
|
+
"pca_reconstruction": PcaReconstructionModel,
|
|
29
|
+
"rolling_window_forecast": RollingWindowForecastModel,
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
SCORER_IMPLEMENTATIONS: dict[str, type[BaseScorer]] = {
|
|
33
|
+
"zscore": ZScoreScorer,
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
DETECTOR_IMPLEMENTATIONS: dict[str, type[BaseDetector]] = {
|
|
37
|
+
"threshold": ThresholdDetector,
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
IMPLEMENTATION_CATALOGS: dict[ComponentKind, dict[str, ComponentClass]] = {
|
|
41
|
+
"connector": CONNECTOR_IMPLEMENTATIONS,
|
|
42
|
+
"model": MODEL_IMPLEMENTATIONS,
|
|
43
|
+
"scorer": SCORER_IMPLEMENTATIONS,
|
|
44
|
+
"detector": DETECTOR_IMPLEMENTATIONS,
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
COMPONENT_IMPLEMENTATIONS = IMPLEMENTATION_CATALOGS
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def resolve_implementation(name: str, kind: ComponentKind) -> ComponentClass:
|
|
51
|
+
"""Resolve one configured implementation from the static catalog."""
|
|
52
|
+
normalized_name = normalise_component_key(name)
|
|
53
|
+
catalog = IMPLEMENTATION_CATALOGS[kind]
|
|
54
|
+
implementation = catalog.get(normalized_name)
|
|
55
|
+
if implementation is not None:
|
|
56
|
+
return implementation
|
|
57
|
+
available = ", ".join(sorted(catalog)) or "none"
|
|
58
|
+
raise KeyError(f"Unknown {kind} implementation '{name}'. Available: {available}.")
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
"""Default offline train-score-detect orchestration."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from time import perf_counter
|
|
6
|
+
from typing import Any, cast
|
|
7
|
+
from uuid import uuid4
|
|
8
|
+
|
|
9
|
+
import pandas as pd
|
|
10
|
+
|
|
11
|
+
from anomx._shared import dataframe_to_records, ensure_dataframe
|
|
12
|
+
from anomx.components.algorithms.base import BaseAlgorithm
|
|
13
|
+
from anomx.components.algorithms.contracts import JobResult, JobSpec, JobSummary
|
|
14
|
+
from anomx.components.algorithms.offline.catalog import resolve_implementation
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
class PipelineOrchestrator(BaseAlgorithm):
|
|
18
|
+
"""Train, score, and classify a dataset with built-in library components."""
|
|
19
|
+
|
|
20
|
+
component_key = "pipeline"
|
|
21
|
+
component_name = "Pipeline"
|
|
22
|
+
component_description = "Default offline orchestration pipeline for model, scorer, and detector components."
|
|
23
|
+
component_config_schema = {
|
|
24
|
+
"connector": {"type": "object"},
|
|
25
|
+
"detector": {"type": "object"},
|
|
26
|
+
"model": {"type": "object"},
|
|
27
|
+
"scorer": {"type": "object"},
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
def run_job(self, job_spec: JobSpec | dict[str, Any]) -> JobResult:
|
|
31
|
+
spec = job_spec if isinstance(job_spec, JobSpec) else JobSpec.from_mapping(job_spec)
|
|
32
|
+
started_at = perf_counter()
|
|
33
|
+
config = spec.config
|
|
34
|
+
|
|
35
|
+
connector = cast(type, resolve_implementation(spec.connector, "connector"))()
|
|
36
|
+
model = cast(type, resolve_implementation(spec.model, "model"))(config.get("model", {}))
|
|
37
|
+
scorer = cast(type, resolve_implementation(spec.scorer, "scorer"))(config.get("scorer", {}))
|
|
38
|
+
detector = cast(type, resolve_implementation(spec.detector, "detector"))(config.get("detector", {}))
|
|
39
|
+
|
|
40
|
+
raw_data = connector.read(config.get("connector", {}))
|
|
41
|
+
frame = ensure_dataframe(raw_data)
|
|
42
|
+
model.fit(frame)
|
|
43
|
+
predictions = ensure_dataframe(model.predict(frame))
|
|
44
|
+
scored = ensure_dataframe(scorer.score(predictions))
|
|
45
|
+
detected = ensure_dataframe(detector.detect(scored))
|
|
46
|
+
|
|
47
|
+
feature_columns = list(config.get("model", {}).get("feature_columns") or [])
|
|
48
|
+
if not feature_columns:
|
|
49
|
+
feature_columns = frame.select_dtypes(include=["number"]).columns.tolist()
|
|
50
|
+
|
|
51
|
+
anomaly_series = detected.get("is_anomaly", pd.Series(dtype=bool))
|
|
52
|
+
score_column = "zscore" if "zscore" in detected.columns else "model_score"
|
|
53
|
+
anomaly_count = int(anomaly_series.sum())
|
|
54
|
+
duration_ms = int((perf_counter() - started_at) * 1000)
|
|
55
|
+
|
|
56
|
+
return JobResult(
|
|
57
|
+
job_id=str(uuid4()),
|
|
58
|
+
status="completed",
|
|
59
|
+
connector=spec.connector,
|
|
60
|
+
model=spec.model,
|
|
61
|
+
scorer=spec.scorer,
|
|
62
|
+
detector=spec.detector,
|
|
63
|
+
summary=JobSummary(
|
|
64
|
+
rows_processed=len(detected),
|
|
65
|
+
anomaly_count=anomaly_count,
|
|
66
|
+
feature_columns=feature_columns,
|
|
67
|
+
score_column=score_column,
|
|
68
|
+
duration_ms=duration_ms,
|
|
69
|
+
),
|
|
70
|
+
records=dataframe_to_records(detected),
|
|
71
|
+
)
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
PipelineAlgorithm = PipelineOrchestrator
|
anomx/components/base.py
ADDED
|
@@ -0,0 +1,170 @@
|
|
|
1
|
+
"""Component metadata helpers and discovery."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import inspect
|
|
6
|
+
import pkgutil
|
|
7
|
+
import re
|
|
8
|
+
from importlib import import_module
|
|
9
|
+
from pathlib import Path
|
|
10
|
+
from typing import Any, ClassVar
|
|
11
|
+
|
|
12
|
+
from anomx._shared import normalise_component_key, normalize_text
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
PROJECT_ROOT = Path(__file__).resolve().parents[3]
|
|
16
|
+
COMPONENT_PACKAGE_NAMES = (
|
|
17
|
+
"anomx.components.algorithms",
|
|
18
|
+
"anomx.components.detection",
|
|
19
|
+
"anomx.components.models",
|
|
20
|
+
)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def humanize_component_name(value: str | None) -> str:
|
|
24
|
+
normalized_value = str(value or "").strip()
|
|
25
|
+
if not normalized_value:
|
|
26
|
+
return ""
|
|
27
|
+
return re.sub(r"(?<!^)(?=[A-Z])", " ", normalized_value).strip()
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def read_component_source_path(component_class: type[object]) -> str:
|
|
31
|
+
source_path = inspect.getsourcefile(component_class) or inspect.getfile(component_class)
|
|
32
|
+
resolved_path = Path(source_path).resolve()
|
|
33
|
+
try:
|
|
34
|
+
return str(resolved_path.relative_to(PROJECT_ROOT))
|
|
35
|
+
except ValueError:
|
|
36
|
+
return str(resolved_path)
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
class BaseComponent:
|
|
40
|
+
"""Common metadata surface for installable algorithm components."""
|
|
41
|
+
|
|
42
|
+
component_type: ClassVar[str | None] = None
|
|
43
|
+
component_key: ClassVar[str] = ""
|
|
44
|
+
component_name: ClassVar[str] = ""
|
|
45
|
+
component_description: ClassVar[str] = ""
|
|
46
|
+
component_docs: ClassVar[str] = ""
|
|
47
|
+
component_image_path: ClassVar[str] = ""
|
|
48
|
+
component_status: ClassVar[str] = "active"
|
|
49
|
+
component_default_config: ClassVar[dict[str, Any]] = {}
|
|
50
|
+
component_config_schema: ClassVar[dict[str, Any]] = {}
|
|
51
|
+
component_code_version: ClassVar[str] = ""
|
|
52
|
+
|
|
53
|
+
@classmethod
|
|
54
|
+
def is_component_abstract(cls) -> bool:
|
|
55
|
+
return inspect.isabstract(cls) or not cls.get_component_type()
|
|
56
|
+
|
|
57
|
+
@classmethod
|
|
58
|
+
def get_component_type(cls) -> str:
|
|
59
|
+
return normalize_text(getattr(cls, "component_type", "")) or ""
|
|
60
|
+
|
|
61
|
+
@classmethod
|
|
62
|
+
def get_component_key(cls) -> str:
|
|
63
|
+
value = getattr(cls, "component_key", "") or cls.get_component_name() or cls.__name__
|
|
64
|
+
return normalise_component_key(value)
|
|
65
|
+
|
|
66
|
+
@classmethod
|
|
67
|
+
def get_component_name(cls) -> str:
|
|
68
|
+
explicit_name = normalize_text(getattr(cls, "component_name", ""))
|
|
69
|
+
return explicit_name or humanize_component_name(cls.__name__)
|
|
70
|
+
|
|
71
|
+
@classmethod
|
|
72
|
+
def get_component_docs(cls) -> str:
|
|
73
|
+
explicit_docs = normalize_text(getattr(cls, "component_docs", ""))
|
|
74
|
+
if explicit_docs:
|
|
75
|
+
return explicit_docs
|
|
76
|
+
return normalize_text(inspect.getdoc(cls))
|
|
77
|
+
|
|
78
|
+
@classmethod
|
|
79
|
+
def get_component_description(cls) -> str:
|
|
80
|
+
explicit_description = normalize_text(getattr(cls, "component_description", ""))
|
|
81
|
+
if explicit_description:
|
|
82
|
+
return explicit_description
|
|
83
|
+
docs = cls.get_component_docs()
|
|
84
|
+
if not docs:
|
|
85
|
+
return ""
|
|
86
|
+
return normalize_text(docs.split("\n\n", 1)[0])
|
|
87
|
+
|
|
88
|
+
@classmethod
|
|
89
|
+
def get_component_image_path(cls) -> str:
|
|
90
|
+
return normalize_text(getattr(cls, "component_image_path", ""))
|
|
91
|
+
|
|
92
|
+
@classmethod
|
|
93
|
+
def get_component_status(cls) -> str:
|
|
94
|
+
return normalize_text(getattr(cls, "component_status", "")) or "active"
|
|
95
|
+
|
|
96
|
+
@classmethod
|
|
97
|
+
def get_component_default_config(cls) -> dict[str, Any]:
|
|
98
|
+
value = getattr(cls, "component_default_config", {})
|
|
99
|
+
return dict(value) if isinstance(value, dict) else {}
|
|
100
|
+
|
|
101
|
+
@classmethod
|
|
102
|
+
def get_component_config_schema(cls) -> dict[str, Any]:
|
|
103
|
+
value = getattr(cls, "component_config_schema", {})
|
|
104
|
+
return dict(value) if isinstance(value, dict) else {}
|
|
105
|
+
|
|
106
|
+
@classmethod
|
|
107
|
+
def get_component_code_version(cls) -> str:
|
|
108
|
+
return normalize_text(getattr(cls, "component_code_version", ""))
|
|
109
|
+
|
|
110
|
+
@classmethod
|
|
111
|
+
def get_component_source_path(cls) -> str:
|
|
112
|
+
return read_component_source_path(cls)
|
|
113
|
+
|
|
114
|
+
@classmethod
|
|
115
|
+
def get_component_definition_payload(cls) -> dict[str, Any]:
|
|
116
|
+
return {
|
|
117
|
+
"code_version": cls.get_component_code_version(),
|
|
118
|
+
"component_type": cls.get_component_type(),
|
|
119
|
+
"config_schema": cls.get_component_config_schema(),
|
|
120
|
+
"default_config": cls.get_component_default_config(),
|
|
121
|
+
"description": cls.get_component_description(),
|
|
122
|
+
"docs": cls.get_component_docs(),
|
|
123
|
+
"image_path": cls.get_component_image_path(),
|
|
124
|
+
"key": cls.get_component_key(),
|
|
125
|
+
"name": cls.get_component_name(),
|
|
126
|
+
"python_class": cls.__name__,
|
|
127
|
+
"python_module": cls.__module__,
|
|
128
|
+
"source_path": cls.get_component_source_path(),
|
|
129
|
+
"status": cls.get_component_status(),
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
def import_component_modules(package_names: tuple[str, ...] = COMPONENT_PACKAGE_NAMES) -> None:
|
|
134
|
+
"""Import every module below the configured component packages."""
|
|
135
|
+
for package_name in package_names:
|
|
136
|
+
package = import_module(package_name)
|
|
137
|
+
package_paths = getattr(package, "__path__", None)
|
|
138
|
+
if package_paths is None:
|
|
139
|
+
continue
|
|
140
|
+
for module_info in pkgutil.walk_packages(package_paths, f"{package.__name__}."):
|
|
141
|
+
import_module(module_info.name)
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
def iter_component_classes(package_names: tuple[str, ...] = COMPONENT_PACKAGE_NAMES):
|
|
145
|
+
"""Yield every non-abstract component class in the package set."""
|
|
146
|
+
import_component_modules(package_names=package_names)
|
|
147
|
+
seen_classes: set[type[object]] = set()
|
|
148
|
+
pending_classes: list[type[object]] = [BaseComponent]
|
|
149
|
+
|
|
150
|
+
while pending_classes:
|
|
151
|
+
current_class = pending_classes.pop()
|
|
152
|
+
for subclass in current_class.__subclasses__():
|
|
153
|
+
if subclass in seen_classes:
|
|
154
|
+
continue
|
|
155
|
+
seen_classes.add(subclass)
|
|
156
|
+
pending_classes.append(subclass)
|
|
157
|
+
if subclass.is_component_abstract():
|
|
158
|
+
continue
|
|
159
|
+
yield subclass
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
def discover_component_payloads(package_names: tuple[str, ...] = COMPONENT_PACKAGE_NAMES) -> list[dict[str, Any]]:
|
|
163
|
+
"""Discover all concrete components and return stable metadata payloads."""
|
|
164
|
+
payloads_by_key: dict[str, dict[str, Any]] = {}
|
|
165
|
+
for component_class in iter_component_classes(package_names=package_names):
|
|
166
|
+
payload = component_class.get_component_definition_payload()
|
|
167
|
+
payload_key = payload["key"]
|
|
168
|
+
if payload_key:
|
|
169
|
+
payloads_by_key[payload_key] = payload
|
|
170
|
+
return [payloads_by_key[key] for key in sorted(payloads_by_key)]
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
"""Detection components."""
|
|
2
|
+
|
|
3
|
+
from anomx.components.detection.detectors import BaseDetector, ThresholdDetector
|
|
4
|
+
from anomx.components.detection.scorers import BaseScorer, ZScoreScorer
|
|
5
|
+
|
|
6
|
+
__all__ = [
|
|
7
|
+
"BaseDetector",
|
|
8
|
+
"BaseScorer",
|
|
9
|
+
"ThresholdDetector",
|
|
10
|
+
"ZScoreScorer",
|
|
11
|
+
]
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
"""Base detector contract for component pipelines."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from abc import ABC, abstractmethod
|
|
6
|
+
from typing import Any
|
|
7
|
+
|
|
8
|
+
from anomx.components.base import BaseComponent
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class BaseDetector(BaseComponent, ABC):
|
|
12
|
+
"""Convert scores into anomaly labels."""
|
|
13
|
+
|
|
14
|
+
component_type = "detector"
|
|
15
|
+
|
|
16
|
+
@abstractmethod
|
|
17
|
+
def detect(self, predictions: Any) -> Any:
|
|
18
|
+
"""Return predictions with anomaly labels."""
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
"""Threshold-based detector component."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import Any
|
|
6
|
+
|
|
7
|
+
import pandas as pd
|
|
8
|
+
|
|
9
|
+
from anomx._shared import ensure_dataframe
|
|
10
|
+
from anomx.components.detection.detectors.base import BaseDetector
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class ThresholdDetector(BaseDetector):
|
|
14
|
+
"""Flag anomalies whenever a score column crosses a fixed threshold."""
|
|
15
|
+
|
|
16
|
+
component_key = "threshold"
|
|
17
|
+
component_name = "Threshold Detector"
|
|
18
|
+
component_default_config = {
|
|
19
|
+
"source_column": "zscore",
|
|
20
|
+
"threshold": 1.5,
|
|
21
|
+
}
|
|
22
|
+
component_config_schema = {
|
|
23
|
+
"source_column": {"type": "string"},
|
|
24
|
+
"threshold": {"type": "number"},
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
def __init__(self, config: dict[str, Any] | None = None) -> None:
|
|
28
|
+
self.config = config or {}
|
|
29
|
+
|
|
30
|
+
def detect(self, predictions: Any) -> pd.DataFrame:
|
|
31
|
+
frame = ensure_dataframe(predictions)
|
|
32
|
+
source_column = str(self.config.get("source_column", "zscore"))
|
|
33
|
+
threshold = float(self.config.get("threshold", 1.5))
|
|
34
|
+
if source_column not in frame.columns:
|
|
35
|
+
raise KeyError(f"Source column '{source_column}' does not exist in detector input.")
|
|
36
|
+
|
|
37
|
+
result = frame.copy()
|
|
38
|
+
result["is_anomaly"] = result[source_column].astype(float).abs() >= threshold
|
|
39
|
+
result["severity"] = result[source_column].astype(float).abs().round(4)
|
|
40
|
+
return result
|