PyPI - detectkit - Versions diffs - 0.1.0__tar.gz → 0.1.2__tar.gz - Mend

detectkit 0.1.0tar.gz → 0.1.2tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (60) hide show

{detectkit-0.1.0/detectkit.egg-info → detectkit-0.1.2}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: detectkit
-Version: 0.1.0
+Version: 0.1.2
 Summary: Metric monitoring with automatic anomaly detection
 Author: detectkit team
 License: MIT
@@ -67,9 +67,11 @@ Dynamic: license-file
 ## Status
-🚧 **In Active Development** - Version 0.1.0
+✅ **Production Ready** - Version 0.1.2
-This is a complete rewrite of the original detectk library with modern architecture and best practices (2025).
+Published to PyPI: https://pypi.org/project/detectkit/
+Complete rewrite with modern architecture and full documentation (2025).
 ## Features
@@ -191,14 +193,17 @@ pytest tests/ --cov=detectkit --cov-report=html
 - ⚠️ Advanced detectors (Prophet, TimesFM) - optional extras
 - ⚠️ Additional alert channels (Telegram, Email) - optional
-See [TODO.md](TODO.md) for detailed development roadmap.
 ## Documentation
-- [ARCHITECTURE.md](ARCHITECTURE.md) - System architecture and design
-- [TECHNICAL_SPEC.md](TECHNICAL_SPEC.md) - Complete technical specification (Russian)
-- [TODO.md](TODO.md) - Development roadmap
-- [CLAUDE.md](CLAUDE.md) - Development context for AI assistants
+📚 **Complete documentation available at: https://github.com/alexeiveselov92/detectkit/tree/main/docs**
+- [Getting Started](https://github.com/alexeiveselov92/detectkit/blob/main/docs/getting-started/quickstart.md) - 5-minute quickstart
+- [Configuration Guide](https://github.com/alexeiveselov92/detectkit/blob/main/docs/guides/configuration.md) - All configuration options
+- [Detectors Guide](https://github.com/alexeiveselov92/detectkit/blob/main/docs/guides/detectors.md) - Choosing the right detector
+- [Alerting Guide](https://github.com/alexeiveselov92/detectkit/blob/main/docs/guides/alerting.md) - Setting up alerts
+- [CLI Reference](https://github.com/alexeiveselov92/detectkit/blob/main/docs/reference/cli.md) - Command-line documentation
+- [Examples](https://github.com/alexeiveselov92/detectkit/tree/main/docs/examples) - Real-world monitoring scenarios
 ## Requirements

{detectkit-0.1.0 → detectkit-0.1.2}/README.md RENAMED Viewed

@@ -6,9 +6,11 @@
 ## Status
-🚧 **In Active Development** - Version 0.1.0
+✅ **Production Ready** - Version 0.1.2
-This is a complete rewrite of the original detectk library with modern architecture and best practices (2025).
+Published to PyPI: https://pypi.org/project/detectkit/
+Complete rewrite with modern architecture and full documentation (2025).
 ## Features
@@ -130,14 +132,17 @@ pytest tests/ --cov=detectkit --cov-report=html
 - ⚠️ Advanced detectors (Prophet, TimesFM) - optional extras
 - ⚠️ Additional alert channels (Telegram, Email) - optional
-See [TODO.md](TODO.md) for detailed development roadmap.
 ## Documentation
-- [ARCHITECTURE.md](ARCHITECTURE.md) - System architecture and design
-- [TECHNICAL_SPEC.md](TECHNICAL_SPEC.md) - Complete technical specification (Russian)
-- [TODO.md](TODO.md) - Development roadmap
-- [CLAUDE.md](CLAUDE.md) - Development context for AI assistants
+📚 **Complete documentation available at: https://github.com/alexeiveselov92/detectkit/tree/main/docs**
+- [Getting Started](https://github.com/alexeiveselov92/detectkit/blob/main/docs/getting-started/quickstart.md) - 5-minute quickstart
+- [Configuration Guide](https://github.com/alexeiveselov92/detectkit/blob/main/docs/guides/configuration.md) - All configuration options
+- [Detectors Guide](https://github.com/alexeiveselov92/detectkit/blob/main/docs/guides/detectors.md) - Choosing the right detector
+- [Alerting Guide](https://github.com/alexeiveselov92/detectkit/blob/main/docs/guides/alerting.md) - Setting up alerts
+- [CLI Reference](https://github.com/alexeiveselov92/detectkit/blob/main/docs/reference/cli.md) - Command-line documentation
+- [Examples](https://github.com/alexeiveselov92/detectkit/tree/main/docs/examples) - Real-world monitoring scenarios
 ## Requirements

{detectkit-0.1.0 → detectkit-0.1.2}/detectkit/cli/commands/run.py RENAMED Viewed

@@ -12,6 +12,7 @@ import click
 from detectkit.config.metric_config import MetricConfig
 from detectkit.config.profile import ProfilesConfig
+from detectkit.config.validator import validate_metric_uniqueness
 from detectkit.database.internal_tables import InternalTablesManager
 from detectkit.orchestration.task_manager import PipelineStep, TaskManager
@@ -65,16 +66,37 @@ def run_command(
     # project_config = load_project_config(project_root)
     # Select metrics based on selector
-    metrics = select_metrics(select, project_root)
+    # Returns list of (path, config) tuples with uniqueness validation
+    try:
+        metrics = select_metrics(select, project_root)
+    except ValueError as e:
+        click.echo(
+            click.style(
+                f"Error: {e}",
+                fg="red",
+                bold=True,
+            )
+        )
+        return
     # Exclude metrics if specified
     if exclude:
-        excluded_metrics = select_metrics(exclude, project_root)
-        excluded_names = {m.name for m in excluded_metrics}
-        metrics = [m for m in metrics if m.name not in excluded_names]
+        try:
+            excluded_metrics = select_metrics(exclude, project_root)
+            excluded_names = {config.name for _, config in excluded_metrics}
+            metrics = [(path, config) for path, config in metrics if config.name not in excluded_names]
-        if excluded_metrics:
-            click.echo(f"Excluded {len(excluded_metrics)} metric(s) matching: {exclude}")
+            if excluded_metrics:
+                click.echo(f"Excluded {len(excluded_metrics)} metric(s) matching: {exclude}")
+        except ValueError as e:
+            click.echo(
+                click.style(
+                    f"Error in exclusion selector: {e}",
+                    fg="red",
+                    bold=True,
+                )
+            )
+            return
     if not metrics:
         click.echo(
@@ -150,9 +172,10 @@ def run_command(
     )
     # Process each metric
-    for metric_path in metrics:
+    for metric_path, config in metrics:
         process_metric(
             metric_path=metric_path,
+            config=config,
             project_root=project_root,
             task_manager=task_manager,
             steps=step_list,
@@ -254,9 +277,9 @@ def find_project_root() -> Optional[Path]:
     return None
-def select_metrics(selector: str, project_root: Path) -> List[Path]:
+def select_metrics(selector: str, project_root: Path) -> List[tuple[Path, MetricConfig]]:
     """
-    Select metrics based on selector.
+    Select metrics based on selector and validate uniqueness.
     Selector types:
     - Metric name: "cpu_usage"
@@ -268,34 +291,44 @@ def select_metrics(selector: str, project_root: Path) -> List[Path]:
         project_root: Project root path
     Returns:
-        List of metric file paths
+        List of (path, config) tuples for selected metrics
+    Raises:
+        ValueError: If duplicate metric names found or configs invalid
     """
     metrics_dir = project_root / "metrics"
     if not metrics_dir.exists():
         return []
+    # Collect metric paths based on selector
+    metric_paths: List[Path] = []
     # Tag selector
     if selector.startswith("tag:"):
         tag = selector[4:]
-        return find_metrics_by_tag(metrics_dir, tag)
+        metric_paths = find_metrics_by_tag(metrics_dir, tag)
     # Path pattern selector
-    if "*" in selector or "/" in selector:
+    elif "*" in selector or "/" in selector:
         pattern = selector if selector.startswith("metrics/") else f"metrics/{selector}"
-        return list(project_root.glob(pattern))
-    # Metric name selector
-    metric_file = metrics_dir / f"{selector}.yml"
-    if metric_file.exists():
-        return [metric_file]
+        metric_paths = list(project_root.glob(pattern))
+    # Metric name selector (only searches root metrics/ directory)
+    else:
+        metric_file = metrics_dir / f"{selector}.yml"
+        if metric_file.exists():
+            metric_paths = [metric_file]
+        else:
+            # Try with .yaml extension
+            metric_file = metrics_dir / f"{selector}.yaml"
+            if metric_file.exists():
+                metric_paths = [metric_file]
-    # Try with .yaml extension
-    metric_file = metrics_dir / f"{selector}.yaml"
-    if metric_file.exists():
-        return [metric_file]
+    if not metric_paths:
+        return []
-    return []
+    # Validate uniqueness and load configs
+    # This will raise ValueError if duplicate metric names found
+    return validate_metric_uniqueness(metric_paths)
 def find_metrics_by_tag(metrics_dir: Path, tag: str) -> List[Path]:
@@ -330,6 +363,7 @@ def find_metrics_by_tag(metrics_dir: Path, tag: str) -> List[Path]:
 def process_metric(
     metric_path: Path,
+    config: MetricConfig,
     project_root: Path,
     task_manager: TaskManager,
     steps: List[PipelineStep],
@@ -343,6 +377,7 @@ def process_metric(
     Args:
         metric_path: Path to metric YAML file
+        config: Loaded and validated metric configuration
         project_root: Project root directory
         task_manager: Task manager instance
         steps: Pipeline steps to execute
@@ -351,10 +386,11 @@ def process_metric(
         full_refresh: Full refresh flag
         force: Force flag
     """
-    metric_name = metric_path.stem
+    # Use config.name (not metric_path.stem) for consistency
+    metric_name = config.name
-    click.echo(click.style(f"Processing: {metric_name}", fg="cyan", bold=True))
-    click.echo(f"  File: {metric_path}")
+    click.echo(click.style(f"Processing metric: {metric_name}", fg="cyan", bold=True))
+    click.echo(f"  Config file: {metric_path.relative_to(project_root)}")
     click.echo(f"  Steps: {', '.join(s.value for s in steps)}")
     if from_date:
@@ -368,19 +404,6 @@ def process_metric(
     click.echo()
-    # Load metric configuration
-    try:
-        config = MetricConfig.from_yaml_file(metric_path)
-    except Exception as e:
-        click.echo(
-            click.style(
-                f"  ✗ Error loading metric config: {e}",
-                fg="red",
-            )
-        )
-        click.echo()
-        return
     # Run pipeline
     try:
         result = task_manager.run_metric(

{detectkit-0.1.0 → detectkit-0.1.2}/detectkit/config/metric_config.py RENAMED Viewed

@@ -231,6 +231,7 @@ class MetricConfig(BaseModel):
     Attributes:
         name: Metric name (unique identifier)
+        tags: Optional list of tags for metric selection (e.g., ["critical", "api"])
         profile: Profile name to use (overrides default_profile from project config)
         query: Inline SQL query (mutually exclusive with query_file)
         query_file: Path to SQL file (mutually exclusive with query)
@@ -246,6 +247,7 @@ class MetricConfig(BaseModel):
     Example YAML:
         ```yaml
         name: cpu_usage
+        tags: ["critical", "infrastructure", "10min"]
         profile: clickhouse_prod
         query_file: sql/cpu_usage.sql
         query_columns:
@@ -275,6 +277,10 @@ class MetricConfig(BaseModel):
     """
     name: str = Field(..., description="Metric name")
+    tags: Optional[List[str]] = Field(
+        default=None,
+        description="Optional tags for metric selection (e.g., ['critical', 'api', '10min'])",
+    )
     profile: Optional[str] = Field(
         default=None, description="Profile name to use (overrides default_profile)"
     )
@@ -335,6 +341,32 @@ class MetricConfig(BaseModel):
             )
         return v
+    @field_validator("tags")
+    @classmethod
+    def validate_tags(cls, v: Optional[List[str]]) -> Optional[List[str]]:
+        """Validate tags field."""
+        if v is None:
+            return v
+        if not v:
+            raise ValueError("tags list cannot be empty (use null instead)")
+        # Check for duplicate tags
+        if len(v) != len(set(v)):
+            raise ValueError("Duplicate tags not allowed")
+        # Validate each tag format (alphanumeric + underscore + dash)
+        for tag in v:
+            if not tag:
+                raise ValueError("Empty tag not allowed")
+            if not all(c.isalnum() or c in ("_", "-") for c in tag):
+                raise ValueError(
+                    f"Invalid tag '{tag}': only alphanumeric characters, "
+                    f"underscores, and dashes allowed"
+                )
+        return v
     @field_validator("loading_batch_size")
     @classmethod
     def validate_batch_size(cls, v: int) -> int:

detectkit-0.1.2/detectkit/config/validator.py ADDED Viewed

@@ -0,0 +1,124 @@
+"""
+Metric configuration validation.
+This module provides validation functions for metric configurations,
+ensuring data integrity and preventing configuration errors.
+"""
+from pathlib import Path
+from typing import List, Tuple
+from detectkit.config.metric_config import MetricConfig
+def validate_metric_uniqueness(metric_paths: List[Path]) -> List[Tuple[Path, MetricConfig]]:
+    """
+    Load all metrics and validate that metric names are unique.
+    This validation is CRITICAL for data integrity because duplicate metric names
+    would cause:
+    - Data corruption (mixed data in _dtk_datapoints table)
+    - Task blocking (lock conflicts in _dtk_tasks table)
+    - Wrong anomaly detection (detectors receive mixed data from different sources)
+    - Data loss (ReplacingMergeTree ignores duplicate inserts)
+    Args:
+        metric_paths: List of paths to metric YAML files
+    Returns:
+        List of (path, config) tuples for all valid metrics
+    Raises:
+        ValueError: If duplicate metric names are found, with clear error message
+            showing which files have conflicting names
+        ValidationError: If any metric config fails to parse
+    Example:
+        >>> paths = [Path("metrics/api/cpu.yml"), Path("metrics/system/cpu.yml")]
+        >>> validate_metric_uniqueness(paths)
+        ValueError: Duplicate metric name 'cpu_usage' found:
+          - metrics/api/cpu.yml
+          - metrics/system/cpu.yml
+        Metric names must be unique across the project.
+        Please rename one of the metrics.
+    """
+    configs: List[Tuple[Path, MetricConfig]] = []
+    seen_names: dict[str, Path] = {}
+    for metric_path in metric_paths:
+        # Load and parse config
+        try:
+            config = MetricConfig.from_yaml_file(metric_path)
+        except Exception as e:
+            raise ValueError(
+                f"Failed to parse metric config at {metric_path}:\n{e}"
+            ) from e
+        # Check for duplicate metric names
+        if config.name in seen_names:
+            conflicting_path = seen_names[config.name]
+            raise ValueError(
+                f"Duplicate metric name '{config.name}' found:\n"
+                f"  - {conflicting_path}\n"
+                f"  - {metric_path}\n\n"
+                f"Metric names must be unique across the project.\n"
+                f"Please rename one of the metrics to avoid data corruption."
+            )
+        seen_names[config.name] = metric_path
+        configs.append((metric_path, config))
+    return configs
+def validate_project_metrics(project_root: Path) -> List[Tuple[Path, MetricConfig]]:
+    """
+    Load and validate all metrics in the project.
+    This is a convenience function that:
+    1. Finds all *.yml and *.yaml files in the metrics/ directory (recursively)
+    2. Validates uniqueness of metric names
+    3. Returns validated list of (path, config) tuples
+    Args:
+        project_root: Path to project root directory (contains metrics/ folder)
+    Returns:
+        List of (path, config) tuples for all valid metrics
+    Raises:
+        ValueError: If duplicate metric names found or configs fail validation
+        FileNotFoundError: If metrics/ directory doesn't exist
+    Example:
+        >>> from pathlib import Path
+        >>> project_root = Path("/path/to/project")
+        >>> metrics = validate_project_metrics(project_root)
+        >>> for path, config in metrics:
+        ...     print(f"{config.name}: {path}")
+    """
+    metrics_dir = project_root / "metrics"
+    if not metrics_dir.exists():
+        raise FileNotFoundError(
+            f"Metrics directory not found: {metrics_dir}\n"
+            f"Expected structure:\n"
+            f"  {project_root}/\n"
+            f"    metrics/\n"
+            f"      your_metric.yml\n"
+        )
+    # Find all metric files recursively
+    metric_paths = []
+    for pattern in ["**/*.yml", "**/*.yaml"]:
+        metric_paths.extend(metrics_dir.glob(pattern))
+    if not metric_paths:
+        raise ValueError(
+            f"No metric files found in {metrics_dir}\n"
+            f"Expected at least one *.yml or *.yaml file."
+        )
+    # Validate uniqueness
+    return validate_metric_uniqueness(metric_paths)

detectkit 0.1.0__tar.gz → 0.1.2__tar.gz

detectkit 0.1.0tar.gz → 0.1.2tar.gz