PyPI - robotframework-testselection - Versions diffs - 0.1.0__py3-none-any.whl - Mend

robotframework-testselection 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (39) hide show

TestSelection/__init__.py +3 -0
TestSelection/cli.py +256 -0
TestSelection/embedding/__init__.py +1 -0
TestSelection/embedding/embedder.py +43 -0
TestSelection/embedding/models.py +198 -0
TestSelection/embedding/ports.py +24 -0
TestSelection/execution/__init__.py +1 -0
TestSelection/execution/listener.py +44 -0
TestSelection/execution/prerun_modifier.py +43 -0
TestSelection/execution/runner.py +75 -0
TestSelection/parsing/__init__.py +1 -0
TestSelection/parsing/datadriver_reader.py +54 -0
TestSelection/parsing/keyword_resolver.py +51 -0
TestSelection/parsing/suite_collector.py +85 -0
TestSelection/parsing/text_builder.py +79 -0
TestSelection/pipeline/__init__.py +1 -0
TestSelection/pipeline/artifacts.py +110 -0
TestSelection/pipeline/cache.py +74 -0
TestSelection/pipeline/errors.py +18 -0
TestSelection/pipeline/execute.py +52 -0
TestSelection/pipeline/select.py +183 -0
TestSelection/pipeline/vectorize.py +190 -0
TestSelection/py.typed +0 -0
TestSelection/selection/__init__.py +25 -0
TestSelection/selection/dpp.py +31 -0
TestSelection/selection/facility.py +25 -0
TestSelection/selection/filtering.py +21 -0
TestSelection/selection/fps.py +67 -0
TestSelection/selection/kmedoids.py +32 -0
TestSelection/selection/registry.py +70 -0
TestSelection/selection/strategy.py +142 -0
TestSelection/shared/__init__.py +1 -0
TestSelection/shared/config.py +31 -0
TestSelection/shared/types.py +117 -0
robotframework_testselection-0.1.0.dist-info/METADATA +408 -0
robotframework_testselection-0.1.0.dist-info/RECORD +39 -0
robotframework_testselection-0.1.0.dist-info/WHEEL +4 -0
robotframework_testselection-0.1.0.dist-info/entry_points.txt +2 -0
robotframework_testselection-0.1.0.dist-info/licenses/LICENSE +191 -0

TestSelection/parsing/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ """Parsing context: Robot Framework suite parsing and text representation."""

TestSelection/parsing/datadriver_reader.py ADDED Viewed

@@ -0,0 +1,54 @@
+from __future__ import annotations
+import csv
+from pathlib import Path
+from typing import Any
+def read_datadriver_csv(
+    csv_path: Path | str,
+    template_name: str,
+    delimiter: str = ";",
+) -> list[dict[str, Any]]:
+    """Read a DataDriver CSV and return test case dicts.
+    The CSV follows DataDriver convention:
+    - First column header is '*** Test Cases ***'
+    - Subsequent columns are variable names (e.g., ${username})
+    - Rows starting with # are comments and are skipped
+    Args:
+        csv_path: Path to the DataDriver CSV file.
+        template_name: Name of the template keyword in the .robot file.
+        delimiter: CSV delimiter (DataDriver defaults to ';').
+    Returns:
+        List of dicts with name, description, source, is_datadriver keys.
+    """
+    csv_path = Path(csv_path)
+    tests: list[dict[str, Any]] = []
+    with open(csv_path, newline="", encoding="utf-8") as f:
+        reader = csv.DictReader(f, delimiter=delimiter)
+        for row in reader:
+            test_name = row.get("*** Test Cases ***", "").strip()
+            if not test_name or test_name.startswith("#"):
+                continue
+            args = {
+                k: v
+                for k, v in row.items()
+                if k is not None and k.startswith("${") and v
+            }
+            description = f"Template: {template_name}. Test: {test_name}."
+            if args:
+                description += " " + " ".join(
+                    f"{k}={v}" for k, v in args.items()
+                )
+            tests.append(
+                {
+                    "name": test_name,
+                    "description": description,
+                    "source": str(csv_path),
+                    "is_datadriver": True,
+                }
+            )
+    return tests

TestSelection/parsing/keyword_resolver.py ADDED Viewed

@@ -0,0 +1,51 @@
+from __future__ import annotations
+from TestSelection.shared.types import KeywordTree, UserKeywordRef
+class KeywordTreeResolver:
+    """Resolves keyword names to their full sub-keyword trees.
+    Wraps the manual resolution logic required because robot.api
+    does not resolve keyword implementations at parse time.
+    """
+    def __init__(self, keyword_map: dict[str, UserKeywordRef]) -> None:
+        self._kw_map = keyword_map
+    def resolve(
+        self,
+        kw_name: str,
+        kw_args: tuple[str, ...],
+        max_depth: int = 10,
+    ) -> KeywordTree:
+        return self._resolve_recursive(
+            kw_name, kw_args, depth=0, max_depth=max_depth
+        )
+    def _resolve_recursive(
+        self,
+        kw_name: str,
+        kw_args: tuple[str, ...],
+        depth: int,
+        max_depth: int,
+    ) -> KeywordTree:
+        children: list[KeywordTree] = []
+        if depth < max_depth:
+            normalized = kw_name.lower().replace(" ", "_")
+            uk = self._kw_map.get(normalized)
+            if uk is not None:
+                for item in uk.body_items:
+                    if hasattr(item, "name") and item.name:
+                        child = self._resolve_recursive(
+                            item.name,
+                            tuple(item.args),
+                            depth + 1,
+                            max_depth,
+                        )
+                        children.append(child)
+        return KeywordTree(
+            keyword_name=kw_name,
+            args=kw_args,
+            children=tuple(children),
+        )

TestSelection/parsing/suite_collector.py ADDED Viewed

@@ -0,0 +1,85 @@
+from __future__ import annotations
+import hashlib
+from pathlib import Path
+from typing import Any
+from robot.api import TestSuite as RobotTestSuite
+from TestSelection.shared.types import FileHash, UserKeywordRef
+class RobotApiAdapter:
+    """ACL: Translates robot.api types into domain objects.
+    Uses TestSuite.from_file_system() as the primary entry point,
+    which is the recommended robot.api approach for building a
+    complete suite model from .robot files or directories.
+    """
+    def parse_suite(
+        self, suite_path: Path
+    ) -> tuple[list[dict[str, Any]], dict[str, UserKeywordRef]]:
+        """Parse a suite path and return (raw_tests, keyword_map).
+        Returns raw test dicts and a domain keyword map.
+        Callers use TextRepresentationBuilder to convert to TestCaseRecords.
+        """
+        robot_suite = RobotTestSuite.from_file_system(str(suite_path))
+        kw_map = self._build_keyword_map(robot_suite)
+        raw_tests = self._collect_tests(robot_suite)
+        return raw_tests, kw_map
+    def _build_keyword_map(
+        self, suite: Any
+    ) -> dict[str, UserKeywordRef]:
+        kw_map: dict[str, UserKeywordRef] = {}
+        if hasattr(suite, "resource") and suite.resource:
+            for uk in suite.resource.keywords:
+                ref = UserKeywordRef(
+                    name=uk.name,
+                    normalized_name=uk.name.lower().replace(" ", "_"),
+                    body_items=tuple(uk.body),
+                )
+                kw_map[ref.normalized_name] = ref
+        for child in suite.suites:
+            kw_map.update(self._build_keyword_map(child))
+        return kw_map
+    def _collect_tests(self, suite: Any) -> list[dict[str, Any]]:
+        """Collect raw test data from suite hierarchy."""
+        tests: list[dict[str, Any]] = []
+        for test in suite.tests:
+            tests.append(
+                {
+                    "name": test.name,
+                    "tags": [str(t) for t in test.tags],
+                    "body": list(test.body),
+                    "source": str(suite.source) if suite.source else suite.name,
+                    "suite_name": suite.name,
+                }
+            )
+        for child in suite.suites:
+            tests.extend(self._collect_tests(child))
+        return tests
+    def compute_file_hashes(
+        self, suite_path: Path
+    ) -> dict[str, FileHash]:
+        """Hash all .robot files for change detection."""
+        hashes: dict[str, FileHash] = {}
+        target = suite_path if suite_path.is_dir() else suite_path.parent
+        patterns = ["*.robot"]
+        if not suite_path.is_dir():
+            patterns = []
+            if suite_path.suffix == ".robot":
+                md5 = hashlib.md5(suite_path.read_bytes()).hexdigest()
+                hashes[str(suite_path)] = FileHash(
+                    path=str(suite_path), md5=md5
+                )
+                return hashes
+        for pattern in patterns:
+            for p in target.rglob(pattern):
+                md5 = hashlib.md5(p.read_bytes()).hexdigest()
+                hashes[str(p)] = FileHash(path=str(p), md5=md5)
+        return hashes

TestSelection/parsing/text_builder.py ADDED Viewed

@@ -0,0 +1,79 @@
+from __future__ import annotations
+from TestSelection.parsing.keyword_resolver import KeywordTreeResolver
+from TestSelection.shared.config import TextBuilderConfig
+from TestSelection.shared.types import (
+    Tag,
+    TextRepresentation,
+    UserKeywordRef,
+)
+class TextRepresentationBuilder:
+    """Builds embeddable text from a test case and its keyword tree.
+    Includes test name, tags, keyword names, and semantic arguments.
+    Filters DOM locators, variable placeholders, and XPaths -- these
+    are noise that dilutes embedding quality.
+    """
+    def __init__(
+        self,
+        resolver: KeywordTreeResolver,
+        config: TextBuilderConfig | None = None,
+    ) -> None:
+        self._resolver = resolver
+        self._config = config or TextBuilderConfig()
+    def build(
+        self,
+        test_name: str,
+        tags: frozenset[Tag],
+        body_items: list,
+    ) -> TextRepresentation:
+        parts = [f"Test: {test_name}."]
+        if self._config.include_tags and tags:
+            sorted_tags = sorted(tags, key=lambda t: t.normalized)
+            parts.append(
+                f"Tags: {', '.join(t.value for t in sorted_tags)}."
+            )
+        for item in body_items:
+            if hasattr(item, "name") and item.name:
+                if self._config.resolve_depth > 0:
+                    tree = self._resolver.resolve(
+                        item.name,
+                        tuple(item.args),
+                        max_depth=self._config.resolve_depth,
+                    )
+                    parts.append(tree.flatten())
+                else:
+                    kw_text = item.name.replace("_", " ")
+                    semantic_args = [
+                        str(a)
+                        for a in item.args
+                        if not any(
+                            str(a).startswith(p)
+                            for p in self._config.noise_prefixes
+                        )
+                    ]
+                    if semantic_args:
+                        kw_text += f" with {', '.join(semantic_args)}"
+                    parts.append(kw_text)
+        return TextRepresentation(
+            text=" ".join(parts),
+            resolve_depth=self._config.resolve_depth,
+            includes_tags=self._config.include_tags,
+        )
+    def build_from_record(
+        self,
+        test_dict: dict,
+        keyword_map: dict[str, UserKeywordRef],
+    ) -> TextRepresentation:
+        """Build a TextRepresentation from a raw test dict and keyword map."""
+        tags = frozenset(Tag(value=t) for t in test_dict.get("tags", []))
+        return self.build(
+            test_name=test_dict["name"],
+            tags=tags,
+            body_items=test_dict.get("body", []),
+        )

TestSelection/pipeline/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ """Pipeline bounded context: stage orchestration, caching, and artifact management."""

TestSelection/pipeline/artifacts.py ADDED Viewed

@@ -0,0 +1,110 @@
+"""Artifact management for the pipeline stages."""
+from __future__ import annotations
+import json
+import logging
+from pathlib import Path
+import numpy as np
+from numpy.typing import NDArray
+from TestSelection.embedding.models import ArtifactManifest, ManifestEntry
+from TestSelection.pipeline.errors import ArtifactError
+logger = logging.getLogger(__name__)
+class ArtifactManager:
+    """Manages artifact storage and retrieval between pipeline stages."""
+    def __init__(self, artifact_dir: Path) -> None:
+        self._artifact_dir = artifact_dir
+        self._artifact_dir.mkdir(parents=True, exist_ok=True)
+    @property
+    def embeddings_path(self) -> Path:
+        return self._artifact_dir / "embeddings.npz"
+    @property
+    def manifest_path(self) -> Path:
+        return self._artifact_dir / "test_manifest.json"
+    @property
+    def selection_path(self) -> Path:
+        return self._artifact_dir / "selected_tests.json"
+    @property
+    def hash_store_path(self) -> Path:
+        return self._artifact_dir / "file_hashes.json"
+    def has_embedding_artifacts(self) -> bool:
+        """Check if both embedding artifacts exist."""
+        return self.embeddings_path.exists() and self.manifest_path.exists()
+    def has_selection_artifact(self) -> bool:
+        """Check if the selection artifact exists."""
+        return self.selection_path.exists()
+    def load_manifest(self) -> ArtifactManifest:
+        """Load and parse test_manifest.json into ArtifactManifest."""
+        if not self.manifest_path.exists():
+            raise ArtifactError(
+                f"Manifest not found: {self.manifest_path}"
+            )
+        raw = json.loads(self.manifest_path.read_text())
+        return ArtifactManifest(
+            model=raw["model"],
+            embedding_dim=raw["embedding_dim"],
+            test_count=raw["test_count"],
+            resolve_depth=raw.get("resolve_depth", 0),
+            tests=tuple(
+                ManifestEntry(
+                    id=t["id"],
+                    name=t["name"],
+                    tags=tuple(t.get("tags", [])),
+                    suite=t.get("suite", ""),
+                    suite_name=t.get("suite_name", ""),
+                    is_datadriver=t.get("is_datadriver", False),
+                )
+                for t in raw["tests"]
+            ),
+        )
+    def load_vectors(self) -> NDArray:
+        """Load embedding vectors from embeddings.npz."""
+        if not self.embeddings_path.exists():
+            raise ArtifactError(
+                f"Embeddings not found: {self.embeddings_path}"
+            )
+        data = np.load(self.embeddings_path, allow_pickle=True)
+        return data["vectors"]
+    def validate_artifacts(self) -> tuple[bool, str]:
+        """Validate that embedding artifacts are consistent.
+        Returns (valid, message) where message describes any issues.
+        """
+        if not self.has_embedding_artifacts():
+            return False, "Missing embedding artifacts"
+        try:
+            manifest = self.load_manifest()
+            vectors = self.load_vectors()
+        except Exception as exc:
+            return False, f"Failed to load artifacts: {exc}"
+        if vectors.shape[0] != manifest.test_count:
+            return (
+                False,
+                f"Shape mismatch: vectors has {vectors.shape[0]} rows "
+                f"but manifest has {manifest.test_count} tests",
+            )
+        if vectors.shape[1] != manifest.embedding_dim:
+            return (
+                False,
+                f"Dimension mismatch: vectors has {vectors.shape[1]} dims "
+                f"but manifest expects {manifest.embedding_dim}",
+            )
+        return True, "Artifacts valid"

TestSelection/pipeline/cache.py ADDED Viewed

@@ -0,0 +1,74 @@
+"""Caching service for change detection via content hashing."""
+from __future__ import annotations
+import hashlib
+import json
+import logging
+from pathlib import Path
+logger = logging.getLogger(__name__)
+class CacheInvalidator:
+    """Detects changes in .robot and .csv files via MD5 content hashing."""
+    def __init__(self, hash_store_path: Path) -> None:
+        self._hash_store_path = hash_store_path
+    def has_changes(self, suite_path: Path) -> bool:
+        """Compare current file hashes with stored hashes.
+        Returns True if there are changes or no stored hashes exist.
+        """
+        if not self._hash_store_path.exists():
+            logger.info(
+                "[DIVERSE-SELECT] stage=vectorize event=cache_miss "
+                "reason=no_stored_hashes"
+            )
+            return True
+        stored = json.loads(self._hash_store_path.read_text())
+        current = self._compute_hashes(suite_path)
+        if current != stored:
+            changed = set(current.keys()) ^ set(stored.keys())
+            for key in set(current.keys()) & set(stored.keys()):
+                if current[key] != stored[key]:
+                    changed.add(key)
+            logger.info(
+                "[DIVERSE-SELECT] stage=vectorize event=cache_miss "
+                "changed_files=%d",
+                len(changed),
+            )
+            return True
+        logger.info(
+            "[DIVERSE-SELECT] stage=vectorize event=cache_hit "
+            "files=%d",
+            len(current),
+        )
+        return False
+    def save_hashes(self, suite_path: Path) -> None:
+        """Save current file hashes to the hash store."""
+        hashes = self._compute_hashes(suite_path)
+        self._hash_store_path.parent.mkdir(parents=True, exist_ok=True)
+        self._hash_store_path.write_text(json.dumps(hashes, indent=2))
+    def _compute_hashes(self, suite_path: Path) -> dict[str, str]:
+        """Compute MD5 hashes for all .robot and .csv files."""
+        hashes: dict[str, str] = {}
+        target = suite_path if suite_path.is_dir() else suite_path.parent
+        if suite_path.is_dir():
+            patterns = ["*.robot", "*.csv"]
+            for pattern in patterns:
+                for p in sorted(target.rglob(pattern)):
+                    md5 = hashlib.md5(p.read_bytes()).hexdigest()
+                    hashes[str(p)] = md5
+        else:
+            if suite_path.exists():
+                md5 = hashlib.md5(suite_path.read_bytes()).hexdigest()
+                hashes[str(suite_path)] = md5
+        return hashes

TestSelection/pipeline/errors.py ADDED Viewed

@@ -0,0 +1,18 @@
+"""Custom exception hierarchy for the diverse test selection pipeline."""
+from __future__ import annotations
+class DiverseSelectionError(Exception):
+    """Base exception for the diverse test selection pipeline."""
+class VectorizationError(DiverseSelectionError):
+    """Raised when Stage 1 (vectorization) encounters an unrecoverable error."""
+class SelectionError(DiverseSelectionError):
+    """Raised when Stage 2 (selection) encounters an unrecoverable error."""
+class ArtifactError(DiverseSelectionError):
+    """Raised when artifact validation fails between stages."""

TestSelection/pipeline/execute.py ADDED Viewed

@@ -0,0 +1,52 @@
+"""Stage 3 orchestrator: execute selected tests via Robot Framework."""
+from __future__ import annotations
+import logging
+from pathlib import Path
+logger = logging.getLogger(__name__)
+def run_execute(
+    suite_path: Path,
+    selection_file: Path,
+    output_dir: str = "./results",
+    extra_robot_args: list[str] | None = None,
+) -> int:
+    """Run the execution stage.
+    Returns the Robot Framework exit code (0=pass, 1=fail, 2=error).
+    """
+    try:
+        from TestSelection.execution.runner import ExecutionRunner
+        runner = ExecutionRunner(
+            suite_path=suite_path,
+            selection_file=selection_file,
+            output_dir=output_dir,
+        )
+        logger.info(
+            "[DIVERSE-SELECT] stage=execute event=start "
+            "suite=%s selection=%s",
+            suite_path,
+            selection_file,
+        )
+        return_code = runner.execute(extra_args=extra_robot_args)
+        runner.generate_report(return_code)
+        logger.info(
+            "[DIVERSE-SELECT] stage=execute event=complete "
+            "return_code=%d",
+            return_code,
+        )
+        return return_code
+    except Exception as exc:
+        logger.warning(
+            "[DIVERSE-SELECT] stage=execute event=error error=%s",
+            str(exc),
+        )
+        return 2