robotframework-testselection 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. TestSelection/__init__.py +3 -0
  2. TestSelection/cli.py +256 -0
  3. TestSelection/embedding/__init__.py +1 -0
  4. TestSelection/embedding/embedder.py +43 -0
  5. TestSelection/embedding/models.py +198 -0
  6. TestSelection/embedding/ports.py +24 -0
  7. TestSelection/execution/__init__.py +1 -0
  8. TestSelection/execution/listener.py +44 -0
  9. TestSelection/execution/prerun_modifier.py +43 -0
  10. TestSelection/execution/runner.py +75 -0
  11. TestSelection/parsing/__init__.py +1 -0
  12. TestSelection/parsing/datadriver_reader.py +54 -0
  13. TestSelection/parsing/keyword_resolver.py +51 -0
  14. TestSelection/parsing/suite_collector.py +85 -0
  15. TestSelection/parsing/text_builder.py +79 -0
  16. TestSelection/pipeline/__init__.py +1 -0
  17. TestSelection/pipeline/artifacts.py +110 -0
  18. TestSelection/pipeline/cache.py +74 -0
  19. TestSelection/pipeline/errors.py +18 -0
  20. TestSelection/pipeline/execute.py +52 -0
  21. TestSelection/pipeline/select.py +183 -0
  22. TestSelection/pipeline/vectorize.py +190 -0
  23. TestSelection/py.typed +0 -0
  24. TestSelection/selection/__init__.py +25 -0
  25. TestSelection/selection/dpp.py +31 -0
  26. TestSelection/selection/facility.py +25 -0
  27. TestSelection/selection/filtering.py +21 -0
  28. TestSelection/selection/fps.py +67 -0
  29. TestSelection/selection/kmedoids.py +32 -0
  30. TestSelection/selection/registry.py +70 -0
  31. TestSelection/selection/strategy.py +142 -0
  32. TestSelection/shared/__init__.py +1 -0
  33. TestSelection/shared/config.py +31 -0
  34. TestSelection/shared/types.py +117 -0
  35. robotframework_testselection-0.1.0.dist-info/METADATA +408 -0
  36. robotframework_testselection-0.1.0.dist-info/RECORD +39 -0
  37. robotframework_testselection-0.1.0.dist-info/WHEEL +4 -0
  38. robotframework_testselection-0.1.0.dist-info/entry_points.txt +2 -0
  39. robotframework_testselection-0.1.0.dist-info/licenses/LICENSE +191 -0
@@ -0,0 +1 @@
1
+ """Parsing context: Robot Framework suite parsing and text representation."""
@@ -0,0 +1,54 @@
1
+ from __future__ import annotations
2
+
3
+ import csv
4
+ from pathlib import Path
5
+ from typing import Any
6
+
7
+
8
+ def read_datadriver_csv(
9
+ csv_path: Path | str,
10
+ template_name: str,
11
+ delimiter: str = ";",
12
+ ) -> list[dict[str, Any]]:
13
+ """Read a DataDriver CSV and return test case dicts.
14
+
15
+ The CSV follows DataDriver convention:
16
+ - First column header is '*** Test Cases ***'
17
+ - Subsequent columns are variable names (e.g., ${username})
18
+ - Rows starting with # are comments and are skipped
19
+
20
+ Args:
21
+ csv_path: Path to the DataDriver CSV file.
22
+ template_name: Name of the template keyword in the .robot file.
23
+ delimiter: CSV delimiter (DataDriver defaults to ';').
24
+
25
+ Returns:
26
+ List of dicts with name, description, source, is_datadriver keys.
27
+ """
28
+ csv_path = Path(csv_path)
29
+ tests: list[dict[str, Any]] = []
30
+ with open(csv_path, newline="", encoding="utf-8") as f:
31
+ reader = csv.DictReader(f, delimiter=delimiter)
32
+ for row in reader:
33
+ test_name = row.get("*** Test Cases ***", "").strip()
34
+ if not test_name or test_name.startswith("#"):
35
+ continue
36
+ args = {
37
+ k: v
38
+ for k, v in row.items()
39
+ if k is not None and k.startswith("${") and v
40
+ }
41
+ description = f"Template: {template_name}. Test: {test_name}."
42
+ if args:
43
+ description += " " + " ".join(
44
+ f"{k}={v}" for k, v in args.items()
45
+ )
46
+ tests.append(
47
+ {
48
+ "name": test_name,
49
+ "description": description,
50
+ "source": str(csv_path),
51
+ "is_datadriver": True,
52
+ }
53
+ )
54
+ return tests
@@ -0,0 +1,51 @@
1
+ from __future__ import annotations
2
+
3
+ from TestSelection.shared.types import KeywordTree, UserKeywordRef
4
+
5
+
6
+ class KeywordTreeResolver:
7
+ """Resolves keyword names to their full sub-keyword trees.
8
+
9
+ Wraps the manual resolution logic required because robot.api
10
+ does not resolve keyword implementations at parse time.
11
+ """
12
+
13
+ def __init__(self, keyword_map: dict[str, UserKeywordRef]) -> None:
14
+ self._kw_map = keyword_map
15
+
16
+ def resolve(
17
+ self,
18
+ kw_name: str,
19
+ kw_args: tuple[str, ...],
20
+ max_depth: int = 10,
21
+ ) -> KeywordTree:
22
+ return self._resolve_recursive(
23
+ kw_name, kw_args, depth=0, max_depth=max_depth
24
+ )
25
+
26
+ def _resolve_recursive(
27
+ self,
28
+ kw_name: str,
29
+ kw_args: tuple[str, ...],
30
+ depth: int,
31
+ max_depth: int,
32
+ ) -> KeywordTree:
33
+ children: list[KeywordTree] = []
34
+ if depth < max_depth:
35
+ normalized = kw_name.lower().replace(" ", "_")
36
+ uk = self._kw_map.get(normalized)
37
+ if uk is not None:
38
+ for item in uk.body_items:
39
+ if hasattr(item, "name") and item.name:
40
+ child = self._resolve_recursive(
41
+ item.name,
42
+ tuple(item.args),
43
+ depth + 1,
44
+ max_depth,
45
+ )
46
+ children.append(child)
47
+ return KeywordTree(
48
+ keyword_name=kw_name,
49
+ args=kw_args,
50
+ children=tuple(children),
51
+ )
@@ -0,0 +1,85 @@
1
+ from __future__ import annotations
2
+
3
+ import hashlib
4
+ from pathlib import Path
5
+ from typing import Any
6
+
7
+ from robot.api import TestSuite as RobotTestSuite
8
+
9
+ from TestSelection.shared.types import FileHash, UserKeywordRef
10
+
11
+
12
+ class RobotApiAdapter:
13
+ """ACL: Translates robot.api types into domain objects.
14
+
15
+ Uses TestSuite.from_file_system() as the primary entry point,
16
+ which is the recommended robot.api approach for building a
17
+ complete suite model from .robot files or directories.
18
+ """
19
+
20
+ def parse_suite(
21
+ self, suite_path: Path
22
+ ) -> tuple[list[dict[str, Any]], dict[str, UserKeywordRef]]:
23
+ """Parse a suite path and return (raw_tests, keyword_map).
24
+
25
+ Returns raw test dicts and a domain keyword map.
26
+ Callers use TextRepresentationBuilder to convert to TestCaseRecords.
27
+ """
28
+ robot_suite = RobotTestSuite.from_file_system(str(suite_path))
29
+ kw_map = self._build_keyword_map(robot_suite)
30
+ raw_tests = self._collect_tests(robot_suite)
31
+ return raw_tests, kw_map
32
+
33
+ def _build_keyword_map(
34
+ self, suite: Any
35
+ ) -> dict[str, UserKeywordRef]:
36
+ kw_map: dict[str, UserKeywordRef] = {}
37
+ if hasattr(suite, "resource") and suite.resource:
38
+ for uk in suite.resource.keywords:
39
+ ref = UserKeywordRef(
40
+ name=uk.name,
41
+ normalized_name=uk.name.lower().replace(" ", "_"),
42
+ body_items=tuple(uk.body),
43
+ )
44
+ kw_map[ref.normalized_name] = ref
45
+ for child in suite.suites:
46
+ kw_map.update(self._build_keyword_map(child))
47
+ return kw_map
48
+
49
+ def _collect_tests(self, suite: Any) -> list[dict[str, Any]]:
50
+ """Collect raw test data from suite hierarchy."""
51
+ tests: list[dict[str, Any]] = []
52
+ for test in suite.tests:
53
+ tests.append(
54
+ {
55
+ "name": test.name,
56
+ "tags": [str(t) for t in test.tags],
57
+ "body": list(test.body),
58
+ "source": str(suite.source) if suite.source else suite.name,
59
+ "suite_name": suite.name,
60
+ }
61
+ )
62
+ for child in suite.suites:
63
+ tests.extend(self._collect_tests(child))
64
+ return tests
65
+
66
+ def compute_file_hashes(
67
+ self, suite_path: Path
68
+ ) -> dict[str, FileHash]:
69
+ """Hash all .robot files for change detection."""
70
+ hashes: dict[str, FileHash] = {}
71
+ target = suite_path if suite_path.is_dir() else suite_path.parent
72
+ patterns = ["*.robot"]
73
+ if not suite_path.is_dir():
74
+ patterns = []
75
+ if suite_path.suffix == ".robot":
76
+ md5 = hashlib.md5(suite_path.read_bytes()).hexdigest()
77
+ hashes[str(suite_path)] = FileHash(
78
+ path=str(suite_path), md5=md5
79
+ )
80
+ return hashes
81
+ for pattern in patterns:
82
+ for p in target.rglob(pattern):
83
+ md5 = hashlib.md5(p.read_bytes()).hexdigest()
84
+ hashes[str(p)] = FileHash(path=str(p), md5=md5)
85
+ return hashes
@@ -0,0 +1,79 @@
1
+ from __future__ import annotations
2
+
3
+ from TestSelection.parsing.keyword_resolver import KeywordTreeResolver
4
+ from TestSelection.shared.config import TextBuilderConfig
5
+ from TestSelection.shared.types import (
6
+ Tag,
7
+ TextRepresentation,
8
+ UserKeywordRef,
9
+ )
10
+
11
+
12
+ class TextRepresentationBuilder:
13
+ """Builds embeddable text from a test case and its keyword tree.
14
+
15
+ Includes test name, tags, keyword names, and semantic arguments.
16
+ Filters DOM locators, variable placeholders, and XPaths -- these
17
+ are noise that dilutes embedding quality.
18
+ """
19
+
20
+ def __init__(
21
+ self,
22
+ resolver: KeywordTreeResolver,
23
+ config: TextBuilderConfig | None = None,
24
+ ) -> None:
25
+ self._resolver = resolver
26
+ self._config = config or TextBuilderConfig()
27
+
28
+ def build(
29
+ self,
30
+ test_name: str,
31
+ tags: frozenset[Tag],
32
+ body_items: list,
33
+ ) -> TextRepresentation:
34
+ parts = [f"Test: {test_name}."]
35
+ if self._config.include_tags and tags:
36
+ sorted_tags = sorted(tags, key=lambda t: t.normalized)
37
+ parts.append(
38
+ f"Tags: {', '.join(t.value for t in sorted_tags)}."
39
+ )
40
+ for item in body_items:
41
+ if hasattr(item, "name") and item.name:
42
+ if self._config.resolve_depth > 0:
43
+ tree = self._resolver.resolve(
44
+ item.name,
45
+ tuple(item.args),
46
+ max_depth=self._config.resolve_depth,
47
+ )
48
+ parts.append(tree.flatten())
49
+ else:
50
+ kw_text = item.name.replace("_", " ")
51
+ semantic_args = [
52
+ str(a)
53
+ for a in item.args
54
+ if not any(
55
+ str(a).startswith(p)
56
+ for p in self._config.noise_prefixes
57
+ )
58
+ ]
59
+ if semantic_args:
60
+ kw_text += f" with {', '.join(semantic_args)}"
61
+ parts.append(kw_text)
62
+ return TextRepresentation(
63
+ text=" ".join(parts),
64
+ resolve_depth=self._config.resolve_depth,
65
+ includes_tags=self._config.include_tags,
66
+ )
67
+
68
+ def build_from_record(
69
+ self,
70
+ test_dict: dict,
71
+ keyword_map: dict[str, UserKeywordRef],
72
+ ) -> TextRepresentation:
73
+ """Build a TextRepresentation from a raw test dict and keyword map."""
74
+ tags = frozenset(Tag(value=t) for t in test_dict.get("tags", []))
75
+ return self.build(
76
+ test_name=test_dict["name"],
77
+ tags=tags,
78
+ body_items=test_dict.get("body", []),
79
+ )
@@ -0,0 +1 @@
1
+ """Pipeline bounded context: stage orchestration, caching, and artifact management."""
@@ -0,0 +1,110 @@
1
+ """Artifact management for the pipeline stages."""
2
+ from __future__ import annotations
3
+
4
+ import json
5
+ import logging
6
+ from pathlib import Path
7
+
8
+ import numpy as np
9
+ from numpy.typing import NDArray
10
+
11
+ from TestSelection.embedding.models import ArtifactManifest, ManifestEntry
12
+ from TestSelection.pipeline.errors import ArtifactError
13
+
14
+ logger = logging.getLogger(__name__)
15
+
16
+
17
+ class ArtifactManager:
18
+ """Manages artifact storage and retrieval between pipeline stages."""
19
+
20
+ def __init__(self, artifact_dir: Path) -> None:
21
+ self._artifact_dir = artifact_dir
22
+ self._artifact_dir.mkdir(parents=True, exist_ok=True)
23
+
24
+ @property
25
+ def embeddings_path(self) -> Path:
26
+ return self._artifact_dir / "embeddings.npz"
27
+
28
+ @property
29
+ def manifest_path(self) -> Path:
30
+ return self._artifact_dir / "test_manifest.json"
31
+
32
+ @property
33
+ def selection_path(self) -> Path:
34
+ return self._artifact_dir / "selected_tests.json"
35
+
36
+ @property
37
+ def hash_store_path(self) -> Path:
38
+ return self._artifact_dir / "file_hashes.json"
39
+
40
+ def has_embedding_artifacts(self) -> bool:
41
+ """Check if both embedding artifacts exist."""
42
+ return self.embeddings_path.exists() and self.manifest_path.exists()
43
+
44
+ def has_selection_artifact(self) -> bool:
45
+ """Check if the selection artifact exists."""
46
+ return self.selection_path.exists()
47
+
48
+ def load_manifest(self) -> ArtifactManifest:
49
+ """Load and parse test_manifest.json into ArtifactManifest."""
50
+ if not self.manifest_path.exists():
51
+ raise ArtifactError(
52
+ f"Manifest not found: {self.manifest_path}"
53
+ )
54
+ raw = json.loads(self.manifest_path.read_text())
55
+ return ArtifactManifest(
56
+ model=raw["model"],
57
+ embedding_dim=raw["embedding_dim"],
58
+ test_count=raw["test_count"],
59
+ resolve_depth=raw.get("resolve_depth", 0),
60
+ tests=tuple(
61
+ ManifestEntry(
62
+ id=t["id"],
63
+ name=t["name"],
64
+ tags=tuple(t.get("tags", [])),
65
+ suite=t.get("suite", ""),
66
+ suite_name=t.get("suite_name", ""),
67
+ is_datadriver=t.get("is_datadriver", False),
68
+ )
69
+ for t in raw["tests"]
70
+ ),
71
+ )
72
+
73
+ def load_vectors(self) -> NDArray:
74
+ """Load embedding vectors from embeddings.npz."""
75
+ if not self.embeddings_path.exists():
76
+ raise ArtifactError(
77
+ f"Embeddings not found: {self.embeddings_path}"
78
+ )
79
+ data = np.load(self.embeddings_path, allow_pickle=True)
80
+ return data["vectors"]
81
+
82
+ def validate_artifacts(self) -> tuple[bool, str]:
83
+ """Validate that embedding artifacts are consistent.
84
+
85
+ Returns (valid, message) where message describes any issues.
86
+ """
87
+ if not self.has_embedding_artifacts():
88
+ return False, "Missing embedding artifacts"
89
+
90
+ try:
91
+ manifest = self.load_manifest()
92
+ vectors = self.load_vectors()
93
+ except Exception as exc:
94
+ return False, f"Failed to load artifacts: {exc}"
95
+
96
+ if vectors.shape[0] != manifest.test_count:
97
+ return (
98
+ False,
99
+ f"Shape mismatch: vectors has {vectors.shape[0]} rows "
100
+ f"but manifest has {manifest.test_count} tests",
101
+ )
102
+
103
+ if vectors.shape[1] != manifest.embedding_dim:
104
+ return (
105
+ False,
106
+ f"Dimension mismatch: vectors has {vectors.shape[1]} dims "
107
+ f"but manifest expects {manifest.embedding_dim}",
108
+ )
109
+
110
+ return True, "Artifacts valid"
@@ -0,0 +1,74 @@
1
+ """Caching service for change detection via content hashing."""
2
+ from __future__ import annotations
3
+
4
+ import hashlib
5
+ import json
6
+ import logging
7
+ from pathlib import Path
8
+
9
+ logger = logging.getLogger(__name__)
10
+
11
+
12
+ class CacheInvalidator:
13
+ """Detects changes in .robot and .csv files via MD5 content hashing."""
14
+
15
+ def __init__(self, hash_store_path: Path) -> None:
16
+ self._hash_store_path = hash_store_path
17
+
18
+ def has_changes(self, suite_path: Path) -> bool:
19
+ """Compare current file hashes with stored hashes.
20
+
21
+ Returns True if there are changes or no stored hashes exist.
22
+ """
23
+ if not self._hash_store_path.exists():
24
+ logger.info(
25
+ "[DIVERSE-SELECT] stage=vectorize event=cache_miss "
26
+ "reason=no_stored_hashes"
27
+ )
28
+ return True
29
+
30
+ stored = json.loads(self._hash_store_path.read_text())
31
+ current = self._compute_hashes(suite_path)
32
+
33
+ if current != stored:
34
+ changed = set(current.keys()) ^ set(stored.keys())
35
+ for key in set(current.keys()) & set(stored.keys()):
36
+ if current[key] != stored[key]:
37
+ changed.add(key)
38
+ logger.info(
39
+ "[DIVERSE-SELECT] stage=vectorize event=cache_miss "
40
+ "changed_files=%d",
41
+ len(changed),
42
+ )
43
+ return True
44
+
45
+ logger.info(
46
+ "[DIVERSE-SELECT] stage=vectorize event=cache_hit "
47
+ "files=%d",
48
+ len(current),
49
+ )
50
+ return False
51
+
52
+ def save_hashes(self, suite_path: Path) -> None:
53
+ """Save current file hashes to the hash store."""
54
+ hashes = self._compute_hashes(suite_path)
55
+ self._hash_store_path.parent.mkdir(parents=True, exist_ok=True)
56
+ self._hash_store_path.write_text(json.dumps(hashes, indent=2))
57
+
58
+ def _compute_hashes(self, suite_path: Path) -> dict[str, str]:
59
+ """Compute MD5 hashes for all .robot and .csv files."""
60
+ hashes: dict[str, str] = {}
61
+ target = suite_path if suite_path.is_dir() else suite_path.parent
62
+
63
+ if suite_path.is_dir():
64
+ patterns = ["*.robot", "*.csv"]
65
+ for pattern in patterns:
66
+ for p in sorted(target.rglob(pattern)):
67
+ md5 = hashlib.md5(p.read_bytes()).hexdigest()
68
+ hashes[str(p)] = md5
69
+ else:
70
+ if suite_path.exists():
71
+ md5 = hashlib.md5(suite_path.read_bytes()).hexdigest()
72
+ hashes[str(suite_path)] = md5
73
+
74
+ return hashes
@@ -0,0 +1,18 @@
1
+ """Custom exception hierarchy for the diverse test selection pipeline."""
2
+ from __future__ import annotations
3
+
4
+
5
+ class DiverseSelectionError(Exception):
6
+ """Base exception for the diverse test selection pipeline."""
7
+
8
+
9
+ class VectorizationError(DiverseSelectionError):
10
+ """Raised when Stage 1 (vectorization) encounters an unrecoverable error."""
11
+
12
+
13
+ class SelectionError(DiverseSelectionError):
14
+ """Raised when Stage 2 (selection) encounters an unrecoverable error."""
15
+
16
+
17
+ class ArtifactError(DiverseSelectionError):
18
+ """Raised when artifact validation fails between stages."""
@@ -0,0 +1,52 @@
1
+ """Stage 3 orchestrator: execute selected tests via Robot Framework."""
2
+ from __future__ import annotations
3
+
4
+ import logging
5
+ from pathlib import Path
6
+
7
+ logger = logging.getLogger(__name__)
8
+
9
+
10
+ def run_execute(
11
+ suite_path: Path,
12
+ selection_file: Path,
13
+ output_dir: str = "./results",
14
+ extra_robot_args: list[str] | None = None,
15
+ ) -> int:
16
+ """Run the execution stage.
17
+
18
+ Returns the Robot Framework exit code (0=pass, 1=fail, 2=error).
19
+ """
20
+ try:
21
+ from TestSelection.execution.runner import ExecutionRunner
22
+
23
+ runner = ExecutionRunner(
24
+ suite_path=suite_path,
25
+ selection_file=selection_file,
26
+ output_dir=output_dir,
27
+ )
28
+
29
+ logger.info(
30
+ "[DIVERSE-SELECT] stage=execute event=start "
31
+ "suite=%s selection=%s",
32
+ suite_path,
33
+ selection_file,
34
+ )
35
+
36
+ return_code = runner.execute(extra_args=extra_robot_args)
37
+ runner.generate_report(return_code)
38
+
39
+ logger.info(
40
+ "[DIVERSE-SELECT] stage=execute event=complete "
41
+ "return_code=%d",
42
+ return_code,
43
+ )
44
+
45
+ return return_code
46
+
47
+ except Exception as exc:
48
+ logger.warning(
49
+ "[DIVERSE-SELECT] stage=execute event=error error=%s",
50
+ str(exc),
51
+ )
52
+ return 2