aspice-eval 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- aspice_eval/__init__.py +71 -0
- aspice_eval/cli.py +343 -0
- aspice_eval/convenience.py +218 -0
- aspice_eval/evaluator.py +737 -0
- aspice_eval/exceptions.py +113 -0
- aspice_eval/kb_validator.py +239 -0
- aspice_eval/knowledge_base/aspice/_metadata.yaml +62 -0
- aspice_eval/knowledge_base/aspice/man.yaml +306 -0
- aspice_eval/knowledge_base/aspice/sup.yaml +483 -0
- aspice_eval/knowledge_base/aspice/swe.yaml +1197 -0
- aspice_eval/knowledge_base/aspice/sys.yaml +432 -0
- aspice_eval/knowledge_base/schema/criteria_schema.json +168 -0
- aspice_eval/knowledge_base.py +564 -0
- aspice_eval/level_calculator.py +231 -0
- aspice_eval/models.py +198 -0
- aspice_eval/providers/__init__.py +134 -0
- aspice_eval/providers/anthropic_provider.py +86 -0
- aspice_eval/providers/bedrock.py +96 -0
- aspice_eval/providers/openai_provider.py +87 -0
- aspice_eval/py.typed +1 -0
- aspice_eval/report_generator.py +547 -0
- aspice_eval/report_renderer.py +113 -0
- aspice_eval/sdp_ingester.py +72 -0
- aspice_eval-0.1.0.dist-info/METADATA +234 -0
- aspice_eval-0.1.0.dist-info/RECORD +28 -0
- aspice_eval-0.1.0.dist-info/WHEEL +5 -0
- aspice_eval-0.1.0.dist-info/entry_points.txt +2 -0
- aspice_eval-0.1.0.dist-info/top_level.txt +1 -0
aspice_eval/__init__.py
ADDED
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
"""ASPICE evaluation engine — knowledge base, evaluator, and reports."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
__version__ = "0.1.0"
|
|
6
|
+
|
|
7
|
+
# --- Core classes ---
|
|
8
|
+
from aspice_eval.knowledge_base import KnowledgeBase
|
|
9
|
+
from aspice_eval.evaluator import GapAnalysisEvaluator
|
|
10
|
+
from aspice_eval.report_renderer import ReportRenderer
|
|
11
|
+
|
|
12
|
+
# --- Factory & registry functions ---
|
|
13
|
+
from aspice_eval.providers import create_evaluator, register_evaluator
|
|
14
|
+
from aspice_eval.report_renderer import register_renderer
|
|
15
|
+
from aspice_eval.knowledge_base import register_kb_loader
|
|
16
|
+
|
|
17
|
+
# --- Convenience functions ---
|
|
18
|
+
from aspice_eval.convenience import evaluate_sdp, validate_kb
|
|
19
|
+
|
|
20
|
+
# --- Models ---
|
|
21
|
+
from aspice_eval.models import (
|
|
22
|
+
ModelConfig,
|
|
23
|
+
EvaluationConfig,
|
|
24
|
+
EvaluationResult,
|
|
25
|
+
CriteriaEntry,
|
|
26
|
+
CriteriaRating,
|
|
27
|
+
SDPDocument,
|
|
28
|
+
CapabilityLevelResult,
|
|
29
|
+
ValidationResult,
|
|
30
|
+
)
|
|
31
|
+
|
|
32
|
+
# --- Exceptions ---
|
|
33
|
+
from aspice_eval.exceptions import (
|
|
34
|
+
KBValidationError,
|
|
35
|
+
UnsupportedFormatError,
|
|
36
|
+
InvalidConfigError,
|
|
37
|
+
AIModelError,
|
|
38
|
+
AIResponseParseError,
|
|
39
|
+
)
|
|
40
|
+
|
|
41
|
+
__all__ = [
|
|
42
|
+
# Version
|
|
43
|
+
"__version__",
|
|
44
|
+
# Core classes
|
|
45
|
+
"KnowledgeBase",
|
|
46
|
+
"GapAnalysisEvaluator",
|
|
47
|
+
"ReportRenderer",
|
|
48
|
+
# Factory & registry
|
|
49
|
+
"create_evaluator",
|
|
50
|
+
"register_evaluator",
|
|
51
|
+
"register_renderer",
|
|
52
|
+
"register_kb_loader",
|
|
53
|
+
# Convenience functions
|
|
54
|
+
"evaluate_sdp",
|
|
55
|
+
"validate_kb",
|
|
56
|
+
# Models
|
|
57
|
+
"ModelConfig",
|
|
58
|
+
"EvaluationConfig",
|
|
59
|
+
"EvaluationResult",
|
|
60
|
+
"CriteriaEntry",
|
|
61
|
+
"CriteriaRating",
|
|
62
|
+
"SDPDocument",
|
|
63
|
+
"CapabilityLevelResult",
|
|
64
|
+
"ValidationResult",
|
|
65
|
+
# Exceptions
|
|
66
|
+
"KBValidationError",
|
|
67
|
+
"UnsupportedFormatError",
|
|
68
|
+
"InvalidConfigError",
|
|
69
|
+
"AIModelError",
|
|
70
|
+
"AIResponseParseError",
|
|
71
|
+
]
|
aspice_eval/cli.py
ADDED
|
@@ -0,0 +1,343 @@
|
|
|
1
|
+
"""Click-based CLI entry point for the ASPICE evaluation tool.
|
|
2
|
+
|
|
3
|
+
Provides three commands:
|
|
4
|
+
|
|
5
|
+
- ``evaluate`` — Run a gap analysis of an SDP document against the KB.
|
|
6
|
+
- ``validate-kb`` — Validate the knowledge base schema and completeness.
|
|
7
|
+
- ``version`` — Print the package version.
|
|
8
|
+
|
|
9
|
+
The entry point is ``aspice_eval.cli:main`` (configured in pyproject.toml).
|
|
10
|
+
|
|
11
|
+
Requirements: 3.3, 7.1, 7.2, 7.3, 9.3
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
from __future__ import annotations
|
|
15
|
+
|
|
16
|
+
import os
|
|
17
|
+
import pathlib
|
|
18
|
+
import sys
|
|
19
|
+
|
|
20
|
+
import click
|
|
21
|
+
import yaml
|
|
22
|
+
|
|
23
|
+
from aspice_eval import __version__
|
|
24
|
+
from aspice_eval.exceptions import (
|
|
25
|
+
AIModelError,
|
|
26
|
+
AIResponseParseError,
|
|
27
|
+
InvalidConfigError,
|
|
28
|
+
KBValidationError,
|
|
29
|
+
UnsupportedFormatError,
|
|
30
|
+
)
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def _validate_config(
|
|
34
|
+
target_level: int,
|
|
35
|
+
groups: list[str],
|
|
36
|
+
kb_path: str,
|
|
37
|
+
) -> None:
|
|
38
|
+
"""Validate configuration parameters early (fail fast).
|
|
39
|
+
|
|
40
|
+
Raises
|
|
41
|
+
------
|
|
42
|
+
InvalidConfigError
|
|
43
|
+
If *target_level* is outside 1–5 or *groups* contains unknown codes.
|
|
44
|
+
FileNotFoundError
|
|
45
|
+
If *kb_path* does not exist.
|
|
46
|
+
"""
|
|
47
|
+
# Validate target level
|
|
48
|
+
if target_level < 1 or target_level > 5:
|
|
49
|
+
raise InvalidConfigError(
|
|
50
|
+
f"Target level {target_level} is out of range. Must be 1–5.",
|
|
51
|
+
parameter="target_level",
|
|
52
|
+
actual_value=target_level,
|
|
53
|
+
expected_values=[1, 2, 3, 4, 5],
|
|
54
|
+
)
|
|
55
|
+
|
|
56
|
+
# Validate KB path exists
|
|
57
|
+
kb = pathlib.Path(kb_path)
|
|
58
|
+
if not kb.exists():
|
|
59
|
+
raise FileNotFoundError(
|
|
60
|
+
f"Knowledge base path does not exist: {kb_path}"
|
|
61
|
+
)
|
|
62
|
+
|
|
63
|
+
# Load metadata to get valid process group codes
|
|
64
|
+
metadata_path = kb / "aspice" / "_metadata.yaml"
|
|
65
|
+
if metadata_path.exists():
|
|
66
|
+
with open(metadata_path) as fh:
|
|
67
|
+
metadata = yaml.safe_load(fh)
|
|
68
|
+
valid_codes = {
|
|
69
|
+
pg.get("code", "")
|
|
70
|
+
for pg in metadata.get("process_groups", [])
|
|
71
|
+
}
|
|
72
|
+
unknown = [g for g in groups if g not in valid_codes]
|
|
73
|
+
if unknown:
|
|
74
|
+
raise InvalidConfigError(
|
|
75
|
+
f"Unknown process group(s): {', '.join(unknown)}. "
|
|
76
|
+
f"Valid groups: {', '.join(sorted(valid_codes))}.",
|
|
77
|
+
parameter="process_groups",
|
|
78
|
+
actual_value=unknown,
|
|
79
|
+
expected_values=sorted(valid_codes),
|
|
80
|
+
)
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
@click.group()
|
|
84
|
+
def main() -> None:
|
|
85
|
+
"""ASPICE evaluation tool for SDP gap analysis."""
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
@main.command()
|
|
89
|
+
@click.option(
|
|
90
|
+
"--sdp",
|
|
91
|
+
required=True,
|
|
92
|
+
type=click.Path(),
|
|
93
|
+
help="Path to the SDP Markdown document.",
|
|
94
|
+
)
|
|
95
|
+
@click.option(
|
|
96
|
+
"--target-level",
|
|
97
|
+
default=3,
|
|
98
|
+
type=int,
|
|
99
|
+
show_default=True,
|
|
100
|
+
help="Target ASPICE capability level (1–5).",
|
|
101
|
+
)
|
|
102
|
+
@click.option(
|
|
103
|
+
"--groups",
|
|
104
|
+
default="SWE,SYS,MAN,SUP",
|
|
105
|
+
show_default=True,
|
|
106
|
+
help="Comma-separated process group codes to evaluate.",
|
|
107
|
+
)
|
|
108
|
+
@click.option(
|
|
109
|
+
"--output",
|
|
110
|
+
default=None,
|
|
111
|
+
type=click.Path(),
|
|
112
|
+
help="Output file path for the report. Defaults to stdout.",
|
|
113
|
+
)
|
|
114
|
+
@click.option(
|
|
115
|
+
"--kb-path",
|
|
116
|
+
default="knowledge_base",
|
|
117
|
+
show_default=True,
|
|
118
|
+
type=click.Path(),
|
|
119
|
+
help="Path to the knowledge base directory.",
|
|
120
|
+
)
|
|
121
|
+
@click.option(
|
|
122
|
+
"--model",
|
|
123
|
+
default=None,
|
|
124
|
+
type=str,
|
|
125
|
+
help="AI model name (provider-specific model identifier).",
|
|
126
|
+
)
|
|
127
|
+
@click.option(
|
|
128
|
+
"--provider",
|
|
129
|
+
default=None,
|
|
130
|
+
type=str,
|
|
131
|
+
help="AI provider name: bedrock, openai, anthropic, mock. "
|
|
132
|
+
"Defaults to ASPICE_EVAL_PROVIDER env var, then 'mock'.",
|
|
133
|
+
)
|
|
134
|
+
@click.option(
|
|
135
|
+
"--region",
|
|
136
|
+
default=None,
|
|
137
|
+
type=str,
|
|
138
|
+
help="AWS region for Bedrock provider (default us-east-1).",
|
|
139
|
+
)
|
|
140
|
+
@click.option(
|
|
141
|
+
"--output-format",
|
|
142
|
+
"output_format",
|
|
143
|
+
default="markdown",
|
|
144
|
+
type=click.Choice(["markdown", "html"], case_sensitive=False),
|
|
145
|
+
show_default=True,
|
|
146
|
+
help="Report output format.",
|
|
147
|
+
)
|
|
148
|
+
def evaluate(
|
|
149
|
+
sdp: str,
|
|
150
|
+
target_level: int,
|
|
151
|
+
groups: str,
|
|
152
|
+
output: str | None,
|
|
153
|
+
kb_path: str,
|
|
154
|
+
model: str | None,
|
|
155
|
+
provider: str | None,
|
|
156
|
+
region: str | None,
|
|
157
|
+
output_format: str,
|
|
158
|
+
) -> None:
|
|
159
|
+
"""Evaluate an SDP document against ASPICE criteria."""
|
|
160
|
+
process_groups = [g.strip() for g in groups.split(",") if g.strip()]
|
|
161
|
+
|
|
162
|
+
# Resolve provider: CLI flag > env var > default "mock"
|
|
163
|
+
resolved_provider = (
|
|
164
|
+
provider
|
|
165
|
+
or os.environ.get("ASPICE_EVAL_PROVIDER")
|
|
166
|
+
or "mock"
|
|
167
|
+
)
|
|
168
|
+
|
|
169
|
+
# Resolve model: CLI flag > env var > provider default
|
|
170
|
+
_model_defaults: dict[str, str] = {
|
|
171
|
+
"bedrock": "us.anthropic.claude-sonnet-4-20250514-v1:0",
|
|
172
|
+
"openai": "gpt-4o",
|
|
173
|
+
"anthropic": "claude-sonnet-4-20250514",
|
|
174
|
+
"mock": "",
|
|
175
|
+
}
|
|
176
|
+
resolved_model = (
|
|
177
|
+
model
|
|
178
|
+
or os.environ.get("ASPICE_EVAL_MODEL")
|
|
179
|
+
or _model_defaults.get(resolved_provider, "")
|
|
180
|
+
)
|
|
181
|
+
|
|
182
|
+
# Resolve temperature: env var (CLI doesn't expose this directly)
|
|
183
|
+
resolved_temperature = float(
|
|
184
|
+
os.environ.get("ASPICE_EVAL_TEMPERATURE", "0.0")
|
|
185
|
+
)
|
|
186
|
+
|
|
187
|
+
# Resolve max_tokens: env var
|
|
188
|
+
resolved_max_tokens = int(
|
|
189
|
+
os.environ.get("ASPICE_EVAL_MAX_TOKENS", "4096")
|
|
190
|
+
)
|
|
191
|
+
|
|
192
|
+
# Resolve region: CLI flag > env var > default ""
|
|
193
|
+
resolved_region = (
|
|
194
|
+
region
|
|
195
|
+
or os.environ.get("AWS_REGION")
|
|
196
|
+
or ""
|
|
197
|
+
)
|
|
198
|
+
|
|
199
|
+
try:
|
|
200
|
+
# --- Fail-fast validation ---
|
|
201
|
+
_validate_config(target_level, process_groups, kb_path)
|
|
202
|
+
|
|
203
|
+
# Check SDP path exists before doing anything else
|
|
204
|
+
sdp_path = pathlib.Path(sdp)
|
|
205
|
+
if not sdp_path.exists():
|
|
206
|
+
raise FileNotFoundError(f"SDP document not found: {sdp}")
|
|
207
|
+
|
|
208
|
+
# --- Wire components ---
|
|
209
|
+
from aspice_eval.knowledge_base import KnowledgeBase
|
|
210
|
+
from aspice_eval.level_calculator import CapabilityLevelCalculator
|
|
211
|
+
from aspice_eval.models import EvaluationConfig, ModelConfig
|
|
212
|
+
from aspice_eval.providers import create_evaluator
|
|
213
|
+
from aspice_eval.report_generator import ReportGenerator
|
|
214
|
+
from aspice_eval.sdp_ingester import SDPIngester
|
|
215
|
+
|
|
216
|
+
# 1. Load KB
|
|
217
|
+
kb = KnowledgeBase(kb_path)
|
|
218
|
+
kb.load("aspice")
|
|
219
|
+
kb_metadata = kb.get_metadata()
|
|
220
|
+
|
|
221
|
+
# 2. Ingest SDP
|
|
222
|
+
ingester = SDPIngester()
|
|
223
|
+
sdp_doc = ingester.ingest(sdp)
|
|
224
|
+
|
|
225
|
+
# 3. Get criteria for requested groups and level
|
|
226
|
+
criteria = kb.get_criteria(process_groups, target_level)
|
|
227
|
+
|
|
228
|
+
# 4. Build config
|
|
229
|
+
config = EvaluationConfig(
|
|
230
|
+
sdp_path=sdp,
|
|
231
|
+
target_capability_level=target_level,
|
|
232
|
+
process_groups=process_groups,
|
|
233
|
+
kb_path=kb_path,
|
|
234
|
+
output_path=output,
|
|
235
|
+
)
|
|
236
|
+
|
|
237
|
+
# 5. Evaluate using provider factory
|
|
238
|
+
model_config = ModelConfig(
|
|
239
|
+
provider=resolved_provider,
|
|
240
|
+
model_name=resolved_model,
|
|
241
|
+
temperature=resolved_temperature,
|
|
242
|
+
max_tokens=resolved_max_tokens,
|
|
243
|
+
region=resolved_region,
|
|
244
|
+
)
|
|
245
|
+
evaluator = create_evaluator(model_config)
|
|
246
|
+
evaluation = evaluator.evaluate(sdp_doc, criteria, config)
|
|
247
|
+
|
|
248
|
+
# 6. Calculate capability levels
|
|
249
|
+
calculator = CapabilityLevelCalculator(target_level)
|
|
250
|
+
levels = calculator.calculate(evaluation.ratings, process_groups)
|
|
251
|
+
|
|
252
|
+
# 7. Generate report
|
|
253
|
+
reporter = ReportGenerator()
|
|
254
|
+
report = reporter.generate(evaluation, levels, config, kb_metadata, output_format=output_format)
|
|
255
|
+
|
|
256
|
+
# 8. Output
|
|
257
|
+
if output:
|
|
258
|
+
pathlib.Path(output).write_text(report, encoding="utf-8")
|
|
259
|
+
click.echo(f"Report written to {output}")
|
|
260
|
+
else:
|
|
261
|
+
click.echo(report)
|
|
262
|
+
|
|
263
|
+
except FileNotFoundError as exc:
|
|
264
|
+
click.echo(f"Error: {exc}", err=True)
|
|
265
|
+
sys.exit(1)
|
|
266
|
+
except UnsupportedFormatError as exc:
|
|
267
|
+
click.echo(
|
|
268
|
+
f"Error: {exc}",
|
|
269
|
+
err=True,
|
|
270
|
+
)
|
|
271
|
+
sys.exit(1)
|
|
272
|
+
except KBValidationError as exc:
|
|
273
|
+
click.echo(f"KB validation error: {exc}", err=True)
|
|
274
|
+
sys.exit(1)
|
|
275
|
+
except InvalidConfigError as exc:
|
|
276
|
+
click.echo(f"Configuration error: {exc}", err=True)
|
|
277
|
+
sys.exit(1)
|
|
278
|
+
except AIModelError as exc:
|
|
279
|
+
click.echo(f"AI model error: {exc}", err=True)
|
|
280
|
+
sys.exit(1)
|
|
281
|
+
except AIResponseParseError as exc:
|
|
282
|
+
click.echo(f"AI response parse error: {exc}", err=True)
|
|
283
|
+
sys.exit(1)
|
|
284
|
+
|
|
285
|
+
|
|
286
|
+
@main.command("validate-kb")
|
|
287
|
+
@click.option(
|
|
288
|
+
"--kb-path",
|
|
289
|
+
default="knowledge_base",
|
|
290
|
+
show_default=True,
|
|
291
|
+
type=click.Path(),
|
|
292
|
+
help="Path to the knowledge base directory.",
|
|
293
|
+
)
|
|
294
|
+
def validate_kb(kb_path: str) -> None:
|
|
295
|
+
"""Validate the knowledge base schema and completeness."""
|
|
296
|
+
try:
|
|
297
|
+
kb_dir = pathlib.Path(kb_path)
|
|
298
|
+
if not kb_dir.exists():
|
|
299
|
+
raise FileNotFoundError(
|
|
300
|
+
f"Knowledge base path does not exist: {kb_path}"
|
|
301
|
+
)
|
|
302
|
+
|
|
303
|
+
from aspice_eval.knowledge_base import KnowledgeBase
|
|
304
|
+
|
|
305
|
+
kb = KnowledgeBase(kb_path)
|
|
306
|
+
kb.load("aspice")
|
|
307
|
+
result = kb.validate()
|
|
308
|
+
|
|
309
|
+
if result.is_valid:
|
|
310
|
+
click.echo("Knowledge base validation passed.")
|
|
311
|
+
else:
|
|
312
|
+
click.echo("Knowledge base validation FAILED.", err=True)
|
|
313
|
+
|
|
314
|
+
if result.schema_errors:
|
|
315
|
+
click.echo("\nSchema errors:", err=True)
|
|
316
|
+
for err in result.schema_errors:
|
|
317
|
+
click.echo(f" - {err}", err=True)
|
|
318
|
+
|
|
319
|
+
if result.completeness_gaps:
|
|
320
|
+
click.echo("\nCompleteness gaps:", err=True)
|
|
321
|
+
for gap in result.completeness_gaps:
|
|
322
|
+
click.echo(f" - {gap}", err=True)
|
|
323
|
+
|
|
324
|
+
if result.warnings:
|
|
325
|
+
click.echo("\nWarnings:")
|
|
326
|
+
for warn in result.warnings:
|
|
327
|
+
click.echo(f" - {warn}")
|
|
328
|
+
|
|
329
|
+
if not result.is_valid:
|
|
330
|
+
sys.exit(1)
|
|
331
|
+
|
|
332
|
+
except FileNotFoundError as exc:
|
|
333
|
+
click.echo(f"Error: {exc}", err=True)
|
|
334
|
+
sys.exit(1)
|
|
335
|
+
except KBValidationError as exc:
|
|
336
|
+
click.echo(f"KB validation error: {exc}", err=True)
|
|
337
|
+
sys.exit(1)
|
|
338
|
+
|
|
339
|
+
|
|
340
|
+
@main.command()
|
|
341
|
+
def version() -> None:
|
|
342
|
+
"""Print the package version."""
|
|
343
|
+
click.echo(f"aspice-eval {__version__}")
|
|
@@ -0,0 +1,218 @@
|
|
|
1
|
+
"""High-level convenience functions for the ASPICE evaluation engine.
|
|
2
|
+
|
|
3
|
+
Provides :func:`evaluate_sdp` and :func:`validate_kb` — single-call
|
|
4
|
+
entry points that orchestrate the full evaluation and validation
|
|
5
|
+
pipelines without requiring users to wire together multiple classes.
|
|
6
|
+
|
|
7
|
+
Requirements: 11.1–11.6, 12.1–12.5, 22.6
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from __future__ import annotations
|
|
11
|
+
|
|
12
|
+
import pathlib
|
|
13
|
+
from typing import Any
|
|
14
|
+
|
|
15
|
+
from aspice_eval.knowledge_base import KnowledgeBase, get_kb_loader
|
|
16
|
+
from aspice_eval.level_calculator import CapabilityLevelCalculator
|
|
17
|
+
from aspice_eval.models import (
|
|
18
|
+
EvaluationConfig,
|
|
19
|
+
EvaluationResult,
|
|
20
|
+
ModelConfig,
|
|
21
|
+
ValidationResult,
|
|
22
|
+
)
|
|
23
|
+
from aspice_eval.providers import create_evaluator
|
|
24
|
+
from aspice_eval.sdp_ingester import SDPIngester
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def _resolve_default_kb_path() -> str:
|
|
28
|
+
"""Resolve the bundled knowledge base path.
|
|
29
|
+
|
|
30
|
+
Searches package-relative and repository-relative locations for the
|
|
31
|
+
``knowledge_base`` directory shipped with the package.
|
|
32
|
+
|
|
33
|
+
Returns
|
|
34
|
+
-------
|
|
35
|
+
str
|
|
36
|
+
Absolute path to the bundled knowledge base directory.
|
|
37
|
+
|
|
38
|
+
Raises
|
|
39
|
+
------
|
|
40
|
+
FileNotFoundError
|
|
41
|
+
If the bundled knowledge base cannot be located.
|
|
42
|
+
"""
|
|
43
|
+
pkg_root = pathlib.Path(__file__).resolve().parent
|
|
44
|
+
candidates = [
|
|
45
|
+
pkg_root / "knowledge_base",
|
|
46
|
+
pkg_root.parent.parent / "knowledge_base",
|
|
47
|
+
]
|
|
48
|
+
for candidate in candidates:
|
|
49
|
+
if candidate.exists():
|
|
50
|
+
return str(candidate)
|
|
51
|
+
raise FileNotFoundError(
|
|
52
|
+
"Bundled knowledge base not found. Provide an explicit kb_path."
|
|
53
|
+
)
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def evaluate_sdp(
|
|
57
|
+
sdp_path: str,
|
|
58
|
+
model_config: ModelConfig,
|
|
59
|
+
*,
|
|
60
|
+
target_level: int = 3,
|
|
61
|
+
process_groups: list[str] | None = None,
|
|
62
|
+
kb_path: str | None = None,
|
|
63
|
+
standard: str = "aspice",
|
|
64
|
+
) -> EvaluationResult:
|
|
65
|
+
"""Evaluate an SDP document against knowledge base criteria.
|
|
66
|
+
|
|
67
|
+
Orchestrates the full evaluation pipeline: SDP ingestion → KB loading →
|
|
68
|
+
criteria filtering → AI evaluation → capability level calculation.
|
|
69
|
+
|
|
70
|
+
Parameters
|
|
71
|
+
----------
|
|
72
|
+
sdp_path:
|
|
73
|
+
Path to the SDP Markdown file.
|
|
74
|
+
model_config:
|
|
75
|
+
AI model configuration (provider, model name, temperature, etc.).
|
|
76
|
+
target_level:
|
|
77
|
+
Target ASPICE capability level (1–5). Defaults to 3 (Established).
|
|
78
|
+
process_groups:
|
|
79
|
+
Process groups to evaluate. Defaults to ``["SWE", "SYS", "MAN", "SUP"]``.
|
|
80
|
+
kb_path:
|
|
81
|
+
Path to the knowledge base directory. Defaults to the bundled KB.
|
|
82
|
+
standard:
|
|
83
|
+
Standard identifier (subdirectory name under kb_path).
|
|
84
|
+
Defaults to ``"aspice"``.
|
|
85
|
+
|
|
86
|
+
Returns
|
|
87
|
+
-------
|
|
88
|
+
EvaluationResult
|
|
89
|
+
Contains per-criteria ratings, capability levels, and token usage.
|
|
90
|
+
|
|
91
|
+
Raises
|
|
92
|
+
------
|
|
93
|
+
FileNotFoundError
|
|
94
|
+
If ``sdp_path`` or ``kb_path`` does not exist.
|
|
95
|
+
UnsupportedFormatError
|
|
96
|
+
If the SDP file is not Markdown format.
|
|
97
|
+
InvalidConfigError
|
|
98
|
+
If ``target_level`` is outside 1–5 or ``process_groups`` contains
|
|
99
|
+
unknown codes.
|
|
100
|
+
AIModelError
|
|
101
|
+
If the AI model call fails after retries.
|
|
102
|
+
|
|
103
|
+
Examples
|
|
104
|
+
--------
|
|
105
|
+
>>> from aspice_eval.convenience import evaluate_sdp
|
|
106
|
+
>>> from aspice_eval.models import ModelConfig
|
|
107
|
+
>>> result = evaluate_sdp(
|
|
108
|
+
... "docs/sdp.md",
|
|
109
|
+
... ModelConfig(provider="bedrock", model_name="us.anthropic.claude-sonnet-4-20250514-v1:0", region="us-east-1"),
|
|
110
|
+
... target_level=3,
|
|
111
|
+
... process_groups=["SWE", "SYS"],
|
|
112
|
+
... )
|
|
113
|
+
>>> print(f"Gaps found: {len([r for r in result.ratings if r.gaps])}")
|
|
114
|
+
"""
|
|
115
|
+
# Validate sdp_path exists
|
|
116
|
+
sdp = pathlib.Path(sdp_path)
|
|
117
|
+
if not sdp.exists():
|
|
118
|
+
raise FileNotFoundError(f"SDP file does not exist: {sdp_path}")
|
|
119
|
+
|
|
120
|
+
# Resolve kb_path
|
|
121
|
+
resolved_kb_path = kb_path if kb_path is not None else _resolve_default_kb_path()
|
|
122
|
+
if not pathlib.Path(resolved_kb_path).exists():
|
|
123
|
+
raise FileNotFoundError(
|
|
124
|
+
f"Knowledge base path does not exist: {resolved_kb_path}"
|
|
125
|
+
)
|
|
126
|
+
|
|
127
|
+
# Default process groups
|
|
128
|
+
groups = process_groups if process_groups is not None else ["SWE", "SYS", "MAN", "SUP"]
|
|
129
|
+
|
|
130
|
+
# Check for custom KB loader in the registry
|
|
131
|
+
custom_loader_cls = get_kb_loader(standard)
|
|
132
|
+
if custom_loader_cls is not None:
|
|
133
|
+
kb = custom_loader_cls(resolved_kb_path)
|
|
134
|
+
else:
|
|
135
|
+
kb = KnowledgeBase(resolved_kb_path)
|
|
136
|
+
|
|
137
|
+
kb.load(standard)
|
|
138
|
+
criteria = kb.get_criteria(groups, target_level)
|
|
139
|
+
|
|
140
|
+
# Ingest SDP
|
|
141
|
+
ingester = SDPIngester()
|
|
142
|
+
sdp_doc = ingester.ingest(sdp_path)
|
|
143
|
+
|
|
144
|
+
# Build evaluation config
|
|
145
|
+
config = EvaluationConfig(
|
|
146
|
+
sdp_path=sdp_path,
|
|
147
|
+
target_capability_level=target_level,
|
|
148
|
+
process_groups=groups,
|
|
149
|
+
kb_path=resolved_kb_path,
|
|
150
|
+
standard=standard,
|
|
151
|
+
)
|
|
152
|
+
|
|
153
|
+
# Create evaluator and run evaluation
|
|
154
|
+
evaluator = create_evaluator(model_config)
|
|
155
|
+
evaluation = evaluator.evaluate(sdp_doc, criteria, config)
|
|
156
|
+
|
|
157
|
+
# Calculate capability levels for metadata enrichment
|
|
158
|
+
calculator = CapabilityLevelCalculator(target_level)
|
|
159
|
+
levels = calculator.calculate(evaluation.ratings, groups)
|
|
160
|
+
|
|
161
|
+
# Attach capability levels to evaluation metadata
|
|
162
|
+
evaluation.sdp_metadata["capability_levels"] = {
|
|
163
|
+
group: {
|
|
164
|
+
"achieved_level": result.achieved_level,
|
|
165
|
+
"target_level": result.target_level,
|
|
166
|
+
"blocking_attributes": result.blocking_attributes,
|
|
167
|
+
}
|
|
168
|
+
for group, result in levels.items()
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
return evaluation
|
|
172
|
+
|
|
173
|
+
|
|
174
|
+
def validate_kb(
|
|
175
|
+
kb_path: str,
|
|
176
|
+
*,
|
|
177
|
+
standard: str = "aspice",
|
|
178
|
+
) -> ValidationResult:
|
|
179
|
+
"""Validate a knowledge base directory for schema and completeness.
|
|
180
|
+
|
|
181
|
+
Loads the specified standard from the knowledge base directory and
|
|
182
|
+
runs schema validation and completeness checks against the bundled
|
|
183
|
+
criteria JSON Schema.
|
|
184
|
+
|
|
185
|
+
Parameters
|
|
186
|
+
----------
|
|
187
|
+
kb_path:
|
|
188
|
+
Path to the knowledge base root directory.
|
|
189
|
+
standard:
|
|
190
|
+
Standard identifier to validate. Defaults to ``"aspice"``.
|
|
191
|
+
|
|
192
|
+
Returns
|
|
193
|
+
-------
|
|
194
|
+
ValidationResult
|
|
195
|
+
Contains ``is_valid`` flag, ``schema_errors``, ``completeness_gaps``,
|
|
196
|
+
and ``warnings``.
|
|
197
|
+
|
|
198
|
+
Raises
|
|
199
|
+
------
|
|
200
|
+
FileNotFoundError
|
|
201
|
+
If ``kb_path`` does not exist.
|
|
202
|
+
|
|
203
|
+
Examples
|
|
204
|
+
--------
|
|
205
|
+
>>> from aspice_eval.convenience import validate_kb
|
|
206
|
+
>>> result = validate_kb("knowledge_base")
|
|
207
|
+
>>> if not result.is_valid:
|
|
208
|
+
... for error in result.schema_errors:
|
|
209
|
+
... print(f"Schema error: {error}")
|
|
210
|
+
"""
|
|
211
|
+
if not pathlib.Path(kb_path).exists():
|
|
212
|
+
raise FileNotFoundError(
|
|
213
|
+
f"Knowledge base path does not exist: {kb_path}"
|
|
214
|
+
)
|
|
215
|
+
|
|
216
|
+
kb = KnowledgeBase(kb_path)
|
|
217
|
+
kb.load(standard)
|
|
218
|
+
return kb.validate()
|