probegen 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. probegen-0.1.0/PKG-INFO +96 -0
  2. probegen-0.1.0/README.md +71 -0
  3. probegen-0.1.0/probegen/__init__.py +5 -0
  4. probegen-0.1.0/probegen/__main__.py +7 -0
  5. probegen-0.1.0/probegen/cli/__init__.py +1 -0
  6. probegen-0.1.0/probegen/cli/doctor_cmd.py +146 -0
  7. probegen-0.1.0/probegen/cli/embed_batch.py +54 -0
  8. probegen-0.1.0/probegen/cli/find_similar.py +76 -0
  9. probegen-0.1.0/probegen/cli/get_behavior_diff.py +255 -0
  10. probegen-0.1.0/probegen/cli/init_cmd.py +542 -0
  11. probegen-0.1.0/probegen/cli/main.py +35 -0
  12. probegen-0.1.0/probegen/cli/post_comment.py +98 -0
  13. probegen-0.1.0/probegen/cli/resolve_run_id.py +64 -0
  14. probegen-0.1.0/probegen/cli/run_stage.py +113 -0
  15. probegen-0.1.0/probegen/cli/setup_mcp.py +62 -0
  16. probegen-0.1.0/probegen/cli/write_probes.py +200 -0
  17. probegen-0.1.0/probegen/config.py +158 -0
  18. probegen-0.1.0/probegen/context.py +196 -0
  19. probegen-0.1.0/probegen/errors.py +72 -0
  20. probegen-0.1.0/probegen/export.py +111 -0
  21. probegen-0.1.0/probegen/github.py +299 -0
  22. probegen-0.1.0/probegen/integrations/__init__.py +1 -0
  23. probegen-0.1.0/probegen/integrations/braintrust.py +110 -0
  24. probegen-0.1.0/probegen/integrations/langsmith.py +108 -0
  25. probegen-0.1.0/probegen/integrations/phoenix.py +115 -0
  26. probegen-0.1.0/probegen/integrations/promptfoo.py +128 -0
  27. probegen-0.1.0/probegen/models/__init__.py +34 -0
  28. probegen-0.1.0/probegen/models/_base.py +9 -0
  29. probegen-0.1.0/probegen/models/eval_case.py +117 -0
  30. probegen-0.1.0/probegen/models/manifests.py +122 -0
  31. probegen-0.1.0/probegen/models/probes.py +98 -0
  32. probegen-0.1.0/probegen/models/raw_change_data.py +64 -0
  33. probegen-0.1.0/probegen/prompts/__init__.py +1 -0
  34. probegen-0.1.0/probegen/prompts/stage1_template.py +82 -0
  35. probegen-0.1.0/probegen/prompts/stage2_template.py +38 -0
  36. probegen-0.1.0/probegen/prompts/stage3_template.py +156 -0
  37. probegen-0.1.0/probegen/stages/__init__.py +1 -0
  38. probegen-0.1.0/probegen/stages/_common.py +168 -0
  39. probegen-0.1.0/probegen/stages/stage1.py +39 -0
  40. probegen-0.1.0/probegen/stages/stage2.py +39 -0
  41. probegen-0.1.0/probegen/stages/stage3.py +57 -0
  42. probegen-0.1.0/probegen/tools/__init__.py +1 -0
  43. probegen-0.1.0/probegen/tools/embedding.py +216 -0
  44. probegen-0.1.0/probegen/tools/similarity.py +83 -0
  45. probegen-0.1.0/probegen/write_probes.py +7 -0
  46. probegen-0.1.0/probegen.egg-info/PKG-INFO +96 -0
  47. probegen-0.1.0/probegen.egg-info/SOURCES.txt +51 -0
  48. probegen-0.1.0/probegen.egg-info/dependency_links.txt +1 -0
  49. probegen-0.1.0/probegen.egg-info/entry_points.txt +2 -0
  50. probegen-0.1.0/probegen.egg-info/requires.txt +17 -0
  51. probegen-0.1.0/probegen.egg-info/top_level.txt +1 -0
  52. probegen-0.1.0/pyproject.toml +50 -0
  53. probegen-0.1.0/setup.cfg +4 -0
@@ -0,0 +1,96 @@
1
+ Metadata-Version: 2.4
2
+ Name: probegen
3
+ Version: 0.1.0
4
+ Summary: Change-coupled eval probe generation for LLM systems
5
+ Author: OpenAI Codex
6
+ License: MIT
7
+ Requires-Python: >=3.11
8
+ Description-Content-Type: text/markdown
9
+ Requires-Dist: arize-phoenix-client==2.0.0
10
+ Requires-Dist: braintrust==0.9.0
11
+ Requires-Dist: claude-agent-sdk==0.1.48
12
+ Requires-Dist: click==8.3.1
13
+ Requires-Dist: httpx==0.28.1
14
+ Requires-Dist: langsmith==0.7.17
15
+ Requires-Dist: numpy==2.4.3
16
+ Requires-Dist: openai==2.28.0
17
+ Requires-Dist: pydantic==2.12.5
18
+ Requires-Dist: PyYAML==6.0.3
19
+ Requires-Dist: rich==14.3.3
20
+ Requires-Dist: tiktoken==0.12.0
21
+ Provides-Extra: dev
22
+ Requires-Dist: pytest==9.0.2; extra == "dev"
23
+ Requires-Dist: pytest-cov==7.0.0; extra == "dev"
24
+ Requires-Dist: respx==0.22.0; extra == "dev"
25
+
26
+ # Probegen
27
+
28
+ Probegen detects behaviorally significant pull request changes in LLM systems and proposes targeted evaluation probes for review before writing them to an evaluation platform. Probegen is **non-blocking** — it runs as a parallel CI job and never prevents PR merges.
29
+
30
+ ## What it does
31
+
32
+ Probegen runs in CI on pull requests. It:
33
+
34
+ 1. Detects changes to prompts, instructions, guardrails, validators, tool descriptions, classifiers, retry policies, output schemas, and other agent harness artifacts that are likely to alter agent behavior.
35
+ 2. Retrieves nearby evaluation coverage from your existing eval stack when mappings exist.
36
+ 3. Falls back to starter probe generation when no eval corpus exists yet.
37
+ 4. Generates ranked probe proposals tailored to the specific change, including multi-turn conversational probes when the agent is conversational.
38
+ 5. Exports those probes as files and, after explicit approval, writes them to the configured platform.
39
+
40
+ Probegen is not an eval runner. It generates eval inputs that plug into LangSmith, Braintrust, Arize Phoenix, Promptfoo, or file-based workflows.
41
+
42
+ Probegen works out of the box even if you have no evals yet. In that case it generates plausible starter probes from the diff, system prompt or guardrails, and whatever product context you provide. The more eval coverage and product detail you give it, the sharper its novelty detection and boundary analysis become.
43
+
44
+ ## Prerequisites
45
+
46
+ - Python 3.11+
47
+ - Node.js 22+ — required in CI by the GitHub Action (installed automatically). Only needed locally if running `probegen run-stage` directly.
48
+ - An Anthropic API key
49
+ - An eval platform API key only if you want direct platform integration or automatic writeback
50
+
51
+ ## Quick Start (GitHub Action)
52
+
53
+ 1. Install the package: `pip install probegen`
54
+ 2. Run interactive setup: `probegen init` — generates `probegen.yaml`, workflow file, and `context/` stubs
55
+ 3. Fill in `context/product.md` and `context/bad_examples.md` (and other context files for best results)
56
+ 4. Add GitHub secrets:
57
+
58
+ | Secret | Purpose | Where to get it |
59
+ |---|---|---|
60
+ | `ANTHROPIC_API_KEY` | Required — powers all three stages | console.anthropic.com → API Keys |
61
+ | `OPENAI_API_KEY` | Required for coverage-aware mode | platform.openai.com → API Keys |
62
+ | `LANGSMITH_API_KEY` | If using LangSmith | smith.langchain.com → Settings |
63
+ | `BRAINTRUST_API_KEY` | If using Braintrust | braintrust.dev → Settings |
64
+ | `PHOENIX_API_KEY` | If using Arize Phoenix | app.phoenix.arize.com → Settings |
65
+
66
+ 5. Create the approval label in GitHub:
67
+ ```
68
+ gh label create "probegen:approve" --color 0075ca --description "Approve Probegen probe writeback"
69
+ ```
70
+ 6. Commit `probegen.yaml`, `.github/workflows/probegen.yml`, and `context/`.
71
+ 7. Open a PR that touches a prompt or guardrail.
72
+ 8. Run `probegen doctor` to verify your setup.
73
+
74
+ ## Cost control
75
+
76
+ Each stage has a configurable Anthropic API spend budget (see `budgets:` in `probegen.yaml`). Typical costs per PR:
77
+
78
+ - Stage 1 (change detection): $0.05–0.30
79
+ - Stage 2 (coverage analysis): $0.10–0.50
80
+ - Stage 3 (probe generation): $0.10–0.60
81
+
82
+ Increase budget limits if stages time out on large diffs or complex repos.
83
+
84
+ ## Advanced Configuration
85
+
86
+ The full configuration reference is available in [probegen.yaml.example](probegen.yaml.example).
87
+
88
+ ## Real example quickstart
89
+
90
+ If you want to test Probegen against a real LangGraph repo instead of wiring everything from scratch, use the in-repo demo under [examples/langgraph-agentic-rag](examples/langgraph-agentic-rag) and follow [examples/langgraph-agentic-rag/docs/quickstart.md](examples/langgraph-agentic-rag/docs/quickstart.md).
91
+
92
+ ## Context pack and trace safety
93
+
94
+ Probegen works without a context pack, but probe quality drops significantly. At minimum, fill in product context and known failure modes. This matters even more in starter mode, where Probegen has no existing eval corpus to compare against.
95
+
96
+ Production traces are never sanitized by the tool. If you add files under `context/traces/`, anonymize them first. Remove names, emails, account IDs, and any other sensitive data before committing them.
@@ -0,0 +1,71 @@
1
+ # Probegen
2
+
3
+ Probegen detects behaviorally significant pull request changes in LLM systems and proposes targeted evaluation probes for review before writing them to an evaluation platform. Probegen is **non-blocking** — it runs as a parallel CI job and never prevents PR merges.
4
+
5
+ ## What it does
6
+
7
+ Probegen runs in CI on pull requests. It:
8
+
9
+ 1. Detects changes to prompts, instructions, guardrails, validators, tool descriptions, classifiers, retry policies, output schemas, and other agent harness artifacts that are likely to alter agent behavior.
10
+ 2. Retrieves nearby evaluation coverage from your existing eval stack when mappings exist.
11
+ 3. Falls back to starter probe generation when no eval corpus exists yet.
12
+ 4. Generates ranked probe proposals tailored to the specific change, including multi-turn conversational probes when the agent is conversational.
13
+ 5. Exports those probes as files and, after explicit approval, writes them to the configured platform.
14
+
15
+ Probegen is not an eval runner. It generates eval inputs that plug into LangSmith, Braintrust, Arize Phoenix, Promptfoo, or file-based workflows.
16
+
17
+ Probegen works out of the box even if you have no evals yet. In that case it generates plausible starter probes from the diff, system prompt or guardrails, and whatever product context you provide. The more eval coverage and product detail you give it, the sharper its novelty detection and boundary analysis become.
18
+
19
+ ## Prerequisites
20
+
21
+ - Python 3.11+
22
+ - Node.js 22+ — required in CI by the GitHub Action (installed automatically). Only needed locally if running `probegen run-stage` directly.
23
+ - An Anthropic API key
24
+ - An eval platform API key only if you want direct platform integration or automatic writeback
25
+
26
+ ## Quick Start (GitHub Action)
27
+
28
+ 1. Install the package: `pip install probegen`
29
+ 2. Run interactive setup: `probegen init` — generates `probegen.yaml`, workflow file, and `context/` stubs
30
+ 3. Fill in `context/product.md` and `context/bad_examples.md` (and other context files for best results)
31
+ 4. Add GitHub secrets:
32
+
33
+ | Secret | Purpose | Where to get it |
34
+ |---|---|---|
35
+ | `ANTHROPIC_API_KEY` | Required — powers all three stages | console.anthropic.com → API Keys |
36
+ | `OPENAI_API_KEY` | Required for coverage-aware mode | platform.openai.com → API Keys |
37
+ | `LANGSMITH_API_KEY` | If using LangSmith | smith.langchain.com → Settings |
38
+ | `BRAINTRUST_API_KEY` | If using Braintrust | braintrust.dev → Settings |
39
+ | `PHOENIX_API_KEY` | If using Arize Phoenix | app.phoenix.arize.com → Settings |
40
+
41
+ 5. Create the approval label in GitHub:
42
+ ```
43
+ gh label create "probegen:approve" --color 0075ca --description "Approve Probegen probe writeback"
44
+ ```
45
+ 6. Commit `probegen.yaml`, `.github/workflows/probegen.yml`, and `context/`.
46
+ 7. Open a PR that touches a prompt or guardrail.
47
+ 8. Run `probegen doctor` to verify your setup.
48
+
49
+ ## Cost control
50
+
51
+ Each stage has a configurable Anthropic API spend budget (see `budgets:` in `probegen.yaml`). Typical costs per PR:
52
+
53
+ - Stage 1 (change detection): $0.05–0.30
54
+ - Stage 2 (coverage analysis): $0.10–0.50
55
+ - Stage 3 (probe generation): $0.10–0.60
56
+
57
+ Increase budget limits if stages time out on large diffs or complex repos.
58
+
59
+ ## Advanced Configuration
60
+
61
+ The full configuration reference is available in [probegen.yaml.example](probegen.yaml.example).
62
+
63
+ ## Real example quickstart
64
+
65
+ If you want to test Probegen against a real LangGraph repo instead of wiring everything from scratch, use the in-repo demo under [examples/langgraph-agentic-rag](examples/langgraph-agentic-rag) and follow [examples/langgraph-agentic-rag/docs/quickstart.md](examples/langgraph-agentic-rag/docs/quickstart.md).
66
+
67
+ ## Context pack and trace safety
68
+
69
+ Probegen works without a context pack, but probe quality drops significantly. At minimum, fill in product context and known failure modes. This matters even more in starter mode, where Probegen has no existing eval corpus to compare against.
70
+
71
+ Production traces are never sanitized by the tool. If you add files under `context/traces/`, anonymize them first. Remove names, emails, account IDs, and any other sensitive data before committing them.
@@ -0,0 +1,5 @@
1
+ from __future__ import annotations
2
+
3
+ __all__ = ["__version__"]
4
+
5
+ __version__ = "0.1.0"
@@ -0,0 +1,7 @@
1
+ from __future__ import annotations
2
+
3
+ from probegen.cli.main import cli
4
+
5
+
6
+ if __name__ == "__main__":
7
+ cli()
@@ -0,0 +1 @@
1
+ from __future__ import annotations
@@ -0,0 +1,146 @@
1
+ from __future__ import annotations
2
+
3
+ import fnmatch
4
+ import os
5
+ import subprocess
6
+ from pathlib import Path
7
+
8
+ import click
9
+
10
+ from probegen.config import ProbegenConfig
11
+ from probegen.errors import ConfigError
12
+
13
+
14
+ def _git_ls_files(cwd: Path) -> list[str]:
15
+ try:
16
+ completed = subprocess.run(
17
+ ["git", "ls-files"],
18
+ cwd=cwd,
19
+ check=True,
20
+ capture_output=True,
21
+ text=True,
22
+ )
23
+ return completed.stdout.splitlines()
24
+ except (subprocess.CalledProcessError, FileNotFoundError):
25
+ return []
26
+
27
+
28
+ @click.command("doctor")
29
+ @click.option("--config", "config_path", default="probegen.yaml", show_default=True, type=click.Path(dir_okay=False, path_type=Path))
30
+ @click.option("--ci", is_flag=True, help="Run additional CI-specific checks (requires GITHUB_TOKEN).")
31
+ def doctor_command(config_path: Path, ci: bool) -> None:
32
+ """Verify Probegen setup and report any issues."""
33
+ checks: list[tuple[bool, str]] = []
34
+ root = Path.cwd()
35
+
36
+ # Check 1: probegen.yaml exists
37
+ config_exists = config_path.exists()
38
+ checks.append((config_exists, f"probegen.yaml found at {config_path}"))
39
+ if not config_exists:
40
+ click.echo(_format_checks(checks))
41
+ click.echo(f"\nRun `probegen init` to create probegen.yaml.")
42
+ return
43
+
44
+ # Check 2: config is valid
45
+ config: ProbegenConfig | None = None
46
+ try:
47
+ config = ProbegenConfig.load(config_path, allow_missing=False)
48
+ checks.append((True, "probegen.yaml is valid"))
49
+ except ConfigError as exc:
50
+ checks.append((False, f"probegen.yaml has errors: {exc}"))
51
+
52
+ if config is not None:
53
+ # Check 3: ANTHROPIC_API_KEY
54
+ anthropic_key = os.environ.get("ANTHROPIC_API_KEY", "")
55
+ checks.append((bool(anthropic_key), "ANTHROPIC_API_KEY is set"))
56
+
57
+ # Check 4: Platform-specific keys
58
+ if config.platforms.langsmith:
59
+ key_name = config.platforms.langsmith.api_key_env
60
+ checks.append((bool(os.environ.get(key_name)), f"{key_name} is set (langsmith)"))
61
+
62
+ if config.platforms.braintrust:
63
+ key_name = config.platforms.braintrust.api_key_env
64
+ checks.append((bool(os.environ.get(key_name)), f"{key_name} is set (braintrust)"))
65
+
66
+ if config.platforms.arize_phoenix:
67
+ key_name = config.platforms.arize_phoenix.api_key_env
68
+ checks.append((bool(os.environ.get(key_name)), f"{key_name} is set (arize_phoenix)"))
69
+
70
+ # Check 5: OPENAI_API_KEY if mappings configured
71
+ if config.mappings:
72
+ openai_key = os.environ.get("OPENAI_API_KEY", "")
73
+ checks.append((bool(openai_key), "OPENAI_API_KEY is set (required for coverage-aware mode)"))
74
+
75
+ # Check 6: Hint pattern matches
76
+ tracked_files = _git_ls_files(root)
77
+ if tracked_files:
78
+ all_patterns = [*config.behavior_artifacts.paths, *config.guardrail_artifacts.paths]
79
+ if all_patterns:
80
+ for pattern in all_patterns:
81
+ matched = [
82
+ f for f in tracked_files
83
+ if fnmatch.fnmatch(f, pattern)
84
+ and not any(fnmatch.fnmatch(f, ex) for ex in config.behavior_artifacts.exclude)
85
+ ]
86
+ checks.append((
87
+ bool(matched),
88
+ f"Pattern '{pattern}' matches {len(matched)} tracked file(s)",
89
+ ))
90
+ else:
91
+ checks.append((False, "No hint patterns configured in behavior_artifacts or guardrail_artifacts"))
92
+
93
+ # Check 7: context/ directory key files
94
+ context_files = [
95
+ config.context.product,
96
+ config.context.bad_examples,
97
+ ]
98
+ for rel_path in context_files:
99
+ full_path = root / rel_path
100
+ non_empty = full_path.exists() and full_path.stat().st_size > 0
101
+ checks.append((non_empty, f"Context file {rel_path} exists and is non-empty"))
102
+
103
+ # Check 8: CI label check
104
+ if ci:
105
+ token = os.environ.get("GITHUB_TOKEN", "")
106
+ repo = os.environ.get("GITHUB_REPOSITORY", "")
107
+ label_name = config.approval.label
108
+ if token and repo:
109
+ label_ok = _check_github_label(repo, token, label_name)
110
+ checks.append((label_ok, f"GitHub label '{label_name}' exists in {repo}"))
111
+ else:
112
+ checks.append((False, "GITHUB_TOKEN or GITHUB_REPOSITORY not set — skipping label check"))
113
+
114
+ click.echo(_format_checks(checks))
115
+ passed = sum(1 for ok, _ in checks if ok)
116
+ total = len(checks)
117
+ click.echo(f"\n{passed}/{total} checks passed.")
118
+
119
+
120
+ def _check_github_label(repo: str, token: str, label_name: str) -> bool:
121
+ try:
122
+ import httpx
123
+ response = httpx.get(
124
+ f"https://api.github.com/repos/{repo}/labels/{label_name}",
125
+ headers={
126
+ "Accept": "application/vnd.github+json",
127
+ "Authorization": f"Bearer {token}",
128
+ "X-GitHub-Api-Version": "2022-11-28",
129
+ },
130
+ timeout=10.0,
131
+ )
132
+ return response.status_code == 200
133
+ except Exception:
134
+ return False
135
+
136
+
137
+ def _format_checks(checks: list[tuple[bool, str]]) -> str:
138
+ lines = []
139
+ for ok, message in checks:
140
+ symbol = "✓" if ok else "✗"
141
+ lines.append(f" {symbol} {message}")
142
+ return "\n".join(lines)
143
+
144
+
145
+ if __name__ == "__main__":
146
+ doctor_command()
@@ -0,0 +1,54 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ import sys
5
+ from pathlib import Path
6
+
7
+ import click
8
+
9
+ from probegen.errors import EmbeddingError
10
+ from probegen.tools.embedding import embed_batch
11
+
12
+
13
+ @click.command("embed-batch")
14
+ @click.option("--inputs", "inputs_path", required=True, type=click.Path(exists=True, dir_okay=False, path_type=Path))
15
+ @click.option("--output", "output_path", required=True, type=click.Path(dir_okay=False, path_type=Path))
16
+ @click.option("--model", default="text-embedding-3-small", show_default=True)
17
+ @click.option("--cache", "cache_path", default=".probegen/embedding_cache.db", show_default=True, type=click.Path(path_type=Path))
18
+ @click.option("--dimensions", type=int, default=None)
19
+ def embed_batch_command(
20
+ inputs_path: Path,
21
+ output_path: Path,
22
+ model: str,
23
+ cache_path: Path,
24
+ dimensions: int | None,
25
+ ) -> None:
26
+ payload = json.loads(inputs_path.read_text(encoding="utf-8"))
27
+ try:
28
+ embeddings, cache_warning = embed_batch(
29
+ payload,
30
+ model=model,
31
+ cache_path=cache_path,
32
+ dimensions=dimensions,
33
+ )
34
+ except EmbeddingError as exc:
35
+ raise SystemExit(_emit_error(str(exc), 1)) from exc
36
+
37
+ output_path.parent.mkdir(parents=True, exist_ok=True)
38
+ output_path.write_text(json.dumps(embeddings, indent=2), encoding="utf-8")
39
+ if cache_warning:
40
+ click.echo(
41
+ "probegen embed-batch: embedding cache warning — some cache reads or writes failed; "
42
+ "embeddings are still valid and have been written to the output file.",
43
+ err=True,
44
+ )
45
+ raise SystemExit(0)
46
+
47
+
48
+ def _emit_error(message: str, code: int) -> int:
49
+ click.echo(message, err=True)
50
+ return code
51
+
52
+
53
+ if __name__ == "__main__":
54
+ embed_batch_command()
@@ -0,0 +1,76 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ from pathlib import Path
5
+ from typing import Any
6
+
7
+ import click
8
+
9
+ from probegen.errors import EmbeddingError
10
+ from probegen.tools.embedding import embed_batch
11
+ from probegen.tools.similarity import classify_similarity, cosine_similarity
12
+
13
+
14
+ @click.command("find-similar")
15
+ @click.option("--candidate", "candidate_path", required=True, type=click.Path(exists=True, dir_okay=False, path_type=Path))
16
+ @click.option("--corpus", "corpus_path", required=True, type=click.Path(exists=True, dir_okay=False, path_type=Path))
17
+ @click.option("--output", "output_path", required=True, type=click.Path(dir_okay=False, path_type=Path))
18
+ @click.option("--duplicate-threshold", default=0.88, show_default=True, type=float)
19
+ @click.option("--boundary-threshold", default=0.72, show_default=True, type=float)
20
+ @click.option("--model", default="text-embedding-3-small", show_default=True)
21
+ @click.option("--cache", "cache_path", default=".probegen/embedding_cache.db", show_default=True, type=click.Path(path_type=Path))
22
+ @click.option("--dimensions", type=int, default=None)
23
+ def find_similar_command(
24
+ candidate_path: Path,
25
+ corpus_path: Path,
26
+ output_path: Path,
27
+ duplicate_threshold: float,
28
+ boundary_threshold: float,
29
+ model: str,
30
+ cache_path: Path,
31
+ dimensions: int | None,
32
+ ) -> None:
33
+ candidate = json.loads(candidate_path.read_text(encoding="utf-8"))
34
+ corpus = json.loads(corpus_path.read_text(encoding="utf-8"))
35
+ try:
36
+ embedded_candidate, _ = embed_batch(
37
+ [candidate],
38
+ model=model,
39
+ cache_path=cache_path,
40
+ dimensions=dimensions,
41
+ )
42
+ except EmbeddingError as exc:
43
+ click.echo(str(exc), err=True)
44
+ raise SystemExit(1) from exc
45
+
46
+ candidate_embedding = embedded_candidate[0]["embedding"]
47
+ results: list[dict[str, Any]] = []
48
+ for item in corpus:
49
+ score = cosine_similarity(candidate_embedding, item["embedding"])
50
+ results.append(
51
+ {
52
+ "corpus_id": item["id"],
53
+ "similarity": score,
54
+ "classification": classify_similarity(
55
+ score,
56
+ duplicate_threshold=duplicate_threshold,
57
+ boundary_threshold=boundary_threshold,
58
+ ),
59
+ }
60
+ )
61
+
62
+ results.sort(key=lambda item: item["similarity"], reverse=True)
63
+ top_match = results[0] if results else None
64
+ payload = {
65
+ "candidate_id": candidate["id"],
66
+ "results": results,
67
+ "top_match": top_match,
68
+ "max_similarity": top_match["similarity"] if top_match else 0.0,
69
+ "overall_classification": top_match["classification"] if top_match else "novel",
70
+ }
71
+ output_path.parent.mkdir(parents=True, exist_ok=True)
72
+ output_path.write_text(json.dumps(payload, indent=2), encoding="utf-8")
73
+
74
+
75
+ if __name__ == "__main__":
76
+ find_similar_command()