@minhpnq1807/contextos 0.5.52 → 0.5.53
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +7 -0
- package/README.md +38 -9
- package/bin/ctx.js +1 -1
- package/eval/skill-routing/cases.yaml +1 -1
- package/package.json +1 -1
- package/plugins/ctx/.codex-plugin/plugin.json +1 -1
- package/plugins/ctx/lib/skill-discoverer.js +13 -6
package/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,12 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
+
## 0.5.53
|
|
4
|
+
|
|
5
|
+
- **Optional adapter positioning:** Clarified that ContextOS core works standalone and that `code-review-graph`, `codegraph`, and `agent-memory` are optional adapters. Skill Router scoring now exposes separate `importGraphScore`, `externalGraphScore`, and `memoryScore` fields so missing adapters degrade to zero score instead of becoming install/runtime requirements.
|
|
6
|
+
- **Adapter-aware benchmark update:** Updated the Skill Router formula to reserve explicit weights for local import graph, optional external graph, and optional memory adapters. The 52-case internal benchmark now reports Top-1 Accuracy 94.2%, Top-3 Recall 94.2%, False Positive Rate 0.0%, Confidence Calibration 100.0%, and Negative Gate Accuracy 100.0%.
|
|
7
|
+
- **Release safety docs:** Added a README safety model covering standalone install, optional adapters, fail-open hooks, local-only telemetry, no hook network calls, and no postinstall behavior.
|
|
8
|
+
- **Launch roadmap template:** Added a GitHub issue template for release hardening, README polish, benchmarks, optional adapters, setup, and telemetry roadmap work.
|
|
9
|
+
|
|
3
10
|
## 0.5.52
|
|
4
11
|
|
|
5
12
|
- **Release candidate polish:** Updated README positioning around ContextOS as a runtime context router, added npm/CI/license badges, a same-prompt/different-repo demo section, a benchmark table, a 30-second install callout, and an AGENTS.md vs RAG vs ContextOS comparison table.
|
package/README.md
CHANGED
|
@@ -46,7 +46,7 @@ Skill Router internal fixture benchmark:
|
|
|
46
46
|
| Metric | Result |
|
|
47
47
|
| --- | ---: |
|
|
48
48
|
| Cases | 52 |
|
|
49
|
-
| Top-1 Accuracy |
|
|
49
|
+
| Top-1 Accuracy | 94.2% |
|
|
50
50
|
| Top-3 Recall | 94.2% |
|
|
51
51
|
| False Positive Rate | 0.0% |
|
|
52
52
|
| Confidence Calibration | 100.0% |
|
|
@@ -145,6 +145,21 @@ The problem is not that agents cannot read `AGENTS.md`. The problem is that larg
|
|
|
145
145
|
| Generic RAG | Semantically related files or snippets. | It usually does not route skills/workflows or prove rule compliance. |
|
|
146
146
|
| ContextOS | Task-routed rules, files, skills, workflows, and evidence. | Requires local setup and warm indexes for best results. |
|
|
147
147
|
|
|
148
|
+
## Safety Model
|
|
149
|
+
|
|
150
|
+
ContextOS is designed to be OSS-friendly and low-friction:
|
|
151
|
+
|
|
152
|
+
| Guarantee | Behavior |
|
|
153
|
+
| --- | --- |
|
|
154
|
+
| Standalone by default | `ctx setup` works without `code-review-graph`, `codegraph`, or `agent-memory`. |
|
|
155
|
+
| Optional adapters | Graph and memory backends add signal when available; missing adapters contribute score `0`. |
|
|
156
|
+
| Fail-open hooks | Prompt hooks return local context or nothing instead of blocking the agent when MCP, embeddings, graph, or memory is unavailable. |
|
|
157
|
+
| Local-only telemetry | Reports, prompt history, evidence, and telemetry stay under `~/.ctx/contextos/`. |
|
|
158
|
+
| No hook network calls | Prompt and stop hooks do not call external services. Install/warm commands may download the local embedding model when explicitly run. |
|
|
159
|
+
| No postinstall surprise | `npm install` only installs the CLI. Setup runs only when you call `ctx setup`. |
|
|
160
|
+
|
|
161
|
+
Positioning: ContextOS works standalone and gets smarter when graph or memory adapters are available.
|
|
162
|
+
|
|
148
163
|
## Quick Commands
|
|
149
164
|
|
|
150
165
|
| Command | Use it for |
|
|
@@ -543,7 +558,17 @@ These files are local telemetry only. Hooks do not make network calls.
|
|
|
543
558
|
|
|
544
559
|
## Project Understanding
|
|
545
560
|
|
|
546
|
-
ContextOS
|
|
561
|
+
ContextOS works standalone. The core path is local rules, file embeddings, import graph expansion, skill routing, workflow routing, and evidence capture.
|
|
562
|
+
|
|
563
|
+
Project graph and memory backends are optional adapters:
|
|
564
|
+
|
|
565
|
+
| Adapter | What it adds | Required? |
|
|
566
|
+
| --- | --- | --- |
|
|
567
|
+
| `code-review-graph` | Blast radius, semantic node search, and test relationships. | No |
|
|
568
|
+
| `codegraph` | Symbol/call graph context once its MCP schema is stable. | No |
|
|
569
|
+
| `agent-memory` / `agentmemory` | Prior task history, decisions, and recurring bug-fix context. | No |
|
|
570
|
+
|
|
571
|
+
ContextOS does not require `code-review-graph`, `codegraph`, or `agent-memory` to install or run. It gets smarter when those backends are available; when they are missing, the adapter scores stay at zero and the hook continues with local context.
|
|
547
572
|
|
|
548
573
|
For file suggestions, ContextOS now runs a local RAG-style retrieval pass:
|
|
549
574
|
|
|
@@ -553,12 +578,12 @@ prompt
|
|
|
553
578
|
-> ctx-mcp reads AGENTS.md and scores rules with local MiniLM
|
|
554
579
|
-> query the persisted file-vector index in embeddings.db for semantic file candidates
|
|
555
580
|
-> expand candidates through relative import graph links
|
|
556
|
-
-> query code-review-graph semantic_search_nodes with seed entity names
|
|
557
|
-
-> merge and deduplicate semantic, import-graph, and
|
|
581
|
+
-> optionally query code-review-graph semantic_search_nodes with seed entity names
|
|
582
|
+
-> merge and deduplicate semantic, import-graph, and optional graph matches
|
|
558
583
|
-> inject top suggested files with graph evidence reasons
|
|
559
584
|
```
|
|
560
585
|
|
|
561
|
-
This keeps the hook fast and local while still using graph semantics when available. The graph search path is visible in runtime data through file reasons such as `graph:content-moderation.service`.
|
|
586
|
+
This keeps the hook fast and local while still using graph semantics when available. The graph search path is visible in runtime data through file reasons such as `graph:content-moderation.service`. When no graph adapter is available, file suggestions still use local file vectors and import graph expansion.
|
|
562
587
|
|
|
563
588
|
Prompt scoring does not walk the repository for file candidates or import expansion. `ctx install` and `ctx embeddings warm` rebuild the persisted file-vector index and one-hop import adjacency index by walking source paths once; prompt hooks query those indexes directly. Rules, files, skills, and workflows are scored concurrently with `Promise.all()`.
|
|
564
589
|
|
|
@@ -576,14 +601,18 @@ Skill ranking uses Skill Router v2. ContextOS still starts with semantic retriev
|
|
|
576
601
|
|
|
577
602
|
```text
|
|
578
603
|
final_score =
|
|
579
|
-
semantic_score * 0.
|
|
604
|
+
semantic_score * 0.30
|
|
580
605
|
+ prompt_trigger_score * 0.20
|
|
581
|
-
+ project_evidence_score * 0.
|
|
606
|
+
+ project_evidence_score * 0.20
|
|
582
607
|
+ file_config_score * 0.10
|
|
583
|
-
+
|
|
608
|
+
+ import_graph_score * 0.10
|
|
609
|
+
+ external_graph_score * 0.05
|
|
610
|
+
+ memory_score * 0.05
|
|
584
611
|
- negative_penalty * 0.20
|
|
585
612
|
```
|
|
586
613
|
|
|
614
|
+
`external_graph_score` is supplied by optional project graph adapters such as `code-review-graph` or `codegraph`. `memory_score` is reserved for optional memory adapters such as `agent-memory`. Without those adapters, both scores are `0`.
|
|
615
|
+
|
|
587
616
|
Skill metadata can live beside `SKILL.md` as `skill.yaml`:
|
|
588
617
|
|
|
589
618
|
```yaml
|
|
@@ -625,7 +654,7 @@ Current local benchmark:
|
|
|
625
654
|
|
|
626
655
|
```text
|
|
627
656
|
Cases: 52
|
|
628
|
-
Top-1 Accuracy:
|
|
657
|
+
Top-1 Accuracy: 94.2%
|
|
629
658
|
Top-3 Recall: 94.2%
|
|
630
659
|
False Positive Rate: 0.0%
|
|
631
660
|
Confidence Calibration: 100.0%
|
package/bin/ctx.js
CHANGED
|
@@ -679,7 +679,7 @@ async function skillsDoctor(task) {
|
|
|
679
679
|
}
|
|
680
680
|
for (const skill of result.skills) {
|
|
681
681
|
console.log(`${Number(skill.confidence || skill.score || 0).toFixed(2)} ${skill.confidenceBand || "low"} ${skill.name}`);
|
|
682
|
-
console.log(` semantic:${Number(skill.semanticScore || 0).toFixed(2)} prompt:${Number(skill.promptTriggerScore || 0).toFixed(2)} project:${Number(skill.projectEvidenceScore || 0).toFixed(2)} files:${Number(skill.fileConfigScore || 0).toFixed(2)} negative:${Number(skill.negativePenalty || 0).toFixed(2)}`);
|
|
682
|
+
console.log(` semantic:${Number(skill.semanticScore || 0).toFixed(2)} prompt:${Number(skill.promptTriggerScore || 0).toFixed(2)} project:${Number(skill.projectEvidenceScore || 0).toFixed(2)} files:${Number(skill.fileConfigScore || 0).toFixed(2)} import:${Number(skill.importGraphScore || 0).toFixed(2)} graph:${Number(skill.externalGraphScore || skill.graphScore || 0).toFixed(2)} memory:${Number(skill.memoryScore || 0).toFixed(2)} negative:${Number(skill.negativePenalty || 0).toFixed(2)}`);
|
|
683
683
|
if (skill.evidence?.length) console.log(` evidence: ${skill.evidence.join(", ")}`);
|
|
684
684
|
if (skill.negativeEvidence?.length) console.log(` rejected signals: ${skill.negativeEvidence.join(", ")}`);
|
|
685
685
|
}
|
package/package.json
CHANGED
|
@@ -565,13 +565,17 @@ function hybridSkillScore(skill, { prompt, projectEvidence }) {
|
|
|
565
565
|
const negativePenalty = Math.max(negativeDependencies.score, negativeFiles.score, negativePrompts.score);
|
|
566
566
|
const projectEvidenceScore = dependencyEvidence.score;
|
|
567
567
|
const fileConfigScore = fileEvidence.score;
|
|
568
|
-
const
|
|
568
|
+
const importGraphScore = 0;
|
|
569
|
+
const externalGraphScore = 0;
|
|
570
|
+
const memoryScore = 0;
|
|
569
571
|
const hybridScore = Math.max(0, Math.min(1,
|
|
570
|
-
semanticScore * 0.
|
|
572
|
+
semanticScore * 0.30
|
|
571
573
|
+ promptMatch.score * 0.20
|
|
572
|
-
+ projectEvidenceScore * 0.
|
|
574
|
+
+ projectEvidenceScore * 0.20
|
|
573
575
|
+ fileConfigScore * 0.10
|
|
574
|
-
+
|
|
576
|
+
+ importGraphScore * 0.10
|
|
577
|
+
+ externalGraphScore * 0.05
|
|
578
|
+
+ memoryScore * 0.05
|
|
575
579
|
- negativePenalty * 0.20
|
|
576
580
|
));
|
|
577
581
|
const explicit = (skill.reasons || []).includes("explicit-skill");
|
|
@@ -609,7 +613,10 @@ function hybridSkillScore(skill, { prompt, projectEvidence }) {
|
|
|
609
613
|
promptTriggerScore: promptMatch.score,
|
|
610
614
|
projectEvidenceScore,
|
|
611
615
|
fileConfigScore,
|
|
612
|
-
|
|
616
|
+
importGraphScore,
|
|
617
|
+
externalGraphScore,
|
|
618
|
+
memoryScore,
|
|
619
|
+
graphScore: externalGraphScore,
|
|
613
620
|
negativePenalty,
|
|
614
621
|
rankScore,
|
|
615
622
|
explicit,
|
|
@@ -639,7 +646,7 @@ function calibrateSkillConfidence(score, {
|
|
|
639
646
|
if (isAmbiguousPrompt(prompt) && !(hasDependencyEvidence && hasFileEvidence) && !explicit) {
|
|
640
647
|
confidence = Math.min(confidence, 0.64);
|
|
641
648
|
}
|
|
642
|
-
if (hasPromptEvidence && hasProjectEvidence && confidence >= 0.
|
|
649
|
+
if (hasPromptEvidence && hasProjectEvidence && confidence >= 0.45) {
|
|
643
650
|
confidence = Math.max(confidence, 0.68);
|
|
644
651
|
}
|
|
645
652
|
if (hasDependencyEvidence && hasFileEvidence) {
|