@minhpnq1807/contextos 0.1.9 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +6 -0
- package/README.md +2 -0
- package/bin/ctx.js +7 -0
- package/package.json +1 -1
- package/plugins/ctx/lib/analyzer.js +13 -0
- package/plugins/ctx/lib/benchmark.js +72 -0
- package/plugins/ctx/lib/measure.js +12 -4
- package/plugins/ctx/lib/reporter.js +11 -3
- package/plugins/ctx/lib/stats.js +4 -1
package/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,11 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
+
## 0.2.0
|
|
4
|
+
|
|
5
|
+
- Adds `ctx benchmark -- "task"` to compare baseline AGENTS.md ordering with ContextOS scheduling and estimate lost-in-the-middle risk.
|
|
6
|
+
- Improves AGENTS.md rule filtering for generic headings and non-actionable sections.
|
|
7
|
+
- Splits Stop reports into `followed`, `ignored`, `unknown`, and `unmeasurable` so efficiency only reflects rules with evidence.
|
|
8
|
+
|
|
3
9
|
## 0.1.9
|
|
4
10
|
|
|
5
11
|
- Proxies all configured MCP servers except ContextOS' own `ctx-mcp` server.
|
package/README.md
CHANGED
|
@@ -190,6 +190,7 @@ This warning comes from a transitive dependency in the local embedding/WASM stac
|
|
|
190
190
|
| `ctx report` | Shows the last Stop-hook compliance report for the current workspace. | A Codex task has finished and you want the summary again. | Reads `~/.ctx/contextos/workspaces/<workspace-id>/last-report.json`. |
|
|
191
191
|
| `ctx evidence` | Shows detailed evidence behind the last report for the current workspace. | You want to inspect why a rule was marked `followed`, `ignored`, or `unknown`. | Prints rule text, source file, score, status, and evidence reason. |
|
|
192
192
|
| `ctx stats` | Shows aggregate runtime metrics for the current workspace. | You want to know whether ContextOS is active and useful over time. | Prints prompt count, report count, injected/quiet ratio, average prompt analysis time, efficiency, rule outcomes, hook events, and last suggested files for the current workspace only. |
|
|
193
|
+
| `ctx benchmark -- "task"` | Compares baseline AGENTS.md ordering with ContextOS task-aware scheduling. | You want a before/after signal for lost-in-the-middle risk. | Prints parsed/actionable/filtered rule counts, relevant rules in the middle of the original file, scheduled high/mid rules, and top scored rules. |
|
|
193
194
|
| `ctx embeddings warm -- "task"` | Prepares local semantic embedding caches. | First install, CI smoke checks, or after changing AGENTS.md/project files. | Loads/downloads `Xenova/all-MiniLM-L6-v2` and writes vectors to `~/.ctx/contextos/embeddings.db`. |
|
|
194
195
|
| `ctx --version` | Prints the installed ContextOS CLI version. | You want to confirm which npm version is being executed. | Prints the version from package metadata. |
|
|
195
196
|
|
|
@@ -286,6 +287,7 @@ ContextOS uses heuristic evidence collection from git diff/status plus local run
|
|
|
286
287
|
followed = evidence in the diff suggests the rule was applied
|
|
287
288
|
ignored = evidence in the diff suggests the rule was violated
|
|
288
289
|
unknown = the rule was relevant, but the diff does not prove either way
|
|
290
|
+
unmeasurable = ContextOS lacks the required evidence source, such as git diff lines or runtime telemetry
|
|
289
291
|
```
|
|
290
292
|
|
|
291
293
|
For runtime-only rules, ContextOS also checks `telemetry.jsonl` for hook-visible tool names, MCP server names, and command metadata. A rule like "use code-review-graph before reading files" can be marked `followed` when telemetry contains a matching `code-review-graph` signal.
|
package/bin/ctx.js
CHANGED
|
@@ -15,6 +15,7 @@ import { warmFileEmbeddings } from "../plugins/ctx/lib/file-embedding-retriever.
|
|
|
15
15
|
import { scoreContext } from "../plugins/ctx/lib/score-context.js";
|
|
16
16
|
import { defaultDataRoot, workspaceDataDir, workspaceMarkerPath } from "../plugins/ctx/lib/workspace-data.js";
|
|
17
17
|
import { installMcpTelemetryProxies } from "../plugins/ctx/lib/mcp-proxy-install.js";
|
|
18
|
+
import { benchmarkWorkspace, formatBenchmark } from "../plugins/ctx/lib/benchmark.js";
|
|
18
19
|
|
|
19
20
|
const __dirname = path.dirname(fileURLToPath(import.meta.url));
|
|
20
21
|
const rootDir = path.resolve(__dirname, "..");
|
|
@@ -32,6 +33,7 @@ Usage:
|
|
|
32
33
|
ctx report
|
|
33
34
|
ctx evidence
|
|
34
35
|
ctx stats
|
|
36
|
+
ctx benchmark -- "task"
|
|
35
37
|
ctx embeddings warm -- "task"
|
|
36
38
|
ctx --version
|
|
37
39
|
`;
|
|
@@ -272,6 +274,11 @@ try {
|
|
|
272
274
|
console.log(formatEvidence(loadLastReport()));
|
|
273
275
|
} else if (command === "stats") {
|
|
274
276
|
console.log(formatStats(loadStats(contextOSWorkspaceDataDir())));
|
|
277
|
+
} else if (command === "benchmark") {
|
|
278
|
+
const marker = args.indexOf("--");
|
|
279
|
+
const task = marker >= 0 ? args.slice(marker + 1).join(" ") : args.slice(1).join(" ");
|
|
280
|
+
if (!task.trim()) throw new Error('Usage: ctx benchmark -- "task"');
|
|
281
|
+
console.log(formatBenchmark(benchmarkWorkspace({ cwd: process.cwd(), task })));
|
|
275
282
|
} else {
|
|
276
283
|
throw new Error(`Unknown command: ${command}\n\n${usage()}`);
|
|
277
284
|
}
|
package/package.json
CHANGED
|
@@ -81,6 +81,11 @@ const TOOL_REFERENCE_TOKENS = new Set([
|
|
|
81
81
|
"list_communities"
|
|
82
82
|
]);
|
|
83
83
|
|
|
84
|
+
const ACTION_TOKENS = new Set([
|
|
85
|
+
"add", "avoid", "call", "check", "derive", "ensure", "filter", "follow", "prefer", "run",
|
|
86
|
+
"use", "validate", "verify", "write", "never", "always", "must", "should", "do"
|
|
87
|
+
]);
|
|
88
|
+
|
|
84
89
|
export function tokenize(value) {
|
|
85
90
|
const normalized = String(value || "")
|
|
86
91
|
.toLowerCase()
|
|
@@ -190,6 +195,7 @@ export function isDocumentationOnlyRule(rule) {
|
|
|
190
195
|
if (/^<!--.*-->$/.test(normalized)) return true;
|
|
191
196
|
if (DOCUMENTATION_HEADING_PATTERNS.some((pattern) => pattern.test(normalized))) return true;
|
|
192
197
|
if (isMarkdownTableRule(normalized)) return true;
|
|
198
|
+
if (isGenericHeading(normalized)) return true;
|
|
193
199
|
return false;
|
|
194
200
|
}
|
|
195
201
|
|
|
@@ -209,6 +215,13 @@ function isMarkdownTableRule(content) {
|
|
|
209
215
|
return /\btool\b/.test(lower) && /\buse\s+when\b/.test(lower) && toolReferenceCount >= 2;
|
|
210
216
|
}
|
|
211
217
|
|
|
218
|
+
function isGenericHeading(content) {
|
|
219
|
+
if (content.length > 80 || /[`.:;]/.test(content)) return false;
|
|
220
|
+
const tokens = tokenize(content);
|
|
221
|
+
if (tokens.length > 4) return false;
|
|
222
|
+
return !tokens.some((token) => ACTION_TOKENS.has(token));
|
|
223
|
+
}
|
|
224
|
+
|
|
212
225
|
function dedupeRules(rules) {
|
|
213
226
|
const seen = new Set();
|
|
214
227
|
return rules.filter((rule) => {
|
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
import { parseRules, filterActionableRules, scoreRules } from "./analyzer.js";
|
|
2
|
+
import { readAgentsChain } from "./reader.js";
|
|
3
|
+
import { scheduleContext } from "./scheduler.js";
|
|
4
|
+
|
|
5
|
+
export function benchmarkContext({ markdown, sources = [], task = "", openFiles = [], topK = 8 } = {}) {
|
|
6
|
+
const parsedRules = parseRules(markdown);
|
|
7
|
+
const actionableRules = filterActionableRules(parsedRules);
|
|
8
|
+
const scoredRules = scoreRules(actionableRules, task, openFiles);
|
|
9
|
+
const relevantRules = scoredRules.filter((rule) => Number(rule.score || 0) >= 0.1);
|
|
10
|
+
const scheduled = scheduleContext({ rules: scoredRules, relevantFiles: [] });
|
|
11
|
+
|
|
12
|
+
const originalPositions = new Map(actionableRules.map((rule, index) => [rule.content, index]));
|
|
13
|
+
const middleStart = Math.floor(actionableRules.length * 0.25);
|
|
14
|
+
const middleEnd = Math.ceil(actionableRules.length * 0.75);
|
|
15
|
+
const lostMiddle = relevantRules.filter((rule) => {
|
|
16
|
+
const index = originalPositions.get(rule.content);
|
|
17
|
+
return typeof index === "number" && index >= middleStart && index <= middleEnd;
|
|
18
|
+
});
|
|
19
|
+
|
|
20
|
+
return {
|
|
21
|
+
task,
|
|
22
|
+
sources,
|
|
23
|
+
rulesParsed: parsedRules.length,
|
|
24
|
+
actionableRules: actionableRules.length,
|
|
25
|
+
filteredRules: parsedRules.length - actionableRules.length,
|
|
26
|
+
relevantRules: relevantRules.length,
|
|
27
|
+
baseline: {
|
|
28
|
+
relevantRulesInMiddle: lostMiddle.length,
|
|
29
|
+
middleRiskPercent: relevantRules.length ? Math.round((lostMiddle.length / relevantRules.length) * 100) : 0
|
|
30
|
+
},
|
|
31
|
+
contextOS: {
|
|
32
|
+
highRules: scheduled.highRules.length,
|
|
33
|
+
midRules: scheduled.midRules.length,
|
|
34
|
+
topRules: scoredRules.slice(0, topK).map((rule) => ({
|
|
35
|
+
score: rule.score,
|
|
36
|
+
content: rule.content,
|
|
37
|
+
reasons: rule.reasons || []
|
|
38
|
+
})),
|
|
39
|
+
repeatsHighRulesAtEnd: scheduled.highRules.length > 0
|
|
40
|
+
}
|
|
41
|
+
};
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
export function benchmarkWorkspace({ cwd = process.cwd(), task = "", openFiles = [], topK = 8 } = {}) {
|
|
45
|
+
const merged = readAgentsChain({ cwd });
|
|
46
|
+
return benchmarkContext({
|
|
47
|
+
markdown: merged.content,
|
|
48
|
+
sources: merged.sources,
|
|
49
|
+
task,
|
|
50
|
+
openFiles,
|
|
51
|
+
topK
|
|
52
|
+
});
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
export function formatBenchmark(result) {
|
|
56
|
+
const lines = [];
|
|
57
|
+
lines.push("ContextOS benchmark");
|
|
58
|
+
lines.push(`Task: ${result.task || "(empty)"}`);
|
|
59
|
+
lines.push(`Rules: ${result.rulesParsed} parsed, ${result.actionableRules} actionable, ${result.filteredRules} filtered`);
|
|
60
|
+
lines.push(`Relevant rules: ${result.relevantRules}`);
|
|
61
|
+
lines.push(`Baseline middle-risk: ${result.baseline.relevantRulesInMiddle}/${result.relevantRules} relevant rules (${result.baseline.middleRiskPercent}%)`);
|
|
62
|
+
lines.push(`ContextOS scheduled: ${result.contextOS.highRules} high, ${result.contextOS.midRules} mid`);
|
|
63
|
+
lines.push(`Recency reminder: ${result.contextOS.repeatsHighRulesAtEnd ? "enabled" : "not needed"}`);
|
|
64
|
+
if (result.contextOS.topRules.length) {
|
|
65
|
+
lines.push("Top rules:");
|
|
66
|
+
for (const rule of result.contextOS.topRules) {
|
|
67
|
+
const reasons = rule.reasons?.length ? ` reasons:${rule.reasons.join(",")}` : "";
|
|
68
|
+
lines.push(`- ${Number(rule.score || 0).toFixed(2)} ${rule.content}${reasons}`);
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
return lines.join("\n");
|
|
72
|
+
}
|
|
@@ -183,23 +183,31 @@ export function checkCompliance({ rules = [], addedLines = [], runtimeEvidence =
|
|
|
183
183
|
if (!keywords.length || !addedLines.length) {
|
|
184
184
|
results.push({
|
|
185
185
|
rule,
|
|
186
|
-
status: "unknown",
|
|
186
|
+
status: isRuntimeOnly || !addedLines.length || !keywords.length ? "unmeasurable" : "unknown",
|
|
187
187
|
kind: isRuntimeOnly ? "runtime" : kind,
|
|
188
188
|
keywords,
|
|
189
189
|
evidence: isRuntimeOnly
|
|
190
|
-
? "requires runtime/tool-call telemetry; no
|
|
190
|
+
? "requires runtime/tool-call telemetry; no runtime telemetry source observed"
|
|
191
191
|
: (!addedLines.length ? "no added lines in git diff" : "no concrete compliance keywords found")
|
|
192
192
|
});
|
|
193
193
|
continue;
|
|
194
194
|
}
|
|
195
195
|
|
|
196
196
|
if (isRuntimeOnly) {
|
|
197
|
+
const hasRuntimeSource = Boolean(
|
|
198
|
+
runtimeEvidence.sources?.length ||
|
|
199
|
+
runtimeEvidence.signals?.length ||
|
|
200
|
+
runtimeEvidence.toolSignals?.length ||
|
|
201
|
+
runtimeEvidence.commandSignals?.length
|
|
202
|
+
);
|
|
197
203
|
results.push({
|
|
198
204
|
rule,
|
|
199
|
-
status: "unknown",
|
|
205
|
+
status: hasRuntimeSource ? "unknown" : "unmeasurable",
|
|
200
206
|
kind: "runtime",
|
|
201
207
|
keywords,
|
|
202
|
-
evidence:
|
|
208
|
+
evidence: hasRuntimeSource
|
|
209
|
+
? "requires runtime/tool-call telemetry; no matching runtime signal observed"
|
|
210
|
+
: "requires runtime/tool-call telemetry; no runtime telemetry source observed"
|
|
203
211
|
});
|
|
204
212
|
continue;
|
|
205
213
|
}
|
|
@@ -5,6 +5,7 @@ export function buildReport({ cwd, prompt, relevantFiles, scheduled, gitSnapshot
|
|
|
5
5
|
const followed = actionableCompliance.filter((item) => item.status === "followed");
|
|
6
6
|
const ignored = actionableCompliance.filter((item) => item.status === "ignored");
|
|
7
7
|
const unknown = actionableCompliance.filter((item) => item.status === "unknown");
|
|
8
|
+
const unmeasurable = actionableCompliance.filter((item) => item.status === "unmeasurable");
|
|
8
9
|
const measured = followed.length + ignored.length;
|
|
9
10
|
const efficiencyScore = measured ? Math.round((followed.length / measured) * 100) : null;
|
|
10
11
|
|
|
@@ -20,8 +21,10 @@ export function buildReport({ cwd, prompt, relevantFiles, scheduled, gitSnapshot
|
|
|
20
21
|
followed,
|
|
21
22
|
ignored,
|
|
22
23
|
unknown,
|
|
24
|
+
unmeasurable,
|
|
23
25
|
measuredRuleCount: measured,
|
|
24
26
|
unknownRuleCount: unknown.length,
|
|
27
|
+
unmeasurableRuleCount: unmeasurable.length,
|
|
25
28
|
efficiencyScore
|
|
26
29
|
};
|
|
27
30
|
}
|
|
@@ -32,7 +35,7 @@ export function formatReport(report) {
|
|
|
32
35
|
lines.push("ContextOS report");
|
|
33
36
|
lines.push(`Efficiency: ${report.efficiencyScore == null ? "unknown" : `${report.efficiencyScore}%`}`);
|
|
34
37
|
lines.push(`Injected rules: ${report.injectedRuleCount || 0}`);
|
|
35
|
-
lines.push(`Rule outcomes: ${report.followed?.length || 0} followed, ${report.ignored?.length || 0} ignored, ${report.unknown?.length || 0} unknown`);
|
|
38
|
+
lines.push(`Rule outcomes: ${report.followed?.length || 0} followed, ${report.ignored?.length || 0} ignored, ${report.unknown?.length || 0} unknown, ${report.unmeasurable?.length || 0} unmeasurable`);
|
|
36
39
|
lines.push(`Measured rules: ${report.measuredRuleCount ?? ((report.followed?.length || 0) + (report.ignored?.length || 0))}`);
|
|
37
40
|
lines.push(`Changed files: ${report.changedFiles?.length ? report.changedFiles.join(", ") : "none detected"}`);
|
|
38
41
|
|
|
@@ -48,6 +51,7 @@ export function formatReport(report) {
|
|
|
48
51
|
appendBucket(lines, "Followed", report.followed);
|
|
49
52
|
appendBucket(lines, "Ignored", report.ignored);
|
|
50
53
|
appendBucket(lines, "Unknown", report.unknown);
|
|
54
|
+
appendBucket(lines, "Unmeasurable", report.unmeasurable);
|
|
51
55
|
|
|
52
56
|
if (report.ignored?.length) {
|
|
53
57
|
lines.push(`Suggestion: fix ignored rule evidence first: ${truncate(report.ignored[0].rule?.content || "", 70)}`);
|
|
@@ -71,7 +75,8 @@ export function formatEvidence(report) {
|
|
|
71
75
|
const items = [
|
|
72
76
|
...(report.followed || []).map((item) => ({ ...item, status: "followed" })),
|
|
73
77
|
...(report.ignored || []).map((item) => ({ ...item, status: "ignored" })),
|
|
74
|
-
...(report.unknown || []).map((item) => ({ ...item, status: "unknown" }))
|
|
78
|
+
...(report.unknown || []).map((item) => ({ ...item, status: "unknown" })),
|
|
79
|
+
...(report.unmeasurable || []).map((item) => ({ ...item, status: "unmeasurable" }))
|
|
75
80
|
];
|
|
76
81
|
|
|
77
82
|
if (!items.length) {
|
|
@@ -129,15 +134,18 @@ function sanitizeReport(report = {}) {
|
|
|
129
134
|
const followed = (report.followed || []).filter((item) => !isSystemUserRule(item.rule));
|
|
130
135
|
const ignored = (report.ignored || []).filter((item) => !isSystemUserRule(item.rule));
|
|
131
136
|
const unknown = (report.unknown || []).filter((item) => !isSystemUserRule(item.rule));
|
|
137
|
+
const unmeasurable = (report.unmeasurable || []).filter((item) => !isSystemUserRule(item.rule));
|
|
132
138
|
const measured = followed.length + ignored.length;
|
|
133
139
|
return {
|
|
134
140
|
...report,
|
|
135
|
-
injectedRuleCount: followed.length + ignored.length + unknown.length,
|
|
141
|
+
injectedRuleCount: followed.length + ignored.length + unknown.length + unmeasurable.length,
|
|
136
142
|
followed,
|
|
137
143
|
ignored,
|
|
138
144
|
unknown,
|
|
145
|
+
unmeasurable,
|
|
139
146
|
measuredRuleCount: measured,
|
|
140
147
|
unknownRuleCount: unknown.length,
|
|
148
|
+
unmeasurableRuleCount: unmeasurable.length,
|
|
141
149
|
efficiencyScore: measured ? Math.round((followed.length / measured) * 100) : null
|
|
142
150
|
};
|
|
143
151
|
}
|
package/plugins/ctx/lib/stats.js
CHANGED
|
@@ -57,6 +57,7 @@ export function loadStats(dataDir) {
|
|
|
57
57
|
const followed = reports.reduce((sum, report) => sum + (report.followed?.length || 0), 0);
|
|
58
58
|
const ignored = reports.reduce((sum, report) => sum + (report.ignored?.length || 0), 0);
|
|
59
59
|
const unknown = reports.reduce((sum, report) => sum + (report.unknown?.length || 0), 0);
|
|
60
|
+
const unmeasurable = reports.reduce((sum, report) => sum + (report.unmeasurable?.length || 0), 0);
|
|
60
61
|
|
|
61
62
|
return {
|
|
62
63
|
dataDir,
|
|
@@ -71,6 +72,7 @@ export function loadStats(dataDir) {
|
|
|
71
72
|
followed,
|
|
72
73
|
ignored,
|
|
73
74
|
unknown,
|
|
75
|
+
unmeasurable,
|
|
74
76
|
lastPrompt: analyzedPrompts.at(-1) || null,
|
|
75
77
|
lastReport: reports.at(-1) || null
|
|
76
78
|
};
|
|
@@ -85,7 +87,7 @@ export function formatStats(stats) {
|
|
|
85
87
|
lines.push(`Prompt mode: ${stats.injectedCount} injected, ${stats.quietCount} quiet (${stats.injectionRate}% injected)`);
|
|
86
88
|
lines.push(`Average prompt analysis: ${stats.averagePromptMs == null ? "unknown" : `${stats.averagePromptMs}ms`}`);
|
|
87
89
|
lines.push(`Average efficiency: ${formatAverageEfficiency(stats)}`);
|
|
88
|
-
lines.push(`Rule outcomes: ${stats.followed} followed, ${stats.ignored} ignored, ${stats.unknown} unknown`);
|
|
90
|
+
lines.push(`Rule outcomes: ${stats.followed} followed, ${stats.ignored} ignored, ${stats.unknown} unknown, ${stats.unmeasurable || 0} unmeasurable`);
|
|
89
91
|
|
|
90
92
|
const eventSummary = Object.entries(stats.events)
|
|
91
93
|
.map(([event, count]) => `${event}:${count}`)
|
|
@@ -103,6 +105,7 @@ export function formatStats(stats) {
|
|
|
103
105
|
lines.push(`Last report efficiency: ${stats.lastReport.efficiencyScore == null ? "unknown" : `${stats.lastReport.efficiencyScore}%`}`);
|
|
104
106
|
lines.push(`Last report measured rules: ${stats.lastReport.measuredRuleCount ?? ((stats.lastReport.followed?.length || 0) + (stats.lastReport.ignored?.length || 0))}`);
|
|
105
107
|
lines.push(`Last report unknown rules: ${stats.lastReport.unknownRuleCount ?? (stats.lastReport.unknown?.length || 0)}`);
|
|
108
|
+
lines.push(`Last report unmeasurable rules: ${stats.lastReport.unmeasurableRuleCount ?? (stats.lastReport.unmeasurable?.length || 0)}`);
|
|
106
109
|
const changed = stats.lastReport.changedFiles?.join(", ");
|
|
107
110
|
if (changed) lines.push(`Last changed files: ${changed}`);
|
|
108
111
|
}
|