npm - @minhpnq1807/contextos - Versions diffs - 0.1.9 → 0.2.0 - Mend

@minhpnq1807/contextos 0.1.9 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

package/CHANGELOG.md +6 -0
package/README.md +2 -0
package/bin/ctx.js +7 -0
package/package.json +1 -1
package/plugins/ctx/lib/analyzer.js +13 -0
package/plugins/ctx/lib/benchmark.js +72 -0
package/plugins/ctx/lib/measure.js +12 -4
package/plugins/ctx/lib/reporter.js +11 -3
package/plugins/ctx/lib/stats.js +4 -1

package/CHANGELOG.md CHANGED Viewed

@@ -1,5 +1,11 @@
 # Changelog
+## 0.2.0
+- Adds `ctx benchmark -- "task"` to compare baseline AGENTS.md ordering with ContextOS scheduling and estimate lost-in-the-middle risk.
+- Improves AGENTS.md rule filtering for generic headings and non-actionable sections.
+- Splits Stop reports into `followed`, `ignored`, `unknown`, and `unmeasurable` so efficiency only reflects rules with evidence.
 ## 0.1.9
 - Proxies all configured MCP servers except ContextOS' own `ctx-mcp` server.

package/README.md CHANGED Viewed

@@ -190,6 +190,7 @@ This warning comes from a transitive dependency in the local embedding/WASM stac
 | `ctx report` | Shows the last Stop-hook compliance report for the current workspace. | A Codex task has finished and you want the summary again. | Reads `~/.ctx/contextos/workspaces/<workspace-id>/last-report.json`. |
 | `ctx evidence` | Shows detailed evidence behind the last report for the current workspace. | You want to inspect why a rule was marked `followed`, `ignored`, or `unknown`. | Prints rule text, source file, score, status, and evidence reason. |
 | `ctx stats` | Shows aggregate runtime metrics for the current workspace. | You want to know whether ContextOS is active and useful over time. | Prints prompt count, report count, injected/quiet ratio, average prompt analysis time, efficiency, rule outcomes, hook events, and last suggested files for the current workspace only. |
+| `ctx benchmark -- "task"` | Compares baseline AGENTS.md ordering with ContextOS task-aware scheduling. | You want a before/after signal for lost-in-the-middle risk. | Prints parsed/actionable/filtered rule counts, relevant rules in the middle of the original file, scheduled high/mid rules, and top scored rules. |
 | `ctx embeddings warm -- "task"` | Prepares local semantic embedding caches. | First install, CI smoke checks, or after changing AGENTS.md/project files. | Loads/downloads `Xenova/all-MiniLM-L6-v2` and writes vectors to `~/.ctx/contextos/embeddings.db`. |
 | `ctx --version` | Prints the installed ContextOS CLI version. | You want to confirm which npm version is being executed. | Prints the version from package metadata. |
@@ -286,6 +287,7 @@ ContextOS uses heuristic evidence collection from git diff/status plus local run
 followed = evidence in the diff suggests the rule was applied
 ignored  = evidence in the diff suggests the rule was violated
 unknown  = the rule was relevant, but the diff does not prove either way
+unmeasurable = ContextOS lacks the required evidence source, such as git diff lines or runtime telemetry
 ```
 For runtime-only rules, ContextOS also checks `telemetry.jsonl` for hook-visible tool names, MCP server names, and command metadata. A rule like "use code-review-graph before reading files" can be marked `followed` when telemetry contains a matching `code-review-graph` signal.

package/bin/ctx.js CHANGED Viewed

@@ -15,6 +15,7 @@ import { warmFileEmbeddings } from "../plugins/ctx/lib/file-embedding-retriever.
 import { scoreContext } from "../plugins/ctx/lib/score-context.js";
 import { defaultDataRoot, workspaceDataDir, workspaceMarkerPath } from "../plugins/ctx/lib/workspace-data.js";
 import { installMcpTelemetryProxies } from "../plugins/ctx/lib/mcp-proxy-install.js";
+import { benchmarkWorkspace, formatBenchmark } from "../plugins/ctx/lib/benchmark.js";
 const __dirname = path.dirname(fileURLToPath(import.meta.url));
 const rootDir = path.resolve(__dirname, "..");
@@ -32,6 +33,7 @@ Usage:
   ctx report
   ctx evidence
   ctx stats
+  ctx benchmark -- "task"
   ctx embeddings warm -- "task"
   ctx --version
 `;
@@ -272,6 +274,11 @@ try {
     console.log(formatEvidence(loadLastReport()));
   } else if (command === "stats") {
     console.log(formatStats(loadStats(contextOSWorkspaceDataDir())));
+  } else if (command === "benchmark") {
+    const marker = args.indexOf("--");
+    const task = marker >= 0 ? args.slice(marker + 1).join(" ") : args.slice(1).join(" ");
+    if (!task.trim()) throw new Error('Usage: ctx benchmark -- "task"');
+    console.log(formatBenchmark(benchmarkWorkspace({ cwd: process.cwd(), task })));
   } else {
     throw new Error(`Unknown command: ${command}\n\n${usage()}`);
   }

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@minhpnq1807/contextos",
-  "version": "0.1.9",
+  "version": "0.2.0",
   "description": "Task-aware AGENTS.md context injection and compliance reporting for Codex.",
   "type": "module",
   "bin": {

package/plugins/ctx/lib/analyzer.js CHANGED Viewed

@@ -81,6 +81,11 @@ const TOOL_REFERENCE_TOKENS = new Set([
   "list_communities"
 ]);
+const ACTION_TOKENS = new Set([
+  "add", "avoid", "call", "check", "derive", "ensure", "filter", "follow", "prefer", "run",
+  "use", "validate", "verify", "write", "never", "always", "must", "should", "do"
+]);
 export function tokenize(value) {
   const normalized = String(value || "")
     .toLowerCase()
@@ -190,6 +195,7 @@ export function isDocumentationOnlyRule(rule) {
   if (/^<!--.*-->$/.test(normalized)) return true;
   if (DOCUMENTATION_HEADING_PATTERNS.some((pattern) => pattern.test(normalized))) return true;
   if (isMarkdownTableRule(normalized)) return true;
+  if (isGenericHeading(normalized)) return true;
   return false;
 }
@@ -209,6 +215,13 @@ function isMarkdownTableRule(content) {
   return /\btool\b/.test(lower) && /\buse\s+when\b/.test(lower) && toolReferenceCount >= 2;
 }
+function isGenericHeading(content) {
+  if (content.length > 80 || /[`.:;]/.test(content)) return false;
+  const tokens = tokenize(content);
+  if (tokens.length > 4) return false;
+  return !tokens.some((token) => ACTION_TOKENS.has(token));
+}
 function dedupeRules(rules) {
   const seen = new Set();
   return rules.filter((rule) => {

package/plugins/ctx/lib/benchmark.js ADDED Viewed

@@ -0,0 +1,72 @@
+import { parseRules, filterActionableRules, scoreRules } from "./analyzer.js";
+import { readAgentsChain } from "./reader.js";
+import { scheduleContext } from "./scheduler.js";
+export function benchmarkContext({ markdown, sources = [], task = "", openFiles = [], topK = 8 } = {}) {
+  const parsedRules = parseRules(markdown);
+  const actionableRules = filterActionableRules(parsedRules);
+  const scoredRules = scoreRules(actionableRules, task, openFiles);
+  const relevantRules = scoredRules.filter((rule) => Number(rule.score || 0) >= 0.1);
+  const scheduled = scheduleContext({ rules: scoredRules, relevantFiles: [] });
+  const originalPositions = new Map(actionableRules.map((rule, index) => [rule.content, index]));
+  const middleStart = Math.floor(actionableRules.length * 0.25);
+  const middleEnd = Math.ceil(actionableRules.length * 0.75);
+  const lostMiddle = relevantRules.filter((rule) => {
+    const index = originalPositions.get(rule.content);
+    return typeof index === "number" && index >= middleStart && index <= middleEnd;
+  });
+  return {
+    task,
+    sources,
+    rulesParsed: parsedRules.length,
+    actionableRules: actionableRules.length,
+    filteredRules: parsedRules.length - actionableRules.length,
+    relevantRules: relevantRules.length,
+    baseline: {
+      relevantRulesInMiddle: lostMiddle.length,
+      middleRiskPercent: relevantRules.length ? Math.round((lostMiddle.length / relevantRules.length) * 100) : 0
+    },
+    contextOS: {
+      highRules: scheduled.highRules.length,
+      midRules: scheduled.midRules.length,
+      topRules: scoredRules.slice(0, topK).map((rule) => ({
+        score: rule.score,
+        content: rule.content,
+        reasons: rule.reasons || []
+      })),
+      repeatsHighRulesAtEnd: scheduled.highRules.length > 0
+    }
+  };
+}
+export function benchmarkWorkspace({ cwd = process.cwd(), task = "", openFiles = [], topK = 8 } = {}) {
+  const merged = readAgentsChain({ cwd });
+  return benchmarkContext({
+    markdown: merged.content,
+    sources: merged.sources,
+    task,
+    openFiles,
+    topK
+  });
+}
+export function formatBenchmark(result) {
+  const lines = [];
+  lines.push("ContextOS benchmark");
+  lines.push(`Task: ${result.task || "(empty)"}`);
+  lines.push(`Rules: ${result.rulesParsed} parsed, ${result.actionableRules} actionable, ${result.filteredRules} filtered`);
+  lines.push(`Relevant rules: ${result.relevantRules}`);
+  lines.push(`Baseline middle-risk: ${result.baseline.relevantRulesInMiddle}/${result.relevantRules} relevant rules (${result.baseline.middleRiskPercent}%)`);
+  lines.push(`ContextOS scheduled: ${result.contextOS.highRules} high, ${result.contextOS.midRules} mid`);
+  lines.push(`Recency reminder: ${result.contextOS.repeatsHighRulesAtEnd ? "enabled" : "not needed"}`);
+  if (result.contextOS.topRules.length) {
+    lines.push("Top rules:");
+    for (const rule of result.contextOS.topRules) {
+      const reasons = rule.reasons?.length ? ` reasons:${rule.reasons.join(",")}` : "";
+      lines.push(`- ${Number(rule.score || 0).toFixed(2)} ${rule.content}${reasons}`);
+    }
+  }
+  return lines.join("\n");
+}

package/plugins/ctx/lib/measure.js CHANGED Viewed

@@ -183,23 +183,31 @@ export function checkCompliance({ rules = [], addedLines = [], runtimeEvidence =
     if (!keywords.length || !addedLines.length) {
       results.push({
         rule,
-        status: "unknown",
+        status: isRuntimeOnly || !addedLines.length || !keywords.length ? "unmeasurable" : "unknown",
         kind: isRuntimeOnly ? "runtime" : kind,
         keywords,
         evidence: isRuntimeOnly
-          ? "requires runtime/tool-call telemetry; no matching runtime signal observed"
+          ? "requires runtime/tool-call telemetry; no runtime telemetry source observed"
           : (!addedLines.length ? "no added lines in git diff" : "no concrete compliance keywords found")
       });
       continue;
     }
     if (isRuntimeOnly) {
+      const hasRuntimeSource = Boolean(
+        runtimeEvidence.sources?.length ||
+        runtimeEvidence.signals?.length ||
+        runtimeEvidence.toolSignals?.length ||
+        runtimeEvidence.commandSignals?.length
+      );
       results.push({
         rule,
-        status: "unknown",
+        status: hasRuntimeSource ? "unknown" : "unmeasurable",
         kind: "runtime",
         keywords,
-        evidence: "requires runtime/tool-call telemetry; no matching runtime signal observed"
+        evidence: hasRuntimeSource
+          ? "requires runtime/tool-call telemetry; no matching runtime signal observed"
+          : "requires runtime/tool-call telemetry; no runtime telemetry source observed"
       });
       continue;
     }

package/plugins/ctx/lib/reporter.js CHANGED Viewed

@@ -5,6 +5,7 @@ export function buildReport({ cwd, prompt, relevantFiles, scheduled, gitSnapshot
   const followed = actionableCompliance.filter((item) => item.status === "followed");
   const ignored = actionableCompliance.filter((item) => item.status === "ignored");
   const unknown = actionableCompliance.filter((item) => item.status === "unknown");
+  const unmeasurable = actionableCompliance.filter((item) => item.status === "unmeasurable");
   const measured = followed.length + ignored.length;
   const efficiencyScore = measured ? Math.round((followed.length / measured) * 100) : null;
@@ -20,8 +21,10 @@ export function buildReport({ cwd, prompt, relevantFiles, scheduled, gitSnapshot
     followed,
     ignored,
     unknown,
+    unmeasurable,
     measuredRuleCount: measured,
     unknownRuleCount: unknown.length,
+    unmeasurableRuleCount: unmeasurable.length,
     efficiencyScore
   };
 }
@@ -32,7 +35,7 @@ export function formatReport(report) {
   lines.push("ContextOS report");
   lines.push(`Efficiency: ${report.efficiencyScore == null ? "unknown" : `${report.efficiencyScore}%`}`);
   lines.push(`Injected rules: ${report.injectedRuleCount || 0}`);
-  lines.push(`Rule outcomes: ${report.followed?.length || 0} followed, ${report.ignored?.length || 0} ignored, ${report.unknown?.length || 0} unknown`);
+  lines.push(`Rule outcomes: ${report.followed?.length || 0} followed, ${report.ignored?.length || 0} ignored, ${report.unknown?.length || 0} unknown, ${report.unmeasurable?.length || 0} unmeasurable`);
   lines.push(`Measured rules: ${report.measuredRuleCount ?? ((report.followed?.length || 0) + (report.ignored?.length || 0))}`);
   lines.push(`Changed files: ${report.changedFiles?.length ? report.changedFiles.join(", ") : "none detected"}`);
@@ -48,6 +51,7 @@ export function formatReport(report) {
   appendBucket(lines, "Followed", report.followed);
   appendBucket(lines, "Ignored", report.ignored);
   appendBucket(lines, "Unknown", report.unknown);
+  appendBucket(lines, "Unmeasurable", report.unmeasurable);
   if (report.ignored?.length) {
     lines.push(`Suggestion: fix ignored rule evidence first: ${truncate(report.ignored[0].rule?.content || "", 70)}`);
@@ -71,7 +75,8 @@ export function formatEvidence(report) {
   const items = [
     ...(report.followed || []).map((item) => ({ ...item, status: "followed" })),
     ...(report.ignored || []).map((item) => ({ ...item, status: "ignored" })),
-    ...(report.unknown || []).map((item) => ({ ...item, status: "unknown" }))
+    ...(report.unknown || []).map((item) => ({ ...item, status: "unknown" })),
+    ...(report.unmeasurable || []).map((item) => ({ ...item, status: "unmeasurable" }))
   ];
   if (!items.length) {
@@ -129,15 +134,18 @@ function sanitizeReport(report = {}) {
   const followed = (report.followed || []).filter((item) => !isSystemUserRule(item.rule));
   const ignored = (report.ignored || []).filter((item) => !isSystemUserRule(item.rule));
   const unknown = (report.unknown || []).filter((item) => !isSystemUserRule(item.rule));
+  const unmeasurable = (report.unmeasurable || []).filter((item) => !isSystemUserRule(item.rule));
   const measured = followed.length + ignored.length;
   return {
     ...report,
-    injectedRuleCount: followed.length + ignored.length + unknown.length,
+    injectedRuleCount: followed.length + ignored.length + unknown.length + unmeasurable.length,
     followed,
     ignored,
     unknown,
+    unmeasurable,
     measuredRuleCount: measured,
     unknownRuleCount: unknown.length,
+    unmeasurableRuleCount: unmeasurable.length,
     efficiencyScore: measured ? Math.round((followed.length / measured) * 100) : null
   };
 }

package/plugins/ctx/lib/stats.js CHANGED Viewed

@@ -57,6 +57,7 @@ export function loadStats(dataDir) {
   const followed = reports.reduce((sum, report) => sum + (report.followed?.length || 0), 0);
   const ignored = reports.reduce((sum, report) => sum + (report.ignored?.length || 0), 0);
   const unknown = reports.reduce((sum, report) => sum + (report.unknown?.length || 0), 0);
+  const unmeasurable = reports.reduce((sum, report) => sum + (report.unmeasurable?.length || 0), 0);
   return {
     dataDir,
@@ -71,6 +72,7 @@ export function loadStats(dataDir) {
     followed,
     ignored,
     unknown,
+    unmeasurable,
     lastPrompt: analyzedPrompts.at(-1) || null,
     lastReport: reports.at(-1) || null
   };
@@ -85,7 +87,7 @@ export function formatStats(stats) {
   lines.push(`Prompt mode: ${stats.injectedCount} injected, ${stats.quietCount} quiet (${stats.injectionRate}% injected)`);
   lines.push(`Average prompt analysis: ${stats.averagePromptMs == null ? "unknown" : `${stats.averagePromptMs}ms`}`);
   lines.push(`Average efficiency: ${formatAverageEfficiency(stats)}`);
-  lines.push(`Rule outcomes: ${stats.followed} followed, ${stats.ignored} ignored, ${stats.unknown} unknown`);
+  lines.push(`Rule outcomes: ${stats.followed} followed, ${stats.ignored} ignored, ${stats.unknown} unknown, ${stats.unmeasurable || 0} unmeasurable`);
   const eventSummary = Object.entries(stats.events)
     .map(([event, count]) => `${event}:${count}`)
@@ -103,6 +105,7 @@ export function formatStats(stats) {
     lines.push(`Last report efficiency: ${stats.lastReport.efficiencyScore == null ? "unknown" : `${stats.lastReport.efficiencyScore}%`}`);
     lines.push(`Last report measured rules: ${stats.lastReport.measuredRuleCount ?? ((stats.lastReport.followed?.length || 0) + (stats.lastReport.ignored?.length || 0))}`);
     lines.push(`Last report unknown rules: ${stats.lastReport.unknownRuleCount ?? (stats.lastReport.unknown?.length || 0)}`);
+    lines.push(`Last report unmeasurable rules: ${stats.lastReport.unmeasurableRuleCount ?? (stats.lastReport.unmeasurable?.length || 0)}`);
     const changed = stats.lastReport.changedFiles?.join(", ");
     if (changed) lines.push(`Last changed files: ${changed}`);
   }