npm - oscar64-mcp-docs - Versions diffs - 1.1.2 → 1.1.3 - Mend

oscar64-mcp-docs 1.1.2 → 1.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/README.md CHANGED Viewed

@@ -76,7 +76,7 @@ The executable entry is built to `dist/stdio.js`.
 Primary tools:
-- `search(query, limit, type, system)` -> unified manual/code search with strict hit fields (`result_type`, `uri`, `title`, `preview`, `classification_summary`), optional `referenced_files` as `code://...` URIs readable by `read_uri`; `classification_summary` includes only `track` and `track_status`; `type` defaults to `all`; `system` defaults to `c64` and supports `all` for cross-system results
+- `search(query, limit, type, system, strict, min_confidence, debug_scoring)` -> unified manual/code search with agent-facing hit fields (`result_type`, `uri`, `title`, `preview`, `confidence`, `match_reasons`, `classification_summary`), optional `referenced_files` as `code://...` URIs readable by `read_uri`; `strict` suppresses broad fallback-only results, `min_confidence` applies threshold filtering, and strict/threshold no-match responses return `quality_status`, `failure_reasons`, and `suggested_refinement`; `debug_scoring=true` adds per-hit scoring diagnostics
 - `read_uri(uri, binary_mode, max_base64_bytes)` -> returns `ok + data` where `data.content_type` is `text` or `binary` for `docs://...` and `code://...`
 - `list_indexes(type, system)` -> lists `topics`/`tutorials`/`samples`/`headers` entries; `type` defaults to `headers`, `system` defaults to `c64`, and `system=all` returns cross-system indexes
@@ -95,50 +95,3 @@ Index discovery:
 - `list_indexes(type)` returns only `docs://` and `code://` URIs that can be read directly with `read_uri`.
-## Versioning
-```bash
-npm run version:patch
-# or
-npm run version:minor
-# or
-npm run version:major
-```
-## Publish to npmjs
-Dry run:
-```bash
-npm run publish:npm -- --dry-run
-```
-Publish:
-```bash
-npm run publish:npm
-```
-`publish:npm` is interactive:
-- warns on dirty git working tree
-- asks for version bump (`patch/minor/major/custom/keep`)
-- runs `check` + `build`
-- asks for final publish confirmation
-- only mutates `package.json` version after confirmation
-Optional env vars:
-- `NPM_ACCESS` (`public` or `restricted`, default `public`)
-- `NPM_TAG` (default `latest`)
-- `NPM_OTP` (for 2FA)
-## Classification Config
-Classification policy and deterministic scoring are data-driven in:
-- `src/config/classification-v2.ts`
-This file defines:
-- weighted facet rules and primary-track inference
-- tutorial ID band seeds and precedence tie-breaks

package/dist/stdio.js CHANGED Viewed

@@ -403,22 +403,49 @@ var searchPreviewSchema = z.object({
   signature: z.string().optional().describe("Declaration-like line extracted from content when available."),
   include_path: z.string().optional().describe("Header include path context when relevant.")
 });
+var matchReasonCodeSchema = z.enum([
+  "exact_symbol_match",
+  "exact_include_path_match",
+  "exact_manual_anchor_match",
+  "register_or_address_exact_match",
+  "api_name_token_match",
+  "topic_semantic_match",
+  "fallback_broad_match"
+]);
+var matchReasonSchema = z.object({
+  code: matchReasonCodeSchema.describe("Deterministic reason code describing why this hit matched."),
+  weight: z.number().describe("Per-reason contribution to confidence before normalization.")
+});
+var searchHitDebugScoringSchema = z.object({
+  reason_total_weight: z.number().describe("Sum of reason weights before confidence normalization."),
+  base_score: z.number().describe("Original backend search score for this hit."),
+  confidence_raw: z.number().describe("Unclamped confidence before range enforcement.")
+});
 var searchHitSchema = z.object({
   result_type: z.enum(["topics", "tutorials", "samples", "headers"]).describe("Artifact type for this result."),
   uri: z.string().describe("URI to pass into `read_uri` for full content."),
   title: z.string().describe("Short title for the matched result."),
   preview: searchPreviewSchema.describe("Structured preview fields for relevance evaluation."),
   referenced_files: z.array(z.string()).optional().describe("Referenced `code://...` URIs that can be read with `read_uri` (for example from #embed or #include)."),
+  confidence: z.number().min(0).max(1).describe("Normalized confidence score in range 0..1."),
+  match_reasons: z.array(matchReasonSchema).min(1).describe("Deterministic ordered reason list for this hit."),
+  debug_scoring: searchHitDebugScoringSchema.optional().describe("Per-hit scoring diagnostics, only present when debug_scoring=true."),
   classification_summary: classificationSummarySchema.describe("Compact classification metadata always returned.")
 });
 var searchInputSchema = z.object({
   query: z.string().min(1).describe("Query text, symbol, API name, or error phrase to search for."),
   limit: z.number().int().min(1).max(80).default(20).describe("Maximum number of results to return."),
   type: searchTypeSchema.default("all").describe("Filter results by artifact type. Defaults to `all`."),
-  system: systemFilterSchema.default("c64").describe("Target system filter. Defaults to `c64`; use `all` for cross-system search.")
+  system: systemFilterSchema.default("c64").describe("Target system filter. Defaults to `c64`; use `all` for cross-system search."),
+  strict: z.boolean().default(false).describe("When true, suppresses broad fallback-only results."),
+  min_confidence: z.number().min(0).max(1).optional().describe("Optional confidence threshold for result inclusion."),
+  debug_scoring: z.boolean().default(false).describe("When true, include per-hit scoring diagnostics.")
 }).strict();
 var searchDataSchema = z.object({
-  results: z.array(searchHitSchema).describe("Ranked search hits.")
+  results: z.array(searchHitSchema).describe("Ranked search hits."),
+  quality_status: z.enum(["insufficient"]).optional().describe("Set when strict filtering removes all useful matches."),
+  failure_reasons: z.array(matchReasonCodeSchema).optional().describe("High-signal reasons for insufficient quality."),
+  suggested_refinement: z.enum(["symbol", "include_path", "address"]).optional().describe("Suggested query refinement type when quality is insufficient.")
 });
 var searchSuccessEnvelopeSchema = z.object({
   ok: z.literal(true).describe("True when the request succeeds."),
@@ -1894,7 +1921,14 @@ function escapeRegExp(value) {
   return value.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
 }
 function tokenizeQuery(query) {
-  return query.toLowerCase().split(/[^a-z0-9_]+/).map((token) => token.trim()).filter((token) => token.length >= 3);
+  const seen = /* @__PURE__ */ new Set();
+  const out = [];
+  for (const token of query.toLowerCase().split(/[^a-z0-9_]+/).map((token2) => token2.trim()).filter((token2) => token2.length >= 2)) {
+    if (seen.has(token)) continue;
+    seen.add(token);
+    out.push(token);
+  }
+  return out;
 }
 function isExactSymbolQuery(query) {
   return /^[A-Za-z_][A-Za-z0-9_]*$/.test(query.trim());
@@ -1980,13 +2014,30 @@ function extractSystems(classification) {
   }
   return [...out];
 }
-function hasQuerySignal(result, query) {
-  const queryTokens = tokenizeQuery(query);
-  if (queryTokens.length === 0) return false;
+function hasQuerySignal(result, tokens) {
+  if (tokens.length === 0) return false;
   const title = String(result?.title ?? "").toLowerCase();
   const snippet = String(result?.snippet ?? "").toLowerCase();
   const body = String(result?.body ?? "").toLowerCase();
-  return queryTokens.some((token) => title.includes(token) || snippet.includes(token) || body.includes(token));
+  return tokens.some((token) => title.includes(token) || snippet.includes(token) || body.includes(token));
+}
+function extractQueryEntities(query) {
+  const trimmed = query.trim();
+  const lower = trimmed.toLowerCase();
+  const tokens = tokenizeQuery(trimmed);
+  const symbol = isExactSymbolQuery(trimmed) ? lower : void 0;
+  const includePathMatch = lower.match(/\b(?:[a-z0-9_]+\/)+[a-z0-9_.-]+\.[a-z0-9_]+\b/);
+  const includePath = includePathMatch ? includePathMatch[0] : void 0;
+  const dollarHex = lower.match(/\$[0-9a-f]{3,4}\b/);
+  const oxHex = lower.match(/0x[0-9a-f]{3,4}\b/);
+  const address = dollarHex ? dollarHex[0] : oxHex ? `$${oxHex[0].replace(/^0x/, "")}` : void 0;
+  return {
+    tokens,
+    symbol,
+    includePath,
+    address,
+    hasPragmaToken: tokens.includes("pragma")
+  };
 }
 function hasClearCodingValue(hit) {
   if (typeof hit.preview.signature === "string" && hit.preview.signature.trim().length > 0) return true;
@@ -1995,35 +2046,93 @@ function hasClearCodingValue(hit) {
   if ((hit.resultType === "tutorials" || hit.resultType === "samples" || hit.resultType === "topics") && String(hit.preview.summary ?? "").trim().length > 0) {
     return true;
   }
-  if (hit.hasQuerySignal && (hit.resultType === "tutorials" || hit.resultType === "samples" || hit.resultType === "topics")) {
-    return true;
-  }
-  return false;
+  return hit.hasQuerySignal && (hit.resultType === "tutorials" || hit.resultType === "samples" || hit.resultType === "topics");
 }
-function computeRankingBoost(hit) {
-  let boost = 0;
-  if (hit.resultType === "headers") boost += 20;
-  if (hit.resultType === "headers" && hit.classificationSummary.track_status === "asserted") boost += 12;
-  if (hit.resultType === "tutorials" || hit.resultType === "samples") boost += 6;
-  if (hit.preview.signature) boost += 16;
-  if ((hit.referencedUris?.length ?? 0) > 0) boost += 8;
-  if (hit.hasQuerySignal) boost += 5;
-  const symbol = hit.query.trim().toLowerCase();
-  if (isExactSymbolQuery(hit.query)) {
-    const signature = String(hit.preview.signature ?? "").toLowerCase();
-    if (signature.includes(symbol)) boost += 42;
-    if (hit.resultType === "headers" && hit.uri.toLowerCase().includes(`${symbol}.h`)) boost += 18;
-  }
-  if (hit.classificationSummary.track === "fundamentals" && hit.classificationSummary.track_status === "neutral_fallback") {
-    boost -= 8;
-  }
-  return boost;
+var REASON_WEIGHTS = {
+  exact_symbol_match: 0.52,
+  exact_include_path_match: 0.5,
+  exact_manual_anchor_match: 0.46,
+  register_or_address_exact_match: 0.48,
+  api_name_token_match: 0.24,
+  topic_semantic_match: 0.18,
+  fallback_broad_match: 0.08
+};
+function pushReason(out, code) {
+  if (out.some((item) => item.code === code)) return;
+  out.push({ code, weight: REASON_WEIGHTS[code] });
+}
+function toDeterministicConfidence(value) {
+  const clamped = Math.max(0.02, Math.min(0.99, value));
+  return Number(clamped.toFixed(3));
+}
+function computeScoredReasons(hit) {
+  const reasons = [];
+  const uriLower = hit.uri.toLowerCase();
+  const signature = String(hit.preview.signature ?? "").toLowerCase();
+  const includePath = String(hit.preview.include_path ?? "").toLowerCase();
+  const summary = String(hit.preview.summary ?? "").toLowerCase();
+  const snippet = hit.snippet.toLowerCase();
+  const body = hit.body.toLowerCase();
+  const title = hit.title.toLowerCase();
+  if (hit.entities.symbol) {
+    if (signature.includes(hit.entities.symbol) || body.includes(hit.entities.symbol) || title.includes(hit.entities.symbol)) {
+      pushReason(reasons, "exact_symbol_match");
+    }
+  }
+  if (hit.entities.includePath) {
+    const includeTarget = hit.entities.includePath.toLowerCase();
+    if (uriLower.includes(includeTarget) || includePath.includes(includeTarget) || snippet.includes(includeTarget) || body.includes(includeTarget)) {
+      pushReason(reasons, "exact_include_path_match");
+    }
+  }
+  if (hit.entities.address) {
+    const addr = hit.entities.address.toLowerCase();
+    const normalizedAddr = addr.replace("$", "0x");
+    if (uriLower.includes(addr) || snippet.includes(addr) || body.includes(addr) || snippet.includes(normalizedAddr) || body.includes(normalizedAddr)) {
+      pushReason(reasons, "register_or_address_exact_match");
+    }
+  }
+  if (hit.resultType === "topics" && hit.entities.tokens.length > 0) {
+    const anchor = hit.uri.includes("#") ? hit.uri.slice(hit.uri.indexOf("#") + 1).toLowerCase() : "";
+    if (anchor && hit.entities.tokens.some((token) => anchor.includes(token))) {
+      pushReason(reasons, "exact_manual_anchor_match");
+    }
+  }
+  const apiTokenMatches = hit.entities.tokens.filter(
+    (token) => signature.includes(token) || title.includes(token) || includePath.includes(token)
+  ).length;
+  if (apiTokenMatches > 0) {
+    pushReason(reasons, "api_name_token_match");
+  }
+  if (hit.entities.hasPragmaToken || hit.hasQuerySignal || hit.classificationSummary.track_status === "asserted") {
+    pushReason(reasons, "topic_semantic_match");
+  }
+  if (reasons.length === 0) {
+    pushReason(reasons, "fallback_broad_match");
+  }
+  reasons.sort((a, b) => b.weight - a.weight || a.code.localeCompare(b.code));
+  const reasonTotalWeight = reasons.reduce((sum, reason) => sum + reason.weight, 0);
+  const baseScoreBoost = Math.max(0, Math.min(0.22, hit.baseScore / 100));
+  const typeBoost = hit.resultType === "headers" ? 0.08 : hit.resultType === "topics" ? 0.03 : 0.05;
+  const classificationBoost = hit.classificationSummary.track_status === "asserted" ? 0.06 : hit.classificationSummary.track === "fundamentals" ? -0.02 : 0;
+  const confidenceRaw = reasonTotalWeight + baseScoreBoost + typeBoost + classificationBoost;
+  const confidence = toDeterministicConfidence(confidenceRaw);
+  return {
+    confidence,
+    reasons,
+    reasonTotalWeight: Number(reasonTotalWeight.toFixed(3)),
+    confidenceRaw: Number(confidenceRaw.toFixed(3))
+  };
 }
 async function executeSearch(context) {
   const { query, limit } = context;
   const requestedType = context.type ?? "all";
   const system = context.system ?? "c64";
+  const strict = context.strict ?? false;
+  const minConfidence = context.min_confidence;
+  const debugScoring = context.debug_scoring ?? false;
   const state = await getStateSnapshot();
+  const entities = extractQueryEntities(query);
   const rawResults = normalizeSearchResults(
     state.searchIndex.search(query, {
       combineWith: inferCombineMode(query),
@@ -2046,19 +2155,27 @@ async function executeSearch(context) {
       const referencedUris = await resolveReferencedUris(state, String(result.uri ?? ""), result.referencedFiles);
       const classificationSummary = toClassificationSummary(result.classification);
       const systems = extractSystems(result.classification);
-      const hasSignal = hasQuerySignal(result, query);
+      const hasSignal = hasQuerySignal(result, entities.tokens);
+      const uri = String(result.uri ?? "");
+      const title = String(result.title ?? "");
+      const snippet = String(result.snippet ?? "");
+      const body = String(result.body ?? "");
       const baseScore = Number(result?.score ?? 0);
-      const rankScore = baseScore + computeRankingBoost({
-        uri: String(result.uri ?? ""),
+      const scored = computeScoredReasons({
+        entities,
+        uri,
         resultType,
         preview,
-        query,
-        referencedUris,
+        snippet,
+        body,
+        title,
+        baseScore,
         classificationSummary,
         hasQuerySignal: hasSignal
       });
       return {
-        rankScore,
+        confidence: scored.confidence,
+        matchReasons: scored.reasons,
         systems,
         keep: hasClearCodingValue({
           resultType,
@@ -2068,24 +2185,56 @@ async function executeSearch(context) {
         }),
         hit: {
           result_type: resultType,
-          uri: String(result.uri ?? ""),
-          title: String(result.title ?? ""),
+          uri,
+          title,
           preview,
           ...referencedUris ? { referenced_files: referencedUris } : {},
+          confidence: scored.confidence,
+          match_reasons: scored.reasons,
+          ...debugScoring ? {
+            debug_scoring: {
+              reason_total_weight: scored.reasonTotalWeight,
+              base_score: baseScore,
+              confidence_raw: scored.confidenceRaw
+            }
+          } : {},
           classification_summary: classificationSummary
         }
       };
     })
   );
-  const hits = mapped.filter((entry) => entry.keep).filter((entry) => {
+  const candidateHits = mapped.filter((entry) => entry.keep).filter((entry) => {
     const uri = String(entry.hit.uri ?? "");
     if (uri.startsWith("code://oscar/include/") && !uri.toLowerCase().endsWith(".h")) return false;
     return true;
-  }).filter((entry) => requestedType === "all" || entry.hit.result_type === requestedType).filter((entry) => matchesSystemFilter(entry.systems, system)).sort((a, b) => b.rankScore - a.rankScore || a.hit.uri.localeCompare(b.hit.uri)).slice(0, limit);
+  }).filter((entry) => requestedType === "all" || entry.hit.result_type === requestedType).filter((entry) => matchesSystemFilter(entry.systems, system));
+  const filteredHits = candidateHits.filter((entry) => !(entry.matchReasons.length === 1 && entry.matchReasons[0]?.code === "fallback_broad_match")).filter((entry) => {
+    if (!strict) return true;
+    return !(entry.matchReasons.length === 1 && entry.matchReasons[0]?.code === "fallback_broad_match");
+  }).filter((entry) => typeof minConfidence === "number" ? entry.confidence >= minConfidence : true).sort((a, b) => b.confidence - a.confidence || a.hit.uri.localeCompare(b.hit.uri)).slice(0, limit);
+  if (filteredHits.length === 0) {
+    const reasonCounts = /* @__PURE__ */ new Map();
+    for (const entry of candidateHits) {
+      for (const reason of entry.matchReasons) {
+        reasonCounts.set(reason.code, (reasonCounts.get(reason.code) ?? 0) + 1);
+      }
+    }
+    const failureReasons = [...reasonCounts.entries()].sort((a, b) => b[1] - a[1] || a[0].localeCompare(b[0])).map(([code]) => code).slice(0, 3);
+    const suggestedRefinement = entities.address ? "address" : entities.includePath ? "include_path" : "symbol";
+    return {
+      ok: true,
+      data: {
+        results: [],
+        quality_status: "insufficient",
+        failure_reasons: failureReasons.length > 0 ? failureReasons : ["fallback_broad_match"],
+        suggested_refinement: suggestedRefinement
+      }
+    };
+  }
   return {
     ok: true,
     data: {
-      results: hits.map((entry) => entry.hit)
+      results: filteredHits.map((entry) => entry.hit)
     }
   };
 }

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "oscar64-mcp-docs",
-  "version": "1.1.2",
+  "version": "1.1.3",
   "private": false,
   "type": "module",
   "engines": {