oscar64-mcp-docs 1.1.2 → 1.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. package/README.md +1 -48
  2. package/dist/stdio.js +189 -40
  3. package/package.json +1 -1
package/README.md CHANGED
@@ -76,7 +76,7 @@ The executable entry is built to `dist/stdio.js`.
76
76
 
77
77
  Primary tools:
78
78
 
79
- - `search(query, limit, type, system)` -> unified manual/code search with strict hit fields (`result_type`, `uri`, `title`, `preview`, `classification_summary`), optional `referenced_files` as `code://...` URIs readable by `read_uri`; `classification_summary` includes only `track` and `track_status`; `type` defaults to `all`; `system` defaults to `c64` and supports `all` for cross-system results
79
+ - `search(query, limit, type, system, strict, min_confidence, debug_scoring)` -> unified manual/code search with agent-facing hit fields (`result_type`, `uri`, `title`, `preview`, `confidence`, `match_reasons`, `classification_summary`), optional `referenced_files` as `code://...` URIs readable by `read_uri`; `strict` suppresses broad fallback-only results, `min_confidence` applies threshold filtering, and strict/threshold no-match responses return `quality_status`, `failure_reasons`, and `suggested_refinement`; `debug_scoring=true` adds per-hit scoring diagnostics
80
80
  - `read_uri(uri, binary_mode, max_base64_bytes)` -> returns `ok + data` where `data.content_type` is `text` or `binary` for `docs://...` and `code://...`
81
81
  - `list_indexes(type, system)` -> lists `topics`/`tutorials`/`samples`/`headers` entries; `type` defaults to `headers`, `system` defaults to `c64`, and `system=all` returns cross-system indexes
82
82
 
@@ -95,50 +95,3 @@ Index discovery:
95
95
 
96
96
  - `list_indexes(type)` returns only `docs://` and `code://` URIs that can be read directly with `read_uri`.
97
97
 
98
- ## Versioning
99
-
100
- ```bash
101
- npm run version:patch
102
- # or
103
- npm run version:minor
104
- # or
105
- npm run version:major
106
- ```
107
-
108
- ## Publish to npmjs
109
-
110
- Dry run:
111
-
112
- ```bash
113
- npm run publish:npm -- --dry-run
114
- ```
115
-
116
- Publish:
117
-
118
- ```bash
119
- npm run publish:npm
120
- ```
121
-
122
- `publish:npm` is interactive:
123
- - warns on dirty git working tree
124
- - asks for version bump (`patch/minor/major/custom/keep`)
125
- - runs `check` + `build`
126
- - asks for final publish confirmation
127
- - only mutates `package.json` version after confirmation
128
-
129
- Optional env vars:
130
-
131
- - `NPM_ACCESS` (`public` or `restricted`, default `public`)
132
- - `NPM_TAG` (default `latest`)
133
- - `NPM_OTP` (for 2FA)
134
-
135
- ## Classification Config
136
-
137
- Classification policy and deterministic scoring are data-driven in:
138
-
139
- - `src/config/classification-v2.ts`
140
-
141
- This file defines:
142
-
143
- - weighted facet rules and primary-track inference
144
- - tutorial ID band seeds and precedence tie-breaks
package/dist/stdio.js CHANGED
@@ -403,22 +403,49 @@ var searchPreviewSchema = z.object({
403
403
  signature: z.string().optional().describe("Declaration-like line extracted from content when available."),
404
404
  include_path: z.string().optional().describe("Header include path context when relevant.")
405
405
  });
406
+ var matchReasonCodeSchema = z.enum([
407
+ "exact_symbol_match",
408
+ "exact_include_path_match",
409
+ "exact_manual_anchor_match",
410
+ "register_or_address_exact_match",
411
+ "api_name_token_match",
412
+ "topic_semantic_match",
413
+ "fallback_broad_match"
414
+ ]);
415
+ var matchReasonSchema = z.object({
416
+ code: matchReasonCodeSchema.describe("Deterministic reason code describing why this hit matched."),
417
+ weight: z.number().describe("Per-reason contribution to confidence before normalization.")
418
+ });
419
+ var searchHitDebugScoringSchema = z.object({
420
+ reason_total_weight: z.number().describe("Sum of reason weights before confidence normalization."),
421
+ base_score: z.number().describe("Original backend search score for this hit."),
422
+ confidence_raw: z.number().describe("Unclamped confidence before range enforcement.")
423
+ });
406
424
  var searchHitSchema = z.object({
407
425
  result_type: z.enum(["topics", "tutorials", "samples", "headers"]).describe("Artifact type for this result."),
408
426
  uri: z.string().describe("URI to pass into `read_uri` for full content."),
409
427
  title: z.string().describe("Short title for the matched result."),
410
428
  preview: searchPreviewSchema.describe("Structured preview fields for relevance evaluation."),
411
429
  referenced_files: z.array(z.string()).optional().describe("Referenced `code://...` URIs that can be read with `read_uri` (for example from #embed or #include)."),
430
+ confidence: z.number().min(0).max(1).describe("Normalized confidence score in range 0..1."),
431
+ match_reasons: z.array(matchReasonSchema).min(1).describe("Deterministic ordered reason list for this hit."),
432
+ debug_scoring: searchHitDebugScoringSchema.optional().describe("Per-hit scoring diagnostics, only present when debug_scoring=true."),
412
433
  classification_summary: classificationSummarySchema.describe("Compact classification metadata always returned.")
413
434
  });
414
435
  var searchInputSchema = z.object({
415
436
  query: z.string().min(1).describe("Query text, symbol, API name, or error phrase to search for."),
416
437
  limit: z.number().int().min(1).max(80).default(20).describe("Maximum number of results to return."),
417
438
  type: searchTypeSchema.default("all").describe("Filter results by artifact type. Defaults to `all`."),
418
- system: systemFilterSchema.default("c64").describe("Target system filter. Defaults to `c64`; use `all` for cross-system search.")
439
+ system: systemFilterSchema.default("c64").describe("Target system filter. Defaults to `c64`; use `all` for cross-system search."),
440
+ strict: z.boolean().default(false).describe("When true, suppresses broad fallback-only results."),
441
+ min_confidence: z.number().min(0).max(1).optional().describe("Optional confidence threshold for result inclusion."),
442
+ debug_scoring: z.boolean().default(false).describe("When true, include per-hit scoring diagnostics.")
419
443
  }).strict();
420
444
  var searchDataSchema = z.object({
421
- results: z.array(searchHitSchema).describe("Ranked search hits.")
445
+ results: z.array(searchHitSchema).describe("Ranked search hits."),
446
+ quality_status: z.enum(["insufficient"]).optional().describe("Set when strict filtering removes all useful matches."),
447
+ failure_reasons: z.array(matchReasonCodeSchema).optional().describe("High-signal reasons for insufficient quality."),
448
+ suggested_refinement: z.enum(["symbol", "include_path", "address"]).optional().describe("Suggested query refinement type when quality is insufficient.")
422
449
  });
423
450
  var searchSuccessEnvelopeSchema = z.object({
424
451
  ok: z.literal(true).describe("True when the request succeeds."),
@@ -1894,7 +1921,14 @@ function escapeRegExp(value) {
1894
1921
  return value.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
1895
1922
  }
1896
1923
  function tokenizeQuery(query) {
1897
- return query.toLowerCase().split(/[^a-z0-9_]+/).map((token) => token.trim()).filter((token) => token.length >= 3);
1924
+ const seen = /* @__PURE__ */ new Set();
1925
+ const out = [];
1926
+ for (const token of query.toLowerCase().split(/[^a-z0-9_]+/).map((token2) => token2.trim()).filter((token2) => token2.length >= 2)) {
1927
+ if (seen.has(token)) continue;
1928
+ seen.add(token);
1929
+ out.push(token);
1930
+ }
1931
+ return out;
1898
1932
  }
1899
1933
  function isExactSymbolQuery(query) {
1900
1934
  return /^[A-Za-z_][A-Za-z0-9_]*$/.test(query.trim());
@@ -1980,13 +2014,30 @@ function extractSystems(classification) {
1980
2014
  }
1981
2015
  return [...out];
1982
2016
  }
1983
- function hasQuerySignal(result, query) {
1984
- const queryTokens = tokenizeQuery(query);
1985
- if (queryTokens.length === 0) return false;
2017
+ function hasQuerySignal(result, tokens) {
2018
+ if (tokens.length === 0) return false;
1986
2019
  const title = String(result?.title ?? "").toLowerCase();
1987
2020
  const snippet = String(result?.snippet ?? "").toLowerCase();
1988
2021
  const body = String(result?.body ?? "").toLowerCase();
1989
- return queryTokens.some((token) => title.includes(token) || snippet.includes(token) || body.includes(token));
2022
+ return tokens.some((token) => title.includes(token) || snippet.includes(token) || body.includes(token));
2023
+ }
2024
+ function extractQueryEntities(query) {
2025
+ const trimmed = query.trim();
2026
+ const lower = trimmed.toLowerCase();
2027
+ const tokens = tokenizeQuery(trimmed);
2028
+ const symbol = isExactSymbolQuery(trimmed) ? lower : void 0;
2029
+ const includePathMatch = lower.match(/\b(?:[a-z0-9_]+\/)+[a-z0-9_.-]+\.[a-z0-9_]+\b/);
2030
+ const includePath = includePathMatch ? includePathMatch[0] : void 0;
2031
+ const dollarHex = lower.match(/\$[0-9a-f]{3,4}\b/);
2032
+ const oxHex = lower.match(/0x[0-9a-f]{3,4}\b/);
2033
+ const address = dollarHex ? dollarHex[0] : oxHex ? `$${oxHex[0].replace(/^0x/, "")}` : void 0;
2034
+ return {
2035
+ tokens,
2036
+ symbol,
2037
+ includePath,
2038
+ address,
2039
+ hasPragmaToken: tokens.includes("pragma")
2040
+ };
1990
2041
  }
1991
2042
  function hasClearCodingValue(hit) {
1992
2043
  if (typeof hit.preview.signature === "string" && hit.preview.signature.trim().length > 0) return true;
@@ -1995,35 +2046,93 @@ function hasClearCodingValue(hit) {
1995
2046
  if ((hit.resultType === "tutorials" || hit.resultType === "samples" || hit.resultType === "topics") && String(hit.preview.summary ?? "").trim().length > 0) {
1996
2047
  return true;
1997
2048
  }
1998
- if (hit.hasQuerySignal && (hit.resultType === "tutorials" || hit.resultType === "samples" || hit.resultType === "topics")) {
1999
- return true;
2000
- }
2001
- return false;
2049
+ return hit.hasQuerySignal && (hit.resultType === "tutorials" || hit.resultType === "samples" || hit.resultType === "topics");
2002
2050
  }
2003
- function computeRankingBoost(hit) {
2004
- let boost = 0;
2005
- if (hit.resultType === "headers") boost += 20;
2006
- if (hit.resultType === "headers" && hit.classificationSummary.track_status === "asserted") boost += 12;
2007
- if (hit.resultType === "tutorials" || hit.resultType === "samples") boost += 6;
2008
- if (hit.preview.signature) boost += 16;
2009
- if ((hit.referencedUris?.length ?? 0) > 0) boost += 8;
2010
- if (hit.hasQuerySignal) boost += 5;
2011
- const symbol = hit.query.trim().toLowerCase();
2012
- if (isExactSymbolQuery(hit.query)) {
2013
- const signature = String(hit.preview.signature ?? "").toLowerCase();
2014
- if (signature.includes(symbol)) boost += 42;
2015
- if (hit.resultType === "headers" && hit.uri.toLowerCase().includes(`${symbol}.h`)) boost += 18;
2016
- }
2017
- if (hit.classificationSummary.track === "fundamentals" && hit.classificationSummary.track_status === "neutral_fallback") {
2018
- boost -= 8;
2019
- }
2020
- return boost;
2051
+ var REASON_WEIGHTS = {
2052
+ exact_symbol_match: 0.52,
2053
+ exact_include_path_match: 0.5,
2054
+ exact_manual_anchor_match: 0.46,
2055
+ register_or_address_exact_match: 0.48,
2056
+ api_name_token_match: 0.24,
2057
+ topic_semantic_match: 0.18,
2058
+ fallback_broad_match: 0.08
2059
+ };
2060
+ function pushReason(out, code) {
2061
+ if (out.some((item) => item.code === code)) return;
2062
+ out.push({ code, weight: REASON_WEIGHTS[code] });
2063
+ }
2064
+ function toDeterministicConfidence(value) {
2065
+ const clamped = Math.max(0.02, Math.min(0.99, value));
2066
+ return Number(clamped.toFixed(3));
2067
+ }
2068
+ function computeScoredReasons(hit) {
2069
+ const reasons = [];
2070
+ const uriLower = hit.uri.toLowerCase();
2071
+ const signature = String(hit.preview.signature ?? "").toLowerCase();
2072
+ const includePath = String(hit.preview.include_path ?? "").toLowerCase();
2073
+ const summary = String(hit.preview.summary ?? "").toLowerCase();
2074
+ const snippet = hit.snippet.toLowerCase();
2075
+ const body = hit.body.toLowerCase();
2076
+ const title = hit.title.toLowerCase();
2077
+ if (hit.entities.symbol) {
2078
+ if (signature.includes(hit.entities.symbol) || body.includes(hit.entities.symbol) || title.includes(hit.entities.symbol)) {
2079
+ pushReason(reasons, "exact_symbol_match");
2080
+ }
2081
+ }
2082
+ if (hit.entities.includePath) {
2083
+ const includeTarget = hit.entities.includePath.toLowerCase();
2084
+ if (uriLower.includes(includeTarget) || includePath.includes(includeTarget) || snippet.includes(includeTarget) || body.includes(includeTarget)) {
2085
+ pushReason(reasons, "exact_include_path_match");
2086
+ }
2087
+ }
2088
+ if (hit.entities.address) {
2089
+ const addr = hit.entities.address.toLowerCase();
2090
+ const normalizedAddr = addr.replace("$", "0x");
2091
+ if (uriLower.includes(addr) || snippet.includes(addr) || body.includes(addr) || snippet.includes(normalizedAddr) || body.includes(normalizedAddr)) {
2092
+ pushReason(reasons, "register_or_address_exact_match");
2093
+ }
2094
+ }
2095
+ if (hit.resultType === "topics" && hit.entities.tokens.length > 0) {
2096
+ const anchor = hit.uri.includes("#") ? hit.uri.slice(hit.uri.indexOf("#") + 1).toLowerCase() : "";
2097
+ if (anchor && hit.entities.tokens.some((token) => anchor.includes(token))) {
2098
+ pushReason(reasons, "exact_manual_anchor_match");
2099
+ }
2100
+ }
2101
+ const apiTokenMatches = hit.entities.tokens.filter(
2102
+ (token) => signature.includes(token) || title.includes(token) || includePath.includes(token)
2103
+ ).length;
2104
+ if (apiTokenMatches > 0) {
2105
+ pushReason(reasons, "api_name_token_match");
2106
+ }
2107
+ if (hit.entities.hasPragmaToken || hit.hasQuerySignal || hit.classificationSummary.track_status === "asserted") {
2108
+ pushReason(reasons, "topic_semantic_match");
2109
+ }
2110
+ if (reasons.length === 0) {
2111
+ pushReason(reasons, "fallback_broad_match");
2112
+ }
2113
+ reasons.sort((a, b) => b.weight - a.weight || a.code.localeCompare(b.code));
2114
+ const reasonTotalWeight = reasons.reduce((sum, reason) => sum + reason.weight, 0);
2115
+ const baseScoreBoost = Math.max(0, Math.min(0.22, hit.baseScore / 100));
2116
+ const typeBoost = hit.resultType === "headers" ? 0.08 : hit.resultType === "topics" ? 0.03 : 0.05;
2117
+ const classificationBoost = hit.classificationSummary.track_status === "asserted" ? 0.06 : hit.classificationSummary.track === "fundamentals" ? -0.02 : 0;
2118
+ const confidenceRaw = reasonTotalWeight + baseScoreBoost + typeBoost + classificationBoost;
2119
+ const confidence = toDeterministicConfidence(confidenceRaw);
2120
+ return {
2121
+ confidence,
2122
+ reasons,
2123
+ reasonTotalWeight: Number(reasonTotalWeight.toFixed(3)),
2124
+ confidenceRaw: Number(confidenceRaw.toFixed(3))
2125
+ };
2021
2126
  }
2022
2127
  async function executeSearch(context) {
2023
2128
  const { query, limit } = context;
2024
2129
  const requestedType = context.type ?? "all";
2025
2130
  const system = context.system ?? "c64";
2131
+ const strict = context.strict ?? false;
2132
+ const minConfidence = context.min_confidence;
2133
+ const debugScoring = context.debug_scoring ?? false;
2026
2134
  const state = await getStateSnapshot();
2135
+ const entities = extractQueryEntities(query);
2027
2136
  const rawResults = normalizeSearchResults(
2028
2137
  state.searchIndex.search(query, {
2029
2138
  combineWith: inferCombineMode(query),
@@ -2046,19 +2155,27 @@ async function executeSearch(context) {
2046
2155
  const referencedUris = await resolveReferencedUris(state, String(result.uri ?? ""), result.referencedFiles);
2047
2156
  const classificationSummary = toClassificationSummary(result.classification);
2048
2157
  const systems = extractSystems(result.classification);
2049
- const hasSignal = hasQuerySignal(result, query);
2158
+ const hasSignal = hasQuerySignal(result, entities.tokens);
2159
+ const uri = String(result.uri ?? "");
2160
+ const title = String(result.title ?? "");
2161
+ const snippet = String(result.snippet ?? "");
2162
+ const body = String(result.body ?? "");
2050
2163
  const baseScore = Number(result?.score ?? 0);
2051
- const rankScore = baseScore + computeRankingBoost({
2052
- uri: String(result.uri ?? ""),
2164
+ const scored = computeScoredReasons({
2165
+ entities,
2166
+ uri,
2053
2167
  resultType,
2054
2168
  preview,
2055
- query,
2056
- referencedUris,
2169
+ snippet,
2170
+ body,
2171
+ title,
2172
+ baseScore,
2057
2173
  classificationSummary,
2058
2174
  hasQuerySignal: hasSignal
2059
2175
  });
2060
2176
  return {
2061
- rankScore,
2177
+ confidence: scored.confidence,
2178
+ matchReasons: scored.reasons,
2062
2179
  systems,
2063
2180
  keep: hasClearCodingValue({
2064
2181
  resultType,
@@ -2068,24 +2185,56 @@ async function executeSearch(context) {
2068
2185
  }),
2069
2186
  hit: {
2070
2187
  result_type: resultType,
2071
- uri: String(result.uri ?? ""),
2072
- title: String(result.title ?? ""),
2188
+ uri,
2189
+ title,
2073
2190
  preview,
2074
2191
  ...referencedUris ? { referenced_files: referencedUris } : {},
2192
+ confidence: scored.confidence,
2193
+ match_reasons: scored.reasons,
2194
+ ...debugScoring ? {
2195
+ debug_scoring: {
2196
+ reason_total_weight: scored.reasonTotalWeight,
2197
+ base_score: baseScore,
2198
+ confidence_raw: scored.confidenceRaw
2199
+ }
2200
+ } : {},
2075
2201
  classification_summary: classificationSummary
2076
2202
  }
2077
2203
  };
2078
2204
  })
2079
2205
  );
2080
- const hits = mapped.filter((entry) => entry.keep).filter((entry) => {
2206
+ const candidateHits = mapped.filter((entry) => entry.keep).filter((entry) => {
2081
2207
  const uri = String(entry.hit.uri ?? "");
2082
2208
  if (uri.startsWith("code://oscar/include/") && !uri.toLowerCase().endsWith(".h")) return false;
2083
2209
  return true;
2084
- }).filter((entry) => requestedType === "all" || entry.hit.result_type === requestedType).filter((entry) => matchesSystemFilter(entry.systems, system)).sort((a, b) => b.rankScore - a.rankScore || a.hit.uri.localeCompare(b.hit.uri)).slice(0, limit);
2210
+ }).filter((entry) => requestedType === "all" || entry.hit.result_type === requestedType).filter((entry) => matchesSystemFilter(entry.systems, system));
2211
+ const filteredHits = candidateHits.filter((entry) => !(entry.matchReasons.length === 1 && entry.matchReasons[0]?.code === "fallback_broad_match")).filter((entry) => {
2212
+ if (!strict) return true;
2213
+ return !(entry.matchReasons.length === 1 && entry.matchReasons[0]?.code === "fallback_broad_match");
2214
+ }).filter((entry) => typeof minConfidence === "number" ? entry.confidence >= minConfidence : true).sort((a, b) => b.confidence - a.confidence || a.hit.uri.localeCompare(b.hit.uri)).slice(0, limit);
2215
+ if (filteredHits.length === 0) {
2216
+ const reasonCounts = /* @__PURE__ */ new Map();
2217
+ for (const entry of candidateHits) {
2218
+ for (const reason of entry.matchReasons) {
2219
+ reasonCounts.set(reason.code, (reasonCounts.get(reason.code) ?? 0) + 1);
2220
+ }
2221
+ }
2222
+ const failureReasons = [...reasonCounts.entries()].sort((a, b) => b[1] - a[1] || a[0].localeCompare(b[0])).map(([code]) => code).slice(0, 3);
2223
+ const suggestedRefinement = entities.address ? "address" : entities.includePath ? "include_path" : "symbol";
2224
+ return {
2225
+ ok: true,
2226
+ data: {
2227
+ results: [],
2228
+ quality_status: "insufficient",
2229
+ failure_reasons: failureReasons.length > 0 ? failureReasons : ["fallback_broad_match"],
2230
+ suggested_refinement: suggestedRefinement
2231
+ }
2232
+ };
2233
+ }
2085
2234
  return {
2086
2235
  ok: true,
2087
2236
  data: {
2088
- results: hits.map((entry) => entry.hit)
2237
+ results: filteredHits.map((entry) => entry.hit)
2089
2238
  }
2090
2239
  };
2091
2240
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "oscar64-mcp-docs",
3
- "version": "1.1.2",
3
+ "version": "1.1.3",
4
4
  "private": false,
5
5
  "type": "module",
6
6
  "engines": {