npm - pi-research - Versions diffs - 1.3.1 → 1.4.1 - Mend

pi-research 1.3.1 → 1.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (33) hide show

package/README.md +67 -250
package/lib/page-fetch-adapter.js +311 -64
package/lib/research-policy.js +36 -15
package/lib/research-profiles.json +4 -0
package/lib/research.js +15 -6
package/lib/router-annotation.js +192 -0
package/lib/router-structured-features.js +134 -0
package/lib/tiny-router.js +338 -0
package/lib/web-research.js +171 -10
package/ml/models/conflict-structured/feature-names.json +22 -0
package/ml/models/conflict-structured/meta.json +5 -0
package/ml/models/conflict-structured/model.joblib +0 -0
package/ml/models/domain/metrics.json +16 -0
package/ml/models/domain/model.joblib +0 -0
package/ml/models/domain-lr/metrics.json +16 -0
package/ml/models/domain-lr/model.joblib +0 -0
package/ml/models/followup/meta.json +3 -0
package/ml/models/followup/model.joblib +0 -0
package/ml/models/sufficiency-structured/feature-names.json +22 -0
package/ml/models/sufficiency-structured/meta.json +5 -0
package/ml/models/sufficiency-structured/model.joblib +0 -0
package/ml/router/README.md +106 -0
package/ml/router/__pycache__/features.cpython-314.pyc +0 -0
package/ml/router/benchmark_latency.py +81 -0
package/ml/router/daemon.py +140 -0
package/ml/router/embed_model2vec.py +48 -0
package/ml/router/evaluate_domain.py +67 -0
package/ml/router/features.py +60 -0
package/ml/router/requirements.txt +5 -0
package/ml/router/train_classifier.py +57 -0
package/ml/router/train_domain_classifier.py +209 -0
package/ml/router/train_structured_baseline.py +174 -0
package/package.json +5 -4

package/lib/web-research.js CHANGED Viewed

@@ -13,6 +13,7 @@ import {
   buildFallbackQueries,
   buildFastQueries,
   buildFollowUpQuery,
+  buildActionBasedFollowUpQuery,
   buildJinaReaderUrl,
   classifySourceType,
   compactResearchPayload,
@@ -35,8 +36,8 @@ import {
   scoreSourceEntry,
   selectRelevantChunks,
 } from "./research.js";
-import { pageFetchAdapter } from "./page-fetch-adapter.js";
-import { pageQualitySignals } from "./research-policy.js";
+import { getScraplingRuntimeStatus, pageFetchAdapter } from "./page-fetch-adapter.js";
+import { isUsableContent, pageQualitySignals } from "./research-policy.js";
 import { resolveOutputFormat, shouldRequireAuthoritativeSources } from "./research-output.js";
 import { planResearch } from "./planner.js";
 import {
@@ -53,6 +54,7 @@ const USER_AGENT = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/
 const MIN_PAGE_TEXT = 300;
 const SEARCH_CACHE_TTL_MS = 5 * 60 * 1000;
 const PAGE_CACHE_TTL_MS = 30 * 60 * 1000;
+const EXPENSIVE_PAGE_CACHE_TTL_MS = 7 * 24 * 60 * 60 * 1000;
 const searchCache = new Map();
 const pageCache = new Map();
@@ -71,6 +73,10 @@ function setCacheValue(cache, key, value, ttlMs) {
   return value;
 }
+function pageCacheTtl(page) {
+  return page?.expensive ? EXPENSIVE_PAGE_CACHE_TTL_MS : PAGE_CACHE_TTL_MS;
+}
 function hashText(text) {
   return createHash("sha1").update(String(text || "")).digest("hex");
 }
@@ -165,6 +171,7 @@ export async function buildQueries(query, mode = "fast", ctx, signal) {
   const hintedQueries = Array.isArray(config.queryHints) && config.queryHints.length
     ? config.queryHints.map((hint) => `${query} ${hint}`)
     : [];
   if (config.mode === "code") {
     return [...new Set([...planResearch(query, "code").subqueries, ...hintedQueries])].slice(0, config.maxQueries);
   }
@@ -208,6 +215,7 @@ async function fetchTextWithRetry(url, signal, attempts = 2, headers = {
       return response;
     } catch (error) {
       lastError = error;
+      if (signal?.aborted || error?.name === "AbortError" || error?.name === "TimeoutError") throw error;
       if (attempt + 1 < attempts) await new Promise((resolve) => setTimeout(resolve, 100 * (attempt + 1)));
     }
   }
@@ -428,7 +436,7 @@ export async function fetchPageSource(url, signal, config = getResearchConfig())
   if (shouldUseJinaFirst(url)) {
     const first = finalizeFetchedPage(await fetchJinaPageSource(url, signal, config), config, { url, contentType: "text/plain" });
     if (first && withinTimeframe(first, config)) {
-      const page = config.isolate ? first : setCacheValue(pageCache, cacheKey, first, PAGE_CACHE_TTL_MS);
+      const page = config.isolate ? first : setCacheValue(pageCache, cacheKey, first, pageCacheTtl(first));
       await logResearchEvent("fetch_end", { url, via: "jina_first", success: Boolean(page), page: page ? { title: page.title, sourceType: page.sourceType, publishDate: page.publishDate, textLength: page.text?.length || 0 } : null });
       return page;
     }
@@ -444,7 +452,7 @@ export async function fetchPageSource(url, signal, config = getResearchConfig())
     if (!contentType.includes("text/html") && !contentType.includes("text/plain")) {
       const fallback = finalizeFetchedPage(await fetchJinaPageSource(url, signal, config), config, { url, contentType });
       if (fallback && withinTimeframe(fallback, config)) {
-        const page = config.isolate ? fallback : setCacheValue(pageCache, cacheKey, fallback, PAGE_CACHE_TTL_MS);
+        const page = config.isolate ? fallback : setCacheValue(pageCache, cacheKey, fallback, pageCacheTtl(fallback));
         await logResearchEvent("fetch_end", { url, via: "unsupported_content_type_fallback", success: Boolean(page), contentType, page: page ? { title: page.title, sourceType: page.sourceType, publishDate: page.publishDate, textLength: page.text?.length || 0 } : null });
         return page;
       }
@@ -479,27 +487,60 @@ export async function fetchPageSource(url, signal, config = getResearchConfig())
           codeBlocks: scraplingSnapshot.codeBlocks,
           fetchStatus: scrapling.status ?? 200,
           contentType: scrapling.contentType || "text/html",
+          expensive: true,
         });
+      } else if (assessment?.blocked || assessment?.dynamic) {
+        await logResearchEvent("fetch_scrapling_unavailable", { url, mode: assessment.mode, runtime: getScraplingRuntimeStatus?.() || null });
       }
     }
     const resolved = page || await fetchJinaPageSource(url, signal, config);
     const finalPage = finalizeFetchedPage(resolved, config, { url: response.url || url, status: response.status ?? 200, contentType });
     const stored = finalPage && withinTimeframe(finalPage, config)
-      ? (config.isolate ? finalPage : setCacheValue(pageCache, cacheKey, finalPage, PAGE_CACHE_TTL_MS))
+      ? (config.isolate ? finalPage : setCacheValue(pageCache, cacheKey, finalPage, pageCacheTtl(finalPage)))
       : null;
     await logResearchEvent("fetch_end", { url, success: Boolean(stored), page: stored ? { title: stored.title, sourceType: stored.sourceType, publishDate: stored.publishDate, textLength: stored.text?.length || 0 } : null });
     return stored;
   } catch (error) {
+    if (signal?.aborted || error?.name === "AbortError") {
+      await logResearchEvent("fetch_abort", { url });
+      return null;
+    }
     const fallback = finalizeFetchedPage(await fetchJinaPageSource(url, signal, config), config, { url, contentType: "text/plain" });
     const stored = fallback && withinTimeframe(fallback, config)
-      ? (config.isolate ? fallback : setCacheValue(pageCache, cacheKey, fallback, PAGE_CACHE_TTL_MS))
+      ? (config.isolate ? fallback : setCacheValue(pageCache, cacheKey, fallback, pageCacheTtl(fallback)))
       : null;
     await logResearchEvent("fetch_error", { url, error, fallback: stored ? { title: stored.title, sourceType: stored.sourceType, publishDate: stored.publishDate, textLength: stored.text?.length || 0 } : null });
     return stored;
   }
 }
+async function speculativeFetch(results, signal, config, query) {
+  const target = Math.max(1, config.minSources || 1);
+  const controllers = results.map(() => new AbortController());
+  const abortAll = () => controllers.forEach((controller) => controller.abort());
+  if (signal) signal.addEventListener("abort", abortAll, { once: true });
+  let usableCount = 0;
+  const pages = await Promise.all(results.map(async (result, index) => {
+    const scopedSignal = signal ? AbortSignal.any([signal, controllers[index].signal]) : controllers[index].signal;
+    const page = await fetchPageSource(result.url, scopedSignal, { ...config, query });
+    if (scopedSignal.aborted || !page) return null;
+    if (isUsableContent(page, { ...config, query })) {
+      usableCount += 1;
+      if (usableCount >= target) {
+        controllers.forEach((controller, controllerIndex) => {
+          if (controllerIndex !== index && !controller.signal.aborted) controller.abort();
+        });
+      }
+    }
+    return page;
+  }));
+  if (signal) signal.removeEventListener("abort", abortAll);
+  return pages.filter(Boolean);
+}
 async function readLocalFiles(paths, config) {
   const pages = [];
   for (const path of paths) {
@@ -620,8 +661,72 @@ function modeCacheKey(query, config) {
   }))}`;
 }
+import {
+  applyConflictTinyRouterDecision,
+  applySufficiencyTinyRouterDecision,
+  chooseTinyRouterDomain,
+  classifyConflictWithTinyRouter,
+  classifyDomainWithTinyRouter,
+  classifyFollowupWithTinyRouter,
+  classifySufficiencyWithTinyRouter,
+} from "./tiny-router.js";
+function missingAspectFromStructuredDecision(decision) {
+  if (decision === "need_authority") return "authoritative sources";
+  if (decision === "need_more_sources") return "readable sources";
+  if (decision === "need_recency") return "recent sources";
+  if (decision === "need_version_context") return "version context";
+  if (decision === "need_conflict_resolution") return "conflict resolution";
+  return null;
+}
+function withStructuredSufficiencyDecision(sufficiency, decision, query, seenUrls = []) {
+  if (!decision) return sufficiency;
+  if (decision === "sufficient") return sufficiency;
+  const missingAspect = missingAspectFromStructuredDecision(decision);
+  const followupQuery = buildActionBasedFollowUpQuery(query, decision, { seenUrls });
+  return {
+    ...sufficiency,
+    sufficient: false,
+    missingAspects: missingAspect
+      ? [...new Set([...(sufficiency.missingAspects || []), missingAspect])]
+      : sufficiency.missingAspects,
+    openSubQuestions: followupQuery
+      ? [...new Set([...(sufficiency.openSubQuestions || []), followupQuery])]
+      : sufficiency.openSubQuestions,
+  };
+}
+async function resolveQuestionDomain(query, mode, signal) {
+  const fallback = classifyQuestionDomain(query);
+  const normalizedMode = typeof mode === "object" ? mode?.mode || "fast" : mode;
+  try {
+    const tinyStartedAt = Date.now();
+    const tinyDomain = await classifyDomainWithTinyRouter(query, normalizedMode, signal);
+    const tinyLatencyMs = Date.now() - tinyStartedAt;
+    await logResearchEvent("tiny_router_latency", { task: "domain", latencyMs: tinyLatencyMs, accepted: Boolean(tinyDomain) });
+    const domain = chooseTinyRouterDomain(fallback, tinyDomain);
+    if (tinyDomain && domain !== fallback) {
+      await logResearchEvent("tiny_router_domain", { query, mode: normalizedMode, heuristicDomain: fallback, predictedDomain: tinyDomain, acceptedDomain: domain });
+      return domain;
+    }
+    if (tinyDomain && domain === fallback && tinyDomain !== fallback) {
+      await logResearchEvent("tiny_router_fallback", { task: "domain", query, mode: normalizedMode, heuristicDomain: fallback, predictedDomain: tinyDomain, reason: "high_risk_not_downgraded" });
+    }
+    await logResearchEvent("tiny_router_fallback", { task: "domain", query, mode: normalizedMode, heuristicDomain: fallback, reason: tinyDomain ? "heuristic_kept" : "tiny_router_unavailable_or_low_confidence" });
+    return fallback;
+  } catch (error) {
+    await logResearchEvent("tiny_router_fallback", { task: "domain", query, mode: normalizedMode, heuristicDomain: fallback, reason: "error", error });
+    return fallback;
+  }
+}
 export async function runWebResearch(query, ctx, signal, onUpdate, mode = "fast") {
-  const domain = classifyQuestionDomain(query);
+  const domain = await resolveQuestionDomain(query, mode, signal);
   const config = getResearchConfig(typeof mode === "object" ? { ...mode, domain } : { mode, domain });
   const cacheKey = modeCacheKey(query, config);
@@ -699,6 +804,9 @@ export async function runWebResearch(query, ctx, signal, onUpdate, mode = "fast"
     } else {
       lastEmptySearchSignature = null;
     }
+    const fetchWindow = config.mode === "fast"
+      ? Math.max(config.maxPages, Math.min(config.maxPages * 2, (config.minSources || 3) + 2))
+      : config.maxPages;
     const results = rankSearchResults(flatResults, query, config.maxPages * 2, config)
       .filter((result) => {
         const key = normalizeUrl(result.url);
@@ -706,10 +814,12 @@ export async function runWebResearch(query, ctx, signal, onUpdate, mode = "fast"
         seenUrls.add(key);
         return true;
       })
-      .slice(0, config.maxPages);
+      .slice(0, fetchWindow);
     emit("fetch", `Reading ${results.length} sources...`);
-    const pageCandidates = await Promise.all(results.map((result) => fetchPageSource(result.url, signal, { ...config, query })));
+    const pageCandidates = config.mode === "fast"
+      ? await speculativeFetch(results, signal, { ...config, minSources: config.minSources || 3 }, query)
+      : await Promise.all(results.map((result) => fetchPageSource(result.url, signal, { ...config, query })));
     await logResearchEvent("page_fetch_results", {
       query,
       urls: results.map((result) => result.url),
@@ -742,6 +852,22 @@ export async function runWebResearch(query, ctx, signal, onUpdate, mode = "fast"
     conflictSummary = conflict.conflictSummary || "";
     conflictingSourcePairs = conflict.conflictingSourcePairs || [];
+    const structuredConflictStartedAt = Date.now();
+    const structuredConflictDecision = await classifyConflictWithTinyRouter(query, mergedPages, signal);
+    if (structuredConflictDecision) {
+      await logResearchEvent("tiny_router_latency", { task: "conflict", latencyMs: Date.now() - structuredConflictStartedAt, accepted: true });
+      const nextConflictDetected = applyConflictTinyRouterDecision(
+        conflictDetected,
+        structuredConflictDecision,
+        { allowClear: process.env.PI_RESEARCH_TINY_ROUTER_CONFLICT_ALLOW_CLEAR === "1" || process.env.PI_RESEARCH_TINY_ROUTER_CONFLICT_ALLOW_CLEAR === "true" },
+      );
+      if (nextConflictDetected !== conflictDetected) {
+        conflictDetected = nextConflictDetected;
+        if (conflictDetected && !conflictSummary) conflictSummary = `Structured router flagged ${query} for conflict review.`;
+      }
+      await logResearchEvent("tiny_router_structured_decision", { task: "conflict", query, decision: structuredConflictDecision, heuristicConflictDetected: conflict.detected, finalConflictDetected: conflictDetected });
+    }
     const minSources = config.mode === "fast"
       ? (mergedPages.some((page) => page.authoritative) ? 1 : Math.max(3, config.minSources || 3))
       : (config.minSources || 3);
@@ -757,10 +883,45 @@ export async function runWebResearch(query, ctx, signal, onUpdate, mode = "fast"
       sufficiency = { ...sufficiency, sufficient: true };
     }
+    const structuredSufficiencyStartedAt = Date.now();
+    const structuredSufficiencyDecision = await classifySufficiencyWithTinyRouter(query, mergedPages, signal);
+    if (structuredSufficiencyDecision) {
+      const heuristicSufficient = sufficiency.sufficient;
+      await logResearchEvent("tiny_router_latency", { task: "sufficiency", latencyMs: Date.now() - structuredSufficiencyStartedAt, accepted: true });
+      const finalSufficient = applySufficiencyTinyRouterDecision(heuristicSufficient, structuredSufficiencyDecision);
+      if (finalSufficient !== heuristicSufficient) {
+        sufficiency = withStructuredSufficiencyDecision(sufficiency, structuredSufficiencyDecision, query, mergedPages.map((page) => page.url));
+      }
+      await logResearchEvent("tiny_router_structured_decision", { task: "sufficiency", query, decision: structuredSufficiencyDecision, heuristicSufficient, finalSufficient });
+      sufficiency = { ...sufficiency, sufficient: finalSufficient };
+    }
     if (sufficiency.sufficient || turn === (config.maxTurns - 1)) break;
     followupRounds += 1;
-    followupQuery = buildFollowUpQuery(query, mergedPages);
+    const conflictState = conflictDetected ? (mergedPages.some(p => p.authoritative) ? "minor" : "severe") : "none";
+    const sourcesMeta = {
+      has_authority: mergedPages.some(p => p.authoritative),
+      has_forum: mergedPages.some(p => p.sourceType === "forum" || /forum|reddit|stack/i.test(p.url)),
+      has_news: mergedPages.some(p => p.sourceType === "news" || /news|blog|article/i.test(p.url)),
+      has_recent: mergedPages.some(p => p.freshness === "recent" || p.freshness === "current_year"),
+      source_count: mergedPages.length
+    };
+    const action = await classifyFollowupWithTinyRouter(query, config.mode, conflictState, sourcesMeta, signal);
+    if (action === "stop") {
+      await logResearchEvent("tiny_router_stop", { query, reason: "router_suggested_stop" });
+      break;
+    }
+    if (!action) {
+      followupQuery = buildFollowUpQuery(query, mergedPages, { seenUrls: mergedPages.map((page) => page.url) });
+    } else {
+      followupQuery = buildActionBasedFollowUpQuery(query, action, { seenUrls: mergedPages.map((page) => page.url) });
+    }
     currentQueries = planSubqueries(query, followupQuery, config, sufficiency);
     subqueries = [...new Set([...subqueries, ...currentQueries])];
   }

package/ml/models/conflict-structured/feature-names.json ADDED Viewed

@@ -0,0 +1,22 @@
+[
+  "authoritative_source_count",
+  "blocked_source_count",
+  "blog_count",
+  "candidate_conflict",
+  "file_count",
+  "forum_count",
+  "github_readme_count",
+  "github_repo_count",
+  "has_authority_resolution_path",
+  "negative_signal_sources",
+  "official_doc_count",
+  "other_count",
+  "paper_count",
+  "positive_signal_sources",
+  "query_academic",
+  "query_comparison",
+  "query_procedural",
+  "query_temporal",
+  "query_versioned",
+  "source_count"
+]

package/ml/models/conflict-structured/meta.json ADDED Viewed

@@ -0,0 +1,5 @@
+{
+  "task": "conflict",
+  "bestModel": "lr",
+  "rows": 80
+}

package/ml/models/conflict-structured/model.joblib ADDED Viewed

Binary file

package/ml/models/domain/metrics.json ADDED Viewed

@@ -0,0 +1,16 @@
+{
+  "macro_f1": 0.5773809523809524,
+  "train_size": 122,
+  "val_size": 34,
+  "high_risk_downgrades": 1,
+  "classes": [
+    "changelog",
+    "github",
+    "package-registry",
+    "papers",
+    "security",
+    "specs",
+    "vendor-status",
+    "web"
+  ]
+}

package/ml/models/domain/model.joblib ADDED Viewed

Binary file

package/ml/models/domain-lr/metrics.json ADDED Viewed

@@ -0,0 +1,16 @@
+{
+  "macro_f1": 0.41485507246376807,
+  "train_size": 122,
+  "val_size": 34,
+  "high_risk_downgrades": 1,
+  "classes": [
+    "changelog",
+    "github",
+    "package-registry",
+    "papers",
+    "security",
+    "specs",
+    "vendor-status",
+    "web"
+  ]
+}

package/ml/models/domain-lr/model.joblib ADDED Viewed

Binary file

package/ml/models/followup/meta.json ADDED Viewed

@@ -0,0 +1,3 @@
+{
+  "confidenceThreshold": 0.75
+}

package/ml/models/followup/model.joblib ADDED Viewed

Binary file

package/ml/models/sufficiency-structured/feature-names.json ADDED Viewed

@@ -0,0 +1,22 @@
+[
+  "authoritative_source_count",
+  "blocked_source_count",
+  "blog_count",
+  "file_count",
+  "forum_count",
+  "github_readme_count",
+  "github_repo_count",
+  "has_authority",
+  "has_only_one_good_source",
+  "negative_signal_sources",
+  "official_doc_count",
+  "other_count",
+  "paper_count",
+  "positive_signal_sources",
+  "query_academic",
+  "query_comparison",
+  "query_procedural",
+  "query_temporal",
+  "query_versioned",
+  "source_count"
+]

package/ml/models/sufficiency-structured/meta.json ADDED Viewed

@@ -0,0 +1,5 @@
+{
+  "task": "sufficiency",
+  "bestModel": "lr",
+  "rows": 78
+}

package/ml/models/sufficiency-structured/model.joblib ADDED Viewed

Binary file

package/ml/router/README.md ADDED Viewed

@@ -0,0 +1,106 @@
+# Tiny Router Training Runbook
+Target budget:
+- GPU RAM: 2 GB
+- CPU RAM: 20 GB
+- Default path: CPU-first, frozen embeddings, small models
+## Environment
+```bash
+python3 -m venv .venv-router
+. .venv-router/bin/activate
+pip install -r ml/router/requirements.txt
+```
+## Phase 1 — domain router
+```bash
+node scripts/router/audit-cache.mjs
+node scripts/router/export-examples.mjs
+node scripts/router/split-examples.mjs
+python ml/router/embed_model2vec.py \
+  --input data/router/examples.jsonl \
+  --gold data/router/gold-domain.jsonl \
+  --synthetic data/router/synthetic-train.jsonl
+python ml/router/train_domain_classifier.py \
+  --embeddings data/router/domain-model2vec.npz data/router/synthetic-model2vec.npz \
+  --gold-embeddings data/router/gold-model2vec.npz \
+  --out .cache/models/pi-research-router/domain \
+  --model-type auto
+python ml/router/evaluate_domain.py \
+  --model .cache/models/pi-research-router/domain/model.joblib \
+  --embeddings data/router/gold-model2vec.npz \
+  --out metrics/router/domain-model2vec-lr.json
+python ml/router/benchmark_latency.py \
+  --model-dir .cache/models/pi-research-router/domain \
+  --examples data/router/gold-domain.jsonl \
+  --out metrics/router/latency.json
+python scripts/router/eval_domain_unknown.py \
+  --model-dir .cache/models/pi-research-router/domain \
+  --input data/router/unknown-domain-smoke.jsonl
+```
+## Phase 2 — structured baselines
+Build provisional structured rows:
+```bash
+node scripts/router/export_structured_provisional.mjs
+node scripts/router/eval_structured_baselines.mjs
+```
+Train conservative structured classifiers:
+```bash
+python ml/router/train_structured_baseline.py --task conflict
+python ml/router/train_structured_baseline.py --task sufficiency
+```
+Outputs:
+- `.cache/models/pi-research-router/conflict-structured/`
+- `.cache/models/pi-research-router/sufficiency-structured/`
+- `metrics/router/conflict-structured-models.json`
+- `metrics/router/sufficiency-structured-models.json`
+## Runtime flags
+```bash
+PI_RESEARCH_TINY_ROUTER=1
+PI_RESEARCH_TINY_ROUTER_MODEL=.cache/models/pi-research-router
+PI_RESEARCH_TINY_ROUTER_TIMEOUT_MS=50
+PI_RESEARCH_TINY_ROUTER_DOMAIN=1
+PI_RESEARCH_TINY_ROUTER_FOLLOWUP=1
+PI_RESEARCH_TINY_ROUTER_CONFLICT=0
+PI_RESEARCH_TINY_ROUTER_SUFFICIENCY=0
+```
+Keep conflict/sufficiency off until metrics are reviewed.
+## Server deploy
+Safe MCP runtime deploy:
+```bash
+scripts/router/deploy-server-runtime.sh \
+  blackknight@100.98.190.19 \
+  ~/work/pi-research-runtime
+```
+This syncs the repo, installs user-local Node if needed, copies trained router models, runs `npm install`, and writes:
+- `start-mcp-tiny-router-safe.sh`
+- `start-mcp-tiny-router-experimental.sh`
+Recommended start command:
+```bash
+ssh blackknight@100.98.190.19 'cd ~/work/pi-research-runtime && ./start-mcp-tiny-router-safe.sh'
+```

package/ml/router/__pycache__/features.cpython-314.pyc ADDED Viewed

Binary file

package/ml/router/benchmark_latency.py ADDED Viewed

@@ -0,0 +1,81 @@
+import json
+import argparse
+import time
+import numpy as np
+import joblib
+import os
+sys_path_added = False
+if not sys_path_added:
+    import sys
+    sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
+    sys_path_added = True
+from features import load_embedding_model, extract_domain_features
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--model-dir", required=True)
+    parser.add_argument("--examples", required=True)
+    parser.add_argument("--out", required=True)
+    args = parser.parse_args()
+    print(f"Loading Model2Vec...")
+    emb_model = load_embedding_model()
+    print(f"Loading Classifier...")
+    clf = joblib.load(f"{args.model_dir}/model.joblib")
+    # Load a few queries to test
+    queries = []
+    with open(args.examples, "r") as f:
+        for line in f:
+            if not line.strip(): continue
+            ex = json.loads(line)
+            queries.append(ex["query"])
+    # Warmup
+    print("Warming up...")
+    for q in queries[:10]:
+        feats = extract_domain_features([q], ["fast"], emb_model=emb_model, show_progress_bar=False)
+        clf.predict(feats)
+    # Benchmark
+    print(f"Benchmarking {len(queries)} queries sequentially...")
+    latencies = []
+    for q in queries:
+        t0 = time.perf_counter()
+        feats = extract_domain_features([q], ["fast"], emb_model=emb_model, show_progress_bar=False)
+        pred = clf.predict(feats)[0]
+        t1 = time.perf_counter()
+        latencies.append((t1 - t0) * 1000) # ms
+    latencies = np.array(latencies)
+    p50 = np.percentile(latencies, 50)
+    p95 = np.percentile(latencies, 95)
+    mean = np.mean(latencies)
+    print(f"p50: {p50:.2f} ms")
+    print(f"p95: {p95:.2f} ms")
+    print(f"Mean: {mean:.2f} ms")
+    os.makedirs(os.path.dirname(args.out), exist_ok=True)
+    metrics = {
+        "task": "domain",
+        "latency_ms": {
+            "p50": p50,
+            "p95": p95,
+            "mean": mean,
+            "samples": len(latencies)
+        }
+    }
+    with open(args.out, "w") as f:
+        json.dump(metrics, f, indent=2)
+if __name__ == "__main__":
+    main()