pi-research 1.3.1 → 1.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. package/README.md +67 -250
  2. package/lib/page-fetch-adapter.js +311 -64
  3. package/lib/research-policy.js +36 -15
  4. package/lib/research-profiles.json +4 -0
  5. package/lib/research.js +15 -6
  6. package/lib/router-annotation.js +192 -0
  7. package/lib/router-structured-features.js +134 -0
  8. package/lib/tiny-router.js +338 -0
  9. package/lib/web-research.js +171 -10
  10. package/ml/models/conflict-structured/feature-names.json +22 -0
  11. package/ml/models/conflict-structured/meta.json +5 -0
  12. package/ml/models/conflict-structured/model.joblib +0 -0
  13. package/ml/models/domain/metrics.json +16 -0
  14. package/ml/models/domain/model.joblib +0 -0
  15. package/ml/models/domain-lr/metrics.json +16 -0
  16. package/ml/models/domain-lr/model.joblib +0 -0
  17. package/ml/models/followup/meta.json +3 -0
  18. package/ml/models/followup/model.joblib +0 -0
  19. package/ml/models/sufficiency-structured/feature-names.json +22 -0
  20. package/ml/models/sufficiency-structured/meta.json +5 -0
  21. package/ml/models/sufficiency-structured/model.joblib +0 -0
  22. package/ml/router/README.md +106 -0
  23. package/ml/router/__pycache__/features.cpython-314.pyc +0 -0
  24. package/ml/router/benchmark_latency.py +81 -0
  25. package/ml/router/daemon.py +140 -0
  26. package/ml/router/embed_model2vec.py +48 -0
  27. package/ml/router/evaluate_domain.py +67 -0
  28. package/ml/router/features.py +60 -0
  29. package/ml/router/requirements.txt +5 -0
  30. package/ml/router/train_classifier.py +57 -0
  31. package/ml/router/train_domain_classifier.py +209 -0
  32. package/ml/router/train_structured_baseline.py +174 -0
  33. package/package.json +5 -4
@@ -13,6 +13,7 @@ import {
13
13
  buildFallbackQueries,
14
14
  buildFastQueries,
15
15
  buildFollowUpQuery,
16
+ buildActionBasedFollowUpQuery,
16
17
  buildJinaReaderUrl,
17
18
  classifySourceType,
18
19
  compactResearchPayload,
@@ -35,8 +36,8 @@ import {
35
36
  scoreSourceEntry,
36
37
  selectRelevantChunks,
37
38
  } from "./research.js";
38
- import { pageFetchAdapter } from "./page-fetch-adapter.js";
39
- import { pageQualitySignals } from "./research-policy.js";
39
+ import { getScraplingRuntimeStatus, pageFetchAdapter } from "./page-fetch-adapter.js";
40
+ import { isUsableContent, pageQualitySignals } from "./research-policy.js";
40
41
  import { resolveOutputFormat, shouldRequireAuthoritativeSources } from "./research-output.js";
41
42
  import { planResearch } from "./planner.js";
42
43
  import {
@@ -53,6 +54,7 @@ const USER_AGENT = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/
53
54
  const MIN_PAGE_TEXT = 300;
54
55
  const SEARCH_CACHE_TTL_MS = 5 * 60 * 1000;
55
56
  const PAGE_CACHE_TTL_MS = 30 * 60 * 1000;
57
+ const EXPENSIVE_PAGE_CACHE_TTL_MS = 7 * 24 * 60 * 60 * 1000;
56
58
  const searchCache = new Map();
57
59
  const pageCache = new Map();
58
60
 
@@ -71,6 +73,10 @@ function setCacheValue(cache, key, value, ttlMs) {
71
73
  return value;
72
74
  }
73
75
 
76
+ function pageCacheTtl(page) {
77
+ return page?.expensive ? EXPENSIVE_PAGE_CACHE_TTL_MS : PAGE_CACHE_TTL_MS;
78
+ }
79
+
74
80
  function hashText(text) {
75
81
  return createHash("sha1").update(String(text || "")).digest("hex");
76
82
  }
@@ -165,6 +171,7 @@ export async function buildQueries(query, mode = "fast", ctx, signal) {
165
171
  const hintedQueries = Array.isArray(config.queryHints) && config.queryHints.length
166
172
  ? config.queryHints.map((hint) => `${query} ${hint}`)
167
173
  : [];
174
+
168
175
  if (config.mode === "code") {
169
176
  return [...new Set([...planResearch(query, "code").subqueries, ...hintedQueries])].slice(0, config.maxQueries);
170
177
  }
@@ -208,6 +215,7 @@ async function fetchTextWithRetry(url, signal, attempts = 2, headers = {
208
215
  return response;
209
216
  } catch (error) {
210
217
  lastError = error;
218
+ if (signal?.aborted || error?.name === "AbortError" || error?.name === "TimeoutError") throw error;
211
219
  if (attempt + 1 < attempts) await new Promise((resolve) => setTimeout(resolve, 100 * (attempt + 1)));
212
220
  }
213
221
  }
@@ -428,7 +436,7 @@ export async function fetchPageSource(url, signal, config = getResearchConfig())
428
436
  if (shouldUseJinaFirst(url)) {
429
437
  const first = finalizeFetchedPage(await fetchJinaPageSource(url, signal, config), config, { url, contentType: "text/plain" });
430
438
  if (first && withinTimeframe(first, config)) {
431
- const page = config.isolate ? first : setCacheValue(pageCache, cacheKey, first, PAGE_CACHE_TTL_MS);
439
+ const page = config.isolate ? first : setCacheValue(pageCache, cacheKey, first, pageCacheTtl(first));
432
440
  await logResearchEvent("fetch_end", { url, via: "jina_first", success: Boolean(page), page: page ? { title: page.title, sourceType: page.sourceType, publishDate: page.publishDate, textLength: page.text?.length || 0 } : null });
433
441
  return page;
434
442
  }
@@ -444,7 +452,7 @@ export async function fetchPageSource(url, signal, config = getResearchConfig())
444
452
  if (!contentType.includes("text/html") && !contentType.includes("text/plain")) {
445
453
  const fallback = finalizeFetchedPage(await fetchJinaPageSource(url, signal, config), config, { url, contentType });
446
454
  if (fallback && withinTimeframe(fallback, config)) {
447
- const page = config.isolate ? fallback : setCacheValue(pageCache, cacheKey, fallback, PAGE_CACHE_TTL_MS);
455
+ const page = config.isolate ? fallback : setCacheValue(pageCache, cacheKey, fallback, pageCacheTtl(fallback));
448
456
  await logResearchEvent("fetch_end", { url, via: "unsupported_content_type_fallback", success: Boolean(page), contentType, page: page ? { title: page.title, sourceType: page.sourceType, publishDate: page.publishDate, textLength: page.text?.length || 0 } : null });
449
457
  return page;
450
458
  }
@@ -479,27 +487,60 @@ export async function fetchPageSource(url, signal, config = getResearchConfig())
479
487
  codeBlocks: scraplingSnapshot.codeBlocks,
480
488
  fetchStatus: scrapling.status ?? 200,
481
489
  contentType: scrapling.contentType || "text/html",
490
+ expensive: true,
482
491
  });
492
+ } else if (assessment?.blocked || assessment?.dynamic) {
493
+ await logResearchEvent("fetch_scrapling_unavailable", { url, mode: assessment.mode, runtime: getScraplingRuntimeStatus?.() || null });
483
494
  }
484
495
  }
485
496
 
486
497
  const resolved = page || await fetchJinaPageSource(url, signal, config);
487
498
  const finalPage = finalizeFetchedPage(resolved, config, { url: response.url || url, status: response.status ?? 200, contentType });
488
499
  const stored = finalPage && withinTimeframe(finalPage, config)
489
- ? (config.isolate ? finalPage : setCacheValue(pageCache, cacheKey, finalPage, PAGE_CACHE_TTL_MS))
500
+ ? (config.isolate ? finalPage : setCacheValue(pageCache, cacheKey, finalPage, pageCacheTtl(finalPage)))
490
501
  : null;
491
502
  await logResearchEvent("fetch_end", { url, success: Boolean(stored), page: stored ? { title: stored.title, sourceType: stored.sourceType, publishDate: stored.publishDate, textLength: stored.text?.length || 0 } : null });
492
503
  return stored;
493
504
  } catch (error) {
505
+ if (signal?.aborted || error?.name === "AbortError") {
506
+ await logResearchEvent("fetch_abort", { url });
507
+ return null;
508
+ }
494
509
  const fallback = finalizeFetchedPage(await fetchJinaPageSource(url, signal, config), config, { url, contentType: "text/plain" });
495
510
  const stored = fallback && withinTimeframe(fallback, config)
496
- ? (config.isolate ? fallback : setCacheValue(pageCache, cacheKey, fallback, PAGE_CACHE_TTL_MS))
511
+ ? (config.isolate ? fallback : setCacheValue(pageCache, cacheKey, fallback, pageCacheTtl(fallback)))
497
512
  : null;
498
513
  await logResearchEvent("fetch_error", { url, error, fallback: stored ? { title: stored.title, sourceType: stored.sourceType, publishDate: stored.publishDate, textLength: stored.text?.length || 0 } : null });
499
514
  return stored;
500
515
  }
501
516
  }
502
517
 
518
+ async function speculativeFetch(results, signal, config, query) {
519
+ const target = Math.max(1, config.minSources || 1);
520
+ const controllers = results.map(() => new AbortController());
521
+ const abortAll = () => controllers.forEach((controller) => controller.abort());
522
+ if (signal) signal.addEventListener("abort", abortAll, { once: true });
523
+
524
+ let usableCount = 0;
525
+ const pages = await Promise.all(results.map(async (result, index) => {
526
+ const scopedSignal = signal ? AbortSignal.any([signal, controllers[index].signal]) : controllers[index].signal;
527
+ const page = await fetchPageSource(result.url, scopedSignal, { ...config, query });
528
+ if (scopedSignal.aborted || !page) return null;
529
+ if (isUsableContent(page, { ...config, query })) {
530
+ usableCount += 1;
531
+ if (usableCount >= target) {
532
+ controllers.forEach((controller, controllerIndex) => {
533
+ if (controllerIndex !== index && !controller.signal.aborted) controller.abort();
534
+ });
535
+ }
536
+ }
537
+ return page;
538
+ }));
539
+
540
+ if (signal) signal.removeEventListener("abort", abortAll);
541
+ return pages.filter(Boolean);
542
+ }
543
+
503
544
  async function readLocalFiles(paths, config) {
504
545
  const pages = [];
505
546
  for (const path of paths) {
@@ -620,8 +661,72 @@ function modeCacheKey(query, config) {
620
661
  }))}`;
621
662
  }
622
663
 
664
+ import {
665
+ applyConflictTinyRouterDecision,
666
+ applySufficiencyTinyRouterDecision,
667
+ chooseTinyRouterDomain,
668
+ classifyConflictWithTinyRouter,
669
+ classifyDomainWithTinyRouter,
670
+ classifyFollowupWithTinyRouter,
671
+ classifySufficiencyWithTinyRouter,
672
+ } from "./tiny-router.js";
673
+
674
+ function missingAspectFromStructuredDecision(decision) {
675
+ if (decision === "need_authority") return "authoritative sources";
676
+ if (decision === "need_more_sources") return "readable sources";
677
+ if (decision === "need_recency") return "recent sources";
678
+ if (decision === "need_version_context") return "version context";
679
+ if (decision === "need_conflict_resolution") return "conflict resolution";
680
+ return null;
681
+ }
682
+
683
+ function withStructuredSufficiencyDecision(sufficiency, decision, query, seenUrls = []) {
684
+ if (!decision) return sufficiency;
685
+ if (decision === "sufficient") return sufficiency;
686
+
687
+ const missingAspect = missingAspectFromStructuredDecision(decision);
688
+ const followupQuery = buildActionBasedFollowUpQuery(query, decision, { seenUrls });
689
+
690
+ return {
691
+ ...sufficiency,
692
+ sufficient: false,
693
+ missingAspects: missingAspect
694
+ ? [...new Set([...(sufficiency.missingAspects || []), missingAspect])]
695
+ : sufficiency.missingAspects,
696
+ openSubQuestions: followupQuery
697
+ ? [...new Set([...(sufficiency.openSubQuestions || []), followupQuery])]
698
+ : sufficiency.openSubQuestions,
699
+ };
700
+ }
701
+
702
+ async function resolveQuestionDomain(query, mode, signal) {
703
+ const fallback = classifyQuestionDomain(query);
704
+ const normalizedMode = typeof mode === "object" ? mode?.mode || "fast" : mode;
705
+ try {
706
+ const tinyStartedAt = Date.now();
707
+ const tinyDomain = await classifyDomainWithTinyRouter(query, normalizedMode, signal);
708
+ const tinyLatencyMs = Date.now() - tinyStartedAt;
709
+ await logResearchEvent("tiny_router_latency", { task: "domain", latencyMs: tinyLatencyMs, accepted: Boolean(tinyDomain) });
710
+
711
+ const domain = chooseTinyRouterDomain(fallback, tinyDomain);
712
+ if (tinyDomain && domain !== fallback) {
713
+ await logResearchEvent("tiny_router_domain", { query, mode: normalizedMode, heuristicDomain: fallback, predictedDomain: tinyDomain, acceptedDomain: domain });
714
+ return domain;
715
+ }
716
+ if (tinyDomain && domain === fallback && tinyDomain !== fallback) {
717
+ await logResearchEvent("tiny_router_fallback", { task: "domain", query, mode: normalizedMode, heuristicDomain: fallback, predictedDomain: tinyDomain, reason: "high_risk_not_downgraded" });
718
+ }
719
+
720
+ await logResearchEvent("tiny_router_fallback", { task: "domain", query, mode: normalizedMode, heuristicDomain: fallback, reason: tinyDomain ? "heuristic_kept" : "tiny_router_unavailable_or_low_confidence" });
721
+ return fallback;
722
+ } catch (error) {
723
+ await logResearchEvent("tiny_router_fallback", { task: "domain", query, mode: normalizedMode, heuristicDomain: fallback, reason: "error", error });
724
+ return fallback;
725
+ }
726
+ }
727
+
623
728
  export async function runWebResearch(query, ctx, signal, onUpdate, mode = "fast") {
624
- const domain = classifyQuestionDomain(query);
729
+ const domain = await resolveQuestionDomain(query, mode, signal);
625
730
  const config = getResearchConfig(typeof mode === "object" ? { ...mode, domain } : { mode, domain });
626
731
  const cacheKey = modeCacheKey(query, config);
627
732
 
@@ -699,6 +804,9 @@ export async function runWebResearch(query, ctx, signal, onUpdate, mode = "fast"
699
804
  } else {
700
805
  lastEmptySearchSignature = null;
701
806
  }
807
+ const fetchWindow = config.mode === "fast"
808
+ ? Math.max(config.maxPages, Math.min(config.maxPages * 2, (config.minSources || 3) + 2))
809
+ : config.maxPages;
702
810
  const results = rankSearchResults(flatResults, query, config.maxPages * 2, config)
703
811
  .filter((result) => {
704
812
  const key = normalizeUrl(result.url);
@@ -706,10 +814,12 @@ export async function runWebResearch(query, ctx, signal, onUpdate, mode = "fast"
706
814
  seenUrls.add(key);
707
815
  return true;
708
816
  })
709
- .slice(0, config.maxPages);
817
+ .slice(0, fetchWindow);
710
818
 
711
819
  emit("fetch", `Reading ${results.length} sources...`);
712
- const pageCandidates = await Promise.all(results.map((result) => fetchPageSource(result.url, signal, { ...config, query })));
820
+ const pageCandidates = config.mode === "fast"
821
+ ? await speculativeFetch(results, signal, { ...config, minSources: config.minSources || 3 }, query)
822
+ : await Promise.all(results.map((result) => fetchPageSource(result.url, signal, { ...config, query })));
713
823
  await logResearchEvent("page_fetch_results", {
714
824
  query,
715
825
  urls: results.map((result) => result.url),
@@ -742,6 +852,22 @@ export async function runWebResearch(query, ctx, signal, onUpdate, mode = "fast"
742
852
  conflictSummary = conflict.conflictSummary || "";
743
853
  conflictingSourcePairs = conflict.conflictingSourcePairs || [];
744
854
 
855
+ const structuredConflictStartedAt = Date.now();
856
+ const structuredConflictDecision = await classifyConflictWithTinyRouter(query, mergedPages, signal);
857
+ if (structuredConflictDecision) {
858
+ await logResearchEvent("tiny_router_latency", { task: "conflict", latencyMs: Date.now() - structuredConflictStartedAt, accepted: true });
859
+ const nextConflictDetected = applyConflictTinyRouterDecision(
860
+ conflictDetected,
861
+ structuredConflictDecision,
862
+ { allowClear: process.env.PI_RESEARCH_TINY_ROUTER_CONFLICT_ALLOW_CLEAR === "1" || process.env.PI_RESEARCH_TINY_ROUTER_CONFLICT_ALLOW_CLEAR === "true" },
863
+ );
864
+ if (nextConflictDetected !== conflictDetected) {
865
+ conflictDetected = nextConflictDetected;
866
+ if (conflictDetected && !conflictSummary) conflictSummary = `Structured router flagged ${query} for conflict review.`;
867
+ }
868
+ await logResearchEvent("tiny_router_structured_decision", { task: "conflict", query, decision: structuredConflictDecision, heuristicConflictDetected: conflict.detected, finalConflictDetected: conflictDetected });
869
+ }
870
+
745
871
  const minSources = config.mode === "fast"
746
872
  ? (mergedPages.some((page) => page.authoritative) ? 1 : Math.max(3, config.minSources || 3))
747
873
  : (config.minSources || 3);
@@ -757,10 +883,45 @@ export async function runWebResearch(query, ctx, signal, onUpdate, mode = "fast"
757
883
  sufficiency = { ...sufficiency, sufficient: true };
758
884
  }
759
885
 
886
+ const structuredSufficiencyStartedAt = Date.now();
887
+ const structuredSufficiencyDecision = await classifySufficiencyWithTinyRouter(query, mergedPages, signal);
888
+ if (structuredSufficiencyDecision) {
889
+ const heuristicSufficient = sufficiency.sufficient;
890
+ await logResearchEvent("tiny_router_latency", { task: "sufficiency", latencyMs: Date.now() - structuredSufficiencyStartedAt, accepted: true });
891
+ const finalSufficient = applySufficiencyTinyRouterDecision(heuristicSufficient, structuredSufficiencyDecision);
892
+ if (finalSufficient !== heuristicSufficient) {
893
+ sufficiency = withStructuredSufficiencyDecision(sufficiency, structuredSufficiencyDecision, query, mergedPages.map((page) => page.url));
894
+ }
895
+ await logResearchEvent("tiny_router_structured_decision", { task: "sufficiency", query, decision: structuredSufficiencyDecision, heuristicSufficient, finalSufficient });
896
+ sufficiency = { ...sufficiency, sufficient: finalSufficient };
897
+ }
898
+
760
899
  if (sufficiency.sufficient || turn === (config.maxTurns - 1)) break;
761
900
 
762
901
  followupRounds += 1;
763
- followupQuery = buildFollowUpQuery(query, mergedPages);
902
+
903
+ const conflictState = conflictDetected ? (mergedPages.some(p => p.authoritative) ? "minor" : "severe") : "none";
904
+ const sourcesMeta = {
905
+ has_authority: mergedPages.some(p => p.authoritative),
906
+ has_forum: mergedPages.some(p => p.sourceType === "forum" || /forum|reddit|stack/i.test(p.url)),
907
+ has_news: mergedPages.some(p => p.sourceType === "news" || /news|blog|article/i.test(p.url)),
908
+ has_recent: mergedPages.some(p => p.freshness === "recent" || p.freshness === "current_year"),
909
+ source_count: mergedPages.length
910
+ };
911
+
912
+ const action = await classifyFollowupWithTinyRouter(query, config.mode, conflictState, sourcesMeta, signal);
913
+
914
+ if (action === "stop") {
915
+ await logResearchEvent("tiny_router_stop", { query, reason: "router_suggested_stop" });
916
+ break;
917
+ }
918
+
919
+ if (!action) {
920
+ followupQuery = buildFollowUpQuery(query, mergedPages, { seenUrls: mergedPages.map((page) => page.url) });
921
+ } else {
922
+ followupQuery = buildActionBasedFollowUpQuery(query, action, { seenUrls: mergedPages.map((page) => page.url) });
923
+ }
924
+
764
925
  currentQueries = planSubqueries(query, followupQuery, config, sufficiency);
765
926
  subqueries = [...new Set([...subqueries, ...currentQueries])];
766
927
  }
@@ -0,0 +1,22 @@
1
+ [
2
+ "authoritative_source_count",
3
+ "blocked_source_count",
4
+ "blog_count",
5
+ "candidate_conflict",
6
+ "file_count",
7
+ "forum_count",
8
+ "github_readme_count",
9
+ "github_repo_count",
10
+ "has_authority_resolution_path",
11
+ "negative_signal_sources",
12
+ "official_doc_count",
13
+ "other_count",
14
+ "paper_count",
15
+ "positive_signal_sources",
16
+ "query_academic",
17
+ "query_comparison",
18
+ "query_procedural",
19
+ "query_temporal",
20
+ "query_versioned",
21
+ "source_count"
22
+ ]
@@ -0,0 +1,5 @@
1
+ {
2
+ "task": "conflict",
3
+ "bestModel": "lr",
4
+ "rows": 80
5
+ }
@@ -0,0 +1,16 @@
1
+ {
2
+ "macro_f1": 0.5773809523809524,
3
+ "train_size": 122,
4
+ "val_size": 34,
5
+ "high_risk_downgrades": 1,
6
+ "classes": [
7
+ "changelog",
8
+ "github",
9
+ "package-registry",
10
+ "papers",
11
+ "security",
12
+ "specs",
13
+ "vendor-status",
14
+ "web"
15
+ ]
16
+ }
Binary file
@@ -0,0 +1,16 @@
1
+ {
2
+ "macro_f1": 0.41485507246376807,
3
+ "train_size": 122,
4
+ "val_size": 34,
5
+ "high_risk_downgrades": 1,
6
+ "classes": [
7
+ "changelog",
8
+ "github",
9
+ "package-registry",
10
+ "papers",
11
+ "security",
12
+ "specs",
13
+ "vendor-status",
14
+ "web"
15
+ ]
16
+ }
Binary file
@@ -0,0 +1,3 @@
1
+ {
2
+ "confidenceThreshold": 0.75
3
+ }
Binary file
@@ -0,0 +1,22 @@
1
+ [
2
+ "authoritative_source_count",
3
+ "blocked_source_count",
4
+ "blog_count",
5
+ "file_count",
6
+ "forum_count",
7
+ "github_readme_count",
8
+ "github_repo_count",
9
+ "has_authority",
10
+ "has_only_one_good_source",
11
+ "negative_signal_sources",
12
+ "official_doc_count",
13
+ "other_count",
14
+ "paper_count",
15
+ "positive_signal_sources",
16
+ "query_academic",
17
+ "query_comparison",
18
+ "query_procedural",
19
+ "query_temporal",
20
+ "query_versioned",
21
+ "source_count"
22
+ ]
@@ -0,0 +1,5 @@
1
+ {
2
+ "task": "sufficiency",
3
+ "bestModel": "lr",
4
+ "rows": 78
5
+ }
@@ -0,0 +1,106 @@
1
+ # Tiny Router Training Runbook
2
+
3
+ Target budget:
4
+
5
+ - GPU RAM: 2 GB
6
+ - CPU RAM: 20 GB
7
+ - Default path: CPU-first, frozen embeddings, small models
8
+
9
+ ## Environment
10
+
11
+ ```bash
12
+ python3 -m venv .venv-router
13
+ . .venv-router/bin/activate
14
+ pip install -r ml/router/requirements.txt
15
+ ```
16
+
17
+ ## Phase 1 — domain router
18
+
19
+ ```bash
20
+ node scripts/router/audit-cache.mjs
21
+ node scripts/router/export-examples.mjs
22
+ node scripts/router/split-examples.mjs
23
+
24
+ python ml/router/embed_model2vec.py \
25
+ --input data/router/examples.jsonl \
26
+ --gold data/router/gold-domain.jsonl \
27
+ --synthetic data/router/synthetic-train.jsonl
28
+
29
+ python ml/router/train_domain_classifier.py \
30
+ --embeddings data/router/domain-model2vec.npz data/router/synthetic-model2vec.npz \
31
+ --gold-embeddings data/router/gold-model2vec.npz \
32
+ --out .cache/models/pi-research-router/domain \
33
+ --model-type auto
34
+
35
+ python ml/router/evaluate_domain.py \
36
+ --model .cache/models/pi-research-router/domain/model.joblib \
37
+ --embeddings data/router/gold-model2vec.npz \
38
+ --out metrics/router/domain-model2vec-lr.json
39
+
40
+ python ml/router/benchmark_latency.py \
41
+ --model-dir .cache/models/pi-research-router/domain \
42
+ --examples data/router/gold-domain.jsonl \
43
+ --out metrics/router/latency.json
44
+
45
+ python scripts/router/eval_domain_unknown.py \
46
+ --model-dir .cache/models/pi-research-router/domain \
47
+ --input data/router/unknown-domain-smoke.jsonl
48
+ ```
49
+
50
+ ## Phase 2 — structured baselines
51
+
52
+ Build provisional structured rows:
53
+
54
+ ```bash
55
+ node scripts/router/export_structured_provisional.mjs
56
+ node scripts/router/eval_structured_baselines.mjs
57
+ ```
58
+
59
+ Train conservative structured classifiers:
60
+
61
+ ```bash
62
+ python ml/router/train_structured_baseline.py --task conflict
63
+ python ml/router/train_structured_baseline.py --task sufficiency
64
+ ```
65
+
66
+ Outputs:
67
+
68
+ - `.cache/models/pi-research-router/conflict-structured/`
69
+ - `.cache/models/pi-research-router/sufficiency-structured/`
70
+ - `metrics/router/conflict-structured-models.json`
71
+ - `metrics/router/sufficiency-structured-models.json`
72
+
73
+ ## Runtime flags
74
+
75
+ ```bash
76
+ PI_RESEARCH_TINY_ROUTER=1
77
+ PI_RESEARCH_TINY_ROUTER_MODEL=.cache/models/pi-research-router
78
+ PI_RESEARCH_TINY_ROUTER_TIMEOUT_MS=50
79
+ PI_RESEARCH_TINY_ROUTER_DOMAIN=1
80
+ PI_RESEARCH_TINY_ROUTER_FOLLOWUP=1
81
+ PI_RESEARCH_TINY_ROUTER_CONFLICT=0
82
+ PI_RESEARCH_TINY_ROUTER_SUFFICIENCY=0
83
+ ```
84
+
85
+ Keep conflict/sufficiency off until metrics are reviewed.
86
+
87
+ ## Server deploy
88
+
89
+ Safe MCP runtime deploy:
90
+
91
+ ```bash
92
+ scripts/router/deploy-server-runtime.sh \
93
+ blackknight@100.98.190.19 \
94
+ ~/work/pi-research-runtime
95
+ ```
96
+
97
+ This syncs the repo, installs user-local Node if needed, copies trained router models, runs `npm install`, and writes:
98
+
99
+ - `start-mcp-tiny-router-safe.sh`
100
+ - `start-mcp-tiny-router-experimental.sh`
101
+
102
+ Recommended start command:
103
+
104
+ ```bash
105
+ ssh blackknight@100.98.190.19 'cd ~/work/pi-research-runtime && ./start-mcp-tiny-router-safe.sh'
106
+ ```
@@ -0,0 +1,81 @@
1
+ import json
2
+ import argparse
3
+ import time
4
+ import numpy as np
5
+ import joblib
6
+ import os
7
+
8
+ sys_path_added = False
9
+ if not sys_path_added:
10
+ import sys
11
+ sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
12
+ sys_path_added = True
13
+
14
+ from features import load_embedding_model, extract_domain_features
15
+
16
+ def main():
17
+ parser = argparse.ArgumentParser()
18
+ parser.add_argument("--model-dir", required=True)
19
+ parser.add_argument("--examples", required=True)
20
+ parser.add_argument("--out", required=True)
21
+ args = parser.parse_args()
22
+
23
+ print(f"Loading Model2Vec...")
24
+ emb_model = load_embedding_model()
25
+
26
+ print(f"Loading Classifier...")
27
+ clf = joblib.load(f"{args.model_dir}/model.joblib")
28
+
29
+ # Load a few queries to test
30
+ queries = []
31
+ with open(args.examples, "r") as f:
32
+ for line in f:
33
+ if not line.strip(): continue
34
+ ex = json.loads(line)
35
+ queries.append(ex["query"])
36
+
37
+ # Warmup
38
+ print("Warming up...")
39
+ for q in queries[:10]:
40
+ feats = extract_domain_features([q], ["fast"], emb_model=emb_model, show_progress_bar=False)
41
+ clf.predict(feats)
42
+
43
+ # Benchmark
44
+ print(f"Benchmarking {len(queries)} queries sequentially...")
45
+ latencies = []
46
+
47
+ for q in queries:
48
+ t0 = time.perf_counter()
49
+
50
+ feats = extract_domain_features([q], ["fast"], emb_model=emb_model, show_progress_bar=False)
51
+ pred = clf.predict(feats)[0]
52
+
53
+ t1 = time.perf_counter()
54
+ latencies.append((t1 - t0) * 1000) # ms
55
+
56
+ latencies = np.array(latencies)
57
+ p50 = np.percentile(latencies, 50)
58
+ p95 = np.percentile(latencies, 95)
59
+ mean = np.mean(latencies)
60
+
61
+ print(f"p50: {p50:.2f} ms")
62
+ print(f"p95: {p95:.2f} ms")
63
+ print(f"Mean: {mean:.2f} ms")
64
+
65
+ os.makedirs(os.path.dirname(args.out), exist_ok=True)
66
+
67
+ metrics = {
68
+ "task": "domain",
69
+ "latency_ms": {
70
+ "p50": p50,
71
+ "p95": p95,
72
+ "mean": mean,
73
+ "samples": len(latencies)
74
+ }
75
+ }
76
+
77
+ with open(args.out, "w") as f:
78
+ json.dump(metrics, f, indent=2)
79
+
80
+ if __name__ == "__main__":
81
+ main()