pi-research 1.3.1 → 1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,192 @@
1
+ export const ANNOTATION_LABELS = {
2
+ conflict: ["no_conflict", "resolved_by_authority", "resolved_by_recency", "needs_review"],
3
+ sufficiency: ["sufficient", "need_authority", "need_more_sources", "need_recency", "need_version_context"],
4
+ };
5
+
6
+ export function parseJsonl(text = "") {
7
+ return String(text)
8
+ .split("\n")
9
+ .map((line) => line.trim())
10
+ .filter(Boolean)
11
+ .map((line) => JSON.parse(line));
12
+ }
13
+
14
+ function stableHash(input = "") {
15
+ let hash = 2166136261;
16
+ const text = String(input);
17
+ for (let index = 0; index < text.length; index += 1) {
18
+ hash ^= text.charCodeAt(index);
19
+ hash = Math.imul(hash, 16777619);
20
+ }
21
+ return (hash >>> 0).toString(16).padStart(8, "0");
22
+ }
23
+
24
+ export function deriveAnnotationId(task, row = {}) {
25
+ return stableHash(JSON.stringify([task || "unknown", row.query || "", row.inputText || "", row.meta?.mode || null]));
26
+ }
27
+
28
+ function isTemporalQuery(query = "") {
29
+ return /\b(current|latest|today|status|support|supported|lts|2024|2025|2026|release)\b/i.test(query);
30
+ }
31
+
32
+ function isVersionQuery(query = "") {
33
+ return /\b(version|v\d+|migration|upgrade|compatibility|compatible|build flag)\b/i.test(query);
34
+ }
35
+
36
+ function isProceduralDocQuery(query = "") {
37
+ return /\b(readme|issue|repo|repository|docs|documentation|file|csv|json|run|how to|how evaluation works|spreadsheet|columns|api|abortcontroller|promise\.all|browsecomp_eval\.py)\b/i.test(query);
38
+ }
39
+
40
+ function hasBlockedMarker(text = "") {
41
+ return /attention required!|cloudflare|access denied|temporarily unavailable/i.test(text);
42
+ }
43
+
44
+ function sourceTypeCounts(text = "") {
45
+ const tags = ["official_doc", "paper", "github_readme", "github_repo", "forum", "blog", "other"];
46
+ const counts = Object.fromEntries(tags.map((tag) => [tag, 0]));
47
+ for (const tag of tags) {
48
+ const matches = text.match(new RegExp(`\\[${tag}\\]`, "gi"));
49
+ counts[tag] = matches ? matches.length : 0;
50
+ }
51
+ return counts;
52
+ }
53
+
54
+ function hasAuthorityMarkers(text = "") {
55
+ const counts = sourceTypeCounts(text);
56
+ return counts.official_doc + counts.paper + counts.github_readme + counts.github_repo > 0;
57
+ }
58
+
59
+ function hasFreshnessMarkers(text = "") {
60
+ return /\b(current|latest|today|2024|2025|2026|lts|release|support status)\b/i.test(text);
61
+ }
62
+
63
+ function countStrongAuthority(counts) {
64
+ return counts.official_doc + counts.paper + counts.github_readme;
65
+ }
66
+
67
+ export function suggestAnnotation(task, row = {}) {
68
+ const query = String(row.query || "");
69
+ const candidateLabel = String(row.candidateLabel || row.label || "");
70
+ const inputText = String(row.inputText || "");
71
+ const meta = row.meta && typeof row.meta === "object" ? row.meta : {};
72
+ const sourceCount = Number(meta.sourceCount || 0);
73
+ const counts = sourceTypeCounts(inputText);
74
+ const blocked = hasBlockedMarker(inputText);
75
+ const authoritative = !blocked && (Boolean(meta.authoritativeSourcesFound) || hasAuthorityMarkers(inputText));
76
+ const strongAuthority = countStrongAuthority(counts);
77
+ const temporal = isTemporalQuery(query);
78
+ const versioned = isVersionQuery(query);
79
+ const procedural = isProceduralDocQuery(query);
80
+ const freshness = hasFreshnessMarkers(inputText);
81
+
82
+ if (task === "conflict") {
83
+ if (candidateLabel === "no_conflict") {
84
+ return { label: "no_conflict", rationale: "The snippets do not show a clear factual contradiction on the same claim." };
85
+ }
86
+ if (procedural && authoritative) {
87
+ return { label: "no_conflict", rationale: "Repository, README, and documentation sources are more likely complementary than contradictory here." };
88
+ }
89
+ if (temporal && authoritative) {
90
+ return { label: "resolved_by_recency", rationale: "This looks time-sensitive, so the more current authoritative source should decide the conflict." };
91
+ }
92
+ if (authoritative) {
93
+ return { label: "resolved_by_authority", rationale: "Prefer the authoritative source over weaker secondary evidence." };
94
+ }
95
+ return { label: "needs_review", rationale: "The snippets suggest disagreement, but there is no clear authoritative or fresher winner." };
96
+ }
97
+
98
+ if (!authoritative) {
99
+ return { label: "need_authority", rationale: "The available sources are not authoritative enough to treat this as fully answered." };
100
+ }
101
+ if (temporal && !freshness) {
102
+ return { label: "need_recency", rationale: "This query is time-sensitive, but the snippets do not clearly establish current status." };
103
+ }
104
+ if (versioned) {
105
+ return { label: "need_version_context", rationale: "This answer depends on version-specific behavior that should be confirmed explicitly." };
106
+ }
107
+ if (candidateLabel === "insufficient" && sourceCount <= 1) {
108
+ return { label: "need_more_sources", rationale: "There is some evidence, but a single source is not enough for a robust answer here." };
109
+ }
110
+ if (candidateLabel === "insufficient" && strongAuthority >= 2 && sourceCount >= 3 && !temporal && !versioned) {
111
+ return { label: "sufficient", rationale: "The sources appear authoritative enough and broad enough to answer the query reliably." };
112
+ }
113
+ if (sourceCount <= 2 && !procedural) {
114
+ return { label: "need_more_sources", rationale: "There is some evidence, but not enough independent coverage yet." };
115
+ }
116
+ return { label: "sufficient", rationale: "The sources appear authoritative enough and broadly cover the query." };
117
+ }
118
+
119
+ export function buildAnnotationItems(task, draftRows = [], reviewedRows = []) {
120
+ const reviewedById = new Map(
121
+ reviewedRows.map((row) => [deriveAnnotationId(task, row), row]),
122
+ );
123
+
124
+ return draftRows.map((row) => {
125
+ const id = deriveAnnotationId(task, row);
126
+ const reviewed = reviewedById.get(id);
127
+ const finalLabel = reviewed?.label || "";
128
+ const rationale = reviewed?.rationale || row.rationale || "";
129
+ const suggestion = suggestAnnotation(task, row);
130
+
131
+ return {
132
+ id,
133
+ task,
134
+ query: row.query || "",
135
+ candidateLabel: row.candidateLabel || row.label || "",
136
+ finalLabel,
137
+ rationale,
138
+ suggestedLabel: suggestion.label,
139
+ suggestedRationale: suggestion.rationale,
140
+ inputText: row.inputText || "",
141
+ meta: row.meta && typeof row.meta === "object" ? row.meta : {},
142
+ status: finalLabel ? "reviewed" : "pending",
143
+ };
144
+ });
145
+ }
146
+
147
+ export function upsertAnnotationReview(items = [], id, patch = {}) {
148
+ return items.map((item) => {
149
+ if (item.id !== id) return item;
150
+ const finalLabel = patch.finalLabel ?? item.finalLabel ?? "";
151
+ const rationale = patch.rationale ?? item.rationale ?? "";
152
+ return {
153
+ ...item,
154
+ finalLabel,
155
+ rationale,
156
+ status: finalLabel ? "reviewed" : "pending",
157
+ };
158
+ });
159
+ }
160
+
161
+ export function summarizeAnnotationProgress(items = []) {
162
+ const byLabel = {};
163
+ let reviewed = 0;
164
+
165
+ for (const item of items) {
166
+ if (item?.status === "reviewed" && item.finalLabel) {
167
+ reviewed += 1;
168
+ byLabel[item.finalLabel] = (byLabel[item.finalLabel] || 0) + 1;
169
+ }
170
+ }
171
+
172
+ return {
173
+ total: items.length,
174
+ reviewed,
175
+ pending: Math.max(0, items.length - reviewed),
176
+ byLabel,
177
+ };
178
+ }
179
+
180
+ export function exportReviewedJsonl(items = []) {
181
+ return items
182
+ .filter((item) => item?.status === "reviewed" && item.finalLabel)
183
+ .map((item) => JSON.stringify({
184
+ query: item.query,
185
+ label: item.finalLabel,
186
+ rationale: item.rationale || "",
187
+ inputText: item.inputText,
188
+ candidateLabel: item.candidateLabel,
189
+ meta: item.meta && typeof item.meta === "object" ? item.meta : {},
190
+ }))
191
+ .join("\n");
192
+ }
@@ -0,0 +1,134 @@
1
+ import { PLACEHOLDER_PATTERNS } from "./research-policy.js";
2
+
3
+ const AUTHORITATIVE_TYPES = new Set(["official_doc", "paper", "github_readme", "github_repo", "file"]);
4
+ const POSITIVE_PATTERN = /\b(supported|works|available|recommended|stable|benchmark|comprehensive|practical)\b/i;
5
+ const NEGATIVE_PATTERN = /\b(not supported|unsupported|does not|no support|broken|incompatible|removed|blocked|denied)\b/i;
6
+ const KNOWN_SOURCE_TYPES = new Set(["official_doc", "paper", "github_readme", "github_repo", "forum", "blog", "other", "file"]);
7
+
8
+ export function parseStructuredSources(inputText = "") {
9
+ const marker = "Sources:";
10
+ const index = String(inputText).indexOf(marker);
11
+ if (index === -1) return [];
12
+ const body = String(inputText).slice(index + marker.length).trim();
13
+ if (!body) return [];
14
+
15
+ return body
16
+ .split(/\n\s*\n/)
17
+ .map((chunk) => chunk.trim())
18
+ .filter(Boolean)
19
+ .map((chunk, idx) => {
20
+ const match = chunk.match(/^\[([^\]]+)\]\s*(.*)$/s);
21
+ const sourceType = match?.[1] || "other";
22
+ const text = (match?.[2] || chunk).replace(/\s+/g, " ").trim();
23
+ return {
24
+ index: idx,
25
+ sourceType,
26
+ title: text,
27
+ text,
28
+ authoritative: AUTHORITATIVE_TYPES.has(sourceType),
29
+ blocked: PLACEHOLDER_PATTERNS.some((p) => p.test(text)),
30
+ positive: POSITIVE_PATTERN.test(text),
31
+ negative: NEGATIVE_PATTERN.test(text),
32
+ };
33
+ });
34
+ }
35
+
36
+ export function structuredSourceFromPage(page = {}, index = 0) {
37
+ const sourceType = KNOWN_SOURCE_TYPES.has(page.sourceType) ? page.sourceType : "other";
38
+ const text = `${page.title || ""} ${page.snippet || page.text || ""}`.replace(/\s+/g, " ").trim();
39
+ const blocked = Boolean(page.quality?.blocked) || PLACEHOLDER_PATTERNS.some((p) => p.test(text));
40
+ const authoritative = !blocked && (Boolean(page.authoritative) || AUTHORITATIVE_TYPES.has(sourceType));
41
+
42
+ return {
43
+ index,
44
+ sourceType,
45
+ title: page.title || text,
46
+ text,
47
+ authoritative,
48
+ blocked,
49
+ positive: POSITIVE_PATTERN.test(text),
50
+ negative: NEGATIVE_PATTERN.test(text),
51
+ };
52
+ }
53
+
54
+ export function structuredSourcesFromPages(pages = []) {
55
+ return Array.isArray(pages) ? pages.map((page, index) => structuredSourceFromPage(page, index)) : [];
56
+ }
57
+
58
+ export function extractQueryAspectFlags(query = "") {
59
+ const text = String(query || "");
60
+ return {
61
+ temporal: /\b(current|latest|today|status|support|supported|lts|2024|2025|2026|release)\b/i.test(text) ? 1 : 0,
62
+ versioned: /\b(version|v\d+|migration|upgrade|compatibility|compatible|build flag)\b/i.test(text) ? 1 : 0,
63
+ comparison: /\b(vs\.?|versus|compare|comparison|compared to)\b/i.test(text) ? 1 : 0,
64
+ academic: /\b(paper|papers|study|studies|arxiv|doi|research|benchmark)\b/i.test(text) ? 1 : 0,
65
+ procedural: /\b(readme|issue|repo|repository|docs|documentation|file|csv|json|run|how to|api)\b/i.test(text) ? 1 : 0,
66
+ };
67
+ }
68
+
69
+ function countBySourceType(sources = []) {
70
+ const keys = ["official_doc", "paper", "github_readme", "github_repo", "forum", "blog", "other", "file"];
71
+ const counts = Object.fromEntries(keys.map((key) => [key, 0]));
72
+ for (const source of sources) counts[source.sourceType] = (counts[source.sourceType] || 0) + 1;
73
+ return counts;
74
+ }
75
+
76
+ function baseStructuredFeatures(query, sources) {
77
+ const flags = extractQueryAspectFlags(query);
78
+ const counts = countBySourceType(sources);
79
+ const authoritativeSources = sources.filter((source) => source.authoritative && !source.blocked);
80
+ return {
81
+ query_temporal: flags.temporal,
82
+ query_versioned: flags.versioned,
83
+ query_comparison: flags.comparison,
84
+ query_academic: flags.academic,
85
+ query_procedural: flags.procedural,
86
+ source_count: sources.length,
87
+ authoritative_source_count: authoritativeSources.length,
88
+ blocked_source_count: sources.filter((source) => source.blocked).length,
89
+ positive_signal_sources: sources.filter((source) => source.positive).length,
90
+ negative_signal_sources: sources.filter((source) => source.negative).length,
91
+ official_doc_count: counts.official_doc,
92
+ paper_count: counts.paper,
93
+ github_readme_count: counts.github_readme,
94
+ github_repo_count: counts.github_repo,
95
+ forum_count: counts.forum,
96
+ blog_count: counts.blog,
97
+ other_count: counts.other,
98
+ file_count: counts.file,
99
+ };
100
+ }
101
+
102
+ export function extractConflictStructuredFeaturesFromSources(query = "", sources = [], candidateLabel = "") {
103
+ return {
104
+ ...baseStructuredFeatures(query, sources),
105
+ candidate_conflict: String(candidateLabel || "").includes("conflict") ? 1 : 0,
106
+ has_authority_resolution_path: sources.some((source) => source.authoritative && !source.blocked) ? 1 : 0,
107
+ };
108
+ }
109
+
110
+ export function extractConflictStructuredFeatures(row = {}) {
111
+ const sources = parseStructuredSources(row.inputText || "");
112
+ return extractConflictStructuredFeaturesFromSources(row.query || "", sources, row.candidateLabel || row.label || "");
113
+ }
114
+
115
+ export function extractConflictStructuredFeaturesFromPages(query = "", pages = [], candidateLabel = "") {
116
+ return extractConflictStructuredFeaturesFromSources(query, structuredSourcesFromPages(pages), candidateLabel);
117
+ }
118
+
119
+ export function extractSufficiencyStructuredFeaturesFromSources(query = "", sources = []) {
120
+ return {
121
+ ...baseStructuredFeatures(query, sources),
122
+ has_authority: sources.some((source) => source.authoritative && !source.blocked) ? 1 : 0,
123
+ has_only_one_good_source: sources.filter((source) => source.authoritative && !source.blocked).length === 1 ? 1 : 0,
124
+ };
125
+ }
126
+
127
+ export function extractSufficiencyStructuredFeatures(row = {}) {
128
+ const sources = parseStructuredSources(row.inputText || "");
129
+ return extractSufficiencyStructuredFeaturesFromSources(row.query || "", sources);
130
+ }
131
+
132
+ export function extractSufficiencyStructuredFeaturesFromPages(query = "", pages = []) {
133
+ return extractSufficiencyStructuredFeaturesFromSources(query, structuredSourcesFromPages(pages));
134
+ }
@@ -0,0 +1,338 @@
1
+ import { spawn } from "node:child_process";
2
+ import { existsSync, readFileSync } from "node:fs";
3
+ import { dirname, join } from "node:path";
4
+ import { fileURLToPath } from "node:url";
5
+
6
+ import {
7
+ extractConflictStructuredFeaturesFromPages,
8
+ extractSufficiencyStructuredFeaturesFromPages,
9
+ } from "./router-structured-features.js";
10
+
11
+ const HIGH_RISK_DOMAINS = new Set(["security", "papers", "specs"]);
12
+ const MIN_DEFAULT_DOMAIN_THRESHOLD = 0.35;
13
+ const MIN_HIGH_RISK_DOMAIN_THRESHOLD = 0.55;
14
+ const SUFFICIENCY_VETO_DECISIONS = new Set([
15
+ "need_authority",
16
+ "need_more_sources",
17
+ "need_recency",
18
+ "need_version_context",
19
+ "need_conflict_resolution",
20
+ ]);
21
+
22
+ let daemonProcess = null;
23
+ let isReady = false;
24
+ let messageQueue = [];
25
+ let pendingRequests = new Map();
26
+ let requestIdCounter = 1;
27
+ const domainCalibrationCache = new Map();
28
+
29
+ function envFlag(env, name, defaultValue = false) {
30
+ const value = env[name];
31
+ if (value === undefined) return defaultValue;
32
+ return value === "1" || value === "true";
33
+ }
34
+
35
+ function modelExists(modelDir, name) {
36
+ return existsSync(join(modelDir, name, "model.joblib"));
37
+ }
38
+
39
+ export function resolveTinyRouterConfig(env = process.env) {
40
+ const enabled = envFlag(env, "PI_RESEARCH_TINY_ROUTER");
41
+ const modelDir = env.PI_RESEARCH_TINY_ROUTER_MODEL || join(dirname(fileURLToPath(import.meta.url)), "..", "ml", "models");
42
+ const pythonPath = env.PI_RESEARCH_TINY_ROUTER_PYTHON || join(process.cwd(), ".venv-router", "bin", "python");
43
+ const daemonAvailable = enabled && existsSync(pythonPath);
44
+
45
+ return {
46
+ enabled: daemonAvailable,
47
+ modelDir,
48
+ pythonPath,
49
+ timeoutMs: Number(env.PI_RESEARCH_TINY_ROUTER_TIMEOUT_MS || 50),
50
+ tasks: {
51
+ domain: daemonAvailable && envFlag(env, "PI_RESEARCH_TINY_ROUTER_DOMAIN", true) && modelExists(modelDir, "domain"),
52
+ followup: daemonAvailable && envFlag(env, "PI_RESEARCH_TINY_ROUTER_FOLLOWUP") && modelExists(modelDir, "followup"),
53
+ conflict: daemonAvailable && envFlag(env, "PI_RESEARCH_TINY_ROUTER_CONFLICT") && modelExists(modelDir, "conflict-structured"),
54
+ sufficiency: daemonAvailable && envFlag(env, "PI_RESEARCH_TINY_ROUTER_SUFFICIENCY") && modelExists(modelDir, "sufficiency-structured"),
55
+ },
56
+ };
57
+ }
58
+
59
+ function startDaemon(config) {
60
+ if (daemonProcess) return;
61
+
62
+ const daemonScript = join(dirname(fileURLToPath(import.meta.url)), "..", "ml", "router", "daemon.py");
63
+ daemonProcess = spawn(config.pythonPath, [daemonScript, config.modelDir], {
64
+ stdio: ["pipe", "pipe", "pipe"],
65
+ });
66
+
67
+ let buffer = "";
68
+
69
+ daemonProcess.stdout.on("data", (chunk) => {
70
+ buffer += chunk.toString();
71
+ const lines = buffer.split("\n");
72
+ buffer = lines.pop();
73
+
74
+ for (const line of lines) {
75
+ if (line.trim() === "READY") {
76
+ isReady = true;
77
+ for (const msg of messageQueue) {
78
+ msg.startInferenceTimer();
79
+ daemonProcess.stdin.write(`${msg.payload}\n`);
80
+ }
81
+ messageQueue = [];
82
+ continue;
83
+ }
84
+
85
+ try {
86
+ const parsed = JSON.parse(line);
87
+ const pending = pendingRequests.get(parsed.id);
88
+ if (pending) {
89
+ pendingRequests.delete(parsed.id);
90
+ pending.resolve(parsed);
91
+ }
92
+ } catch {
93
+ // ignore malformed JSON
94
+ }
95
+ }
96
+ });
97
+
98
+ daemonProcess.stderr.on("data", () => {
99
+ // ignore stderr warnings for now
100
+ });
101
+
102
+ const currentProcess = daemonProcess;
103
+
104
+ daemonProcess.on("exit", () => {
105
+ if (daemonProcess === currentProcess) {
106
+ daemonProcess = null;
107
+ isReady = false;
108
+ for (const { resolve } of pendingRequests.values()) {
109
+ resolve({ error: "Daemon exited" });
110
+ }
111
+ pendingRequests.clear();
112
+ }
113
+ });
114
+ }
115
+
116
+ export function stopTinyRouterDaemon() {
117
+ if (daemonProcess) {
118
+ daemonProcess.kill();
119
+ daemonProcess = null;
120
+ }
121
+ isReady = false;
122
+ messageQueue = [];
123
+ for (const { resolve } of pendingRequests.values()) {
124
+ resolve({ error: "Daemon stopped manually" });
125
+ }
126
+ pendingRequests.clear();
127
+ }
128
+
129
+ function requestTinyRouter(config, taskPayload, signal, finalize) {
130
+ startDaemon(config);
131
+
132
+ const id = requestIdCounter++;
133
+ const payload = JSON.stringify({ id, ...taskPayload });
134
+
135
+ return new Promise((resolve) => {
136
+ let settled = false;
137
+ let timer;
138
+
139
+ const finish = (result) => {
140
+ if (settled) return;
141
+ settled = true;
142
+ clearTimeout(timer);
143
+ signal?.removeEventListener?.("abort", abort);
144
+ pendingRequests.delete(id);
145
+ resolve(finalize(result));
146
+ };
147
+
148
+ const abort = () => finish(null);
149
+
150
+ pendingRequests.set(id, { resolve: finish });
151
+
152
+ const startInferenceTimer = () => {
153
+ timer = setTimeout(abort, config.timeoutMs);
154
+ timer.unref?.();
155
+ };
156
+
157
+ if (signal?.aborted) {
158
+ abort();
159
+ } else {
160
+ signal?.addEventListener?.("abort", abort, { once: true });
161
+ if (isReady) {
162
+ startInferenceTimer();
163
+ daemonProcess.stdin.write(`${payload}\n`);
164
+ } else {
165
+ messageQueue.push({ payload, startInferenceTimer });
166
+ }
167
+ }
168
+ });
169
+ }
170
+
171
+ export function chooseTinyRouterDomain(heuristicDomain, tinyDomain) {
172
+ if (!tinyDomain) return heuristicDomain;
173
+ if (HIGH_RISK_DOMAINS.has(heuristicDomain) && tinyDomain === "web") return heuristicDomain;
174
+ return tinyDomain;
175
+ }
176
+
177
+ function loadDomainCalibration(modelDir) {
178
+ if (domainCalibrationCache.has(modelDir)) return domainCalibrationCache.get(modelDir);
179
+ const path = join(modelDir, "domain", "calibration.json");
180
+ let calibration = {
181
+ defaultThreshold: 0.80,
182
+ highRiskThreshold: 0.75,
183
+ domainThresholds: {},
184
+ };
185
+
186
+ try {
187
+ if (existsSync(path)) {
188
+ const parsed = JSON.parse(readFileSync(path, "utf8"));
189
+ calibration = {
190
+ defaultThreshold: Number(parsed.defaultThreshold || 0.80),
191
+ highRiskThreshold: Number(parsed.highRiskThreshold || 0.75),
192
+ domainThresholds: parsed.domainThresholds && typeof parsed.domainThresholds === "object" ? parsed.domainThresholds : {},
193
+ };
194
+ }
195
+ } catch {
196
+ // keep safe defaults
197
+ }
198
+
199
+ domainCalibrationCache.set(modelDir, calibration);
200
+ return calibration;
201
+ }
202
+
203
+ export function resolveTinyRouterDomainThreshold(domain, calibration = {}) {
204
+ const floor = HIGH_RISK_DOMAINS.has(domain) ? MIN_HIGH_RISK_DOMAIN_THRESHOLD : MIN_DEFAULT_DOMAIN_THRESHOLD;
205
+ if (domain && calibration.domainThresholds && Number.isFinite(Number(calibration.domainThresholds[domain]))) {
206
+ return Math.max(Number(calibration.domainThresholds[domain]), floor);
207
+ }
208
+ if (HIGH_RISK_DOMAINS.has(domain)) return Math.max(Number(calibration.highRiskThreshold || 0.75), floor);
209
+ return Math.max(Number(calibration.defaultThreshold || 0.80), floor);
210
+ }
211
+
212
+ export function acceptTinyRouterDomainPrediction(result, calibration = {}) {
213
+ if (!result || result.error || !result.domain) return null;
214
+ return result.confidence >= resolveTinyRouterDomainThreshold(result.domain, calibration) ? result.domain : null;
215
+ }
216
+
217
+ export async function classifyDomainWithTinyRouter(query, mode = "fast", signal, env = process.env) {
218
+ const config = resolveTinyRouterConfig(env);
219
+ if (!config.tasks.domain) return null;
220
+ const calibration = loadDomainCalibration(config.modelDir);
221
+
222
+ return requestTinyRouter(
223
+ config,
224
+ { task: "domain", query, mode },
225
+ signal,
226
+ (result) => acceptTinyRouterDomainPrediction(result, calibration),
227
+ );
228
+ }
229
+
230
+ export function classifyFollowupWithStrongRules(query, mode = "fast", conflict = "none", sources = {}) {
231
+ const text = String(query || "").toLowerCase();
232
+ const sourceCount = Number(sources.source_count || 0);
233
+ const hasAuthority = Boolean(sources.has_authority);
234
+ const hasRecent = Boolean(sources.has_recent);
235
+ const isRecencyQuery = /\b(latest|current|today|release|changelog|new)\b/.test(text);
236
+
237
+ if (conflict === "severe") return "need_conflict_resolution";
238
+ if (conflict === "minor" && !(mode === "fast" && hasAuthority && sourceCount >= 4)) return "need_conflict_resolution";
239
+ if (isRecencyQuery && !hasRecent) return "need_recency";
240
+ if (!hasAuthority && sourceCount === 0) return "need_more_sources";
241
+ return null;
242
+ }
243
+
244
+ export function applyConflictTinyRouterDecision(heuristicConflictDetected, structuredDecision, options = {}) {
245
+ const allowClear = options.allowClear === true;
246
+
247
+ if (structuredDecision === "open_conflict" || structuredDecision === "needs_review") return true;
248
+ if (heuristicConflictDetected && !allowClear) return true;
249
+ if (heuristicConflictDetected && ["resolved_by_authority", "resolved_by_recency", "no_conflict"].includes(structuredDecision)) {
250
+ return false;
251
+ }
252
+ return Boolean(heuristicConflictDetected);
253
+ }
254
+
255
+ export function applySufficiencyTinyRouterDecision(currentSufficient, structuredDecision) {
256
+ if (!currentSufficient) return false;
257
+ if (SUFFICIENCY_VETO_DECISIONS.has(structuredDecision)) return false;
258
+ return true;
259
+ }
260
+
261
+ export function classifyFollowupHeuristically(query, mode = "fast", conflict = "none", sources = {}) {
262
+ const text = String(query || "").toLowerCase();
263
+ const sourceCount = Number(sources.source_count || 0);
264
+ const hasAuthority = Boolean(sources.has_authority);
265
+ const isAcademicQuery = /\b(paper|papers|arxiv|doi|publisher|survey|review|research)\b/.test(text);
266
+
267
+ const strongRule = classifyFollowupWithStrongRules(query, mode, conflict, sources);
268
+ if (strongRule) return strongRule;
269
+
270
+ if (mode === "academic" || isAcademicQuery) {
271
+ if (isAcademicQuery) return "need_primary_source";
272
+ if (!hasAuthority) return "need_authority";
273
+ if (sourceCount < 4) return "need_more_sources";
274
+ return "stop";
275
+ }
276
+
277
+ if (mode === "deep") {
278
+ if (!hasAuthority) return "need_authority";
279
+ if (sourceCount <= 1) return "need_more_sources";
280
+ if (sourceCount < 3) return "need_more_sources";
281
+ return "stop";
282
+ }
283
+
284
+ if (mode === "fast" || mode === "code") {
285
+ if (hasAuthority && sourceCount >= 1) return "stop";
286
+ if (sourceCount >= 3) return "stop";
287
+ return null;
288
+ }
289
+
290
+ if (!hasAuthority) return "need_authority";
291
+ if (sourceCount === 0) return "need_more_sources";
292
+ return "stop";
293
+ }
294
+
295
+ export async function classifyFollowupWithTinyRouter(query, mode, conflict, sources, signal, env = process.env) {
296
+ if (!envFlag(env, "PI_RESEARCH_TINY_ROUTER_FOLLOWUP")) return null;
297
+
298
+ const strongRule = classifyFollowupWithStrongRules(query, mode, conflict, sources);
299
+ if (strongRule) return strongRule;
300
+
301
+ const config = resolveTinyRouterConfig(env);
302
+ if (!config.tasks.followup) return classifyFollowupHeuristically(query, mode, conflict, sources);
303
+
304
+ return requestTinyRouter(
305
+ config,
306
+ { task: "followup", query, mode, conflict, sources },
307
+ signal,
308
+ (result) => (result && !result.error && result.action && result.confidence >= 0.75
309
+ ? result.action
310
+ : classifyFollowupHeuristically(query, mode, conflict, sources)),
311
+ );
312
+ }
313
+
314
+ export async function classifyConflictWithTinyRouter(query, pages = [], signal, env = process.env) {
315
+ const config = resolveTinyRouterConfig(env);
316
+ if (!config.tasks.conflict) return null;
317
+
318
+ const features = extractConflictStructuredFeaturesFromPages(query, pages);
319
+ return requestTinyRouter(
320
+ config,
321
+ { task: "conflict", features },
322
+ signal,
323
+ (result) => (result && !result.error && result.decision && result.confidence >= 0.60 ? result.decision : null),
324
+ );
325
+ }
326
+
327
+ export async function classifySufficiencyWithTinyRouter(query, pages = [], signal, env = process.env) {
328
+ const config = resolveTinyRouterConfig(env);
329
+ if (!config.tasks.sufficiency) return null;
330
+
331
+ const features = extractSufficiencyStructuredFeaturesFromPages(query, pages);
332
+ return requestTinyRouter(
333
+ config,
334
+ { task: "sufficiency", features },
335
+ signal,
336
+ (result) => (result && !result.error && result.decision && result.confidence >= 0.75 ? result.decision : null),
337
+ );
338
+ }