@gajae-code/coding-agent 0.7.1 → 0.7.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (135) hide show
  1. package/CHANGELOG.md +57 -0
  2. package/dist/types/cli/mcp-cli.d.ts +25 -0
  3. package/dist/types/cli/notify-cli.d.ts +2 -0
  4. package/dist/types/cli.d.ts +6 -0
  5. package/dist/types/commands/mcp.d.ts +70 -0
  6. package/dist/types/config/keybindings.d.ts +2 -2
  7. package/dist/types/config/settings-schema.d.ts +39 -2
  8. package/dist/types/deep-interview/plaintext-gate-guard.d.ts +11 -0
  9. package/dist/types/extensibility/shared-events.d.ts +1 -0
  10. package/dist/types/gjc-runtime/ralplan-runtime.d.ts +1 -1
  11. package/dist/types/lsp/types.d.ts +2 -0
  12. package/dist/types/modes/components/custom-editor.d.ts +1 -1
  13. package/dist/types/modes/components/model-selector.d.ts +2 -0
  14. package/dist/types/modes/components/status-line/git-utils.d.ts +6 -0
  15. package/dist/types/modes/theme/defaults/index.d.ts +99 -0
  16. package/dist/types/notifications/attachment-registry.d.ts +17 -0
  17. package/dist/types/notifications/chat-adapters.d.ts +9 -0
  18. package/dist/types/notifications/config.d.ts +9 -1
  19. package/dist/types/notifications/engine.d.ts +59 -0
  20. package/dist/types/notifications/managed-daemon.d.ts +48 -0
  21. package/dist/types/notifications/operator-runtime.d.ts +52 -0
  22. package/dist/types/notifications/telegram-daemon.d.ts +73 -16
  23. package/dist/types/notifications/threaded-inbound.d.ts +19 -0
  24. package/dist/types/notifications/threaded-render.d.ts +6 -1
  25. package/dist/types/notifications/topic-registry.d.ts +2 -0
  26. package/dist/types/session/agent-session.d.ts +2 -0
  27. package/dist/types/tools/composer-bash-policy.d.ts +14 -0
  28. package/dist/types/tools/fetch.d.ts +23 -0
  29. package/dist/types/tools/index.d.ts +1 -0
  30. package/dist/types/tools/telegram-send.d.ts +32 -0
  31. package/dist/types/web/insane/bridge.d.ts +103 -0
  32. package/dist/types/web/insane/url-guard.d.ts +25 -0
  33. package/dist/types/web/scrapers/types.d.ts +5 -0
  34. package/dist/types/web/scrapers/utils.d.ts +7 -1
  35. package/dist/types/web/search/provider.d.ts +18 -1
  36. package/dist/types/web/search/providers/insane.d.ts +53 -0
  37. package/dist/types/web/search/providers/text-citations.d.ts +23 -0
  38. package/dist/types/web/search/types.d.ts +12 -4
  39. package/package.json +10 -8
  40. package/scripts/verify-insane-vendor.ts +132 -0
  41. package/src/cli/args.ts +1 -1
  42. package/src/cli/fast-help.ts +1 -1
  43. package/src/cli/mcp-cli.ts +272 -0
  44. package/src/cli/notify-cli.ts +152 -5
  45. package/src/cli.ts +6 -2
  46. package/src/commands/mcp.ts +117 -0
  47. package/src/commands/team.ts +1 -1
  48. package/src/config/keybindings.ts +2 -2
  49. package/src/config/settings-schema.ts +30 -1
  50. package/src/deep-interview/plaintext-gate-guard.ts +94 -0
  51. package/src/defaults/gjc/skills/deep-interview/SKILL.md +4 -3
  52. package/src/defaults/gjc/skills/ralplan/SKILL.md +11 -4
  53. package/src/defaults/gjc/skills/team/SKILL.md +3 -2
  54. package/src/extensibility/extensions/runner.ts +1 -0
  55. package/src/extensibility/shared-events.ts +1 -0
  56. package/src/gjc-runtime/launch-tmux.ts +17 -3
  57. package/src/gjc-runtime/ledger-event-renderer.ts +1 -0
  58. package/src/gjc-runtime/ralplan-runtime.ts +2 -2
  59. package/src/gjc-runtime/tmux-common.ts +3 -1
  60. package/src/gjc-runtime/ultragoal-guard.ts +25 -8
  61. package/src/gjc-runtime/workflow-manifest.generated.json +29 -0
  62. package/src/gjc-runtime/workflow-manifest.ts +7 -2
  63. package/src/hooks/skill-state.ts +57 -0
  64. package/src/internal-urls/docs-index.generated.ts +14 -11
  65. package/src/lsp/config.ts +16 -3
  66. package/src/lsp/defaults.json +7 -0
  67. package/src/lsp/types.ts +2 -0
  68. package/src/modes/bridge/bridge-mode.ts +11 -0
  69. package/src/modes/components/custom-editor.ts +2 -0
  70. package/src/modes/components/footer.ts +2 -3
  71. package/src/modes/components/model-selector.ts +12 -0
  72. package/src/modes/components/status-line/git-utils.ts +25 -0
  73. package/src/modes/components/status-line.ts +10 -11
  74. package/src/modes/components/welcome.ts +2 -3
  75. package/src/modes/controllers/event-controller.ts +15 -0
  76. package/src/modes/controllers/selector-controller.ts +3 -0
  77. package/src/modes/interactive-mode.ts +48 -3
  78. package/src/modes/shared/agent-wire/scopes.ts +1 -1
  79. package/src/modes/theme/defaults/gruvbox-dark.json +99 -0
  80. package/src/modes/theme/defaults/index.ts +2 -0
  81. package/src/modes/utils/context-usage.ts +2 -2
  82. package/src/notifications/attachment-registry.ts +23 -0
  83. package/src/notifications/chat-adapters.ts +147 -0
  84. package/src/notifications/config.ts +23 -2
  85. package/src/notifications/engine.ts +100 -0
  86. package/src/notifications/index.ts +180 -38
  87. package/src/notifications/managed-daemon.ts +163 -0
  88. package/src/notifications/operator-runtime.ts +171 -0
  89. package/src/notifications/telegram-daemon.ts +553 -236
  90. package/src/notifications/threaded-inbound.ts +60 -4
  91. package/src/notifications/threaded-render.ts +20 -2
  92. package/src/notifications/topic-registry.ts +5 -0
  93. package/src/session/agent-session.ts +82 -51
  94. package/src/slash-commands/helpers/parse.ts +2 -1
  95. package/src/tools/bash.ts +9 -0
  96. package/src/tools/composer-bash-policy.ts +96 -0
  97. package/src/tools/fetch.ts +94 -1
  98. package/src/tools/index.ts +3 -0
  99. package/src/tools/telegram-send.ts +137 -0
  100. package/src/web/insane/bridge.ts +350 -0
  101. package/src/web/insane/url-guard.ts +159 -0
  102. package/src/web/scrapers/types.ts +143 -45
  103. package/src/web/scrapers/utils.ts +70 -19
  104. package/src/web/search/provider.ts +77 -18
  105. package/src/web/search/providers/anthropic.ts +70 -3
  106. package/src/web/search/providers/codex.ts +1 -119
  107. package/src/web/search/providers/gemini.ts +99 -0
  108. package/src/web/search/providers/insane.ts +551 -0
  109. package/src/web/search/providers/openai-compatible.ts +66 -32
  110. package/src/web/search/providers/text-citations.ts +111 -0
  111. package/src/web/search/types.ts +13 -2
  112. package/vendor/insane-search/LICENSE +21 -0
  113. package/vendor/insane-search/MANIFEST.json +24 -0
  114. package/vendor/insane-search/engine/__init__.py +23 -0
  115. package/vendor/insane-search/engine/__main__.py +128 -0
  116. package/vendor/insane-search/engine/bias_check.py +183 -0
  117. package/vendor/insane-search/engine/executor.py +254 -0
  118. package/vendor/insane-search/engine/fetch_chain.py +725 -0
  119. package/vendor/insane-search/engine/learning.py +175 -0
  120. package/vendor/insane-search/engine/phase0.py +214 -0
  121. package/vendor/insane-search/engine/safety.py +91 -0
  122. package/vendor/insane-search/engine/templates/package.json +11 -0
  123. package/vendor/insane-search/engine/templates/playwright_mobile_chrome.js +188 -0
  124. package/vendor/insane-search/engine/templates/playwright_real_chrome.js +243 -0
  125. package/vendor/insane-search/engine/tests/test_hardening.py +57 -0
  126. package/vendor/insane-search/engine/tests/test_smoke.py +152 -0
  127. package/vendor/insane-search/engine/tests/test_u1.py +200 -0
  128. package/vendor/insane-search/engine/tests/test_u4.py +131 -0
  129. package/vendor/insane-search/engine/tests/test_u5.py +163 -0
  130. package/vendor/insane-search/engine/tests/test_u7.py +124 -0
  131. package/vendor/insane-search/engine/transport.py +211 -0
  132. package/vendor/insane-search/engine/url_transforms.py +98 -0
  133. package/vendor/insane-search/engine/validators.py +331 -0
  134. package/vendor/insane-search/engine/waf_detector.py +214 -0
  135. package/vendor/insane-search/engine/waf_profiles.yaml +162 -0
@@ -16,6 +16,8 @@ import { renderStatusLine } from "../tui";
16
16
  import { CachedOutputBlock } from "../tui/output-block";
17
17
  import { formatDimensionNote, resizeImage } from "../utils/image-resize";
18
18
  import { ensureTool } from "../utils/tools-manager";
19
+ import { INSANE_NOTES, tryInsaneFetch } from "../web/insane/bridge";
20
+ import { validatePublicHttpUrl, validatePublicHttpUrlForInsane } from "../web/insane/url-guard";
19
21
  import { extractWithParallel, findParallelApiKey, getParallelExtractContent } from "../web/parallel";
20
22
  import { specialHandlers } from "../web/scrapers";
21
23
  import type { RenderResult } from "../web/scrapers/types";
@@ -705,6 +707,55 @@ async function handleSpecialUrls(
705
707
  // Main Render Function
706
708
  // =============================================================================
707
709
 
710
+ /**
711
+ * Opt-in insane-search fallback for blocked / degraded public URL reads.
712
+ *
713
+ * Returns a finalized `method: "insane"` result on success, or null (so the
714
+ * caller continues with its normal degraded behavior). Fail-closed: no note,
715
+ * guard DNS, dependency probe, or subprocess when raw mode or the opt-in
716
+ * setting is off. The public-URL guard runs BEFORE any probe/spawn.
717
+ */
718
+ export async function tryInsaneFallback(args: {
719
+ url: string;
720
+ finalUrl: string;
721
+ timeout: number;
722
+ raw: boolean;
723
+ settings: Settings;
724
+ signal: AbortSignal | undefined;
725
+ fetchedAt: string;
726
+ notes: string[];
727
+ }): Promise<FetchRenderResult | null> {
728
+ if (args.raw) return null;
729
+ if (args.settings.get("web.insaneFallback") !== true) return null;
730
+
731
+ const target = args.finalUrl || args.url;
732
+ const guard = await validatePublicHttpUrlForInsane(target);
733
+ if (!guard.ok) {
734
+ args.notes.push(INSANE_NOTES.guardBlocked(guard.reason));
735
+ return null;
736
+ }
737
+
738
+ const result = await tryInsaneFetch(guard.url.toString(), {
739
+ timeoutMs: args.timeout * 1000,
740
+ signal: args.signal,
741
+ });
742
+ if (result.ok) {
743
+ const output = finalizeOutput(result.content);
744
+ return {
745
+ url: args.url,
746
+ finalUrl: target,
747
+ contentType: "text/markdown",
748
+ method: "insane",
749
+ content: output.content,
750
+ fetchedAt: args.fetchedAt,
751
+ truncated: output.truncated,
752
+ notes: [...args.notes, ...result.notes],
753
+ };
754
+ }
755
+ for (const note of result.notes) args.notes.push(note);
756
+ return null;
757
+ }
758
+
708
759
  /**
709
760
  * Main render function implementing the full pipeline
710
761
  */
@@ -738,6 +789,21 @@ async function renderUrl(
738
789
 
739
790
  // Step 0: Normalize URL (ensure scheme for special handlers)
740
791
  url = normalizeUrl(url);
792
+ const publicUrl = await validatePublicHttpUrl(url);
793
+ if (!publicUrl.ok) {
794
+ notes.push(`Blocked URL fetch: target URL is not public HTTP(S): ${publicUrl.reason}`);
795
+ return {
796
+ url,
797
+ finalUrl: url,
798
+ contentType: "unknown",
799
+ method: "failed",
800
+ content: "",
801
+ fetchedAt,
802
+ truncated: false,
803
+ notes,
804
+ };
805
+ }
806
+ url = publicUrl.url.toString();
741
807
 
742
808
  // Step 1: Try special handlers for known sites (unless raw mode)
743
809
  if (!raw) {
@@ -751,6 +817,20 @@ async function renderUrl(
751
817
  throw new ToolAbortError();
752
818
  }
753
819
  if (!response.ok) {
820
+ const failureNote =
821
+ response.error ?? (response.status ? `Failed to fetch URL (HTTP ${response.status})` : "Failed to fetch URL");
822
+ notes.push(failureNote);
823
+ const insane = await tryInsaneFallback({
824
+ url,
825
+ finalUrl: response.finalUrl || url,
826
+ timeout,
827
+ raw,
828
+ settings,
829
+ signal,
830
+ fetchedAt,
831
+ notes,
832
+ });
833
+ if (insane) return insane;
754
834
  return {
755
835
  url,
756
836
  finalUrl: response.finalUrl || url,
@@ -759,7 +839,7 @@ async function renderUrl(
759
839
  content: "",
760
840
  fetchedAt,
761
841
  truncated: false,
762
- notes: [response.status ? `Failed to fetch URL (HTTP ${response.status})` : "Failed to fetch URL"],
842
+ notes,
763
843
  };
764
844
  }
765
845
 
@@ -1062,6 +1142,8 @@ async function renderUrl(
1062
1142
  const htmlResult = await renderHtmlToText(finalUrl, rawContent, timeout, settings, signal, storage);
1063
1143
  if (!htmlResult.ok) {
1064
1144
  notes.push("html rendering failed (lynx/html2text unavailable)");
1145
+ const insane = await tryInsaneFallback({ url, finalUrl, timeout, raw, settings, signal, fetchedAt, notes });
1146
+ if (insane) return insane;
1065
1147
  const output = finalizeOutput(rawContent);
1066
1148
  return {
1067
1149
  url,
@@ -1122,6 +1204,17 @@ async function renderUrl(
1122
1204
  };
1123
1205
  }
1124
1206
 
1207
+ const insaneLowQuality = await tryInsaneFallback({
1208
+ url,
1209
+ finalUrl,
1210
+ timeout,
1211
+ raw,
1212
+ settings,
1213
+ signal,
1214
+ fetchedAt,
1215
+ notes,
1216
+ });
1217
+ if (insaneLowQuality) return insaneLowQuality;
1125
1218
  notes.push("Page appears to require JavaScript or is mostly navigation");
1126
1219
  }
1127
1220
 
@@ -58,6 +58,7 @@ import { SearchToolBm25Tool } from "./search-tool-bm25";
58
58
  import { SkillTool } from "./skill";
59
59
  import { loadSshTool } from "./ssh";
60
60
  import { SubagentTool } from "./subagent";
61
+ import { TelegramSendTool } from "./telegram-send";
61
62
  import { type TodoPhase, TodoWriteTool } from "./todo-write";
62
63
  import { WriteTool } from "./write";
63
64
  import { YieldTool } from "./yield";
@@ -96,6 +97,7 @@ export * from "./search-tool-bm25";
96
97
  export * from "./skill";
97
98
  export * from "./ssh";
98
99
  export * from "./subagent";
100
+ export * from "./telegram-send";
99
101
  export * from "./todo-write";
100
102
  export * from "./vim";
101
103
  export * from "./write";
@@ -402,6 +404,7 @@ export const BUILTIN_TOOLS: Record<string, ToolFactory> = {
402
404
  todo_write: s => new TodoWriteTool(s),
403
405
  web_search: s => new WebSearchTool(s),
404
406
  search_tool_bm25: SearchToolBm25Tool.createIf,
407
+ telegram_send: TelegramSendTool.createIf,
405
408
  write: s => new WriteTool(s),
406
409
  skill: SkillTool.createIf,
407
410
  goal: s => new GoalTool(s),
@@ -0,0 +1,137 @@
1
+ import * as fs from "node:fs";
2
+ import * as path from "node:path";
3
+ import type { AgentTool, AgentToolContext, AgentToolResult, AgentToolUpdateCallback } from "@gajae-code/agent-core";
4
+ import { z } from "zod/v4";
5
+ import { getTelegramFileSink } from "../notifications/attachment-registry";
6
+ import { getNotificationConfig, isGloballyConfigured } from "../notifications/config";
7
+ import type { ToolSession } from "./index";
8
+
9
+ const telegramSendSchema = z.object({
10
+ path: z
11
+ .string()
12
+ .describe("file path (absolute or relative to cwd) to send to Telegram; must resolve inside the workspace"),
13
+ caption: z.string().optional().describe("optional caption"),
14
+ });
15
+
16
+ type TelegramSendParams = z.infer<typeof telegramSendSchema>;
17
+
18
+ interface TelegramSendDetails {
19
+ path: string;
20
+ caption?: string;
21
+ ok: boolean;
22
+ error?: string;
23
+ }
24
+
25
+ export class TelegramSendTool implements AgentTool<typeof telegramSendSchema, TelegramSendDetails> {
26
+ readonly name = "telegram_send";
27
+ readonly label = "TelegramSend";
28
+ readonly summary = "Send a workspace file to Telegram";
29
+ readonly loadMode = "discoverable";
30
+ readonly description =
31
+ "Send a file from the current workspace to the connected Telegram chat as a document. The path must resolve " +
32
+ "(after following symlinks) to a regular file inside the project root; paths outside the workspace are rejected.";
33
+ readonly parameters = telegramSendSchema;
34
+ readonly strict = true;
35
+
36
+ constructor(private readonly session: ToolSession) {}
37
+
38
+ static createIf(session: ToolSession): TelegramSendTool | null {
39
+ return isGloballyConfigured(getNotificationConfig(session.settings)) ? new TelegramSendTool(session) : null;
40
+ }
41
+
42
+ /**
43
+ * Resolve `requested` against the workspace root and confine it via realpath:
44
+ * blocks absolute paths outside the project, `..` traversal, and symlinks that
45
+ * escape the root. Returns the resolved real path of a regular file, or an
46
+ * error message. This is the egress safety boundary — the model can only send
47
+ * files that genuinely live inside the session workspace.
48
+ */
49
+ private async resolveContainedFile(
50
+ requested: string,
51
+ ): Promise<{ ok: true; path: string } | { ok: false; error: string }> {
52
+ let root: string;
53
+ try {
54
+ root = await fs.promises.realpath(this.session.cwd);
55
+ } catch {
56
+ return { ok: false, error: "workspace root is unavailable" };
57
+ }
58
+ const absolute = path.isAbsolute(requested) ? requested : path.resolve(root, requested);
59
+ let real: string;
60
+ try {
61
+ real = await fs.promises.realpath(absolute);
62
+ } catch {
63
+ return { ok: false, error: `file not found: ${requested}` };
64
+ }
65
+ const rel = path.relative(root, real);
66
+ if (rel === "" || rel === ".." || rel.startsWith(`..${path.sep}`) || path.isAbsolute(rel)) {
67
+ return { ok: false, error: "path escapes the workspace root; only files inside the project can be sent" };
68
+ }
69
+ let stat: fs.Stats;
70
+ try {
71
+ stat = await fs.promises.stat(real);
72
+ } catch {
73
+ return { ok: false, error: `file not found: ${requested}` };
74
+ }
75
+ if (!stat.isFile()) {
76
+ return { ok: false, error: "not a regular file" };
77
+ }
78
+ return { ok: true, path: real };
79
+ }
80
+
81
+ async execute(
82
+ _toolCallId: string,
83
+ params: TelegramSendParams,
84
+ _signal?: AbortSignal,
85
+ _onUpdate?: AgentToolUpdateCallback<TelegramSendDetails>,
86
+ _context?: AgentToolContext,
87
+ ): Promise<AgentToolResult<TelegramSendDetails>> {
88
+ const sessionId = this.session.getSessionId?.();
89
+ if (!sessionId) {
90
+ return {
91
+ content: [{ type: "text", text: "telegram_send: no active session id" }],
92
+ details: { path: params.path, caption: params.caption, ok: false, error: "no active session id" },
93
+ isError: true,
94
+ };
95
+ }
96
+
97
+ const contained = await this.resolveContainedFile(params.path);
98
+ if (!contained.ok) {
99
+ return {
100
+ content: [{ type: "text", text: `telegram_send: ${contained.error}` }],
101
+ details: { path: params.path, caption: params.caption, ok: false, error: contained.error },
102
+ isError: true,
103
+ };
104
+ }
105
+ const abs = contained.path;
106
+
107
+ const sink = getTelegramFileSink(sessionId);
108
+ if (!sink) {
109
+ return {
110
+ content: [
111
+ { type: "text", text: "telegram_send: Telegram notifications are not connected for this session" },
112
+ ],
113
+ details: {
114
+ path: abs,
115
+ caption: params.caption,
116
+ ok: false,
117
+ error: "Telegram notifications are not connected",
118
+ },
119
+ isError: true,
120
+ };
121
+ }
122
+
123
+ const result = await sink({ path: abs, caption: params.caption });
124
+ if (result.ok) {
125
+ return {
126
+ content: [{ type: "text", text: `Sent ${path.basename(abs)} to Telegram.` }],
127
+ details: { path: abs, caption: params.caption, ok: true },
128
+ };
129
+ }
130
+
131
+ return {
132
+ content: [{ type: "text", text: `telegram_send failed: ${result.error}` }],
133
+ details: { path: abs, caption: params.caption, ok: false, error: result.error },
134
+ isError: true,
135
+ };
136
+ }
137
+ }
@@ -0,0 +1,350 @@
1
+ /**
2
+ * Bridge from TypeScript to the vendored insane-search Python engine.
3
+ *
4
+ * Invokes `python3 -m engine "<url>" --json` per fallback attempt (cwd + PYTHONPATH
5
+ * pointed at the vendored engine), validates the JSON envelope, and maps it onto a
6
+ * discriminated result. Hardened: clamped timeout, AbortSignal propagation that
7
+ * kills+reaps the child, bounded stdout/stderr capture, and a per-process
8
+ * concurrency cap so blocked reads cannot fork-storm.
9
+ *
10
+ * Fail-closed: missing dependencies / bad output / auth-required never throw past
11
+ * the caller and never auto-install anything; they return ok:false with a stable,
12
+ * bounded note so `read` can continue with its normal degraded result.
13
+ */
14
+ import { type ChildProcess, spawn as nodeSpawn } from "node:child_process";
15
+ import * as path from "node:path";
16
+ import { fileURLToPath } from "node:url";
17
+ import { $which } from "@gajae-code/utils";
18
+
19
+ const HERE = path.dirname(fileURLToPath(import.meta.url));
20
+ /** packages/coding-agent/vendor/insane-search */
21
+ export const INSANE_VENDOR_DIR = path.resolve(HERE, "../../../vendor/insane-search");
22
+ const TEMPLATES_DIR = path.join(INSANE_VENDOR_DIR, "engine", "templates");
23
+
24
+ const MAX_STDOUT_BYTES = 8 * 1024 * 1024;
25
+ const MAX_STDERR_BYTES = 64 * 1024;
26
+ const DEFAULT_TIMEOUT_MS = 25_000;
27
+ const MIN_TIMEOUT_MS = 1_000;
28
+ const MAX_TIMEOUT_MS = 120_000;
29
+ const DEFAULT_CONCURRENCY = 2;
30
+ const KILL_GRACE_MS = 2_000;
31
+
32
+ /** Stable note prefixes — tests assert on these without depending on full stderr. */
33
+ export const INSANE_NOTES = {
34
+ guardBlocked: (reason: string) => `insane fallback blocked: target URL is not public HTTP(S): ${reason}`,
35
+ vendorMissing: `insane fallback unavailable: vendor engine missing at packages/coding-agent/vendor/insane-search`,
36
+ noPython: `insane fallback unavailable: python3 not found; install python3 and curl_cffi, then retry with web.insaneFallback=true`,
37
+ noCurlCffi: `insane fallback unavailable: python3 cannot import curl_cffi; install curl_cffi for Phase 0-2`,
38
+ noBrowser: `insane fallback unavailable: node/playwright/stealth dependencies missing for Phase 3; install dependencies under packages/coding-agent/vendor/insane-search/engine/templates`,
39
+ timeout: (seconds: number) => `insane fallback timed out after ${seconds}s; normal read fallback preserved`,
40
+ invalidJson: `insane fallback failed: engine returned invalid JSON`,
41
+ authRequired: `insane fallback stopped: authentication required`,
42
+ verdict: (verdict: string) => `insane fallback failed: engine returned verdict=${verdict}`,
43
+ untried: (routes: string) => `insane fallback routes not tried: ${routes}`,
44
+ mustBrowserMcp: `insane fallback requires browser MCP/manual phase: must_invoke_playwright_mcp=true`,
45
+ concurrency: `insane fallback skipped: max concurrent engine attempts reached`,
46
+ emptyContent: `insane fallback failed: engine reported ok but returned no content`,
47
+ } as const;
48
+
49
+ /** Raw JSON envelope produced by `python3 -m engine --json`. */
50
+ export interface InsaneFetchResultRaw {
51
+ ok?: boolean;
52
+ verdict?: string;
53
+ content?: string;
54
+ profile_used?: string;
55
+ trace?: unknown;
56
+ untried_routes?: string[];
57
+ must_invoke_playwright_mcp?: boolean;
58
+ }
59
+
60
+ export interface InsaneSuccess {
61
+ ok: true;
62
+ content: string;
63
+ profileUsed?: string;
64
+ notes: string[];
65
+ }
66
+
67
+ export interface InsaneFailure {
68
+ ok: false;
69
+ reason: string;
70
+ verdict?: string;
71
+ notes: string[];
72
+ }
73
+
74
+ export type InsaneBridgeResult = InsaneSuccess | InsaneFailure;
75
+
76
+ export interface EngineInvocation {
77
+ url: string;
78
+ timeoutMs: number;
79
+ signal?: AbortSignal;
80
+ }
81
+
82
+ export interface EngineRawOutput {
83
+ code: number | null;
84
+ stdout: string;
85
+ stderr: string;
86
+ timedOut: boolean;
87
+ aborted: boolean;
88
+ }
89
+
90
+ /** Seam: run the engine subprocess. Default spawns python3. */
91
+ export type EngineRunner = (inv: EngineInvocation) => Promise<EngineRawOutput>;
92
+
93
+ export interface InsaneDependencyStatus {
94
+ vendorPresent: boolean;
95
+ python: boolean;
96
+ curlCffi: boolean;
97
+ browser: boolean;
98
+ }
99
+
100
+ /** Seam: probe dependencies. Default probes the real environment (cached). */
101
+ export type DependencyProber = () => Promise<InsaneDependencyStatus>;
102
+
103
+ // ---------------------------------------------------------------------------
104
+ // Subprocess runner
105
+ // ---------------------------------------------------------------------------
106
+
107
+ type SpawnImpl = typeof nodeSpawn;
108
+
109
+ function clampTimeoutMs(timeoutMs: number | undefined): number {
110
+ const value = timeoutMs ?? DEFAULT_TIMEOUT_MS;
111
+ if (!Number.isFinite(value)) return DEFAULT_TIMEOUT_MS;
112
+ return Math.max(MIN_TIMEOUT_MS, Math.min(MAX_TIMEOUT_MS, Math.floor(value)));
113
+ }
114
+
115
+ function appendCapped(buffer: string, chunk: string, cap: number): string {
116
+ if (buffer.length >= cap) return buffer;
117
+ const remaining = cap - buffer.length;
118
+ return buffer + (chunk.length > remaining ? chunk.slice(0, remaining) : chunk);
119
+ }
120
+
121
+ /** Kill a child and its group, escalating to SIGKILL after a grace period. */
122
+ function killChild(child: ChildProcess): void {
123
+ try {
124
+ child.kill("SIGTERM");
125
+ } catch {
126
+ // already gone
127
+ }
128
+ const timer = setTimeout(() => {
129
+ try {
130
+ child.kill("SIGKILL");
131
+ } catch {
132
+ // already gone
133
+ }
134
+ }, KILL_GRACE_MS);
135
+ timer.unref?.();
136
+ child.once("exit", () => clearTimeout(timer));
137
+ }
138
+
139
+ /** Real engine runner: `python3 -m engine "<url>" --json`. */
140
+ export function runEngineSubprocess(
141
+ inv: EngineInvocation,
142
+ options: { spawnImpl?: SpawnImpl } = {},
143
+ ): Promise<EngineRawOutput> {
144
+ const spawnImpl = options.spawnImpl ?? nodeSpawn;
145
+ return new Promise<EngineRawOutput>(resolve => {
146
+ let stdout = "";
147
+ let stderr = "";
148
+ let settled = false;
149
+ let timedOut = false;
150
+ let aborted = false;
151
+
152
+ const child = spawnImpl("python3", ["-m", "engine", inv.url, "--json"], {
153
+ cwd: INSANE_VENDOR_DIR,
154
+ env: { ...process.env, PYTHONPATH: INSANE_VENDOR_DIR },
155
+ stdio: ["ignore", "pipe", "pipe"],
156
+ });
157
+
158
+ const finish = (code: number | null): void => {
159
+ if (settled) return;
160
+ settled = true;
161
+ clearTimeout(timer);
162
+ inv.signal?.removeEventListener("abort", onAbort);
163
+ resolve({ code, stdout, stderr, timedOut, aborted });
164
+ };
165
+
166
+ const timer = setTimeout(() => {
167
+ timedOut = true;
168
+ killChild(child);
169
+ }, inv.timeoutMs);
170
+ timer.unref?.();
171
+
172
+ const onAbort = (): void => {
173
+ aborted = true;
174
+ killChild(child);
175
+ };
176
+ if (inv.signal) {
177
+ if (inv.signal.aborted) onAbort();
178
+ else inv.signal.addEventListener("abort", onAbort, { once: true });
179
+ }
180
+
181
+ child.stdout?.on("data", (chunk: Buffer) => {
182
+ stdout = appendCapped(stdout, chunk.toString("utf8"), MAX_STDOUT_BYTES);
183
+ });
184
+ child.stderr?.on("data", (chunk: Buffer) => {
185
+ stderr = appendCapped(stderr, chunk.toString("utf8"), MAX_STDERR_BYTES);
186
+ });
187
+ child.on("error", () => finish(null));
188
+ child.on("close", code => finish(code));
189
+ });
190
+ }
191
+
192
+ // ---------------------------------------------------------------------------
193
+ // Dependency probes (cached)
194
+ // ---------------------------------------------------------------------------
195
+
196
+ let probeCache: Promise<InsaneDependencyStatus> | null = null;
197
+
198
+ /** Reset the probe cache between tests so probe state never leaks. */
199
+ export function resetInsaneProbeCacheForTest(): void {
200
+ probeCache = null;
201
+ }
202
+
203
+ function runProbeCommand(cmd: string, args: string[], cwd?: string): Promise<boolean> {
204
+ return new Promise<boolean>(resolve => {
205
+ let settled = false;
206
+ const done = (ok: boolean): void => {
207
+ if (settled) return;
208
+ settled = true;
209
+ clearTimeout(timer);
210
+ resolve(ok);
211
+ };
212
+ const child = nodeSpawn(cmd, args, { cwd, stdio: "ignore" });
213
+ const timer = setTimeout(() => {
214
+ try {
215
+ child.kill("SIGKILL");
216
+ } catch {
217
+ // gone
218
+ }
219
+ done(false);
220
+ }, 10_000);
221
+ timer.unref?.();
222
+ child.on("error", () => done(false));
223
+ child.on("close", code => done(code === 0));
224
+ });
225
+ }
226
+
227
+ async function probeRealDependencies(): Promise<InsaneDependencyStatus> {
228
+ const { existsSync } = await import("node:fs");
229
+ const vendorPresent = existsSync(path.join(INSANE_VENDOR_DIR, "engine", "__main__.py"));
230
+ if (!vendorPresent) {
231
+ return { vendorPresent: false, python: false, curlCffi: false, browser: false };
232
+ }
233
+ const python = Boolean($which("python3"));
234
+ const curlCffi = python ? await runProbeCommand("python3", ["-c", "import curl_cffi"]) : false;
235
+ const node = Boolean($which("node"));
236
+ const browser = node
237
+ ? await runProbeCommand(
238
+ "node",
239
+ [
240
+ "-e",
241
+ "require.resolve('playwright');require.resolve('playwright-extra');require.resolve('puppeteer-extra-plugin-stealth')",
242
+ ],
243
+ TEMPLATES_DIR,
244
+ )
245
+ : false;
246
+ return { vendorPresent, python, curlCffi, browser };
247
+ }
248
+
249
+ /** Probe (and cache) the insane-search runtime dependencies. */
250
+ export function probeInsaneDependencies(): Promise<InsaneDependencyStatus> {
251
+ if (!probeCache) probeCache = probeRealDependencies();
252
+ return probeCache;
253
+ }
254
+
255
+ // ---------------------------------------------------------------------------
256
+ // Concurrency gate
257
+ // ---------------------------------------------------------------------------
258
+
259
+ let inFlight = 0;
260
+
261
+ export function resetInsaneConcurrencyForTest(): void {
262
+ inFlight = 0;
263
+ }
264
+
265
+ // ---------------------------------------------------------------------------
266
+ // High-level bridge
267
+ // ---------------------------------------------------------------------------
268
+
269
+ export interface TryInsaneFetchOptions {
270
+ timeoutMs?: number;
271
+ signal?: AbortSignal;
272
+ concurrencyLimit?: number;
273
+ /** Seam: dependency prober (default real, cached). */
274
+ prober?: DependencyProber;
275
+ /** Seam: engine runner (default real subprocess). */
276
+ runner?: EngineRunner;
277
+ }
278
+
279
+ function mapEngineOutput(raw: EngineRawOutput, timeoutMs: number): InsaneBridgeResult {
280
+ const notes: string[] = [];
281
+ if (raw.aborted) {
282
+ return { ok: false, reason: "aborted", notes };
283
+ }
284
+ if (raw.timedOut) {
285
+ notes.push(INSANE_NOTES.timeout(Math.round(timeoutMs / 1000)));
286
+ return { ok: false, reason: "timeout", notes };
287
+ }
288
+ let parsed: InsaneFetchResultRaw;
289
+ try {
290
+ parsed = JSON.parse(raw.stdout) as InsaneFetchResultRaw;
291
+ } catch {
292
+ notes.push(INSANE_NOTES.invalidJson);
293
+ return { ok: false, reason: "invalid-json", notes };
294
+ }
295
+
296
+ const verdict = parsed.verdict?.trim();
297
+ // The engine emits the Verdict enum value `auth_required` (401/407); also tolerate
298
+ // the human-readable phrase defensively. Either is a terminal public-content boundary.
299
+ if (verdict && /^(?:auth_required|authentication required)$/i.test(verdict)) {
300
+ notes.push(INSANE_NOTES.authRequired);
301
+ return { ok: false, reason: "auth-required", verdict, notes };
302
+ }
303
+
304
+ if (parsed.untried_routes && parsed.untried_routes.length > 0) {
305
+ notes.push(INSANE_NOTES.untried(parsed.untried_routes.slice(0, 8).join(", ")));
306
+ }
307
+ if (parsed.must_invoke_playwright_mcp) {
308
+ notes.push(INSANE_NOTES.mustBrowserMcp);
309
+ }
310
+
311
+ if (parsed.ok && typeof parsed.content === "string" && parsed.content.trim().length > 0) {
312
+ return { ok: true, content: parsed.content, profileUsed: parsed.profile_used, notes };
313
+ }
314
+ if (parsed.ok) {
315
+ notes.push(INSANE_NOTES.emptyContent);
316
+ return { ok: false, reason: "empty-content", notes };
317
+ }
318
+ notes.push(INSANE_NOTES.verdict(verdict || "unknown"));
319
+ return { ok: false, reason: "engine-failed", verdict, notes };
320
+ }
321
+
322
+ /**
323
+ * Attempt to read `url` through the insane-search engine. The caller is
324
+ * responsible for the opt-in gate, raw-mode skip, and the public-URL guard
325
+ * (which MUST run before this is called). Never throws; always returns a result.
326
+ */
327
+ export async function tryInsaneFetch(url: string, options: TryInsaneFetchOptions = {}): Promise<InsaneBridgeResult> {
328
+ const prober = options.prober ?? probeInsaneDependencies;
329
+ const runner = options.runner ?? (inv => runEngineSubprocess(inv));
330
+ const limit = options.concurrencyLimit ?? DEFAULT_CONCURRENCY;
331
+
332
+ const deps = await prober();
333
+ if (!deps.vendorPresent) return { ok: false, reason: "vendor-missing", notes: [INSANE_NOTES.vendorMissing] };
334
+ if (!deps.python) return { ok: false, reason: "no-python", notes: [INSANE_NOTES.noPython] };
335
+ if (!deps.curlCffi) return { ok: false, reason: "no-curl-cffi", notes: [INSANE_NOTES.noCurlCffi] };
336
+ if (!deps.browser) return { ok: false, reason: "no-browser", notes: [INSANE_NOTES.noBrowser] };
337
+
338
+ if (inFlight >= limit) {
339
+ return { ok: false, reason: "concurrency", notes: [INSANE_NOTES.concurrency] };
340
+ }
341
+
342
+ inFlight++;
343
+ try {
344
+ const timeoutMs = clampTimeoutMs(options.timeoutMs);
345
+ const raw = await runner({ url, timeoutMs, signal: options.signal });
346
+ return mapEngineOutput(raw, timeoutMs);
347
+ } finally {
348
+ inFlight--;
349
+ }
350
+ }