@undefineds.co/linx 0.3.4 → 0.3.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (172) hide show
  1. package/README.md +58 -23
  2. package/dist/generated/version.js +1 -1
  3. package/dist/generated/version.js.map +1 -1
  4. package/dist/index.js +334 -162
  5. package/dist/index.js.map +1 -1
  6. package/dist/lib/account-session.js +4 -8
  7. package/dist/lib/account-session.js.map +1 -1
  8. package/dist/lib/ai-command.js +228 -178
  9. package/dist/lib/ai-command.js.map +1 -1
  10. package/dist/lib/auto-mode/archive.js +38 -7
  11. package/dist/lib/auto-mode/archive.js.map +1 -1
  12. package/dist/lib/auto-mode/auth.js.map +1 -1
  13. package/dist/lib/auto-mode/display.js +71 -45
  14. package/dist/lib/auto-mode/display.js.map +1 -1
  15. package/dist/lib/auto-mode/format.js +9 -7
  16. package/dist/lib/auto-mode/format.js.map +1 -1
  17. package/dist/lib/auto-mode/hooks/claude.js +12 -2
  18. package/dist/lib/auto-mode/hooks/claude.js.map +1 -1
  19. package/dist/lib/auto-mode/hooks/codex.js +17 -7
  20. package/dist/lib/auto-mode/hooks/codex.js.map +1 -1
  21. package/dist/lib/auto-mode/hooks/index.js +28 -8
  22. package/dist/lib/auto-mode/hooks/index.js.map +1 -1
  23. package/dist/lib/auto-mode/pod-ai.js +20 -37
  24. package/dist/lib/auto-mode/pod-ai.js.map +1 -1
  25. package/dist/lib/auto-mode/pod-approval.js +124 -195
  26. package/dist/lib/auto-mode/pod-approval.js.map +1 -1
  27. package/dist/lib/auto-mode/pod-persistence.js +169 -90
  28. package/dist/lib/auto-mode/pod-persistence.js.map +1 -1
  29. package/dist/lib/auto-mode/runner.js +683 -81
  30. package/dist/lib/auto-mode/runner.js.map +1 -1
  31. package/dist/lib/auto-mode/secretary.js +186 -41
  32. package/dist/lib/auto-mode/secretary.js.map +1 -1
  33. package/dist/lib/auto-mode-command.js +32 -32
  34. package/dist/lib/auto-mode-command.js.map +1 -1
  35. package/dist/lib/chat-api.js +242 -50
  36. package/dist/lib/chat-api.js.map +1 -1
  37. package/dist/lib/codex-plugin/bridge.js +164 -17
  38. package/dist/lib/codex-plugin/bridge.js.map +1 -1
  39. package/dist/lib/codex-plugin/codex-native-proxy.js +370 -34
  40. package/dist/lib/codex-plugin/codex-native-proxy.js.map +1 -1
  41. package/dist/lib/credentials-store.js +33 -42
  42. package/dist/lib/credentials-store.js.map +1 -1
  43. package/dist/lib/linx-cloud-errors.js +61 -0
  44. package/dist/lib/linx-cloud-errors.js.map +1 -0
  45. package/dist/lib/linx-tui-contract.js +8 -5
  46. package/dist/lib/linx-tui-contract.js.map +1 -1
  47. package/dist/lib/login-command.js +9 -2
  48. package/dist/lib/login-command.js.map +1 -1
  49. package/dist/lib/models.js +3 -20
  50. package/dist/lib/models.js.map +1 -1
  51. package/dist/lib/oidc-auth.js +143 -17
  52. package/dist/lib/oidc-auth.js.map +1 -1
  53. package/dist/lib/oidc-session-storage.js +2 -6
  54. package/dist/lib/oidc-session-storage.js.map +1 -1
  55. package/dist/lib/pi-adapter/auto-input-controller.js +988 -0
  56. package/dist/lib/pi-adapter/auto-input-controller.js.map +1 -0
  57. package/dist/lib/pi-adapter/backend-command.js +2 -0
  58. package/dist/lib/pi-adapter/backend-command.js.map +1 -0
  59. package/dist/lib/pi-adapter/backend-credentials.js +80 -0
  60. package/dist/lib/pi-adapter/backend-credentials.js.map +1 -0
  61. package/dist/lib/pi-adapter/branding.js +246 -108
  62. package/dist/lib/pi-adapter/branding.js.map +1 -1
  63. package/dist/lib/pi-adapter/control-state.js +72 -0
  64. package/dist/lib/pi-adapter/control-state.js.map +1 -0
  65. package/dist/lib/pi-adapter/interactive.js +2634 -30
  66. package/dist/lib/pi-adapter/interactive.js.map +1 -1
  67. package/dist/lib/pi-adapter/pod-approval.js +382 -210
  68. package/dist/lib/pi-adapter/pod-approval.js.map +1 -1
  69. package/dist/lib/pi-adapter/pod-mirror-mapping.js +71 -17
  70. package/dist/lib/pi-adapter/pod-mirror-mapping.js.map +1 -1
  71. package/dist/lib/pi-adapter/pod-mirror.js +531 -64
  72. package/dist/lib/pi-adapter/pod-mirror.js.map +1 -1
  73. package/dist/lib/pi-adapter/pod-native.js +81 -85
  74. package/dist/lib/pi-adapter/pod-native.js.map +1 -1
  75. package/dist/lib/pi-adapter/pod-status-output.js +54 -0
  76. package/dist/lib/pi-adapter/pod-status-output.js.map +1 -0
  77. package/dist/lib/pi-adapter/runtime.js +458 -228
  78. package/dist/lib/pi-adapter/runtime.js.map +1 -1
  79. package/dist/lib/pi-adapter/session-control.js +509 -0
  80. package/dist/lib/pi-adapter/session-control.js.map +1 -0
  81. package/dist/lib/pi-adapter/session.js +35 -22
  82. package/dist/lib/pi-adapter/session.js.map +1 -1
  83. package/dist/lib/pi-adapter/stream.js +89 -32
  84. package/dist/lib/pi-adapter/stream.js.map +1 -1
  85. package/dist/lib/pi-adapter/sync-recovery.js +89 -0
  86. package/dist/lib/pi-adapter/sync-recovery.js.map +1 -0
  87. package/dist/lib/pi-adapter/web-fetch.js +13 -14
  88. package/dist/lib/pi-adapter/web-fetch.js.map +1 -1
  89. package/dist/lib/pod-chat-store.js +254 -78
  90. package/dist/lib/pod-chat-store.js.map +1 -1
  91. package/dist/lib/pod-data-session.js +156 -35
  92. package/dist/lib/pod-data-session.js.map +1 -1
  93. package/dist/lib/solid-auth-store.js +27 -0
  94. package/dist/lib/solid-auth-store.js.map +1 -0
  95. package/dist/lib/solid-auth.js +2 -4
  96. package/dist/lib/solid-auth.js.map +1 -1
  97. package/dist/lib/solid-client-credentials-login.js +100 -0
  98. package/dist/lib/solid-client-credentials-login.js.map +1 -0
  99. package/dist/lib/solid-local-store.js +31 -0
  100. package/dist/lib/solid-local-store.js.map +1 -0
  101. package/dist/lib/symphony/archive.js +328 -18
  102. package/dist/lib/symphony/archive.js.map +1 -1
  103. package/dist/lib/symphony/pod-projection.js +2222 -0
  104. package/dist/lib/symphony/pod-projection.js.map +1 -0
  105. package/dist/lib/symphony-command.js +602 -178
  106. package/dist/lib/symphony-command.js.map +1 -1
  107. package/dist/lib/sync-checkpoint-store.js +74 -0
  108. package/dist/lib/sync-checkpoint-store.js.map +1 -0
  109. package/dist/skills/symphony/SKILL.md +665 -0
  110. package/package.json +15 -9
  111. package/vendor/agent-runtime/dist/agent-runtime.d.ts +137 -0
  112. package/vendor/agent-runtime/dist/agent-runtime.js +211 -0
  113. package/vendor/agent-runtime/dist/auto-mode.d.ts +78 -13
  114. package/vendor/agent-runtime/dist/auto-mode.js +288 -31
  115. package/vendor/agent-runtime/dist/control-plane.d.ts +28 -0
  116. package/vendor/agent-runtime/dist/control-plane.js +79 -0
  117. package/vendor/agent-runtime/dist/file-sync.d.ts +157 -0
  118. package/vendor/agent-runtime/dist/file-sync.js +314 -0
  119. package/vendor/agent-runtime/dist/index.d.ts +7 -0
  120. package/vendor/agent-runtime/dist/index.js +7 -0
  121. package/vendor/agent-runtime/dist/reconciler.d.ts +117 -0
  122. package/vendor/agent-runtime/dist/reconciler.js +361 -0
  123. package/vendor/agent-runtime/dist/symphony.d.ts +128 -8
  124. package/vendor/agent-runtime/dist/symphony.js +362 -57
  125. package/vendor/agent-runtime/dist/sync.d.ts +271 -0
  126. package/vendor/agent-runtime/dist/sync.js +550 -0
  127. package/vendor/agent-runtime/dist/thread-reconciler-controller.d.ts +58 -0
  128. package/vendor/agent-runtime/dist/thread-reconciler-controller.js +137 -0
  129. package/vendor/agent-runtime/dist/turn-controller.js +2 -2
  130. package/vendor/agent-runtime/dist/wake-scheduler.d.ts +67 -0
  131. package/vendor/agent-runtime/dist/wake-scheduler.js +194 -0
  132. package/vendor/agent-runtime/package.json +8 -1
  133. package/vendor/pi-web-access/CHANGELOG.md +387 -0
  134. package/vendor/pi-web-access/LICENSE +21 -0
  135. package/vendor/pi-web-access/README.md +352 -0
  136. package/vendor/pi-web-access/activity.ts +101 -0
  137. package/vendor/pi-web-access/banner.png +0 -0
  138. package/vendor/pi-web-access/chrome-cookies.ts +322 -0
  139. package/vendor/pi-web-access/code-search.ts +107 -0
  140. package/vendor/pi-web-access/curator-page.ts +3359 -0
  141. package/vendor/pi-web-access/curator-server.ts +605 -0
  142. package/vendor/pi-web-access/exa.ts +520 -0
  143. package/vendor/pi-web-access/extract.ts +641 -0
  144. package/vendor/pi-web-access/gemini-api.ts +112 -0
  145. package/vendor/pi-web-access/gemini-search.ts +361 -0
  146. package/vendor/pi-web-access/gemini-url-context.ts +126 -0
  147. package/vendor/pi-web-access/gemini-web-config.ts +52 -0
  148. package/vendor/pi-web-access/gemini-web.ts +396 -0
  149. package/vendor/pi-web-access/github-api.ts +196 -0
  150. package/vendor/pi-web-access/github-extract.ts +634 -0
  151. package/vendor/pi-web-access/index.ts +2346 -0
  152. package/vendor/pi-web-access/package.json +45 -0
  153. package/vendor/pi-web-access/pdf-extract.ts +192 -0
  154. package/vendor/pi-web-access/perplexity.ts +195 -0
  155. package/vendor/pi-web-access/pi-web-fetch-demo.mp4 +0 -0
  156. package/vendor/pi-web-access/rsc-extract.ts +338 -0
  157. package/vendor/pi-web-access/skills/librarian/SKILL.md +195 -0
  158. package/vendor/pi-web-access/storage.ts +72 -0
  159. package/vendor/pi-web-access/summary-review.ts +276 -0
  160. package/vendor/pi-web-access/test/gemini-web-cookie-opt-in.test.mjs +41 -0
  161. package/vendor/pi-web-access/test/pdf-extract.test.mjs +95 -0
  162. package/vendor/pi-web-access/utils.ts +44 -0
  163. package/vendor/pi-web-access/video-extract.ts +378 -0
  164. package/vendor/pi-web-access/youtube-extract.ts +310 -0
  165. package/dist/lib/pi-adapter/auth.js +0 -68
  166. package/dist/lib/pi-adapter/auth.js.map +0 -1
  167. package/dist/lib/pi-adapter/pod-tools.js +0 -140
  168. package/dist/lib/pi-adapter/pod-tools.js.map +0 -1
  169. package/dist/skills/drizzle-solid/SKILL.md +0 -340
  170. package/dist/skills/pod-storage/SKILL.md +0 -100
  171. package/dist/skills/solid-modeling/SKILL.md +0 -274
  172. package/dist/skills/xpod-componentsjs/SKILL.md +0 -284
@@ -0,0 +1,641 @@
1
+ import { Readability } from "@mozilla/readability";
2
+ import { parseHTML } from "linkedom";
3
+ import TurndownService from "turndown";
4
+ import pLimit from "p-limit";
5
+ import { activityMonitor } from "./activity.js";
6
+ import { extractRSCContent } from "./rsc-extract.js";
7
+ import { extractPDFToMarkdown, isPDF } from "./pdf-extract.js";
8
+ import { extractGitHub } from "./github-extract.js";
9
+ import { isYouTubeURL, isYouTubeEnabled, extractYouTube, extractYouTubeFrame, extractYouTubeFrames, getYouTubeStreamInfo } from "./youtube-extract.js";
10
+ import { extractWithUrlContext, extractWithGeminiWeb } from "./gemini-url-context.js";
11
+ import { isVideoFile, extractVideo, extractVideoFrame, getLocalVideoDuration } from "./video-extract.js";
12
+ import { formatSeconds } from "./utils.js";
13
+
14
+ const DEFAULT_TIMEOUT_MS = 30000;
15
+ const CONCURRENT_LIMIT = 3;
16
+
17
+ const NON_RECOVERABLE_ERRORS = ["Unsupported content type", "Response too large"];
18
+ const MIN_USEFUL_CONTENT = 500;
19
+
20
+ function errorMessage(err: unknown): string {
21
+ return err instanceof Error ? err.message : String(err);
22
+ }
23
+
24
+ function isConfigParseError(err: unknown): boolean {
25
+ return errorMessage(err).startsWith("Failed to parse ");
26
+ }
27
+
28
+ function isAbortError(err: unknown): boolean {
29
+ return errorMessage(err).toLowerCase().includes("abort");
30
+ }
31
+
32
+ function abortedResult(url: string): ExtractedContent {
33
+ return { url, title: "", content: "", error: "Aborted" };
34
+ }
35
+
36
+ const turndown = new TurndownService({
37
+ headingStyle: "atx",
38
+ codeBlockStyle: "fenced",
39
+ });
40
+
41
+ const fetchLimit = pLimit(CONCURRENT_LIMIT);
42
+
43
+ export interface VideoFrame {
44
+ data: string;
45
+ mimeType: string;
46
+ timestamp: string;
47
+ }
48
+
49
+ export type FrameData = { data: string; mimeType: string };
50
+ export type FrameResult = FrameData | { error: string };
51
+
52
+ export interface ExtractedContent {
53
+ url: string;
54
+ title: string;
55
+ content: string;
56
+ error: string | null;
57
+ thumbnail?: { data: string; mimeType: string };
58
+ frames?: VideoFrame[];
59
+ duration?: number;
60
+ }
61
+
62
+ export interface ExtractOptions {
63
+ timeoutMs?: number;
64
+ forceClone?: boolean;
65
+ prompt?: string;
66
+ timestamp?: string;
67
+ frames?: number;
68
+ model?: string;
69
+ }
70
+
71
+ const JINA_READER_BASE = "https://r.jina.ai/";
72
+ const JINA_TIMEOUT_MS = 30000;
73
+
74
+ async function extractWithJinaReader(
75
+ url: string,
76
+ signal?: AbortSignal,
77
+ ): Promise<ExtractedContent | null> {
78
+ const jinaUrl = JINA_READER_BASE + url;
79
+
80
+ const activityId = activityMonitor.logStart({ type: "api", query: `jina: ${url}` });
81
+
82
+ try {
83
+ const res = await fetch(jinaUrl, {
84
+ headers: {
85
+ "Accept": "text/markdown",
86
+ "X-No-Cache": "true",
87
+ },
88
+ signal: AbortSignal.any([
89
+ AbortSignal.timeout(JINA_TIMEOUT_MS),
90
+ ...(signal ? [signal] : []),
91
+ ]),
92
+ });
93
+
94
+ if (!res.ok) {
95
+ activityMonitor.logComplete(activityId, res.status);
96
+ return null;
97
+ }
98
+
99
+ const content = await res.text();
100
+ activityMonitor.logComplete(activityId, res.status);
101
+
102
+ const contentStart = content.indexOf("Markdown Content:");
103
+ if (contentStart < 0) {
104
+ return null;
105
+ }
106
+
107
+ const markdownPart = content.slice(contentStart + 17).trim(); // 17 = "Markdown Content:".length
108
+
109
+ // Check for failed JS rendering or minimal content
110
+ if (markdownPart.length < 100 ||
111
+ markdownPart.startsWith("Loading...") ||
112
+ markdownPart.startsWith("Please enable JavaScript")) {
113
+ return null;
114
+ }
115
+
116
+ const title = extractHeadingTitle(markdownPart) ?? (new URL(url).pathname.split("/").pop() || url);
117
+ return { url, title, content: markdownPart, error: null };
118
+ } catch (err) {
119
+ const message = err instanceof Error ? err.message : String(err);
120
+ if (message.toLowerCase().includes("abort")) {
121
+ activityMonitor.logComplete(activityId, 0);
122
+ } else {
123
+ activityMonitor.logError(activityId, message);
124
+ }
125
+ return null;
126
+ }
127
+ }
128
+
129
+ function parseTimestamp(ts: string): number | null {
130
+ const num = Number(ts);
131
+ if (!isNaN(num) && num >= 0) return Math.floor(num);
132
+ const parts = ts.split(":").map(Number);
133
+ if (parts.some(p => isNaN(p) || p < 0)) return null;
134
+ if (parts.length === 3) return Math.floor(parts[0] * 3600 + parts[1] * 60 + parts[2]);
135
+ if (parts.length === 2) return Math.floor(parts[0] * 60 + parts[1]);
136
+ return null;
137
+ }
138
+
139
+ type TimestampSpec = { type: "single"; seconds: number } | { type: "range"; start: number; end: number };
140
+
141
+ function parseTimestampSpec(ts: string): TimestampSpec | null {
142
+ const dashIdx = ts.indexOf("-", 1);
143
+ if (dashIdx > 0) {
144
+ const start = parseTimestamp(ts.slice(0, dashIdx));
145
+ const end = parseTimestamp(ts.slice(dashIdx + 1));
146
+ if (start !== null && end !== null && end > start) return { type: "range", start, end };
147
+ }
148
+ const seconds = parseTimestamp(ts);
149
+ return seconds !== null ? { type: "single", seconds } : null;
150
+ }
151
+
152
+ const DEFAULT_RANGE_FRAMES = 6;
153
+ const MIN_FRAME_INTERVAL = 5;
154
+
155
+ function computeRangeTimestamps(start: number, end: number, maxFrames: number = DEFAULT_RANGE_FRAMES): number[] {
156
+ if (maxFrames <= 1) return [start];
157
+ const duration = end - start;
158
+ const idealInterval = duration / (maxFrames - 1);
159
+ if (idealInterval < MIN_FRAME_INTERVAL) {
160
+ const timestamps: number[] = [];
161
+ for (let t = start; t <= end && timestamps.length < maxFrames; t += MIN_FRAME_INTERVAL) {
162
+ timestamps.push(t);
163
+ }
164
+ return timestamps;
165
+ }
166
+ return Array.from({ length: maxFrames }, (_, i) => Math.round(start + i * idealInterval));
167
+ }
168
+
169
+ function buildFrameResult(
170
+ url: string, label: string, requestedCount: number,
171
+ frames: VideoFrame[], error: string | null, duration?: number,
172
+ ): ExtractedContent {
173
+ if (frames.length === 0) {
174
+ const msg = error ?? "Frame extraction failed";
175
+ return { url, title: `Frames ${label} (0/${requestedCount})`, content: msg, error: msg };
176
+ }
177
+ return {
178
+ url,
179
+ title: `Frames ${label} (${frames.length}/${requestedCount})`,
180
+ content: `${frames.length} frames extracted from ${label}`,
181
+ error: null,
182
+ frames,
183
+ duration,
184
+ };
185
+ }
186
+
187
+ async function extractLocalFrames(
188
+ filePath: string, timestamps: number[],
189
+ ): Promise<{ frames: VideoFrame[]; error: string | null }> {
190
+ const results = await Promise.all(timestamps.map(async (t) => {
191
+ const frame = await extractVideoFrame(filePath, t);
192
+ if ("error" in frame) return { error: frame.error };
193
+ return { ...frame, timestamp: formatSeconds(t) };
194
+ }));
195
+ const frames = results.filter((f): f is VideoFrame => "data" in f);
196
+ const firstError = results.find((f): f is { error: string } => "error" in f);
197
+ return { frames, error: frames.length === 0 && firstError ? firstError.error : null };
198
+ }
199
+
200
+ function safeVideoInfo(url: string): { info: ReturnType<typeof isVideoFile>; error?: string } {
201
+ try {
202
+ return { info: isVideoFile(url) };
203
+ } catch (err) {
204
+ return { info: null, error: errorMessage(err) };
205
+ }
206
+ }
207
+
208
+ export async function extractContent(
209
+ url: string,
210
+ signal?: AbortSignal,
211
+ options?: ExtractOptions,
212
+ ): Promise<ExtractedContent> {
213
+ if (signal?.aborted) {
214
+ return { url, title: "", content: "", error: "Aborted" };
215
+ }
216
+
217
+ if (options?.frames && !options.timestamp) {
218
+ const frameCount = options.frames;
219
+ const ytInfo = isYouTubeURL(url);
220
+ if (ytInfo.isYouTube && ytInfo.videoId) {
221
+ const streamInfo = await getYouTubeStreamInfo(ytInfo.videoId);
222
+ if ("error" in streamInfo) {
223
+ return { url, title: "Frames", content: streamInfo.error, error: streamInfo.error };
224
+ }
225
+ if (streamInfo.duration === null) {
226
+ const error = "Cannot determine video duration. Use a timestamp range instead.";
227
+ return { url, title: "Frames", content: error, error };
228
+ }
229
+ const dur = Math.floor(streamInfo.duration);
230
+ const timestamps = computeRangeTimestamps(0, dur, frameCount);
231
+ const result = await extractYouTubeFrames(ytInfo.videoId, timestamps, streamInfo);
232
+ const label = `${formatSeconds(0)}-${formatSeconds(dur)}`;
233
+ return buildFrameResult(url, label, timestamps.length, result.frames, result.error, streamInfo.duration);
234
+ }
235
+
236
+ const localVideo = safeVideoInfo(url);
237
+ if (localVideo.error) {
238
+ return { url, title: "", content: "", error: localVideo.error };
239
+ }
240
+ if (localVideo.info) {
241
+ const durationResult = await getLocalVideoDuration(localVideo.info.absolutePath);
242
+ if (typeof durationResult !== "number") {
243
+ return { url, title: "Frames", content: durationResult.error, error: durationResult.error };
244
+ }
245
+ const dur = Math.floor(durationResult);
246
+ const timestamps = computeRangeTimestamps(0, dur, frameCount);
247
+ const result = await extractLocalFrames(localVideo.info.absolutePath, timestamps);
248
+ const label = `${formatSeconds(0)}-${formatSeconds(dur)}`;
249
+ return buildFrameResult(url, label, timestamps.length, result.frames, result.error, durationResult);
250
+ }
251
+
252
+ return { url, title: "", content: "", error: "Frame extraction only works with YouTube and local video files" };
253
+ }
254
+
255
+ if (options?.timestamp) {
256
+ const spec = parseTimestampSpec(options.timestamp);
257
+ if (!spec) {
258
+ return {
259
+ url,
260
+ title: "",
261
+ content: "",
262
+ error: `Invalid timestamp format: "${options.timestamp}". Use "H:MM:SS", "MM:SS", "85", or "start-end".`,
263
+ };
264
+ }
265
+
266
+ const frameCount = options.frames;
267
+ const ytInfo = isYouTubeURL(url);
268
+ if (ytInfo.isYouTube && ytInfo.videoId) {
269
+ const streamInfo = await getYouTubeStreamInfo(ytInfo.videoId);
270
+ if ("error" in streamInfo) {
271
+ if (spec.type === "range") {
272
+ const label = `${formatSeconds(spec.start)}-${formatSeconds(spec.end)}`;
273
+ return { url, title: `Frames ${label}`, content: streamInfo.error, error: streamInfo.error };
274
+ }
275
+ if (frameCount) {
276
+ const end = spec.seconds + (frameCount - 1) * MIN_FRAME_INTERVAL;
277
+ const label = `${formatSeconds(spec.seconds)}-${formatSeconds(end)}`;
278
+ return { url, title: `Frames ${label}`, content: streamInfo.error, error: streamInfo.error };
279
+ }
280
+ return { url, title: `Frame at ${options.timestamp}`, content: streamInfo.error, error: streamInfo.error };
281
+ }
282
+
283
+ if (spec.type === "range") {
284
+ const label = `${formatSeconds(spec.start)}-${formatSeconds(spec.end)}`;
285
+ if (streamInfo.duration !== null && spec.end > streamInfo.duration) {
286
+ const error = `Timestamp ${formatSeconds(spec.end)} exceeds video duration (${formatSeconds(Math.floor(streamInfo.duration))})`;
287
+ return { url, title: `Frames ${label}`, content: error, error };
288
+ }
289
+ const timestamps = frameCount
290
+ ? computeRangeTimestamps(spec.start, spec.end, frameCount)
291
+ : computeRangeTimestamps(spec.start, spec.end);
292
+ const result = await extractYouTubeFrames(ytInfo.videoId, timestamps, streamInfo);
293
+ return buildFrameResult(url, label, timestamps.length, result.frames, result.error, result.duration ?? undefined);
294
+ }
295
+
296
+ if (frameCount) {
297
+ const end = spec.seconds + (frameCount - 1) * MIN_FRAME_INTERVAL;
298
+ const label = `${formatSeconds(spec.seconds)}-${formatSeconds(end)}`;
299
+ if (streamInfo.duration !== null && end > streamInfo.duration) {
300
+ const error = `Timestamp ${formatSeconds(end)} exceeds video duration (${formatSeconds(Math.floor(streamInfo.duration))})`;
301
+ return { url, title: `Frames ${label}`, content: error, error };
302
+ }
303
+ const timestamps = computeRangeTimestamps(spec.seconds, end, frameCount);
304
+ const result = await extractYouTubeFrames(ytInfo.videoId, timestamps, streamInfo);
305
+ return buildFrameResult(url, label, timestamps.length, result.frames, result.error, result.duration ?? undefined);
306
+ }
307
+
308
+ if (streamInfo.duration !== null && spec.seconds > streamInfo.duration) {
309
+ const error = `Timestamp ${formatSeconds(spec.seconds)} exceeds video duration (${formatSeconds(Math.floor(streamInfo.duration))})`;
310
+ return { url, title: `Frame at ${options.timestamp}`, content: error, error };
311
+ }
312
+ const frame = await extractYouTubeFrame(ytInfo.videoId, spec.seconds, streamInfo);
313
+ if ("error" in frame) {
314
+ return { url, title: `Frame at ${options.timestamp}`, content: frame.error, error: frame.error };
315
+ }
316
+ return { url, title: `Frame at ${options.timestamp}`, content: `Video frame at ${options.timestamp}`, error: null, thumbnail: frame };
317
+ }
318
+
319
+ const localVideo = safeVideoInfo(url);
320
+ if (localVideo.error) {
321
+ return { url, title: "", content: "", error: localVideo.error };
322
+ }
323
+ if (localVideo.info) {
324
+ if (spec.type === "range") {
325
+ const timestamps = frameCount
326
+ ? computeRangeTimestamps(spec.start, spec.end, frameCount)
327
+ : computeRangeTimestamps(spec.start, spec.end);
328
+ const result = await extractLocalFrames(localVideo.info.absolutePath, timestamps);
329
+ const label = `${formatSeconds(spec.start)}-${formatSeconds(spec.end)}`;
330
+ return buildFrameResult(url, label, timestamps.length, result.frames, result.error);
331
+ }
332
+
333
+ if (frameCount) {
334
+ const end = spec.seconds + (frameCount - 1) * MIN_FRAME_INTERVAL;
335
+ const timestamps = computeRangeTimestamps(spec.seconds, end, frameCount);
336
+ const result = await extractLocalFrames(localVideo.info.absolutePath, timestamps);
337
+ const label = `${formatSeconds(spec.seconds)}-${formatSeconds(end)}`;
338
+ return buildFrameResult(url, label, timestamps.length, result.frames, result.error);
339
+ }
340
+
341
+ const frame = await extractVideoFrame(localVideo.info.absolutePath, spec.seconds);
342
+ if ("error" in frame) {
343
+ return { url, title: `Frame at ${options.timestamp}`, content: frame.error, error: frame.error };
344
+ }
345
+ return { url, title: `Frame at ${options.timestamp}`, content: `Video frame at ${options.timestamp}`, error: null, thumbnail: frame };
346
+ }
347
+
348
+ return { url, title: "", content: "", error: "Timestamp extraction only works with YouTube and local video files" };
349
+ }
350
+
351
+ const localVideo = safeVideoInfo(url);
352
+ if (localVideo.error) {
353
+ return { url, title: "", content: "", error: localVideo.error };
354
+ }
355
+ if (localVideo.info) {
356
+ try {
357
+ const result = await extractVideo(localVideo.info, signal, options);
358
+ if (signal?.aborted) return abortedResult(url);
359
+ return result ?? { url, title: "", content: "", error: "Video analysis requires Gemini access. Either:\n 1. Sign into gemini.google.com in Chrome (free, uses cookies)\n 2. Set GEMINI_API_KEY in $LINX_HOME/pi-web-access.json" };
360
+ } catch (err) {
361
+ if (isAbortError(err)) return abortedResult(url);
362
+ return { url, title: "", content: "", error: errorMessage(err) };
363
+ }
364
+ }
365
+
366
+ try {
367
+ new URL(url);
368
+ } catch {
369
+ return { url, title: "", content: "", error: "Invalid URL" };
370
+ }
371
+
372
+ try {
373
+ const ghResult = await extractGitHub(url, signal, options?.forceClone);
374
+ if (ghResult) return ghResult;
375
+ if (signal?.aborted) return abortedResult(url);
376
+ } catch (err) {
377
+ const message = errorMessage(err);
378
+ if (isAbortError(err)) return abortedResult(url);
379
+ if (isConfigParseError(err)) {
380
+ return { url, title: "", content: "", error: message };
381
+ }
382
+ }
383
+
384
+ const ytInfo = isYouTubeURL(url);
385
+ let youtubeEnabled = false;
386
+ try {
387
+ youtubeEnabled = isYouTubeEnabled();
388
+ } catch (err) {
389
+ return { url, title: "", content: "", error: errorMessage(err) };
390
+ }
391
+ if (ytInfo.isYouTube && youtubeEnabled) {
392
+ try {
393
+ const ytResult = await extractYouTube(url, signal, options?.prompt, options?.model);
394
+ if (ytResult) return ytResult;
395
+ if (signal?.aborted) return abortedResult(url);
396
+ } catch (err) {
397
+ const message = errorMessage(err);
398
+ if (isAbortError(err)) return abortedResult(url);
399
+ if (isConfigParseError(err)) {
400
+ return { url, title: "", content: "", error: message };
401
+ }
402
+ }
403
+ return {
404
+ url,
405
+ title: "",
406
+ content: "",
407
+ error: "Could not extract YouTube video content. Sign into Google in Chrome for automatic access, or set GEMINI_API_KEY.",
408
+ };
409
+ }
410
+
411
+ if (signal?.aborted) return abortedResult(url);
412
+
413
+ const httpResult = await extractViaHttp(url, signal, options);
414
+
415
+ if (signal?.aborted) return abortedResult(url);
416
+ if (!httpResult.error) return httpResult;
417
+ if (NON_RECOVERABLE_ERRORS.some(prefix => httpResult.error!.startsWith(prefix))) return httpResult;
418
+
419
+ const jinaResult = await extractWithJinaReader(url, signal);
420
+ if (jinaResult) return jinaResult;
421
+ if (signal?.aborted) return abortedResult(url);
422
+
423
+ let geminiResult: ExtractedContent | null = null;
424
+ try {
425
+ geminiResult = await extractWithUrlContext(url, signal)
426
+ ?? await extractWithGeminiWeb(url, signal);
427
+ } catch (err) {
428
+ if (isAbortError(err)) return abortedResult(url);
429
+ if (isConfigParseError(err)) {
430
+ return { ...httpResult, error: errorMessage(err) };
431
+ }
432
+ }
433
+
434
+ if (geminiResult) return geminiResult;
435
+ if (signal?.aborted) return abortedResult(url);
436
+
437
+ const guidance = [
438
+ httpResult.error,
439
+ "",
440
+ "Fallback options:",
441
+ " \u2022 Set GEMINI_API_KEY in $LINX_HOME/pi-web-access.json",
442
+ " \u2022 Sign into gemini.google.com in Chrome",
443
+ " \u2022 Use web_search to find content about this topic",
444
+ ].join("\n");
445
+ return { ...httpResult, error: guidance };
446
+ }
447
+
448
+ function isLikelyJSRendered(html: string): boolean {
449
+ // Extract body content
450
+ const bodyMatch = html.match(/<body[^>]*>([\s\S]*?)<\/body>/i);
451
+ if (!bodyMatch) return false;
452
+
453
+ const bodyHtml = bodyMatch[1];
454
+
455
+ // Strip tags to get text content
456
+ const textContent = bodyHtml
457
+ .replace(/<script[\s\S]*?<\/script>/gi, "")
458
+ .replace(/<style[\s\S]*?<\/style>/gi, "")
459
+ .replace(/<[^>]+>/g, "")
460
+ .replace(/\s+/g, " ")
461
+ .trim();
462
+
463
+ // Count scripts
464
+ const scriptCount = (html.match(/<script/gi) || []).length;
465
+
466
+ // Heuristic: little text content but many scripts suggests JS rendering
467
+ return textContent.length < 500 && scriptCount > 3;
468
+ }
469
+
470
+ async function extractViaHttp(
471
+ url: string,
472
+ signal?: AbortSignal,
473
+ options?: ExtractOptions,
474
+ ): Promise<ExtractedContent> {
475
+ const timeoutMs = options?.timeoutMs ?? DEFAULT_TIMEOUT_MS;
476
+ const activityId = activityMonitor.logStart({ type: "fetch", url });
477
+
478
+ const controller = new AbortController();
479
+ const timeoutId = setTimeout(() => controller.abort(), timeoutMs);
480
+
481
+ const onAbort = () => controller.abort();
482
+ signal?.addEventListener("abort", onAbort);
483
+
484
+ try {
485
+ const response = await fetch(url, {
486
+ signal: controller.signal,
487
+ headers: {
488
+ "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36",
489
+ "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8",
490
+ "Accept-Language": "en-US,en;q=0.9",
491
+ "Cache-Control": "no-cache",
492
+ "Sec-Fetch-Dest": "document",
493
+ "Sec-Fetch-Mode": "navigate",
494
+ "Sec-Fetch-Site": "none",
495
+ "Sec-Fetch-User": "?1",
496
+ "Upgrade-Insecure-Requests": "1",
497
+ },
498
+ });
499
+
500
+ if (!response.ok) {
501
+ activityMonitor.logComplete(activityId, response.status);
502
+ return {
503
+ url,
504
+ title: "",
505
+ content: "",
506
+ error: `HTTP ${response.status}: ${response.statusText}`,
507
+ };
508
+ }
509
+
510
+ const contentLengthHeader = response.headers.get("content-length");
511
+ const contentType = response.headers.get("content-type") || "";
512
+ const isPDFContent = isPDF(url, contentType);
513
+ const maxResponseSize = isPDFContent ? 20 * 1024 * 1024 : 5 * 1024 * 1024;
514
+ if (contentLengthHeader) {
515
+ const contentLength = parseInt(contentLengthHeader, 10);
516
+ if (contentLength > maxResponseSize) {
517
+ activityMonitor.logComplete(activityId, response.status);
518
+ return {
519
+ url,
520
+ title: "",
521
+ content: "",
522
+ error: `Response too large (${Math.round(contentLength / 1024 / 1024)}MB)`,
523
+ };
524
+ }
525
+ }
526
+
527
+ if (isPDFContent) {
528
+ try {
529
+ const buffer = await response.arrayBuffer();
530
+ const result = await extractPDFToMarkdown(buffer, url);
531
+ activityMonitor.logComplete(activityId, response.status);
532
+ return {
533
+ url,
534
+ title: result.title,
535
+ content: `PDF extracted and saved to: ${result.outputPath}\n\nPages: ${result.pages}\nCharacters: ${result.chars}`,
536
+ error: null,
537
+ };
538
+ } catch (err) {
539
+ const message = err instanceof Error ? err.message : String(err);
540
+ activityMonitor.logError(activityId, message);
541
+ return { url, title: "", content: "", error: `PDF extraction failed: ${message}` };
542
+ }
543
+ }
544
+
545
+ if (contentType.includes("application/octet-stream") ||
546
+ contentType.includes("image/") ||
547
+ contentType.includes("audio/") ||
548
+ contentType.includes("video/") ||
549
+ contentType.includes("application/zip")) {
550
+ activityMonitor.logComplete(activityId, response.status);
551
+ return {
552
+ url,
553
+ title: "",
554
+ content: "",
555
+ error: `Unsupported content type: ${contentType.split(";")[0]}`,
556
+ };
557
+ }
558
+
559
+ const text = await response.text();
560
+ const isHTML = contentType.includes("text/html") || contentType.includes("application/xhtml+xml");
561
+
562
+ if (!isHTML) {
563
+ activityMonitor.logComplete(activityId, response.status);
564
+ const title = extractTextTitle(text, url);
565
+ return { url, title, content: text, error: null };
566
+ }
567
+
568
+ const { document } = parseHTML(text);
569
+ const reader = new Readability(document as unknown as Document);
570
+ const article = reader.parse();
571
+
572
+ if (!article) {
573
+ const rscResult = extractRSCContent(text);
574
+ if (rscResult) {
575
+ activityMonitor.logComplete(activityId, response.status);
576
+ return { url, title: rscResult.title, content: rscResult.content, error: null };
577
+ }
578
+
579
+ activityMonitor.logComplete(activityId, response.status);
580
+
581
+ // Provide more specific error message
582
+ const jsRendered = isLikelyJSRendered(text);
583
+ const errorMsg = jsRendered
584
+ ? "Page appears to be JavaScript-rendered (content loads dynamically)"
585
+ : "Could not extract readable content from HTML structure";
586
+
587
+ return {
588
+ url,
589
+ title: "",
590
+ content: "",
591
+ error: errorMsg,
592
+ };
593
+ }
594
+
595
+ const markdown = turndown.turndown(article.content);
596
+ activityMonitor.logComplete(activityId, response.status);
597
+
598
+ if (markdown.length < MIN_USEFUL_CONTENT) {
599
+ return {
600
+ url,
601
+ title: article.title || "",
602
+ content: markdown,
603
+ error: isLikelyJSRendered(text)
604
+ ? "Page appears to be JavaScript-rendered (content loads dynamically)"
605
+ : "Extracted content appears incomplete",
606
+ };
607
+ }
608
+
609
+ return { url, title: article.title || "", content: markdown, error: null };
610
+ } catch (err) {
611
+ const message = err instanceof Error ? err.message : String(err);
612
+ if (message.toLowerCase().includes("abort")) {
613
+ activityMonitor.logComplete(activityId, 0);
614
+ } else {
615
+ activityMonitor.logError(activityId, message);
616
+ }
617
+ return { url, title: "", content: "", error: message };
618
+ } finally {
619
+ clearTimeout(timeoutId);
620
+ signal?.removeEventListener("abort", onAbort);
621
+ }
622
+ }
623
+
624
+ export function extractHeadingTitle(text: string): string | null {
625
+ const match = text.match(/^#{1,2}\s+(.+)/m);
626
+ if (!match) return null;
627
+ const cleaned = match[1].replace(/\*+/g, "").trim();
628
+ return cleaned || null;
629
+ }
630
+
631
+ function extractTextTitle(text: string, url: string): string {
632
+ return extractHeadingTitle(text) ?? (new URL(url).pathname.split("/").pop() || url);
633
+ }
634
+
635
+ export async function fetchAllContent(
636
+ urls: string[],
637
+ signal?: AbortSignal,
638
+ options?: ExtractOptions,
639
+ ): Promise<ExtractedContent[]> {
640
+ return Promise.all(urls.map((url) => fetchLimit(() => extractContent(url, signal, options))));
641
+ }