@oh-my-pi/pi-coding-agent 14.5.12 → 14.5.14
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +45 -0
- package/package.json +18 -10
- package/src/cli/jupyter-cli.ts +1 -1
- package/src/commit/pipeline.ts +4 -3
- package/src/config/model-equivalence.ts +49 -16
- package/src/config/model-registry.ts +100 -25
- package/src/config/model-resolver.ts +29 -15
- package/src/config/settings-schema.ts +20 -6
- package/src/config/settings.ts +9 -8
- package/src/config.ts +18 -6
- package/src/eval/backend.ts +43 -0
- package/src/eval/eval.lark +43 -0
- package/src/eval/index.ts +5 -0
- package/src/eval/js/context-manager.ts +717 -0
- package/src/eval/js/executor.ts +131 -0
- package/src/eval/js/index.ts +46 -0
- package/src/eval/js/prelude.ts +2 -0
- package/src/eval/js/prelude.txt +84 -0
- package/src/eval/js/tool-bridge.ts +124 -0
- package/src/eval/parse.ts +337 -0
- package/src/{ipy → eval/py}/executor.ts +2 -180
- package/src/{ipy → eval/py}/gateway-coordinator.ts +2 -2
- package/src/eval/py/index.ts +58 -0
- package/src/{ipy → eval/py}/kernel.ts +9 -45
- package/src/{ipy → eval/py}/prelude.py +39 -227
- package/src/eval/types.ts +48 -0
- package/src/export/html/template.generated.ts +1 -1
- package/src/export/html/template.js +8 -10
- package/src/extensibility/extensions/types.ts +2 -3
- package/src/internal-urls/docs-index.generated.ts +5 -5
- package/src/lsp/client.ts +9 -0
- package/src/lsp/index.ts +395 -0
- package/src/lsp/types.ts +15 -4
- package/src/main.ts +35 -14
- package/src/mcp/manager.ts +22 -0
- package/src/mcp/oauth-flow.ts +1 -1
- package/src/memories/index.ts +1 -1
- package/src/modes/acp/acp-event-mapper.ts +1 -1
- package/src/modes/components/{python-execution.ts → eval-execution.ts} +11 -4
- package/src/modes/components/login-dialog.ts +1 -1
- package/src/modes/components/oauth-selector.ts +2 -1
- package/src/modes/components/tool-execution.ts +3 -4
- package/src/modes/controllers/command-controller.ts +28 -8
- package/src/modes/controllers/input-controller.ts +4 -4
- package/src/modes/controllers/selector-controller.ts +2 -1
- package/src/modes/interactive-mode.ts +4 -5
- package/src/modes/rpc/rpc-client.ts +9 -0
- package/src/modes/rpc/rpc-mode.ts +6 -0
- package/src/modes/rpc/rpc-types.ts +9 -0
- package/src/modes/types.ts +3 -3
- package/src/modes/utils/ui-helpers.ts +2 -2
- package/src/prompts/system/system-prompt.md +3 -3
- package/src/prompts/tools/eval.md +92 -0
- package/src/prompts/tools/lsp.md +7 -3
- package/src/sdk.ts +64 -35
- package/src/session/agent-session.ts +152 -46
- package/src/session/messages.ts +1 -1
- package/src/slash-commands/builtin-registry.ts +1 -1
- package/src/system-prompt.ts +34 -66
- package/src/task/agents.ts +4 -5
- package/src/task/executor.ts +5 -9
- package/src/tools/archive-reader.ts +9 -3
- package/src/tools/browser/launch.ts +22 -0
- package/src/tools/browser/readable.ts +11 -6
- package/src/tools/browser/registry.ts +25 -244
- package/src/tools/browser/render.ts +1 -1
- package/src/tools/browser/tab-protocol.ts +101 -0
- package/src/tools/browser/tab-supervisor.ts +429 -0
- package/src/tools/browser/tab-worker-entry.ts +21 -0
- package/src/tools/browser/tab-worker.ts +1006 -0
- package/src/tools/browser.ts +17 -32
- package/src/tools/checkpoint.ts +2 -2
- package/src/tools/{python.ts → eval.ts} +324 -315
- package/src/tools/exit-plan-mode.ts +1 -1
- package/src/tools/image-gen.ts +2 -2
- package/src/tools/index.ts +62 -100
- package/src/tools/read.ts +0 -6
- package/src/tools/recipe/runners/pkg.ts +34 -32
- package/src/tools/renderers.ts +2 -2
- package/src/tools/resolve.ts +7 -2
- package/src/tools/todo-write.ts +0 -1
- package/src/tools/tool-timeouts.ts +2 -2
- package/src/tools/write.ts +8 -1
- package/src/utils/markit.ts +15 -7
- package/src/utils/tools-manager.ts +5 -5
- package/src/web/scrapers/crossref.ts +3 -3
- package/src/web/scrapers/devto.ts +1 -1
- package/src/web/scrapers/discourse.ts +5 -5
- package/src/web/scrapers/firefox-addons.ts +1 -1
- package/src/web/scrapers/flathub.ts +2 -2
- package/src/web/scrapers/gitlab.ts +1 -1
- package/src/web/scrapers/go-pkg.ts +2 -2
- package/src/web/scrapers/jetbrains-marketplace.ts +1 -1
- package/src/web/scrapers/mastodon.ts +9 -9
- package/src/web/scrapers/mdn.ts +11 -7
- package/src/web/scrapers/pub-dev.ts +1 -1
- package/src/web/scrapers/rawg.ts +3 -3
- package/src/web/scrapers/readthedocs.ts +1 -1
- package/src/web/scrapers/spdx.ts +1 -1
- package/src/web/scrapers/stackoverflow.ts +2 -2
- package/src/web/scrapers/types.ts +53 -39
- package/src/web/scrapers/w3c.ts +1 -1
- package/src/web/search/index.ts +5 -5
- package/src/web/search/provider.ts +121 -39
- package/src/web/search/providers/gemini.ts +4 -4
- package/src/web/search/render.ts +2 -2
- package/src/ipy/modules.ts +0 -144
- package/src/prompts/tools/python.md +0 -57
- package/src/tools/browser/vm.ts +0 -792
- /package/src/{ipy → eval/py}/cancellation.ts +0 -0
- /package/src/{ipy → eval/py}/prelude.ts +0 -0
- /package/src/{ipy → eval/py}/runtime.ts +0 -0
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import * as os from "node:os";
|
|
2
2
|
import * as path from "node:path";
|
|
3
3
|
|
|
4
|
-
import { getOAuthProviders } from "@oh-my-pi/pi-ai";
|
|
4
|
+
import { getOAuthProviders } from "@oh-my-pi/pi-ai/utils/oauth";
|
|
5
5
|
import { getConfigDirName } from "@oh-my-pi/pi-utils";
|
|
6
6
|
import { invalidate as invalidateFsCache } from "../capability/fs";
|
|
7
7
|
import type { SettingPath, SettingValue } from "../config/settings";
|
package/src/system-prompt.ts
CHANGED
|
@@ -2,10 +2,9 @@
|
|
|
2
2
|
* System prompt construction and project context loading
|
|
3
3
|
*/
|
|
4
4
|
|
|
5
|
-
import * as fs from "node:fs";
|
|
6
5
|
import * as os from "node:os";
|
|
7
|
-
import * as path from "node:path";
|
|
8
6
|
import type { AgentTool } from "@oh-my-pi/pi-agent-core";
|
|
7
|
+
import { FileType, glob } from "@oh-my-pi/pi-natives";
|
|
9
8
|
import { $env, getGpuCachePath, getProjectDir, hasFsCode, isEnoent, logger, prompt } from "@oh-my-pi/pi-utils";
|
|
10
9
|
import { $ } from "bun";
|
|
11
10
|
import { contextFileCapability } from "./capability/context-file";
|
|
@@ -89,81 +88,44 @@ const AGENTS_MD_LIMIT = 200;
|
|
|
89
88
|
const SYSTEM_PROMPT_PREP_TIMEOUT_MS = 5000;
|
|
90
89
|
const AGENTS_MD_EXCLUDED_DIRS = new Set(["node_modules", ".git"]);
|
|
91
90
|
|
|
92
|
-
interface AgentsMdSearch {
|
|
91
|
+
export interface AgentsMdSearch {
|
|
93
92
|
scopePath: string;
|
|
94
93
|
limit: number;
|
|
95
94
|
pattern: string;
|
|
96
95
|
files: string[];
|
|
97
96
|
}
|
|
98
97
|
|
|
99
|
-
function normalizePath(value: string): string {
|
|
100
|
-
return value.replace(/\\/g, "/");
|
|
101
|
-
}
|
|
102
|
-
|
|
103
|
-
function shouldSkipAgentsDir(name: string): boolean {
|
|
104
|
-
if (AGENTS_MD_EXCLUDED_DIRS.has(name)) return true;
|
|
105
|
-
return name.startsWith(".");
|
|
106
|
-
}
|
|
107
|
-
|
|
108
|
-
async function collectAgentsMdFiles(
|
|
109
|
-
root: string,
|
|
110
|
-
dir: string,
|
|
111
|
-
depth: number,
|
|
112
|
-
limit: number,
|
|
113
|
-
discovered: Set<string>,
|
|
114
|
-
): Promise<void> {
|
|
115
|
-
if (depth > AGENTS_MD_MAX_DEPTH || discovered.size >= limit) {
|
|
116
|
-
return;
|
|
117
|
-
}
|
|
118
|
-
|
|
119
|
-
let entries: fs.Dirent[];
|
|
120
|
-
try {
|
|
121
|
-
entries = await fs.promises.readdir(dir, { withFileTypes: true });
|
|
122
|
-
} catch {
|
|
123
|
-
return;
|
|
124
|
-
}
|
|
125
|
-
|
|
126
|
-
if (depth >= AGENTS_MD_MIN_DEPTH) {
|
|
127
|
-
const hasAgentsMd = entries.some(entry => entry.isFile() && entry.name === "AGENTS.md");
|
|
128
|
-
if (hasAgentsMd) {
|
|
129
|
-
const relPath = normalizePath(path.relative(root, path.join(dir, "AGENTS.md")));
|
|
130
|
-
if (relPath.length > 0) {
|
|
131
|
-
discovered.add(relPath);
|
|
132
|
-
}
|
|
133
|
-
if (discovered.size >= limit) {
|
|
134
|
-
return;
|
|
135
|
-
}
|
|
136
|
-
}
|
|
137
|
-
}
|
|
138
|
-
|
|
139
|
-
if (depth === AGENTS_MD_MAX_DEPTH) {
|
|
140
|
-
return;
|
|
141
|
-
}
|
|
142
|
-
|
|
143
|
-
const childDirs = entries
|
|
144
|
-
.filter(entry => entry.isDirectory() && !shouldSkipAgentsDir(entry.name))
|
|
145
|
-
.map(entry => entry.name)
|
|
146
|
-
.sort();
|
|
147
|
-
|
|
148
|
-
await Promise.all(
|
|
149
|
-
childDirs.map(async child => {
|
|
150
|
-
if (discovered.size >= limit) return;
|
|
151
|
-
await collectAgentsMdFiles(root, path.join(dir, child), depth + 1, limit, discovered);
|
|
152
|
-
}),
|
|
153
|
-
);
|
|
154
|
-
}
|
|
155
|
-
|
|
156
98
|
async function listAgentsMdFiles(root: string, limit: number): Promise<string[]> {
|
|
157
99
|
try {
|
|
158
|
-
const
|
|
159
|
-
|
|
160
|
-
|
|
100
|
+
const result = await glob({
|
|
101
|
+
pattern: "**/AGENTS.md",
|
|
102
|
+
path: root,
|
|
103
|
+
fileType: FileType.File,
|
|
104
|
+
recursive: true,
|
|
105
|
+
hidden: false,
|
|
106
|
+
gitignore: true,
|
|
107
|
+
maxResults: limit * 4,
|
|
108
|
+
cache: true,
|
|
109
|
+
});
|
|
110
|
+
const files: string[] = [];
|
|
111
|
+
for (const m of result.matches) {
|
|
112
|
+
const rel = m.path.replace(/\\/g, "/");
|
|
113
|
+
if (!rel?.endsWith("AGENTS.md")) continue;
|
|
114
|
+
const segments = rel.split("/");
|
|
115
|
+
const depth = segments.length - 1;
|
|
116
|
+
if (depth < AGENTS_MD_MIN_DEPTH || depth > AGENTS_MD_MAX_DEPTH) continue;
|
|
117
|
+
const dirSegments = segments.slice(0, -1);
|
|
118
|
+
if (dirSegments.some(seg => AGENTS_MD_EXCLUDED_DIRS.has(seg) || seg.startsWith("."))) continue;
|
|
119
|
+
files.push(rel);
|
|
120
|
+
if (files.length >= limit) break;
|
|
121
|
+
}
|
|
122
|
+
return Array.from(new Set(files)).sort().slice(0, limit);
|
|
161
123
|
} catch {
|
|
162
124
|
return [];
|
|
163
125
|
}
|
|
164
126
|
}
|
|
165
127
|
|
|
166
|
-
async function buildAgentsMdSearch(cwd: string): Promise<AgentsMdSearch> {
|
|
128
|
+
export async function buildAgentsMdSearch(cwd: string): Promise<AgentsMdSearch> {
|
|
167
129
|
const files = await listAgentsMdFiles(cwd, AGENTS_MD_LIMIT);
|
|
168
130
|
return {
|
|
169
131
|
scopePath: ".",
|
|
@@ -445,6 +407,8 @@ export interface BuildSystemPromptOptions {
|
|
|
445
407
|
alwaysApplyRules?: AlwaysApplyRule[];
|
|
446
408
|
/** Whether secret obfuscation is active. When true, explains the redaction format in the prompt. */
|
|
447
409
|
secretsEnabled?: boolean;
|
|
410
|
+
/** Pre-loaded AGENTS.md search (skips discovery if provided). May be a Promise to allow early kick-off. */
|
|
411
|
+
agentsMdSearch?: AgentsMdSearch | Promise<AgentsMdSearch>;
|
|
448
412
|
}
|
|
449
413
|
|
|
450
414
|
/** Build the system prompt with tools, guidelines, and context */
|
|
@@ -470,6 +434,7 @@ export async function buildSystemPrompt(options: BuildSystemPromptOptions = {}):
|
|
|
470
434
|
mcpDiscoveryServerSummaries = [],
|
|
471
435
|
eagerTasks = false,
|
|
472
436
|
secretsEnabled = false,
|
|
437
|
+
agentsMdSearch: providedAgentsMdSearch,
|
|
473
438
|
} = options;
|
|
474
439
|
const resolvedCwd = cwd ?? getProjectDir();
|
|
475
440
|
|
|
@@ -480,7 +445,10 @@ export async function buildSystemPrompt(options: BuildSystemPromptOptions = {}):
|
|
|
480
445
|
const contextFilesPromise = providedContextFiles
|
|
481
446
|
? Promise.resolve(providedContextFiles)
|
|
482
447
|
: logger.time("loadProjectContextFiles", loadProjectContextFiles, { cwd: resolvedCwd });
|
|
483
|
-
const agentsMdSearchPromise =
|
|
448
|
+
const agentsMdSearchPromise =
|
|
449
|
+
providedAgentsMdSearch !== undefined
|
|
450
|
+
? Promise.resolve(providedAgentsMdSearch)
|
|
451
|
+
: logger.time("buildAgentsMdSearch", buildAgentsMdSearch, resolvedCwd);
|
|
484
452
|
const skillsPromise: Promise<Skill[]> =
|
|
485
453
|
providedSkills !== undefined
|
|
486
454
|
? Promise.resolve(providedSkills)
|
|
@@ -572,7 +540,7 @@ export async function buildSystemPrompt(options: BuildSystemPromptOptions = {}):
|
|
|
572
540
|
toolNames = Array.from(tools.keys());
|
|
573
541
|
} else {
|
|
574
542
|
// Use defaults
|
|
575
|
-
toolNames = ["read", "bash", "
|
|
543
|
+
toolNames = ["read", "bash", "eval", "edit", "write"]; // TODO: Why?
|
|
576
544
|
}
|
|
577
545
|
}
|
|
578
546
|
|
package/src/task/agents.ts
CHANGED
|
@@ -69,10 +69,7 @@ const EMBEDDED_AGENT_DEFS: EmbeddedAgentDef[] = [
|
|
|
69
69
|
},
|
|
70
70
|
];
|
|
71
71
|
|
|
72
|
-
|
|
73
|
-
name: def.fileName,
|
|
74
|
-
content: buildAgentContent(def),
|
|
75
|
-
}));
|
|
72
|
+
// Computed lazily on first loadBundledAgents() call to avoid eager prompt.render at module load.
|
|
76
73
|
|
|
77
74
|
export class AgentParsingError extends Error {
|
|
78
75
|
constructor(
|
|
@@ -133,7 +130,9 @@ export function loadBundledAgents(): AgentDefinition[] {
|
|
|
133
130
|
if (bundledAgentsCache !== null) {
|
|
134
131
|
return bundledAgentsCache;
|
|
135
132
|
}
|
|
136
|
-
bundledAgentsCache =
|
|
133
|
+
bundledAgentsCache = EMBEDDED_AGENT_DEFS.map(def =>
|
|
134
|
+
parseAgent(`embedded:${def.fileName}`, buildAgentContent(def), "bundled"),
|
|
135
|
+
);
|
|
137
136
|
return bundledAgentsCache;
|
|
138
137
|
}
|
|
139
138
|
|
package/src/task/executor.ts
CHANGED
|
@@ -532,16 +532,12 @@ export async function runSubprocess(options: ExecutorOptions): Promise<SingleRes
|
|
|
532
532
|
if (atMaxDepth && toolNames?.includes("task")) {
|
|
533
533
|
toolNames = toolNames.filter(name => name !== "task");
|
|
534
534
|
}
|
|
535
|
-
const pythonToolMode = settings.get("python.toolMode") ?? "both";
|
|
536
535
|
if (toolNames?.includes("exec")) {
|
|
536
|
+
const allowEvalPy = settings.get("eval.py") ?? true;
|
|
537
|
+
const allowEvalJs = settings.get("eval.js") ?? true;
|
|
537
538
|
const expanded = toolNames.filter(name => name !== "exec");
|
|
538
|
-
if (
|
|
539
|
-
|
|
540
|
-
} else if (pythonToolMode === "ipy-only") {
|
|
541
|
-
expanded.push("python");
|
|
542
|
-
} else {
|
|
543
|
-
expanded.push("python", "bash");
|
|
544
|
-
}
|
|
539
|
+
if (allowEvalPy || allowEvalJs) expanded.push("eval");
|
|
540
|
+
expanded.push("bash");
|
|
545
541
|
toolNames = Array.from(new Set(expanded));
|
|
546
542
|
}
|
|
547
543
|
|
|
@@ -557,7 +553,7 @@ export async function runSubprocess(options: ExecutorOptions): Promise<SingleRes
|
|
|
557
553
|
|
|
558
554
|
const lspEnabled = enableLsp ?? true;
|
|
559
555
|
const ircEnabled = subagentSettings.get("irc.enabled") === true;
|
|
560
|
-
const skipPythonPreflight = Array.isArray(toolNames) && !toolNames.includes("
|
|
556
|
+
const skipPythonPreflight = Array.isArray(toolNames) && !toolNames.includes("eval");
|
|
561
557
|
|
|
562
558
|
const outputChunks: string[] = [];
|
|
563
559
|
const finalOutputChunks: string[] = [];
|
|
@@ -1,6 +1,11 @@
|
|
|
1
|
-
import { unzipSync } from "fflate";
|
|
2
1
|
import { ToolError } from "./tool-errors";
|
|
3
2
|
|
|
3
|
+
let fflateModulePromise: Promise<typeof import("fflate")> | undefined;
|
|
4
|
+
async function loadFflate(): Promise<typeof import("fflate")> {
|
|
5
|
+
if (!fflateModulePromise) fflateModulePromise = import("fflate");
|
|
6
|
+
return fflateModulePromise;
|
|
7
|
+
}
|
|
8
|
+
|
|
4
9
|
export type ArchiveFormat = "zip" | "tar" | "tar.gz";
|
|
5
10
|
|
|
6
11
|
export interface ArchivePathCandidate {
|
|
@@ -150,7 +155,8 @@ async function readTarEntries(bytes: Uint8Array): Promise<ArchiveIndexEntry[]> {
|
|
|
150
155
|
return entries;
|
|
151
156
|
}
|
|
152
157
|
|
|
153
|
-
function readZipEntries(bytes: Uint8Array): ArchiveIndexEntry[] {
|
|
158
|
+
async function readZipEntries(bytes: Uint8Array): Promise<ArchiveIndexEntry[]> {
|
|
159
|
+
const { unzipSync } = await loadFflate();
|
|
154
160
|
let files: Record<string, Uint8Array>;
|
|
155
161
|
try {
|
|
156
162
|
files = unzipSync(bytes);
|
|
@@ -310,6 +316,6 @@ export async function openArchive(filePath: string): Promise<ArchiveReader> {
|
|
|
310
316
|
}
|
|
311
317
|
|
|
312
318
|
const bytes = await Bun.file(filePath).bytes();
|
|
313
|
-
const entries = format === "zip" ? readZipEntries(bytes) : await readTarEntries(bytes);
|
|
319
|
+
const entries = format === "zip" ? await readZipEntries(bytes) : await readTarEntries(bytes);
|
|
314
320
|
return new ArchiveReader(format, entries);
|
|
315
321
|
}
|
|
@@ -22,6 +22,14 @@ import stealthWorkerScript from "../puppeteer/13_stealth_worker.txt" with { type
|
|
|
22
22
|
import { ToolError } from "../tool-errors";
|
|
23
23
|
|
|
24
24
|
export const DEFAULT_VIEWPORT = { width: 1365, height: 768, deviceScaleFactor: 1.25 };
|
|
25
|
+
|
|
26
|
+
/**
|
|
27
|
+
* Per-CDP-message timeout applied to every puppeteer launch/connect. Set above
|
|
28
|
+
* `TOOL_TIMEOUTS.browser.max` (30s) so the agent-side wall-clock is the canonical
|
|
29
|
+
* limit; this constant only catches genuinely stuck CDP sockets (renderer wedged,
|
|
30
|
+
* connection dropped, etc.).
|
|
31
|
+
*/
|
|
32
|
+
export const BROWSER_PROTOCOL_TIMEOUT_MS = 60_000;
|
|
25
33
|
export const STEALTH_IGNORE_DEFAULT_ARGS = [
|
|
26
34
|
"--disable-extensions",
|
|
27
35
|
"--disable-default-apps",
|
|
@@ -55,6 +63,19 @@ export async function loadPuppeteer(): Promise<typeof Puppeteer> {
|
|
|
55
63
|
}
|
|
56
64
|
}
|
|
57
65
|
|
|
66
|
+
let puppeteerModuleWorker: typeof Puppeteer | undefined;
|
|
67
|
+
export async function loadPuppeteerInWorker(safeDir: string): Promise<typeof Puppeteer> {
|
|
68
|
+
if (puppeteerModuleWorker) return puppeteerModuleWorker;
|
|
69
|
+
const orig = process.cwd;
|
|
70
|
+
Object.defineProperty(process, "cwd", { value: () => safeDir, configurable: true });
|
|
71
|
+
try {
|
|
72
|
+
puppeteerModuleWorker = (await import("puppeteer-core")).default;
|
|
73
|
+
return puppeteerModuleWorker;
|
|
74
|
+
} finally {
|
|
75
|
+
Object.defineProperty(process, "cwd", { value: orig, configurable: true });
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
|
|
58
79
|
/**
|
|
59
80
|
* Lazily download Chromium on first browser launch via @puppeteer/browsers.
|
|
60
81
|
* Skipped when a system Chromium (NixOS) or PUPPETEER_EXECUTABLE_PATH is set.
|
|
@@ -243,6 +264,7 @@ export async function launchHeadlessBrowser(opts: LaunchHeadlessOptions): Promis
|
|
|
243
264
|
executablePath: await ensureChromiumExecutable(),
|
|
244
265
|
args: launchArgs,
|
|
245
266
|
ignoreDefaultArgs: [...STEALTH_IGNORE_DEFAULT_ARGS],
|
|
267
|
+
protocolTimeout: BROWSER_PROTOCOL_TIMEOUT_MS,
|
|
246
268
|
});
|
|
247
269
|
}
|
|
248
270
|
|
|
@@ -26,13 +26,17 @@ function normalize(text: string | null | undefined): string | undefined {
|
|
|
26
26
|
* CSS selector chain over the same pre-parsed DOM. Returns null if neither
|
|
27
27
|
* path yields usable content.
|
|
28
28
|
*/
|
|
29
|
-
export function extractReadableFromHtml(
|
|
29
|
+
export async function extractReadableFromHtml(
|
|
30
|
+
html: string,
|
|
31
|
+
url: string,
|
|
32
|
+
format: ReadableFormat,
|
|
33
|
+
): Promise<ReadableResult | null> {
|
|
30
34
|
const { document } = parseHTML(html);
|
|
31
35
|
|
|
32
36
|
// --- Primary: Readability article extraction ---
|
|
33
37
|
const article = new Readability(document).parse();
|
|
34
38
|
if (article) {
|
|
35
|
-
const result = toReadableResult(url, format, article.textContent, article.content, {
|
|
39
|
+
const result = await toReadableResult(url, format, article.textContent, article.content, {
|
|
36
40
|
title: article.title,
|
|
37
41
|
byline: article.byline,
|
|
38
42
|
excerpt: article.excerpt,
|
|
@@ -55,7 +59,7 @@ export function extractReadableFromHtml(html: string, url: string, format: Reada
|
|
|
55
59
|
const innerHTML = el.innerHTML?.trim();
|
|
56
60
|
const textContent = el.textContent?.trim();
|
|
57
61
|
if (!innerHTML || !textContent) continue;
|
|
58
|
-
const result = toReadableResult(url, format, textContent, innerHTML, {
|
|
62
|
+
const result = await toReadableResult(url, format, textContent, innerHTML, {
|
|
59
63
|
title: document.title,
|
|
60
64
|
excerpt: textContent.slice(0, 240),
|
|
61
65
|
length: textContent.length,
|
|
@@ -67,15 +71,16 @@ export function extractReadableFromHtml(html: string, url: string, format: Reada
|
|
|
67
71
|
}
|
|
68
72
|
|
|
69
73
|
/** Shared builder for both extraction paths. */
|
|
70
|
-
function toReadableResult(
|
|
74
|
+
async function toReadableResult(
|
|
71
75
|
url: string,
|
|
72
76
|
format: ReadableFormat,
|
|
73
77
|
textContent: string | null | undefined,
|
|
74
78
|
htmlContent: string | null | undefined,
|
|
75
79
|
meta: { title?: string | null; byline?: string | null; excerpt?: string | null; length?: number | null },
|
|
76
|
-
): ReadableResult | null {
|
|
80
|
+
): Promise<ReadableResult | null> {
|
|
77
81
|
const text = normalize(textContent);
|
|
78
|
-
const markdown =
|
|
82
|
+
const markdown =
|
|
83
|
+
format === "markdown" ? (normalize(await htmlToBasicMarkdown(htmlContent ?? "")) ?? text) : undefined;
|
|
79
84
|
const normalizedText = format === "text" ? text : undefined;
|
|
80
85
|
if (!normalizedText && !markdown) return null;
|
|
81
86
|
return {
|