@kenkaiiii/ggcoder 4.3.231 → 4.3.232
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli.d.ts.map +1 -1
- package/dist/cli.js +3 -0
- package/dist/cli.js.map +1 -1
- package/dist/config.d.ts +1 -0
- package/dist/config.d.ts.map +1 -1
- package/dist/config.js +7 -1
- package/dist/config.js.map +1 -1
- package/dist/config.test.d.ts +2 -0
- package/dist/config.test.d.ts.map +1 -0
- package/dist/config.test.js +29 -0
- package/dist/config.test.js.map +1 -0
- package/dist/core/ideal-review.d.ts +20 -0
- package/dist/core/ideal-review.d.ts.map +1 -0
- package/dist/core/ideal-review.js +55 -0
- package/dist/core/ideal-review.js.map +1 -0
- package/dist/core/ideal-review.test.d.ts +2 -0
- package/dist/core/ideal-review.test.d.ts.map +1 -0
- package/dist/core/ideal-review.test.js +59 -0
- package/dist/core/ideal-review.test.js.map +1 -0
- package/dist/core/settings-manager.d.ts +1 -0
- package/dist/core/settings-manager.d.ts.map +1 -1
- package/dist/core/settings-manager.js +2 -0
- package/dist/core/settings-manager.js.map +1 -1
- package/dist/tools/html-extract.d.ts +58 -0
- package/dist/tools/html-extract.d.ts.map +1 -0
- package/dist/tools/html-extract.js +130 -0
- package/dist/tools/html-extract.js.map +1 -0
- package/dist/tools/html-extract.test.d.ts +2 -0
- package/dist/tools/html-extract.test.d.ts.map +1 -0
- package/dist/tools/html-extract.test.js +60 -0
- package/dist/tools/html-extract.test.js.map +1 -0
- package/dist/tools/pdf-extract.d.ts +18 -0
- package/dist/tools/pdf-extract.d.ts.map +1 -0
- package/dist/tools/pdf-extract.js +43 -0
- package/dist/tools/pdf-extract.js.map +1 -0
- package/dist/tools/pdf-extract.test.d.ts +2 -0
- package/dist/tools/pdf-extract.test.d.ts.map +1 -0
- package/dist/tools/pdf-extract.test.js +15 -0
- package/dist/tools/pdf-extract.test.js.map +1 -0
- package/dist/tools/prompt-hints.d.ts.map +1 -1
- package/dist/tools/prompt-hints.js +2 -1
- package/dist/tools/prompt-hints.js.map +1 -1
- package/dist/tools/web-fetch.d.ts +16 -1
- package/dist/tools/web-fetch.d.ts.map +1 -1
- package/dist/tools/web-fetch.js +357 -45
- package/dist/tools/web-fetch.js.map +1 -1
- package/dist/tools/web-fetch.test.js +263 -2
- package/dist/tools/web-fetch.test.js.map +1 -1
- package/dist/tools/web-search.d.ts +14 -0
- package/dist/tools/web-search.d.ts.map +1 -1
- package/dist/tools/web-search.js +321 -35
- package/dist/tools/web-search.js.map +1 -1
- package/dist/tools/web-search.test.js +144 -1
- package/dist/tools/web-search.test.js.map +1 -1
- package/dist/ui/App.d.ts +2 -0
- package/dist/ui/App.d.ts.map +1 -1
- package/dist/ui/App.js +54 -1
- package/dist/ui/App.js.map +1 -1
- package/dist/ui/app-items.d.ts +14 -1
- package/dist/ui/app-items.d.ts.map +1 -1
- package/dist/ui/app-items.js +2 -0
- package/dist/ui/app-items.js.map +1 -1
- package/dist/ui/components/IdealHookMessage.d.ts +12 -0
- package/dist/ui/components/IdealHookMessage.d.ts.map +1 -0
- package/dist/ui/components/IdealHookMessage.js +24 -0
- package/dist/ui/components/IdealHookMessage.js.map +1 -0
- package/dist/ui/hooks/useAgentLoop.d.ts +2 -0
- package/dist/ui/hooks/useAgentLoop.d.ts.map +1 -1
- package/dist/ui/hooks/useAgentLoop.js +45 -1
- package/dist/ui/hooks/useAgentLoop.js.map +1 -1
- package/dist/ui/render.d.ts +3 -0
- package/dist/ui/render.d.ts.map +1 -1
- package/dist/ui/render.js +2 -0
- package/dist/ui/render.js.map +1 -1
- package/dist/ui/terminal-history.d.ts.map +1 -1
- package/dist/ui/terminal-history.js +8 -0
- package/dist/ui/terminal-history.js.map +1 -1
- package/dist/ui/transcript/TranscriptRenderer.d.ts.map +1 -1
- package/dist/ui/transcript/TranscriptRenderer.js +3 -0
- package/dist/ui/transcript/TranscriptRenderer.js.map +1 -1
- package/dist/ui/transcript/spacing.d.ts +2 -2
- package/dist/ui/transcript/spacing.d.ts.map +1 -1
- package/dist/ui/transcript/spacing.js +1 -0
- package/dist/ui/transcript/spacing.js.map +1 -1
- package/dist/ui/transcript/spacing.test.js +2 -0
- package/dist/ui/transcript/spacing.test.js.map +1 -1
- package/dist/ui/tui-history-parity.test.js +8 -0
- package/dist/ui/tui-history-parity.test.js.map +1 -1
- package/dist/utils/format.js +7 -0
- package/dist/utils/format.js.map +1 -1
- package/package.json +10 -4
|
@@ -0,0 +1,130 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Minimum article length (in characters, per Readability's `length` field)
|
|
3
|
+
* required before we trust the extraction. Below this we fall back to the
|
|
4
|
+
* regex-based `htmlToCleanText` so short/empty pages don't degrade output.
|
|
5
|
+
*/
|
|
6
|
+
const CHAR_THRESHOLD = 250;
|
|
7
|
+
/** Raised when one of the optional extraction dependencies is not installed. */
|
|
8
|
+
export class ExtractorUnavailable extends Error {
|
|
9
|
+
constructor(message) {
|
|
10
|
+
super(message);
|
|
11
|
+
this.name = "ExtractorUnavailable";
|
|
12
|
+
}
|
|
13
|
+
}
|
|
14
|
+
let cached = null;
|
|
15
|
+
/**
|
|
16
|
+
* Lazily resolve the HTML→Markdown extraction stack (linkedom, Readability,
|
|
17
|
+
* Turndown + GFM plugin). Mirrors `loadChromium()` in screenshot.ts: dynamic
|
|
18
|
+
* imports behind a non-literal specifier so `tsc` does not statically require
|
|
19
|
+
* the optional dependencies, with results cached. Throws `ExtractorUnavailable`
|
|
20
|
+
* if any are missing so callers can degrade to the regex path.
|
|
21
|
+
*/
|
|
22
|
+
export async function loadExtractor() {
|
|
23
|
+
if (cached)
|
|
24
|
+
return cached;
|
|
25
|
+
const linkedomName = "linkedom";
|
|
26
|
+
const readabilityName = "@mozilla/readability";
|
|
27
|
+
const turndownName = "turndown";
|
|
28
|
+
const gfmName = "turndown-plugin-gfm";
|
|
29
|
+
try {
|
|
30
|
+
const [linkedom, readability, turndown, gfmMod] = (await Promise.all([
|
|
31
|
+
import(linkedomName),
|
|
32
|
+
import(readabilityName),
|
|
33
|
+
import(turndownName),
|
|
34
|
+
import(gfmName),
|
|
35
|
+
]));
|
|
36
|
+
if (!linkedom.parseHTML || !readability.Readability || !turndown.default || !gfmMod.gfm) {
|
|
37
|
+
throw new ExtractorUnavailable("extraction modules loaded but expected exports are missing");
|
|
38
|
+
}
|
|
39
|
+
cached = {
|
|
40
|
+
parseHTML: linkedom.parseHTML,
|
|
41
|
+
Readability: readability.Readability,
|
|
42
|
+
TurndownService: turndown.default,
|
|
43
|
+
gfm: gfmMod.gfm,
|
|
44
|
+
};
|
|
45
|
+
return cached;
|
|
46
|
+
}
|
|
47
|
+
catch (err) {
|
|
48
|
+
if (err instanceof ExtractorUnavailable)
|
|
49
|
+
throw err;
|
|
50
|
+
throw new ExtractorUnavailable(err instanceof Error ? err.message : "failed to load extraction modules");
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
function buildTurndown(mods) {
|
|
54
|
+
const service = new mods.TurndownService({
|
|
55
|
+
headingStyle: "atx",
|
|
56
|
+
codeBlockStyle: "fenced",
|
|
57
|
+
bulletListMarker: "-",
|
|
58
|
+
});
|
|
59
|
+
service.use(mods.gfm);
|
|
60
|
+
// Preserve fenced code blocks: keep <pre>/<code> structure intact rather
|
|
61
|
+
// than collapsing whitespace inside them.
|
|
62
|
+
service.addRule("fencedPre", {
|
|
63
|
+
filter: ["pre"],
|
|
64
|
+
replacement(_content, node) {
|
|
65
|
+
const code = (node.textContent ?? "").replace(/\n$/, "");
|
|
66
|
+
return `\n\n\`\`\`\n${code}\n\`\`\`\n\n`;
|
|
67
|
+
},
|
|
68
|
+
});
|
|
69
|
+
// Drop anchors without an href (navigation/JS noise) — keep their text only.
|
|
70
|
+
service.addRule("bareAnchor", {
|
|
71
|
+
filter(node) {
|
|
72
|
+
return node.nodeName === "A" && !node.getAttribute("href");
|
|
73
|
+
},
|
|
74
|
+
replacement(content) {
|
|
75
|
+
return content;
|
|
76
|
+
},
|
|
77
|
+
});
|
|
78
|
+
return service;
|
|
79
|
+
}
|
|
80
|
+
/**
|
|
81
|
+
* Run Readability over the page HTML and convert the extracted main content to
|
|
82
|
+
* clean Markdown (headings, lists, GFM tables, fenced code preserved). Returns
|
|
83
|
+
* `null` when extraction fails or the article is too short, so the caller can
|
|
84
|
+
* fall back to `htmlToCleanText`.
|
|
85
|
+
*/
|
|
86
|
+
export async function extractToMarkdown(html, url) {
|
|
87
|
+
const mods = await loadExtractor();
|
|
88
|
+
let document;
|
|
89
|
+
try {
|
|
90
|
+
document = mods.parseHTML(html).document;
|
|
91
|
+
}
|
|
92
|
+
catch {
|
|
93
|
+
return null;
|
|
94
|
+
}
|
|
95
|
+
// Readability reads <base>/document URL for resolving links; set a baseURI
|
|
96
|
+
// hint via a <base> tag when none is present so relative links resolve.
|
|
97
|
+
try {
|
|
98
|
+
if (!document.querySelector("base")) {
|
|
99
|
+
const base = document.createElement("base");
|
|
100
|
+
base.setAttribute("href", url);
|
|
101
|
+
document.head?.appendChild(base);
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
catch {
|
|
105
|
+
// best-effort; ignore if the DOM shim disallows it
|
|
106
|
+
}
|
|
107
|
+
let article;
|
|
108
|
+
try {
|
|
109
|
+
article = new mods.Readability(document, { charThreshold: CHAR_THRESHOLD }).parse();
|
|
110
|
+
}
|
|
111
|
+
catch {
|
|
112
|
+
return null;
|
|
113
|
+
}
|
|
114
|
+
if (!article || !article.content)
|
|
115
|
+
return null;
|
|
116
|
+
if ((article.length ?? 0) < CHAR_THRESHOLD)
|
|
117
|
+
return null;
|
|
118
|
+
let markdown;
|
|
119
|
+
try {
|
|
120
|
+
markdown = buildTurndown(mods).turndown(article.content);
|
|
121
|
+
}
|
|
122
|
+
catch {
|
|
123
|
+
return null;
|
|
124
|
+
}
|
|
125
|
+
markdown = markdown.replace(/\n{3,}/g, "\n\n").trim();
|
|
126
|
+
if (!markdown)
|
|
127
|
+
return null;
|
|
128
|
+
return { markdown, title: article.title ?? undefined };
|
|
129
|
+
}
|
|
130
|
+
//# sourceMappingURL=html-extract.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"html-extract.js","sourceRoot":"","sources":["../../src/tools/html-extract.ts"],"names":[],"mappings":"AAEA;;;;GAIG;AACH,MAAM,cAAc,GAAG,GAAG,CAAC;AAE3B,gFAAgF;AAChF,MAAM,OAAO,oBAAqB,SAAQ,KAAK;IAC7C,YAAY,OAAe;QACzB,KAAK,CAAC,OAAO,CAAC,CAAC;QACf,IAAI,CAAC,IAAI,GAAG,sBAAsB,CAAC;IACrC,CAAC;CACF;AA0CD,IAAI,MAAM,GAA4B,IAAI,CAAC;AAE3C;;;;;;GAMG;AACH,MAAM,CAAC,KAAK,UAAU,aAAa;IACjC,IAAI,MAAM;QAAE,OAAO,MAAM,CAAC;IAE1B,MAAM,YAAY,GAAW,UAAU,CAAC;IACxC,MAAM,eAAe,GAAW,sBAAsB,CAAC;IACvD,MAAM,YAAY,GAAW,UAAU,CAAC;IACxC,MAAM,OAAO,GAAW,qBAAqB,CAAC;IAE9C,IAAI,CAAC;QACH,MAAM,CAAC,QAAQ,EAAE,WAAW,EAAE,QAAQ,EAAE,MAAM,CAAC,GAAG,CAAC,MAAM,OAAO,CAAC,GAAG,CAAC;YACnE,MAAM,CAAC,YAAY,CAAC;YACpB,MAAM,CAAC,eAAe,CAAC;YACvB,MAAM,CAAC,YAAY,CAAC;YACpB,MAAM,CAAC,OAAO,CAAC;SAChB,CAAC,CAAmE,CAAC;QAEtE,IAAI,CAAC,QAAQ,CAAC,SAAS,IAAI,CAAC,WAAW,CAAC,WAAW,IAAI,CAAC,QAAQ,CAAC,OAAO,IAAI,CAAC,MAAM,CAAC,GAAG,EAAE,CAAC;YACxF,MAAM,IAAI,oBAAoB,CAAC,4DAA4D,CAAC,CAAC;QAC/F,CAAC;QAED,MAAM,GAAG;YACP,SAAS,EAAE,QAAQ,CAAC,SAAS;YAC7B,WAAW,EAAE,WAAW,CAAC,WAAW;YACpC,eAAe,EAAE,QAAQ,CAAC,OAAO;YACjC,GAAG,EAAE,MAAM,CAAC,GAAG;SAChB,CAAC;QACF,OAAO,MAAM,CAAC;IAChB,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACb,IAAI,GAAG,YAAY,oBAAoB;YAAE,MAAM,GAAG,CAAC;QACnD,MAAM,IAAI,oBAAoB,CAC5B,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,mCAAmC,CACzE,CAAC;IACJ,CAAC;AACH,CAAC;AAED,SAAS,aAAa,CAAC,IAAsB;IAC3C,MAAM,OAAO,GAAG,IAAI,IAAI,CAAC,eAAe,CAAC;QACvC,YAAY,EAAE,KAAK;QACnB,cAAc,EAAE,QAAQ;QACxB,gBAAgB,EAAE,GAAG;KACtB,CAAC,CAAC;IACH,OAAO,CAAC,GAAG,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;IACtB,yEAAyE;IACzE,0CAA0C;IAC1C,OAAO,CAAC,OAAO,CAAC,WAAW,EAAE;QAC3B,MAAM,EAAE,CAAC,KAAK,CAAC;QACf,WAAW,CAAC,QAAgB,EAAE,IAAqC;YACjE,MAAM,IAAI,GAAG,CAAC,IAAI,CAAC,WAAW,IAAI,EAAE,CAAC,CAAC,OAAO,CAAC,KAAK,EAAE,EAAE,CAAC,CAAC;YACzD,OAAO,eAAe,IAAI,cAAc,CAAC;QAC3C,CAAC;KACF,CAAC,CAAC;IACH,6EAA6E;IAC7E,OAAO,CAAC,OAAO,CAAC,YAAY,EAAE;QAC5B,MAAM,CAAC,IAAqE;YAC1E,OAAO,IAAI,CAAC,QAAQ,KAAK,GAAG,IAAI,CAAC,IAAI,CAAC,YAAY,CAAC,MAAM,CAAC,CAAC;QAC7D,CAAC;QACD,WAAW,CAAC,OAAe;YACzB,OAAO,OAAO,CAAC;QACjB,CAAC;KACF,CAAC,CAAC;IACH,OAAO,OAAO,CAAC;AACjB,CAAC;AAED;;;;;GAKG;AACH,MAAM,CAAC,KAAK,UAAU,iBAAiB,CACrC,IAAY,EACZ,GAAW;IAEX,MAAM,IAAI,GAAG,MAAM,aAAa,EAAE,CAAC;IAEnC,IAAI,QAAkB,CAAC;IACvB,IAAI,CAAC;QACH,QAAQ,GAAG,IAAI,CAAC,SAAS,CAAC,IAAI,CAAC,CAAC,QAAQ,CAAC;IAC3C,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,IAAI,CAAC;IACd,CAAC;IAED,2EAA2E;IAC3E,wEAAwE;IACxE,IAAI,CAAC;QACH,IAAI,CAAC,QAAQ,CAAC,aAAa,CAAC,MAAM,CAAC,EAAE,CAAC;YACpC,MAAM,IAAI,GAAG,QAAQ,CAAC,aAAa,CAAC,MAAM,CAAC,CAAC;YAC5C,IAAI,CAAC,YAAY,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;YAC/B,QAAQ,CAAC,IAAI,EAAE,WAAW,CAAC,IAAI,CAAC,CAAC;QACnC,CAAC;IACH,CAAC;IAAC,MAAM,CAAC;QACP,mDAAmD;IACrD,CAAC;IAED,IAAI,OAAkC,CAAC;IACvC,IAAI,CAAC;QACH,OAAO,GAAG,IAAI,IAAI,CAAC,WAAW,CAAC,QAAQ,EAAE,EAAE,aAAa,EAAE,cAAc,EAAE,CAAC,CAAC,KAAK,EAAE,CAAC;IACtF,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,IAAI,CAAC;IACd,CAAC;IAED,IAAI,CAAC,OAAO,IAAI,CAAC,OAAO,CAAC,OAAO;QAAE,OAAO,IAAI,CAAC;IAC9C,IAAI,CAAC,OAAO,CAAC,MAAM,IAAI,CAAC,CAAC,GAAG,cAAc;QAAE,OAAO,IAAI,CAAC;IAExD,IAAI,QAAgB,CAAC;IACrB,IAAI,CAAC;QACH,QAAQ,GAAG,aAAa,CAAC,IAAI,CAAC,CAAC,QAAQ,CAAC,OAAO,CAAC,OAAO,CAAC,CAAC;IAC3D,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,IAAI,CAAC;IACd,CAAC;IAED,QAAQ,GAAG,QAAQ,CAAC,OAAO,CAAC,SAAS,EAAE,MAAM,CAAC,CAAC,IAAI,EAAE,CAAC;IACtD,IAAI,CAAC,QAAQ;QAAE,OAAO,IAAI,CAAC;IAE3B,OAAO,EAAE,QAAQ,EAAE,KAAK,EAAE,OAAO,CAAC,KAAK,IAAI,SAAS,EAAE,CAAC;AACzD,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"html-extract.test.d.ts","sourceRoot":"","sources":["../../src/tools/html-extract.test.ts"],"names":[],"mappings":""}
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
import { describe, expect, it } from "vitest";
|
|
2
|
+
import { extractToMarkdown, loadExtractor } from "./html-extract.js";
|
|
3
|
+
// Probe at module load so describe.skipIf sees the real value at collection time.
|
|
4
|
+
const extractorInstalled = await loadExtractor()
|
|
5
|
+
.then(() => true)
|
|
6
|
+
.catch(() => false);
|
|
7
|
+
const ARTICLE_HTML = `
|
|
8
|
+
<html>
|
|
9
|
+
<head><title>Test Article</title></head>
|
|
10
|
+
<body>
|
|
11
|
+
<nav>Home Products Pricing Docs</nav>
|
|
12
|
+
<aside class="advert">Sponsored: buy now</aside>
|
|
13
|
+
<article>
|
|
14
|
+
<h1>Understanding the Fetch API</h1>
|
|
15
|
+
<p>The Fetch API provides a modern interface for making HTTP requests in
|
|
16
|
+
JavaScript. It returns promises and is widely supported across browsers.
|
|
17
|
+
This paragraph is intentionally long enough to clear the Readability
|
|
18
|
+
content-length threshold so extraction succeeds in tests reliably.</p>
|
|
19
|
+
<h2>Example</h2>
|
|
20
|
+
<pre><code>const res = await fetch(url);
|
|
21
|
+
const data = await res.json();</code></pre>
|
|
22
|
+
<h2>Comparison</h2>
|
|
23
|
+
<table>
|
|
24
|
+
<thead><tr><th>Method</th><th>Returns</th></tr></thead>
|
|
25
|
+
<tbody>
|
|
26
|
+
<tr><td>fetch</td><td>Promise</td></tr>
|
|
27
|
+
<tr><td>XHR</td><td>callback</td></tr>
|
|
28
|
+
</tbody>
|
|
29
|
+
</table>
|
|
30
|
+
<ul><li>First item</li><li>Second item</li></ul>
|
|
31
|
+
<p>A closing paragraph with more substantial body text so the article
|
|
32
|
+
comfortably exceeds the minimum length that Readability requires before it
|
|
33
|
+
is considered a real article worth extracting.</p>
|
|
34
|
+
</article>
|
|
35
|
+
<footer>Legal links and copyright</footer>
|
|
36
|
+
</body>
|
|
37
|
+
</html>
|
|
38
|
+
`;
|
|
39
|
+
describe.skipIf(!extractorInstalled)("extractToMarkdown", () => {
|
|
40
|
+
it("extracts main content as markdown with title, code fence, and table", async () => {
|
|
41
|
+
const result = await extractToMarkdown(ARTICLE_HTML, "https://example.com/article");
|
|
42
|
+
expect(result).not.toBeNull();
|
|
43
|
+
const md = result?.markdown ?? "";
|
|
44
|
+
expect(result?.title).toBe("Test Article");
|
|
45
|
+
expect(md).toContain("Understanding the Fetch API");
|
|
46
|
+
expect(md).toContain("```");
|
|
47
|
+
expect(md).toContain("await fetch(url)");
|
|
48
|
+
// GFM table rendered with pipe separators.
|
|
49
|
+
expect(md).toMatch(/\|\s*Method\s*\|/);
|
|
50
|
+
expect(md).toContain("First item");
|
|
51
|
+
// Nav, ads, and footer boilerplate dropped.
|
|
52
|
+
expect(md).not.toContain("Sponsored: buy now");
|
|
53
|
+
expect(md).not.toContain("Home Products Pricing");
|
|
54
|
+
});
|
|
55
|
+
it("returns null for trivially short content", async () => {
|
|
56
|
+
const result = await extractToMarkdown("<html><body><p>Too short.</p></body></html>", "https://example.com/short");
|
|
57
|
+
expect(result).toBeNull();
|
|
58
|
+
});
|
|
59
|
+
});
|
|
60
|
+
//# sourceMappingURL=html-extract.test.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"html-extract.test.js","sourceRoot":"","sources":["../../src/tools/html-extract.test.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,MAAM,EAAE,EAAE,EAAE,MAAM,QAAQ,CAAC;AAC9C,OAAO,EAAE,iBAAiB,EAAE,aAAa,EAAE,MAAM,mBAAmB,CAAC;AAErE,kFAAkF;AAClF,MAAM,kBAAkB,GAAG,MAAM,aAAa,EAAE;KAC7C,IAAI,CAAC,GAAG,EAAE,CAAC,IAAI,CAAC;KAChB,KAAK,CAAC,GAAG,EAAE,CAAC,KAAK,CAAC,CAAC;AAEtB,MAAM,YAAY,GAAG;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;CA+BpB,CAAC;AAEF,QAAQ,CAAC,MAAM,CAAC,CAAC,kBAAkB,CAAC,CAAC,mBAAmB,EAAE,GAAG,EAAE;IAC7D,EAAE,CAAC,qEAAqE,EAAE,KAAK,IAAI,EAAE;QACnF,MAAM,MAAM,GAAG,MAAM,iBAAiB,CAAC,YAAY,EAAE,6BAA6B,CAAC,CAAC;QACpF,MAAM,CAAC,MAAM,CAAC,CAAC,GAAG,CAAC,QAAQ,EAAE,CAAC;QAC9B,MAAM,EAAE,GAAG,MAAM,EAAE,QAAQ,IAAI,EAAE,CAAC;QAElC,MAAM,CAAC,MAAM,EAAE,KAAK,CAAC,CAAC,IAAI,CAAC,cAAc,CAAC,CAAC;QAC3C,MAAM,CAAC,EAAE,CAAC,CAAC,SAAS,CAAC,6BAA6B,CAAC,CAAC;QACpD,MAAM,CAAC,EAAE,CAAC,CAAC,SAAS,CAAC,KAAK,CAAC,CAAC;QAC5B,MAAM,CAAC,EAAE,CAAC,CAAC,SAAS,CAAC,kBAAkB,CAAC,CAAC;QACzC,2CAA2C;QAC3C,MAAM,CAAC,EAAE,CAAC,CAAC,OAAO,CAAC,kBAAkB,CAAC,CAAC;QACvC,MAAM,CAAC,EAAE,CAAC,CAAC,SAAS,CAAC,YAAY,CAAC,CAAC;QACnC,4CAA4C;QAC5C,MAAM,CAAC,EAAE,CAAC,CAAC,GAAG,CAAC,SAAS,CAAC,oBAAoB,CAAC,CAAC;QAC/C,MAAM,CAAC,EAAE,CAAC,CAAC,GAAG,CAAC,SAAS,CAAC,uBAAuB,CAAC,CAAC;IACpD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,0CAA0C,EAAE,KAAK,IAAI,EAAE;QACxD,MAAM,MAAM,GAAG,MAAM,iBAAiB,CACpC,6CAA6C,EAC7C,2BAA2B,CAC5B,CAAC;QACF,MAAM,CAAC,MAAM,CAAC,CAAC,QAAQ,EAAE,CAAC;IAC5B,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC"}
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* PDF text extraction via the optional `unpdf` dependency (bundles pdf.js,
|
|
3
|
+
* zero-config). Lazy-loaded behind a function so the base install works without
|
|
4
|
+
* it; callers degrade to an install-hint string when it is absent.
|
|
5
|
+
*/
|
|
6
|
+
/** Raised when the optional `unpdf` dependency is not installed. */
|
|
7
|
+
export declare class PdfExtractorUnavailable extends Error {
|
|
8
|
+
constructor(message: string);
|
|
9
|
+
}
|
|
10
|
+
/**
|
|
11
|
+
* Extract merged page text from a PDF buffer. Returns the concatenated text and
|
|
12
|
+
* total page count. Throws `PdfExtractorUnavailable` if `unpdf` is not present.
|
|
13
|
+
*/
|
|
14
|
+
export declare function extractPdfText(bytes: Uint8Array): Promise<{
|
|
15
|
+
text: string;
|
|
16
|
+
pages: number;
|
|
17
|
+
}>;
|
|
18
|
+
//# sourceMappingURL=pdf-extract.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"pdf-extract.d.ts","sourceRoot":"","sources":["../../src/tools/pdf-extract.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,oEAAoE;AACpE,qBAAa,uBAAwB,SAAQ,KAAK;gBACpC,OAAO,EAAE,MAAM;CAI5B;AA6BD;;;GAGG;AACH,wBAAsB,cAAc,CAAC,KAAK,EAAE,UAAU,GAAG,OAAO,CAAC;IAAE,IAAI,EAAE,MAAM,CAAC;IAAC,KAAK,EAAE,MAAM,CAAA;CAAE,CAAC,CAKhG"}
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* PDF text extraction via the optional `unpdf` dependency (bundles pdf.js,
|
|
3
|
+
* zero-config). Lazy-loaded behind a function so the base install works without
|
|
4
|
+
* it; callers degrade to an install-hint string when it is absent.
|
|
5
|
+
*/
|
|
6
|
+
/** Raised when the optional `unpdf` dependency is not installed. */
|
|
7
|
+
export class PdfExtractorUnavailable extends Error {
|
|
8
|
+
constructor(message) {
|
|
9
|
+
super(message);
|
|
10
|
+
this.name = "PdfExtractorUnavailable";
|
|
11
|
+
}
|
|
12
|
+
}
|
|
13
|
+
let cached = null;
|
|
14
|
+
async function loadUnpdf() {
|
|
15
|
+
if (cached)
|
|
16
|
+
return cached;
|
|
17
|
+
// Non-literal specifier so tsc does not statically resolve the optional dep.
|
|
18
|
+
const moduleName = "unpdf";
|
|
19
|
+
try {
|
|
20
|
+
const mod = (await import(moduleName));
|
|
21
|
+
if (!mod.getDocumentProxy || !mod.extractText) {
|
|
22
|
+
throw new PdfExtractorUnavailable("unpdf loaded but expected exports are missing");
|
|
23
|
+
}
|
|
24
|
+
cached = mod;
|
|
25
|
+
return cached;
|
|
26
|
+
}
|
|
27
|
+
catch (err) {
|
|
28
|
+
if (err instanceof PdfExtractorUnavailable)
|
|
29
|
+
throw err;
|
|
30
|
+
throw new PdfExtractorUnavailable(err instanceof Error ? err.message : "failed to load unpdf");
|
|
31
|
+
}
|
|
32
|
+
}
|
|
33
|
+
/**
|
|
34
|
+
* Extract merged page text from a PDF buffer. Returns the concatenated text and
|
|
35
|
+
* total page count. Throws `PdfExtractorUnavailable` if `unpdf` is not present.
|
|
36
|
+
*/
|
|
37
|
+
export async function extractPdfText(bytes) {
|
|
38
|
+
const unpdf = await loadUnpdf();
|
|
39
|
+
const pdf = await unpdf.getDocumentProxy(bytes);
|
|
40
|
+
const { totalPages, text } = await unpdf.extractText(pdf, { mergePages: true });
|
|
41
|
+
return { text, pages: totalPages };
|
|
42
|
+
}
|
|
43
|
+
//# sourceMappingURL=pdf-extract.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"pdf-extract.js","sourceRoot":"","sources":["../../src/tools/pdf-extract.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AAEH,oEAAoE;AACpE,MAAM,OAAO,uBAAwB,SAAQ,KAAK;IAChD,YAAY,OAAe;QACzB,KAAK,CAAC,OAAO,CAAC,CAAC;QACf,IAAI,CAAC,IAAI,GAAG,yBAAyB,CAAC;IACxC,CAAC;CACF;AAUD,IAAI,MAAM,GAAuB,IAAI,CAAC;AAEtC,KAAK,UAAU,SAAS;IACtB,IAAI,MAAM;QAAE,OAAO,MAAM,CAAC;IAC1B,6EAA6E;IAC7E,MAAM,UAAU,GAAW,OAAO,CAAC;IACnC,IAAI,CAAC;QACH,MAAM,GAAG,GAAG,CAAC,MAAM,MAAM,CAAC,UAAU,CAAC,CAA2B,CAAC;QACjE,IAAI,CAAC,GAAG,CAAC,gBAAgB,IAAI,CAAC,GAAG,CAAC,WAAW,EAAE,CAAC;YAC9C,MAAM,IAAI,uBAAuB,CAAC,+CAA+C,CAAC,CAAC;QACrF,CAAC;QACD,MAAM,GAAG,GAAG,CAAC;QACb,OAAO,MAAM,CAAC;IAChB,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACb,IAAI,GAAG,YAAY,uBAAuB;YAAE,MAAM,GAAG,CAAC;QACtD,MAAM,IAAI,uBAAuB,CAAC,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,sBAAsB,CAAC,CAAC;IACjG,CAAC;AACH,CAAC;AAED;;;GAGG;AACH,MAAM,CAAC,KAAK,UAAU,cAAc,CAAC,KAAiB;IACpD,MAAM,KAAK,GAAG,MAAM,SAAS,EAAE,CAAC;IAChC,MAAM,GAAG,GAAG,MAAM,KAAK,CAAC,gBAAgB,CAAC,KAAK,CAAC,CAAC;IAChD,MAAM,EAAE,UAAU,EAAE,IAAI,EAAE,GAAG,MAAM,KAAK,CAAC,WAAW,CAAC,GAAG,EAAE,EAAE,UAAU,EAAE,IAAI,EAAE,CAAC,CAAC;IAChF,OAAO,EAAE,IAAI,EAAE,KAAK,EAAE,UAAU,EAAE,CAAC;AACrC,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"pdf-extract.test.d.ts","sourceRoot":"","sources":["../../src/tools/pdf-extract.test.ts"],"names":[],"mappings":""}
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
import { readFile } from "node:fs/promises";
|
|
2
|
+
import { fileURLToPath } from "node:url";
|
|
3
|
+
import { describe, expect, it } from "vitest";
|
|
4
|
+
import { extractPdfText } from "./pdf-extract.js";
|
|
5
|
+
const fixturePath = fileURLToPath(new URL("./__fixtures__/sample.pdf", import.meta.url));
|
|
6
|
+
const unpdfInstalled = await import("unpdf").then(() => true).catch(() => false);
|
|
7
|
+
describe.skipIf(!unpdfInstalled)("extractPdfText", () => {
|
|
8
|
+
it("extracts text and page count from a minimal PDF", async () => {
|
|
9
|
+
const bytes = new Uint8Array(await readFile(fixturePath));
|
|
10
|
+
const { text, pages } = await extractPdfText(bytes);
|
|
11
|
+
expect(text).toContain("Hello PDF World");
|
|
12
|
+
expect(pages).toBe(1);
|
|
13
|
+
});
|
|
14
|
+
});
|
|
15
|
+
//# sourceMappingURL=pdf-extract.test.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"pdf-extract.test.js","sourceRoot":"","sources":["../../src/tools/pdf-extract.test.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,MAAM,kBAAkB,CAAC;AAC5C,OAAO,EAAE,aAAa,EAAE,MAAM,UAAU,CAAC;AACzC,OAAO,EAAE,QAAQ,EAAE,MAAM,EAAE,EAAE,EAAE,MAAM,QAAQ,CAAC;AAC9C,OAAO,EAAE,cAAc,EAAE,MAAM,kBAAkB,CAAC;AAElD,MAAM,WAAW,GAAG,aAAa,CAAC,IAAI,GAAG,CAAC,2BAA2B,EAAE,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC;AAEzF,MAAM,cAAc,GAAG,MAAM,MAAM,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,GAAG,EAAE,CAAC,IAAI,CAAC,CAAC,KAAK,CAAC,GAAG,EAAE,CAAC,KAAK,CAAC,CAAC;AAEjF,QAAQ,CAAC,MAAM,CAAC,CAAC,cAAc,CAAC,CAAC,gBAAgB,EAAE,GAAG,EAAE;IACtD,EAAE,CAAC,iDAAiD,EAAE,KAAK,IAAI,EAAE;QAC/D,MAAM,KAAK,GAAG,IAAI,UAAU,CAAC,MAAM,QAAQ,CAAC,WAAW,CAAC,CAAC,CAAC;QAC1D,MAAM,EAAE,IAAI,EAAE,KAAK,EAAE,GAAG,MAAM,cAAc,CAAC,KAAK,CAAC,CAAC;QAEpD,MAAM,CAAC,IAAI,CAAC,CAAC,SAAS,CAAC,iBAAiB,CAAC,CAAC;QAC1C,MAAM,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IACxB,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC"}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"prompt-hints.d.ts","sourceRoot":"","sources":["../../src/tools/prompt-hints.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AACH,eAAO,MAAM,iBAAiB,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,
|
|
1
|
+
{"version":3,"file":"prompt-hints.d.ts","sourceRoot":"","sources":["../../src/tools/prompt-hints.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AACH,eAAO,MAAM,iBAAiB,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CA4BpD,CAAC;AAEF,oEAAoE;AACpE,eAAO,MAAM,kBAAkB,EAAE,SAAS,MAAM,EAoB/C,CAAC"}
|
|
@@ -12,7 +12,8 @@ export const TOOL_PROMPT_HINTS = {
|
|
|
12
12
|
find: "Find files/dirs by name pattern. Faster than bash find, respects .gitignore.",
|
|
13
13
|
grep: "Regex search across files. Use for usages, definitions, imports.",
|
|
14
14
|
source_path: "Resolve installed package/repo source via opensrc. Use before assuming dependency APIs; inspect returned absolute path with read/grep/find/ls.",
|
|
15
|
-
web_search: "Search the web. Use before web_fetch to find pages.",
|
|
15
|
+
web_search: "Search the web. Use before web_fetch to find pages; supports include/exclude_domains and a time_range recency filter.",
|
|
16
|
+
web_fetch: "Fetch page content as Markdown (or text/html). Pass `urls` to fetch many at once; reads PDFs, follows safe redirects, and prefers a site's /llms.txt for docs.",
|
|
16
17
|
task_output: "Read new output from a background process by id.",
|
|
17
18
|
task_stop: "Stop a background process by id.",
|
|
18
19
|
goals: "Manage durable Goal runs for /goal and Ctrl+G workflows. Use for Goal setup, coordinator evidence, worker tasks, verifier records, final completion audits, blockers, and completion state.",
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"prompt-hints.js","sourceRoot":"","sources":["../../src/tools/prompt-hints.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AACH,MAAM,CAAC,MAAM,iBAAiB,GAA2B;IACvD,IAAI,EAAE,uDAAuD;IAC7D,KAAK,EAAE,gFAAgF;IACvF,IAAI,EAAE,kIAAkI;IACxI,IAAI,EAAE,oHAAoH;IAC1H,IAAI,EAAE,8EAA8E;IACpF,IAAI,EAAE,kEAAkE;IACxE,WAAW,EACT,gJAAgJ;IAClJ,UAAU,
|
|
1
|
+
{"version":3,"file":"prompt-hints.js","sourceRoot":"","sources":["../../src/tools/prompt-hints.ts"],"names":[],"mappings":"AAAA;;;;;GAKG;AACH,MAAM,CAAC,MAAM,iBAAiB,GAA2B;IACvD,IAAI,EAAE,uDAAuD;IAC7D,KAAK,EAAE,gFAAgF;IACvF,IAAI,EAAE,kIAAkI;IACxI,IAAI,EAAE,oHAAoH;IAC1H,IAAI,EAAE,8EAA8E;IACpF,IAAI,EAAE,kEAAkE;IACxE,WAAW,EACT,gJAAgJ;IAClJ,UAAU,EACR,uHAAuH;IACzH,SAAS,EACP,gKAAgK;IAClK,WAAW,EAAE,kDAAkD;IAC/D,SAAS,EAAE,kCAAkC;IAC7C,KAAK,EACH,6LAA6L;IAC/L,UAAU,EACR,yGAAyG;IAC3G,SAAS,EAAE,0EAA0E;IACrF,QAAQ,EAAE,uEAAuE;IACjF,KAAK,EAAE,oDAAoD;IAC3D,uCAAuC,EACrC,qNAAqN;IACvN,oCAAoC,EAClC,6LAA6L;IAC/L,iCAAiC,EAC/B,+OAA+O;CAClP,CAAC;AAEF,oEAAoE;AACpE,MAAM,CAAC,MAAM,kBAAkB,GAAsB;IACnD,MAAM;IACN,OAAO;IACP,MAAM;IACN,MAAM;IACN,MAAM;IACN,MAAM;IACN,IAAI;IACJ,aAAa;IACb,WAAW;IACX,aAAa;IACb,WAAW;IACX,OAAO;IACP,YAAY;IACZ,WAAW;IACX,UAAU;IACV,OAAO;IACP,uCAAuC;IACvC,oCAAoC;IACpC,iCAAiC;CAClC,CAAC"}
|
|
@@ -6,10 +6,25 @@ import type { AgentTool } from "@kenkaiiii/gg-agent";
|
|
|
6
6
|
*/
|
|
7
7
|
export declare function isBlockedUrl(urlString: string): boolean;
|
|
8
8
|
export declare function htmlToCleanText(html: string): string;
|
|
9
|
+
type LlmsCandidateKind = "llms" | "llms-full" | "llms-ctx" | "page-md";
|
|
10
|
+
interface LlmsCandidate {
|
|
11
|
+
url: string;
|
|
12
|
+
label: string;
|
|
13
|
+
kind: LlmsCandidateKind;
|
|
14
|
+
priority: number;
|
|
15
|
+
}
|
|
16
|
+
export declare function buildLlmsCandidates(url: string, maxLength: number): LlmsCandidate[];
|
|
9
17
|
export declare function createWebFetchTool(): AgentTool<typeof parameters>;
|
|
10
18
|
declare const parameters: z.ZodObject<{
|
|
11
|
-
url: z.ZodString
|
|
19
|
+
url: z.ZodOptional<z.ZodString>;
|
|
20
|
+
urls: z.ZodOptional<z.ZodArray<z.ZodString>>;
|
|
12
21
|
max_length: z.ZodOptional<z.ZodNumber>;
|
|
22
|
+
format: z.ZodOptional<z.ZodEnum<{
|
|
23
|
+
text: "text";
|
|
24
|
+
html: "html";
|
|
25
|
+
markdown: "markdown";
|
|
26
|
+
}>>;
|
|
27
|
+
prefer_llms_txt: z.ZodOptional<z.ZodBoolean>;
|
|
13
28
|
}, z.core.$strip>;
|
|
14
29
|
export {};
|
|
15
30
|
//# sourceMappingURL=web-fetch.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"web-fetch.d.ts","sourceRoot":"","sources":["../../src/tools/web-fetch.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AACxB,OAAO,KAAK,EAAE,SAAS,
|
|
1
|
+
{"version":3,"file":"web-fetch.d.ts","sourceRoot":"","sources":["../../src/tools/web-fetch.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,CAAC,EAAE,MAAM,KAAK,CAAC;AACxB,OAAO,KAAK,EAAE,SAAS,EAAe,MAAM,qBAAqB,CAAC;AAIlE;;;GAGG;AACH,wBAAgB,YAAY,CAAC,SAAS,EAAE,MAAM,GAAG,OAAO,CA0CvD;AAmGD,wBAAgB,eAAe,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,CAYpD;AAqBD,KAAK,iBAAiB,GAAG,MAAM,GAAG,WAAW,GAAG,UAAU,GAAG,SAAS,CAAC;AAEvE,UAAU,aAAa;IACrB,GAAG,EAAE,MAAM,CAAC;IACZ,KAAK,EAAE,MAAM,CAAC;IACd,IAAI,EAAE,iBAAiB,CAAC;IACxB,QAAQ,EAAE,MAAM,CAAC;CAClB;AAuOD,wBAAgB,mBAAmB,CAAC,GAAG,EAAE,MAAM,EAAE,SAAS,EAAE,MAAM,GAAG,aAAa,EAAE,CAuCnF;AAuED,wBAAgB,kBAAkB,IAAI,SAAS,CAAC,OAAO,UAAU,CAAC,CAkCjE;AA0BD,QAAA,MAAM,UAAU;;;;;;;;;;iBAoBZ,CAAC"}
|