@steipete/summarize 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +52 -0
- package/LICENSE +21 -0
- package/README.md +185 -0
- package/dist/cli.cjs +74333 -0
- package/dist/cli.cjs.map +7 -0
- package/dist/esm/cli-main.js +80 -0
- package/dist/esm/cli-main.js.map +1 -0
- package/dist/esm/cli.js +18 -0
- package/dist/esm/cli.js.map +1 -0
- package/dist/esm/config.js +33 -0
- package/dist/esm/config.js.map +1 -0
- package/dist/esm/content/asset.js +167 -0
- package/dist/esm/content/asset.js.map +1 -0
- package/dist/esm/content/index.js +4 -0
- package/dist/esm/content/index.js.map +1 -0
- package/dist/esm/content/link-preview/client.js +20 -0
- package/dist/esm/content/link-preview/client.js.map +1 -0
- package/dist/esm/content/link-preview/content/article.js +150 -0
- package/dist/esm/content/link-preview/content/article.js.map +1 -0
- package/dist/esm/content/link-preview/content/cleaner.js +55 -0
- package/dist/esm/content/link-preview/content/cleaner.js.map +1 -0
- package/dist/esm/content/link-preview/content/fetcher.js +120 -0
- package/dist/esm/content/link-preview/content/fetcher.js.map +1 -0
- package/dist/esm/content/link-preview/content/index.js +275 -0
- package/dist/esm/content/link-preview/content/index.js.map +1 -0
- package/dist/esm/content/link-preview/content/parsers.js +77 -0
- package/dist/esm/content/link-preview/content/parsers.js.map +1 -0
- package/dist/esm/content/link-preview/content/types.js +4 -0
- package/dist/esm/content/link-preview/content/types.js.map +1 -0
- package/dist/esm/content/link-preview/content/utils.js +127 -0
- package/dist/esm/content/link-preview/content/utils.js.map +1 -0
- package/dist/esm/content/link-preview/content/youtube.js +82 -0
- package/dist/esm/content/link-preview/content/youtube.js.map +1 -0
- package/dist/esm/content/link-preview/deps.js +2 -0
- package/dist/esm/content/link-preview/deps.js.map +1 -0
- package/dist/esm/content/link-preview/fetch-with-timeout.js +35 -0
- package/dist/esm/content/link-preview/fetch-with-timeout.js.map +1 -0
- package/dist/esm/content/link-preview/transcript/cache.js +73 -0
- package/dist/esm/content/link-preview/transcript/cache.js.map +1 -0
- package/dist/esm/content/link-preview/transcript/index.js +95 -0
- package/dist/esm/content/link-preview/transcript/index.js.map +1 -0
- package/dist/esm/content/link-preview/transcript/normalize.js +43 -0
- package/dist/esm/content/link-preview/transcript/normalize.js.map +1 -0
- package/dist/esm/content/link-preview/transcript/providers/generic.js +11 -0
- package/dist/esm/content/link-preview/transcript/providers/generic.js.map +1 -0
- package/dist/esm/content/link-preview/transcript/providers/podcast.js +12 -0
- package/dist/esm/content/link-preview/transcript/providers/podcast.js.map +1 -0
- package/dist/esm/content/link-preview/transcript/providers/twitter.js +12 -0
- package/dist/esm/content/link-preview/transcript/providers/twitter.js.map +1 -0
- package/dist/esm/content/link-preview/transcript/providers/youtube/api.js +257 -0
- package/dist/esm/content/link-preview/transcript/providers/youtube/api.js.map +1 -0
- package/dist/esm/content/link-preview/transcript/providers/youtube/apify.js +55 -0
- package/dist/esm/content/link-preview/transcript/providers/youtube/apify.js.map +1 -0
- package/dist/esm/content/link-preview/transcript/providers/youtube/captions.js +409 -0
- package/dist/esm/content/link-preview/transcript/providers/youtube/captions.js.map +1 -0
- package/dist/esm/content/link-preview/transcript/providers/youtube/ytdlp.js +114 -0
- package/dist/esm/content/link-preview/transcript/providers/youtube/ytdlp.js.map +1 -0
- package/dist/esm/content/link-preview/transcript/providers/youtube.js +74 -0
- package/dist/esm/content/link-preview/transcript/providers/youtube.js.map +1 -0
- package/dist/esm/content/link-preview/transcript/types.js +2 -0
- package/dist/esm/content/link-preview/transcript/types.js.map +1 -0
- package/dist/esm/content/link-preview/transcript/utils.js +193 -0
- package/dist/esm/content/link-preview/transcript/utils.js.map +1 -0
- package/dist/esm/content/link-preview/types.js +2 -0
- package/dist/esm/content/link-preview/types.js.map +1 -0
- package/dist/esm/costs.js +57 -0
- package/dist/esm/costs.js.map +1 -0
- package/dist/esm/firecrawl.js +54 -0
- package/dist/esm/firecrawl.js.map +1 -0
- package/dist/esm/flags.js +97 -0
- package/dist/esm/flags.js.map +1 -0
- package/dist/esm/index.js +4 -0
- package/dist/esm/index.js.map +1 -0
- package/dist/esm/llm/generate-text.js +296 -0
- package/dist/esm/llm/generate-text.js.map +1 -0
- package/dist/esm/llm/google-models.js +112 -0
- package/dist/esm/llm/google-models.js.map +1 -0
- package/dist/esm/llm/html-to-markdown.js +44 -0
- package/dist/esm/llm/html-to-markdown.js.map +1 -0
- package/dist/esm/llm/model-id.js +45 -0
- package/dist/esm/llm/model-id.js.map +1 -0
- package/dist/esm/pricing/litellm.js +25 -0
- package/dist/esm/pricing/litellm.js.map +1 -0
- package/dist/esm/prompts/file.js +14 -0
- package/dist/esm/prompts/file.js.map +1 -0
- package/dist/esm/prompts/index.js +3 -0
- package/dist/esm/prompts/index.js.map +1 -0
- package/dist/esm/prompts/link-summary.js +105 -0
- package/dist/esm/prompts/link-summary.js.map +1 -0
- package/dist/esm/run.js +1674 -0
- package/dist/esm/run.js.map +1 -0
- package/dist/esm/shared/contracts.js +2 -0
- package/dist/esm/shared/contracts.js.map +1 -0
- package/dist/esm/summarizeHome.js +20 -0
- package/dist/esm/summarizeHome.js.map +1 -0
- package/dist/esm/tty/live-markdown.js +52 -0
- package/dist/esm/tty/live-markdown.js.map +1 -0
- package/dist/esm/tty/osc-progress.js +8 -0
- package/dist/esm/tty/osc-progress.js.map +1 -0
- package/dist/esm/tty/spinner.js +33 -0
- package/dist/esm/tty/spinner.js.map +1 -0
- package/dist/esm/version.js +44 -0
- package/dist/esm/version.js.map +1 -0
- package/dist/types/cli-main.d.ts +11 -0
- package/dist/types/cli.d.ts +1 -0
- package/dist/types/config.d.ts +15 -0
- package/dist/types/content/asset.d.ts +44 -0
- package/dist/types/content/index.d.ts +4 -0
- package/dist/types/content/link-preview/client.d.ts +14 -0
- package/dist/types/content/link-preview/content/article.d.ts +4 -0
- package/dist/types/content/link-preview/content/cleaner.d.ts +12 -0
- package/dist/types/content/link-preview/content/fetcher.d.ts +16 -0
- package/dist/types/content/link-preview/content/index.d.ts +4 -0
- package/dist/types/content/link-preview/content/parsers.d.ts +7 -0
- package/dist/types/content/link-preview/content/types.d.ts +44 -0
- package/dist/types/content/link-preview/content/utils.d.ts +16 -0
- package/dist/types/content/link-preview/content/youtube.d.ts +1 -0
- package/dist/types/content/link-preview/deps.d.ts +70 -0
- package/dist/types/content/link-preview/fetch-with-timeout.d.ts +4 -0
- package/dist/types/content/link-preview/transcript/cache.d.ts +29 -0
- package/dist/types/content/link-preview/transcript/index.d.ts +9 -0
- package/dist/types/content/link-preview/transcript/normalize.d.ts +3 -0
- package/dist/types/content/link-preview/transcript/providers/generic.d.ts +3 -0
- package/dist/types/content/link-preview/transcript/providers/podcast.d.ts +3 -0
- package/dist/types/content/link-preview/transcript/providers/twitter.d.ts +3 -0
- package/dist/types/content/link-preview/transcript/providers/youtube/api.d.ts +26 -0
- package/dist/types/content/link-preview/transcript/providers/youtube/apify.d.ts +1 -0
- package/dist/types/content/link-preview/transcript/providers/youtube/captions.d.ts +7 -0
- package/dist/types/content/link-preview/transcript/providers/youtube/ytdlp.d.ts +3 -0
- package/dist/types/content/link-preview/transcript/providers/youtube.d.ts +3 -0
- package/dist/types/content/link-preview/transcript/types.d.ts +23 -0
- package/dist/types/content/link-preview/transcript/utils.d.ts +7 -0
- package/dist/types/content/link-preview/types.d.ts +36 -0
- package/dist/types/costs.d.ts +31 -0
- package/dist/types/firecrawl.d.ts +5 -0
- package/dist/types/flags.d.ts +23 -0
- package/dist/types/index.d.ts +4 -0
- package/dist/types/llm/generate-text.d.ts +43 -0
- package/dist/types/llm/google-models.d.ts +10 -0
- package/dist/types/llm/html-to-markdown.d.ts +15 -0
- package/dist/types/llm/model-id.d.ts +14 -0
- package/dist/types/pricing/litellm.d.ts +13 -0
- package/dist/types/prompts/file.d.ts +6 -0
- package/dist/types/prompts/index.d.ts +3 -0
- package/dist/types/prompts/link-summary.d.ts +27 -0
- package/dist/types/run.d.ts +8 -0
- package/dist/types/shared/contracts.d.ts +2 -0
- package/dist/types/summarizeHome.d.ts +6 -0
- package/dist/types/tty/live-markdown.d.ts +10 -0
- package/dist/types/tty/osc-progress.d.ts +3 -0
- package/dist/types/tty/spinner.d.ts +10 -0
- package/dist/types/version.d.ts +2 -0
- package/docs/README.md +11 -0
- package/docs/config.md +28 -0
- package/docs/extract-only.md +13 -0
- package/docs/firecrawl.md +17 -0
- package/docs/llm.md +33 -0
- package/docs/openai.md +18 -0
- package/docs/site/.nojekyll +1 -0
- package/docs/site/404.html +37 -0
- package/docs/site/assets/site.css +577 -0
- package/docs/site/assets/site.js +69 -0
- package/docs/site/docs/config.html +73 -0
- package/docs/site/docs/extract-only.html +79 -0
- package/docs/site/docs/firecrawl.html +72 -0
- package/docs/site/docs/index.html +89 -0
- package/docs/site/docs/llm.html +70 -0
- package/docs/site/docs/openai.html +66 -0
- package/docs/site/docs/website.html +70 -0
- package/docs/site/docs/youtube.html +62 -0
- package/docs/site/index.html +125 -0
- package/docs/website.md +27 -0
- package/docs/youtube.md +32 -0
- package/package.json +76 -0
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
import { runCli } from './run.js';
|
|
2
|
+
export function handlePipeErrors(stream, exit) {
|
|
3
|
+
stream.on('error', (error) => {
|
|
4
|
+
const code = error?.code;
|
|
5
|
+
if (code === 'EPIPE') {
|
|
6
|
+
exit(0);
|
|
7
|
+
return;
|
|
8
|
+
}
|
|
9
|
+
throw error;
|
|
10
|
+
});
|
|
11
|
+
}
|
|
12
|
+
function stripAnsi(input) {
|
|
13
|
+
// Minimal, good-enough ANSI stripper for error output. We only use this for non-verbose errors.
|
|
14
|
+
let out = '';
|
|
15
|
+
for (let i = 0; i < input.length; i += 1) {
|
|
16
|
+
const ch = input[i];
|
|
17
|
+
if (ch !== '\u001b') {
|
|
18
|
+
out += ch;
|
|
19
|
+
continue;
|
|
20
|
+
}
|
|
21
|
+
const next = input[i + 1];
|
|
22
|
+
if (next === '[') {
|
|
23
|
+
// CSI: ESC [ ... <final>
|
|
24
|
+
i += 2;
|
|
25
|
+
while (i < input.length) {
|
|
26
|
+
const c = input[i];
|
|
27
|
+
if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z'))
|
|
28
|
+
break;
|
|
29
|
+
i += 1;
|
|
30
|
+
}
|
|
31
|
+
continue;
|
|
32
|
+
}
|
|
33
|
+
if (next === ']') {
|
|
34
|
+
// OSC: ESC ] ... (BEL | ESC \)
|
|
35
|
+
i += 2;
|
|
36
|
+
while (i < input.length) {
|
|
37
|
+
const c = input[i];
|
|
38
|
+
if (c === '\u0007')
|
|
39
|
+
break;
|
|
40
|
+
if (c === '\u001b' && input[i + 1] === '\\') {
|
|
41
|
+
i += 1;
|
|
42
|
+
break;
|
|
43
|
+
}
|
|
44
|
+
i += 1;
|
|
45
|
+
}
|
|
46
|
+
continue;
|
|
47
|
+
}
|
|
48
|
+
// Unknown ESC sequence (or stray ESC): drop the next character too to avoid leaving artifacts.
|
|
49
|
+
if (typeof next === 'string') {
|
|
50
|
+
i += 1;
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
return out;
|
|
54
|
+
}
|
|
55
|
+
export async function runCliMain({ argv, env, fetch, stdout, stderr, exit, setExitCode, }) {
|
|
56
|
+
handlePipeErrors(stdout, exit);
|
|
57
|
+
handlePipeErrors(stderr, exit);
|
|
58
|
+
const verbose = argv.includes('--verbose') || argv.includes('--verbose=true');
|
|
59
|
+
try {
|
|
60
|
+
await runCli(argv, { env, fetch, stdout, stderr });
|
|
61
|
+
}
|
|
62
|
+
catch (error) {
|
|
63
|
+
const isTty = Boolean(stderr.isTTY);
|
|
64
|
+
if (isTty)
|
|
65
|
+
stderr.write('\n');
|
|
66
|
+
if (verbose && error instanceof Error && typeof error.stack === 'string') {
|
|
67
|
+
stderr.write(`${error.stack}\n`);
|
|
68
|
+
const cause = error.cause;
|
|
69
|
+
if (cause instanceof Error && typeof cause.stack === 'string') {
|
|
70
|
+
stderr.write(`Caused by: ${cause.stack}\n`);
|
|
71
|
+
}
|
|
72
|
+
setExitCode(1);
|
|
73
|
+
return;
|
|
74
|
+
}
|
|
75
|
+
const message = error instanceof Error ? error.message : error ? String(error) : 'Unknown error';
|
|
76
|
+
stderr.write(`${stripAnsi(message)}\n`);
|
|
77
|
+
setExitCode(1);
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
//# sourceMappingURL=cli-main.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"cli-main.js","sourceRoot":"","sources":["../../src/cli-main.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,MAAM,EAAE,MAAM,UAAU,CAAA;AAYjC,MAAM,UAAU,gBAAgB,CAAC,MAA6B,EAAE,IAA4B;IAC1F,MAAM,CAAC,EAAE,CAAC,OAAO,EAAE,CAAC,KAAc,EAAE,EAAE;QACpC,MAAM,IAAI,GAAI,KAAmC,EAAE,IAAI,CAAA;QACvD,IAAI,IAAI,KAAK,OAAO,EAAE,CAAC;YACrB,IAAI,CAAC,CAAC,CAAC,CAAA;YACP,OAAM;QACR,CAAC;QACD,MAAM,KAAK,CAAA;IACb,CAAC,CAAC,CAAA;AACJ,CAAC;AAED,SAAS,SAAS,CAAC,KAAa;IAC9B,gGAAgG;IAChG,IAAI,GAAG,GAAG,EAAE,CAAA;IAEZ,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,IAAI,CAAC,EAAE,CAAC;QACzC,MAAM,EAAE,GAAG,KAAK,CAAC,CAAC,CAAC,CAAA;QACnB,IAAI,EAAE,KAAK,QAAQ,EAAE,CAAC;YACpB,GAAG,IAAI,EAAE,CAAA;YACT,SAAQ;QACV,CAAC;QAED,MAAM,IAAI,GAAG,KAAK,CAAC,CAAC,GAAG,CAAC,CAAC,CAAA;QACzB,IAAI,IAAI,KAAK,GAAG,EAAE,CAAC;YACjB,yBAAyB;YACzB,CAAC,IAAI,CAAC,CAAA;YACN,OAAO,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC;gBACxB,MAAM,CAAC,GAAG,KAAK,CAAC,CAAC,CAAC,CAAA;gBAClB,IAAI,CAAC,CAAC,IAAI,GAAG,IAAI,CAAC,IAAI,GAAG,CAAC,IAAI,CAAC,CAAC,IAAI,GAAG,IAAI,CAAC,IAAI,GAAG,CAAC;oBAAE,MAAK;gBAC3D,CAAC,IAAI,CAAC,CAAA;YACR,CAAC;YACD,SAAQ;QACV,CAAC;QAED,IAAI,IAAI,KAAK,GAAG,EAAE,CAAC;YACjB,+BAA+B;YAC/B,CAAC,IAAI,CAAC,CAAA;YACN,OAAO,CAAC,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC;gBACxB,MAAM,CAAC,GAAG,KAAK,CAAC,CAAC,CAAC,CAAA;gBAClB,IAAI,CAAC,KAAK,QAAQ;oBAAE,MAAK;gBACzB,IAAI,CAAC,KAAK,QAAQ,IAAI,KAAK,CAAC,CAAC,GAAG,CAAC,CAAC,KAAK,IAAI,EAAE,CAAC;oBAC5C,CAAC,IAAI,CAAC,CAAA;oBACN,MAAK;gBACP,CAAC;gBACD,CAAC,IAAI,CAAC,CAAA;YACR,CAAC;YACD,SAAQ;QACV,CAAC;QAED,+FAA+F;QAC/F,IAAI,OAAO,IAAI,KAAK,QAAQ,EAAE,CAAC;YAC7B,CAAC,IAAI,CAAC,CAAA;QACR,CAAC;IACH,CAAC;IAED,OAAO,GAAG,CAAA;AACZ,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,UAAU,CAAC,EAC/B,IAAI,EACJ,GAAG,EACH,KAAK,EACL,MAAM,EACN,MAAM,EACN,IAAI,EACJ,WAAW,GACC;IACZ,gBAAgB,CAAC,MAAM,EAAE,IAAI,CAAC,CAAA;IAC9B,gBAAgB,CAAC,MAAM,EAAE,IAAI,CAAC,CAAA;IAE9B,MAAM,OAAO,GAAG,IAAI,CAAC,QAAQ,CAAC,WAAW,CAAC,IAAI,IAAI,CAAC,QAAQ,CAAC,gBAAgB,CAAC,CAAA;IAE7E,IAAI,CAAC;QACH,MAAM,MAAM,CAAC,IAAI,EAAE,EAAE,GAAG,EAAE,KAAK,EAAE,MAAM,EAAE,MAAM,EAAE,CAAC,CAAA;IACpD,CAAC;IAAC,OAAO,KAAc,EAAE,CAAC;QACxB,MAAM,KAAK,GAAG,OAAO,CAAE,MAAyC,CAAC,KAAK,CAAC,CAAA;QACvE,IAAI,KAAK;YAAE,MAAM,CAAC,KAAK,CAAC,IAAI,CAAC,CAAA;QAE7B,IAAI,OAAO,IAAI,KAAK,YAAY,KAAK,IAAI,OAAO,KAAK,CAAC,KAAK,KAAK,QAAQ,EAAE,CAAC;YACzE,MAAM,CAAC,KAAK,CAAC,GAAG,KAAK,CAAC,KAAK,IAAI,CAAC,CAAA;YAChC,MAAM,KAAK,GAAI,KAAqC,CAAC,KAAK,CAAA;YAC1D,IAAI,KAAK,YAAY,KAAK,IAAI,OAAO,KAAK,CAAC,KAAK,KAAK,QAAQ,EAAE,CAAC;gBAC9D,MAAM,CAAC,KAAK,CAAC,cAAc,KAAK,CAAC,KAAK,IAAI,CAAC,CAAA;YAC7C,CAAC;YACD,WAAW,CAAC,CAAC,CAAC,CAAA;YACd,OAAM;QACR,CAAC;QAED,MAAM,OAAO,GAAG,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,eAAe,CAAA;QAChG,MAAM,CAAC,KAAK,CAAC,GAAG,SAAS,CAAC,OAAO,CAAC,IAAI,CAAC,CAAA;QACvC,WAAW,CAAC,CAAC,CAAC,CAAA;IAChB,CAAC;AACH,CAAC"}
|
package/dist/esm/cli.js
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
import { runCliMain } from './cli-main.js';
|
|
2
|
+
void runCliMain({
|
|
3
|
+
argv: process.argv.slice(2),
|
|
4
|
+
env: process.env,
|
|
5
|
+
fetch: globalThis.fetch.bind(globalThis),
|
|
6
|
+
stdout: process.stdout,
|
|
7
|
+
stderr: process.stderr,
|
|
8
|
+
exit: (code) => process.exit(code),
|
|
9
|
+
setExitCode: (code) => {
|
|
10
|
+
process.exitCode = code;
|
|
11
|
+
},
|
|
12
|
+
}).catch((error) => {
|
|
13
|
+
// Last-resort fallback; runCliMain should already format errors nicely.
|
|
14
|
+
const message = error instanceof Error ? error.message : error ? String(error) : 'Unknown error';
|
|
15
|
+
process.stderr.write(`${message}\n`);
|
|
16
|
+
process.exitCode = 1;
|
|
17
|
+
});
|
|
18
|
+
//# sourceMappingURL=cli.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"cli.js","sourceRoot":"","sources":["../../src/cli.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,UAAU,EAAE,MAAM,eAAe,CAAA;AAE1C,KAAK,UAAU,CAAC;IACd,IAAI,EAAE,OAAO,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,CAAC;IAC3B,GAAG,EAAE,OAAO,CAAC,GAAG;IAChB,KAAK,EAAE,UAAU,CAAC,KAAK,CAAC,IAAI,CAAC,UAAU,CAAC;IACxC,MAAM,EAAE,OAAO,CAAC,MAAM;IACtB,MAAM,EAAE,OAAO,CAAC,MAAM;IACtB,IAAI,EAAE,CAAC,IAAI,EAAE,EAAE,CAAC,OAAO,CAAC,IAAI,CAAC,IAAI,CAAC;IAClC,WAAW,EAAE,CAAC,IAAI,EAAE,EAAE;QACpB,OAAO,CAAC,QAAQ,GAAG,IAAI,CAAA;IACzB,CAAC;CACF,CAAC,CAAC,KAAK,CAAC,CAAC,KAAK,EAAE,EAAE;IACjB,wEAAwE;IACxE,MAAM,OAAO,GAAG,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,KAAK,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,eAAe,CAAA;IAChG,OAAO,CAAC,MAAM,CAAC,KAAK,CAAC,GAAG,OAAO,IAAI,CAAC,CAAA;IACpC,OAAO,CAAC,QAAQ,GAAG,CAAC,CAAA;AACtB,CAAC,CAAC,CAAA"}
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
import { readFileSync } from 'node:fs';
|
|
2
|
+
import { homedir } from 'node:os';
|
|
3
|
+
import { join } from 'node:path';
|
|
4
|
+
function isRecord(value) {
|
|
5
|
+
return typeof value === 'object' && value !== null && !Array.isArray(value);
|
|
6
|
+
}
|
|
7
|
+
export function loadSummarizeConfig({ env }) {
|
|
8
|
+
const home = env.HOME?.trim() || homedir();
|
|
9
|
+
if (!home)
|
|
10
|
+
return { config: null, path: null };
|
|
11
|
+
const path = join(home, '.summarize', 'config.json');
|
|
12
|
+
let raw;
|
|
13
|
+
try {
|
|
14
|
+
raw = readFileSync(path, 'utf8');
|
|
15
|
+
}
|
|
16
|
+
catch {
|
|
17
|
+
return { config: null, path };
|
|
18
|
+
}
|
|
19
|
+
let parsed;
|
|
20
|
+
try {
|
|
21
|
+
parsed = JSON.parse(raw);
|
|
22
|
+
}
|
|
23
|
+
catch (error) {
|
|
24
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
25
|
+
throw new Error(`Invalid JSON in config file ${path}: ${message}`);
|
|
26
|
+
}
|
|
27
|
+
if (!isRecord(parsed)) {
|
|
28
|
+
throw new Error(`Invalid config file ${path}: expected an object at the top level`);
|
|
29
|
+
}
|
|
30
|
+
const model = typeof parsed.model === 'string' ? parsed.model : undefined;
|
|
31
|
+
return { config: { model }, path };
|
|
32
|
+
}
|
|
33
|
+
//# sourceMappingURL=config.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"config.js","sourceRoot":"","sources":["../../src/config.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,YAAY,EAAE,MAAM,SAAS,CAAA;AACtC,OAAO,EAAE,OAAO,EAAE,MAAM,SAAS,CAAA;AACjC,OAAO,EAAE,IAAI,EAAE,MAAM,WAAW,CAAA;AAYhC,SAAS,QAAQ,CAAC,KAAc;IAC9B,OAAO,OAAO,KAAK,KAAK,QAAQ,IAAI,KAAK,KAAK,IAAI,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,KAAK,CAAC,CAAA;AAC7E,CAAC;AAED,MAAM,UAAU,mBAAmB,CAAC,EAAE,GAAG,EAA+C;IAItF,MAAM,IAAI,GAAG,GAAG,CAAC,IAAI,EAAE,IAAI,EAAE,IAAI,OAAO,EAAE,CAAA;IAC1C,IAAI,CAAC,IAAI;QAAE,OAAO,EAAE,MAAM,EAAE,IAAI,EAAE,IAAI,EAAE,IAAI,EAAE,CAAA;IAC9C,MAAM,IAAI,GAAG,IAAI,CAAC,IAAI,EAAE,YAAY,EAAE,aAAa,CAAC,CAAA;IAEpD,IAAI,GAAW,CAAA;IACf,IAAI,CAAC;QACH,GAAG,GAAG,YAAY,CAAC,IAAI,EAAE,MAAM,CAAC,CAAA;IAClC,CAAC;IAAC,MAAM,CAAC;QACP,OAAO,EAAE,MAAM,EAAE,IAAI,EAAE,IAAI,EAAE,CAAA;IAC/B,CAAC;IAED,IAAI,MAAe,CAAA;IACnB,IAAI,CAAC;QACH,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAAC,CAAA;IAC1B,CAAC;IAAC,OAAO,KAAK,EAAE,CAAC;QACf,MAAM,OAAO,GAAG,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAA;QACtE,MAAM,IAAI,KAAK,CAAC,+BAA+B,IAAI,KAAK,OAAO,EAAE,CAAC,CAAA;IACpE,CAAC;IAED,IAAI,CAAC,QAAQ,CAAC,MAAM,CAAC,EAAE,CAAC;QACtB,MAAM,IAAI,KAAK,CAAC,uBAAuB,IAAI,uCAAuC,CAAC,CAAA;IACrF,CAAC;IAED,MAAM,KAAK,GAAG,OAAO,MAAM,CAAC,KAAK,KAAK,QAAQ,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC,CAAC,SAAS,CAAA;IACzE,OAAO,EAAE,MAAM,EAAE,EAAE,KAAK,EAAE,EAAE,IAAI,EAAE,CAAA;AACpC,CAAC"}
|
|
@@ -0,0 +1,167 @@
|
|
|
1
|
+
import { existsSync } from 'node:fs';
|
|
2
|
+
import fs from 'node:fs/promises';
|
|
3
|
+
import path from 'node:path';
|
|
4
|
+
import { fileTypeFromBuffer } from 'file-type';
|
|
5
|
+
import mime from 'mime';
|
|
6
|
+
const MAX_ASSET_BYTES_DEFAULT = 50 * 1024 * 1024;
|
|
7
|
+
function normalizeHeaderMediaType(value) {
|
|
8
|
+
if (!value)
|
|
9
|
+
return null;
|
|
10
|
+
const trimmed = value.trim();
|
|
11
|
+
if (!trimmed)
|
|
12
|
+
return null;
|
|
13
|
+
return trimmed.split(';')[0]?.trim().toLowerCase() ?? null;
|
|
14
|
+
}
|
|
15
|
+
function isHtmlMediaType(mediaType) {
|
|
16
|
+
if (!mediaType)
|
|
17
|
+
return false;
|
|
18
|
+
return mediaType === 'text/html' || mediaType === 'application/xhtml+xml';
|
|
19
|
+
}
|
|
20
|
+
function looksLikeHtml(bytes) {
|
|
21
|
+
const head = new TextDecoder().decode(bytes.slice(0, 256)).trimStart().toLowerCase();
|
|
22
|
+
return head.startsWith('<!doctype html') || head.startsWith('<html') || head.startsWith('<head');
|
|
23
|
+
}
|
|
24
|
+
function isLikelyAssetPathname(pathname) {
|
|
25
|
+
const ext = path.extname(pathname).toLowerCase();
|
|
26
|
+
if (!ext)
|
|
27
|
+
return false;
|
|
28
|
+
if (ext === '.html' || ext === '.htm' || ext === '.php' || ext === '.asp' || ext === '.aspx') {
|
|
29
|
+
return false;
|
|
30
|
+
}
|
|
31
|
+
return true;
|
|
32
|
+
}
|
|
33
|
+
export function resolveInputTarget(raw) {
|
|
34
|
+
const normalized = raw.trim();
|
|
35
|
+
if (!normalized) {
|
|
36
|
+
throw new Error('Missing input');
|
|
37
|
+
}
|
|
38
|
+
const asPath = path.resolve(normalized);
|
|
39
|
+
if (existsSync(asPath)) {
|
|
40
|
+
return { kind: 'file', filePath: asPath };
|
|
41
|
+
}
|
|
42
|
+
let parsed;
|
|
43
|
+
try {
|
|
44
|
+
parsed = new URL(normalized);
|
|
45
|
+
}
|
|
46
|
+
catch {
|
|
47
|
+
throw new Error(`Invalid URL or file path: ${raw}`);
|
|
48
|
+
}
|
|
49
|
+
if (parsed.protocol !== 'http:' && parsed.protocol !== 'https:' && parsed.protocol !== 'file:') {
|
|
50
|
+
const embedded = normalized.lastIndexOf('https://');
|
|
51
|
+
const embeddedHttp = normalized.lastIndexOf('http://');
|
|
52
|
+
const idx = Math.max(embedded, embeddedHttp);
|
|
53
|
+
if (idx >= 0) {
|
|
54
|
+
const candidate = normalized.slice(idx);
|
|
55
|
+
return resolveInputTarget(candidate);
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
if (parsed.protocol === 'file:') {
|
|
59
|
+
const filePath = path.resolve(decodeURIComponent(parsed.pathname));
|
|
60
|
+
return { kind: 'file', filePath };
|
|
61
|
+
}
|
|
62
|
+
if (parsed.protocol !== 'http:' && parsed.protocol !== 'https:') {
|
|
63
|
+
throw new Error('Only HTTP and HTTPS URLs can be summarized');
|
|
64
|
+
}
|
|
65
|
+
return { kind: 'url', url: normalized };
|
|
66
|
+
}
|
|
67
|
+
export async function classifyUrl({ url, fetchImpl, timeoutMs, }) {
|
|
68
|
+
const parsed = new URL(url);
|
|
69
|
+
if (isLikelyAssetPathname(parsed.pathname)) {
|
|
70
|
+
return { kind: 'asset' };
|
|
71
|
+
}
|
|
72
|
+
void fetchImpl;
|
|
73
|
+
void timeoutMs;
|
|
74
|
+
return { kind: 'website' };
|
|
75
|
+
}
|
|
76
|
+
async function detectMediaType({ bytes, headerContentType, nameHint, }) {
|
|
77
|
+
const sniffed = await fileTypeFromBuffer(bytes);
|
|
78
|
+
if (sniffed?.mime)
|
|
79
|
+
return sniffed.mime;
|
|
80
|
+
const header = normalizeHeaderMediaType(headerContentType);
|
|
81
|
+
if (header && header !== 'application/octet-stream')
|
|
82
|
+
return header;
|
|
83
|
+
if (nameHint) {
|
|
84
|
+
const byExt = mime.getType(nameHint);
|
|
85
|
+
if (typeof byExt === 'string' && byExt.length > 0)
|
|
86
|
+
return byExt;
|
|
87
|
+
}
|
|
88
|
+
return 'application/octet-stream';
|
|
89
|
+
}
|
|
90
|
+
function buildAttachment({ bytes, mediaType, filename, }) {
|
|
91
|
+
if (mediaType.startsWith('image/')) {
|
|
92
|
+
const part = {
|
|
93
|
+
type: 'image',
|
|
94
|
+
image: bytes,
|
|
95
|
+
mediaType,
|
|
96
|
+
};
|
|
97
|
+
return { mediaType, filename, part };
|
|
98
|
+
}
|
|
99
|
+
const part = {
|
|
100
|
+
type: 'file',
|
|
101
|
+
data: bytes,
|
|
102
|
+
filename: filename ?? undefined,
|
|
103
|
+
mediaType,
|
|
104
|
+
};
|
|
105
|
+
return { mediaType, filename, part };
|
|
106
|
+
}
|
|
107
|
+
export async function loadLocalAsset({ filePath, maxBytes = MAX_ASSET_BYTES_DEFAULT, }) {
|
|
108
|
+
const stat = await fs.stat(filePath);
|
|
109
|
+
if (!stat.isFile()) {
|
|
110
|
+
throw new Error(`Not a file: ${filePath}`);
|
|
111
|
+
}
|
|
112
|
+
if (stat.size > maxBytes) {
|
|
113
|
+
throw new Error(`File too large (${stat.size} bytes). Limit is ${maxBytes} bytes.`);
|
|
114
|
+
}
|
|
115
|
+
const bytes = new Uint8Array(await fs.readFile(filePath));
|
|
116
|
+
const filename = path.basename(filePath);
|
|
117
|
+
const mediaType = await detectMediaType({ bytes, headerContentType: null, nameHint: filename });
|
|
118
|
+
return {
|
|
119
|
+
sourceLabel: filePath,
|
|
120
|
+
attachment: buildAttachment({ bytes, mediaType, filename }),
|
|
121
|
+
};
|
|
122
|
+
}
|
|
123
|
+
export async function loadRemoteAsset({ url, fetchImpl, timeoutMs, maxBytes = MAX_ASSET_BYTES_DEFAULT, }) {
|
|
124
|
+
const controller = new AbortController();
|
|
125
|
+
const timeout = setTimeout(() => controller.abort(), timeoutMs);
|
|
126
|
+
try {
|
|
127
|
+
const res = await fetchImpl(url, { signal: controller.signal });
|
|
128
|
+
if (!res.ok) {
|
|
129
|
+
throw new Error(`Download failed: ${res.status} ${res.statusText}`);
|
|
130
|
+
}
|
|
131
|
+
const contentLength = res.headers.get('content-length');
|
|
132
|
+
if (contentLength) {
|
|
133
|
+
const parsed = Number(contentLength);
|
|
134
|
+
if (Number.isFinite(parsed) && parsed > maxBytes) {
|
|
135
|
+
throw new Error(`Remote file too large (${parsed} bytes). Limit is ${maxBytes} bytes.`);
|
|
136
|
+
}
|
|
137
|
+
}
|
|
138
|
+
const arrayBuffer = await res.arrayBuffer();
|
|
139
|
+
if (arrayBuffer.byteLength > maxBytes) {
|
|
140
|
+
throw new Error(`Remote file too large (${arrayBuffer.byteLength} bytes). Limit is ${maxBytes} bytes.`);
|
|
141
|
+
}
|
|
142
|
+
const bytes = new Uint8Array(arrayBuffer);
|
|
143
|
+
const parsedUrl = new URL(url);
|
|
144
|
+
const filename = path.basename(parsedUrl.pathname) || null;
|
|
145
|
+
const headerContentType = res.headers.get('content-type');
|
|
146
|
+
const mediaType = await detectMediaType({ bytes, headerContentType, nameHint: filename });
|
|
147
|
+
if (isHtmlMediaType(mediaType) || looksLikeHtml(bytes)) {
|
|
148
|
+
throw new Error('URL appears to be a website (HTML), not a file');
|
|
149
|
+
}
|
|
150
|
+
return {
|
|
151
|
+
sourceLabel: url,
|
|
152
|
+
attachment: buildAttachment({ bytes, mediaType, filename }),
|
|
153
|
+
};
|
|
154
|
+
}
|
|
155
|
+
finally {
|
|
156
|
+
clearTimeout(timeout);
|
|
157
|
+
}
|
|
158
|
+
}
|
|
159
|
+
export function buildAssetPromptMessages({ promptText, attachment, }) {
|
|
160
|
+
return [
|
|
161
|
+
{
|
|
162
|
+
role: 'user',
|
|
163
|
+
content: [{ type: 'text', text: promptText }, attachment.part],
|
|
164
|
+
},
|
|
165
|
+
];
|
|
166
|
+
}
|
|
167
|
+
//# sourceMappingURL=asset.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"asset.js","sourceRoot":"","sources":["../../../src/content/asset.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,UAAU,EAAE,MAAM,SAAS,CAAA;AACpC,OAAO,EAAE,MAAM,kBAAkB,CAAA;AACjC,OAAO,IAAI,MAAM,WAAW,CAAA;AAG5B,OAAO,EAAE,kBAAkB,EAAE,MAAM,WAAW,CAAA;AAC9C,OAAO,IAAI,MAAM,MAAM,CAAA;AAYvB,MAAM,uBAAuB,GAAG,EAAE,GAAG,IAAI,GAAG,IAAI,CAAA;AAEhD,SAAS,wBAAwB,CAAC,KAAoB;IACpD,IAAI,CAAC,KAAK;QAAE,OAAO,IAAI,CAAA;IACvB,MAAM,OAAO,GAAG,KAAK,CAAC,IAAI,EAAE,CAAA;IAC5B,IAAI,CAAC,OAAO;QAAE,OAAO,IAAI,CAAA;IACzB,OAAO,OAAO,CAAC,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,EAAE,IAAI,EAAE,CAAC,WAAW,EAAE,IAAI,IAAI,CAAA;AAC5D,CAAC;AAED,SAAS,eAAe,CAAC,SAAwB;IAC/C,IAAI,CAAC,SAAS;QAAE,OAAO,KAAK,CAAA;IAC5B,OAAO,SAAS,KAAK,WAAW,IAAI,SAAS,KAAK,uBAAuB,CAAA;AAC3E,CAAC;AAED,SAAS,aAAa,CAAC,KAAiB;IACtC,MAAM,IAAI,GAAG,IAAI,WAAW,EAAE,CAAC,MAAM,CAAC,KAAK,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC,CAAC,SAAS,EAAE,CAAC,WAAW,EAAE,CAAA;IACpF,OAAO,IAAI,CAAC,UAAU,CAAC,gBAAgB,CAAC,IAAI,IAAI,CAAC,UAAU,CAAC,OAAO,CAAC,IAAI,IAAI,CAAC,UAAU,CAAC,OAAO,CAAC,CAAA;AAClG,CAAC;AAED,SAAS,qBAAqB,CAAC,QAAgB;IAC7C,MAAM,GAAG,GAAG,IAAI,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC,WAAW,EAAE,CAAA;IAChD,IAAI,CAAC,GAAG;QAAE,OAAO,KAAK,CAAA;IACtB,IAAI,GAAG,KAAK,OAAO,IAAI,GAAG,KAAK,MAAM,IAAI,GAAG,KAAK,MAAM,IAAI,GAAG,KAAK,MAAM,IAAI,GAAG,KAAK,OAAO,EAAE,CAAC;QAC7F,OAAO,KAAK,CAAA;IACd,CAAC;IACD,OAAO,IAAI,CAAA;AACb,CAAC;AAED,MAAM,UAAU,kBAAkB,CAAC,GAAW;IAC5C,MAAM,UAAU,GAAG,GAAG,CAAC,IAAI,EAAE,CAAA;IAC7B,IAAI,CAAC,UAAU,EAAE,CAAC;QAChB,MAAM,IAAI,KAAK,CAAC,eAAe,CAAC,CAAA;IAClC,CAAC;IAED,MAAM,MAAM,GAAG,IAAI,CAAC,OAAO,CAAC,UAAU,CAAC,CAAA;IACvC,IAAI,UAAU,CAAC,MAAM,CAAC,EAAE,CAAC;QACvB,OAAO,EAAE,IAAI,EAAE,MAAM,EAAE,QAAQ,EAAE,MAAM,EAAE,CAAA;IAC3C,CAAC;IAED,IAAI,MAAW,CAAA;IACf,IAAI,CAAC;QACH,MAAM,GAAG,IAAI,GAAG,CAAC,UAAU,CAAC,CAAA;IAC9B,CAAC;IAAC,MAAM,CAAC;QACP,MAAM,IAAI,KAAK,CAAC,6BAA6B,GAAG,EAAE,CAAC,CAAA;IACrD,CAAC;IAED,IAAI,MAAM,CAAC,QAAQ,KAAK,OAAO,IAAI,MAAM,CAAC,QAAQ,KAAK,QAAQ,IAAI,MAAM,CAAC,QAAQ,KAAK,OAAO,EAAE,CAAC;QAC/F,MAAM,QAAQ,GAAG,UAAU,CAAC,WAAW,CAAC,UAAU,CAAC,CAAA;QACnD,MAAM,YAAY,GAAG,UAAU,CAAC,WAAW,CAAC,SAAS,CAAC,CAAA;QACtD,MAAM,GAAG,GAAG,IAAI,CAAC,GAAG,CAAC,QAAQ,EAAE,YAAY,CAAC,CAAA;QAC5C,IAAI,GAAG,IAAI,CAAC,EAAE,CAAC;YACb,MAAM,SAAS,GAAG,UAAU,CAAC,KAAK,CAAC,GAAG,CAAC,CAAA;YACvC,OAAO,kBAAkB,CAAC,SAAS,CAAC,CAAA;QACtC,CAAC;IACH,CAAC;IAED,IAAI,MAAM,CAAC,QAAQ,KAAK,OAAO,EAAE,CAAC;QAChC,MAAM,QAAQ,GAAG,IAAI,CAAC,OAAO,CAAC,kBAAkB,CAAC,MAAM,CAAC,QAAQ,CAAC,CAAC,CAAA;QAClE,OAAO,EAAE,IAAI,EAAE,MAAM,EAAE,QAAQ,EAAE,CAAA;IACnC,CAAC;IAED,IAAI,MAAM,CAAC,QAAQ,KAAK,OAAO,IAAI,MAAM,CAAC,QAAQ,KAAK,QAAQ,EAAE,CAAC;QAChE,MAAM,IAAI,KAAK,CAAC,4CAA4C,CAAC,CAAA;IAC/D,CAAC;IACD,OAAO,EAAE,IAAI,EAAE,KAAK,EAAE,GAAG,EAAE,UAAU,EAAE,CAAA;AACzC,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,WAAW,CAAC,EAChC,GAAG,EACH,SAAS,EACT,SAAS,GAKV;IACC,MAAM,MAAM,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAA;IAC3B,IAAI,qBAAqB,CAAC,MAAM,CAAC,QAAQ,CAAC,EAAE,CAAC;QAC3C,OAAO,EAAE,IAAI,EAAE,OAAO,EAAE,CAAA;IAC1B,CAAC;IAED,KAAK,SAAS,CAAA;IACd,KAAK,SAAS,CAAA;IACd,OAAO,EAAE,IAAI,EAAE,SAAS,EAAE,CAAA;AAC5B,CAAC;AAED,KAAK,UAAU,eAAe,CAAC,EAC7B,KAAK,EACL,iBAAiB,EACjB,QAAQ,GAKT;IACC,MAAM,OAAO,GAAG,MAAM,kBAAkB,CAAC,KAAK,CAAC,CAAA;IAC/C,IAAI,OAAO,EAAE,IAAI;QAAE,OAAO,OAAO,CAAC,IAAI,CAAA;IAEtC,MAAM,MAAM,GAAG,wBAAwB,CAAC,iBAAiB,CAAC,CAAA;IAC1D,IAAI,MAAM,IAAI,MAAM,KAAK,0BAA0B;QAAE,OAAO,MAAM,CAAA;IAElE,IAAI,QAAQ,EAAE,CAAC;QACb,MAAM,KAAK,GAAG,IAAI,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAA;QACpC,IAAI,OAAO,KAAK,KAAK,QAAQ,IAAI,KAAK,CAAC,MAAM,GAAG,CAAC;YAAE,OAAO,KAAK,CAAA;IACjE,CAAC;IAED,OAAO,0BAA0B,CAAA;AACnC,CAAC;AAED,SAAS,eAAe,CAAC,EACvB,KAAK,EACL,SAAS,EACT,QAAQ,GAKT;IACC,IAAI,SAAS,CAAC,UAAU,CAAC,QAAQ,CAAC,EAAE,CAAC;QACnC,MAAM,IAAI,GAAc;YACtB,IAAI,EAAE,OAAO;YACb,KAAK,EAAE,KAAK;YACZ,SAAS;SACV,CAAA;QACD,OAAO,EAAE,SAAS,EAAE,QAAQ,EAAE,IAAI,EAAE,CAAA;IACtC,CAAC;IAED,MAAM,IAAI,GAAa;QACrB,IAAI,EAAE,MAAM;QACZ,IAAI,EAAE,KAAK;QACX,QAAQ,EAAE,QAAQ,IAAI,SAAS;QAC/B,SAAS;KACV,CAAA;IACD,OAAO,EAAE,SAAS,EAAE,QAAQ,EAAE,IAAI,EAAE,CAAA;AACtC,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,cAAc,CAAC,EACnC,QAAQ,EACR,QAAQ,GAAG,uBAAuB,GAInC;IACC,MAAM,IAAI,GAAG,MAAM,EAAE,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAA;IACpC,IAAI,CAAC,IAAI,CAAC,MAAM,EAAE,EAAE,CAAC;QACnB,MAAM,IAAI,KAAK,CAAC,eAAe,QAAQ,EAAE,CAAC,CAAA;IAC5C,CAAC;IACD,IAAI,IAAI,CAAC,IAAI,GAAG,QAAQ,EAAE,CAAC;QACzB,MAAM,IAAI,KAAK,CAAC,mBAAmB,IAAI,CAAC,IAAI,qBAAqB,QAAQ,SAAS,CAAC,CAAA;IACrF,CAAC;IAED,MAAM,KAAK,GAAG,IAAI,UAAU,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAC,CAAA;IACzD,MAAM,QAAQ,GAAG,IAAI,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAA;IACxC,MAAM,SAAS,GAAG,MAAM,eAAe,CAAC,EAAE,KAAK,EAAE,iBAAiB,EAAE,IAAI,EAAE,QAAQ,EAAE,QAAQ,EAAE,CAAC,CAAA;IAC/F,OAAO;QACL,WAAW,EAAE,QAAQ;QACrB,UAAU,EAAE,eAAe,CAAC,EAAE,KAAK,EAAE,SAAS,EAAE,QAAQ,EAAE,CAAC;KAC5D,CAAA;AACH,CAAC;AAED,MAAM,CAAC,KAAK,UAAU,eAAe,CAAC,EACpC,GAAG,EACH,SAAS,EACT,SAAS,EACT,QAAQ,GAAG,uBAAuB,GAMnC;IACC,MAAM,UAAU,GAAG,IAAI,eAAe,EAAE,CAAA;IACxC,MAAM,OAAO,GAAG,UAAU,CAAC,GAAG,EAAE,CAAC,UAAU,CAAC,KAAK,EAAE,EAAE,SAAS,CAAC,CAAA;IAC/D,IAAI,CAAC;QACH,MAAM,GAAG,GAAG,MAAM,SAAS,CAAC,GAAG,EAAE,EAAE,MAAM,EAAE,UAAU,CAAC,MAAM,EAAE,CAAC,CAAA;QAC/D,IAAI,CAAC,GAAG,CAAC,EAAE,EAAE,CAAC;YACZ,MAAM,IAAI,KAAK,CAAC,oBAAoB,GAAG,CAAC,MAAM,IAAI,GAAG,CAAC,UAAU,EAAE,CAAC,CAAA;QACrE,CAAC;QAED,MAAM,aAAa,GAAG,GAAG,CAAC,OAAO,CAAC,GAAG,CAAC,gBAAgB,CAAC,CAAA;QACvD,IAAI,aAAa,EAAE,CAAC;YAClB,MAAM,MAAM,GAAG,MAAM,CAAC,aAAa,CAAC,CAAA;YACpC,IAAI,MAAM,CAAC,QAAQ,CAAC,MAAM,CAAC,IAAI,MAAM,GAAG,QAAQ,EAAE,CAAC;gBACjD,MAAM,IAAI,KAAK,CAAC,0BAA0B,MAAM,qBAAqB,QAAQ,SAAS,CAAC,CAAA;YACzF,CAAC;QACH,CAAC;QAED,MAAM,WAAW,GAAG,MAAM,GAAG,CAAC,WAAW,EAAE,CAAA;QAC3C,IAAI,WAAW,CAAC,UAAU,GAAG,QAAQ,EAAE,CAAC;YACtC,MAAM,IAAI,KAAK,CACb,0BAA0B,WAAW,CAAC,UAAU,qBAAqB,QAAQ,SAAS,CACvF,CAAA;QACH,CAAC;QAED,MAAM,KAAK,GAAG,IAAI,UAAU,CAAC,WAAW,CAAC,CAAA;QACzC,MAAM,SAAS,GAAG,IAAI,GAAG,CAAC,GAAG,CAAC,CAAA;QAC9B,MAAM,QAAQ,GAAG,IAAI,CAAC,QAAQ,CAAC,SAAS,CAAC,QAAQ,CAAC,IAAI,IAAI,CAAA;QAC1D,MAAM,iBAAiB,GAAG,GAAG,CAAC,OAAO,CAAC,GAAG,CAAC,cAAc,CAAC,CAAA;QACzD,MAAM,SAAS,GAAG,MAAM,eAAe,CAAC,EAAE,KAAK,EAAE,iBAAiB,EAAE,QAAQ,EAAE,QAAQ,EAAE,CAAC,CAAA;QAEzF,IAAI,eAAe,CAAC,SAAS,CAAC,IAAI,aAAa,CAAC,KAAK,CAAC,EAAE,CAAC;YACvD,MAAM,IAAI,KAAK,CAAC,gDAAgD,CAAC,CAAA;QACnE,CAAC;QAED,OAAO;YACL,WAAW,EAAE,GAAG;YAChB,UAAU,EAAE,eAAe,CAAC,EAAE,KAAK,EAAE,SAAS,EAAE,QAAQ,EAAE,CAAC;SAC5D,CAAA;IACH,CAAC;YAAS,CAAC;QACT,YAAY,CAAC,OAAO,CAAC,CAAA;IACvB,CAAC;AACH,CAAC;AAED,MAAM,UAAU,wBAAwB,CAAC,EACvC,UAAU,EACV,UAAU,GAIX;IACC,OAAO;QACL;YACE,IAAI,EAAE,MAAM;YACZ,OAAO,EAAE,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,IAAI,EAAE,UAAU,EAAE,EAAE,UAAU,CAAC,IAAI,CAAC;SAC/D;KACF,CAAA;AACH,CAAC"}
|
|
@@ -0,0 +1,4 @@
|
|
|
1
|
+
export { createLinkPreviewClient, } from './link-preview/client.js';
|
|
2
|
+
export { DEFAULT_CACHE_MODE, DEFAULT_MAX_CONTENT_CHARACTERS, DEFAULT_TIMEOUT_MS, } from './link-preview/content/types.js';
|
|
3
|
+
export { CACHE_MODES, } from './link-preview/types.js';
|
|
4
|
+
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.js","sourceRoot":"","sources":["../../../src/content/index.ts"],"names":[],"mappings":"AAAA,OAAO,EACL,uBAAuB,GAGxB,MAAM,0BAA0B,CAAA;AACjC,OAAO,EACL,kBAAkB,EAClB,8BAA8B,EAC9B,kBAAkB,GAGnB,MAAM,iCAAiC,CAAA;AAOxC,OAAO,EACL,WAAW,GAIZ,MAAM,yBAAyB,CAAA"}
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
import { fetchLinkContent } from './content/index.js';
|
|
2
|
+
export function createLinkPreviewClient(options = {}) {
|
|
3
|
+
const fetchImpl = options.fetch ?? ((...args) => globalThis.fetch(...args));
|
|
4
|
+
const scrape = options.scrapeWithFirecrawl ?? null;
|
|
5
|
+
const apifyApiToken = typeof options.apifyApiToken === 'string' ? options.apifyApiToken : null;
|
|
6
|
+
const convertHtmlToMarkdown = options.convertHtmlToMarkdown ?? null;
|
|
7
|
+
const transcriptCache = options.transcriptCache ?? null;
|
|
8
|
+
const onProgress = typeof options.onProgress === 'function' ? options.onProgress : null;
|
|
9
|
+
return {
|
|
10
|
+
fetchLinkContent: (url, contentOptions) => fetchLinkContent(url, contentOptions, {
|
|
11
|
+
fetch: fetchImpl,
|
|
12
|
+
scrapeWithFirecrawl: scrape,
|
|
13
|
+
apifyApiToken,
|
|
14
|
+
convertHtmlToMarkdown,
|
|
15
|
+
transcriptCache,
|
|
16
|
+
onProgress,
|
|
17
|
+
}),
|
|
18
|
+
};
|
|
19
|
+
}
|
|
20
|
+
//# sourceMappingURL=client.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"client.js","sourceRoot":"","sources":["../../../../src/content/link-preview/client.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,gBAAgB,EAAE,MAAM,oBAAoB,CAAA;AAsBrD,MAAM,UAAU,uBAAuB,CAAC,UAAoC,EAAE;IAC5E,MAAM,SAAS,GACb,OAAO,CAAC,KAAK,IAAI,CAAC,CAAC,GAAG,IAA8B,EAAE,EAAE,CAAC,UAAU,CAAC,KAAK,CAAC,GAAG,IAAI,CAAC,CAAC,CAAA;IACrF,MAAM,MAAM,GAA+B,OAAO,CAAC,mBAAmB,IAAI,IAAI,CAAA;IAC9E,MAAM,aAAa,GAAG,OAAO,OAAO,CAAC,aAAa,KAAK,QAAQ,CAAC,CAAC,CAAC,OAAO,CAAC,aAAa,CAAC,CAAC,CAAC,IAAI,CAAA;IAC9F,MAAM,qBAAqB,GAAiC,OAAO,CAAC,qBAAqB,IAAI,IAAI,CAAA;IACjG,MAAM,eAAe,GAA2B,OAAO,CAAC,eAAe,IAAI,IAAI,CAAA;IAC/E,MAAM,UAAU,GAAG,OAAO,OAAO,CAAC,UAAU,KAAK,UAAU,CAAC,CAAC,CAAC,OAAO,CAAC,UAAU,CAAC,CAAC,CAAC,IAAI,CAAA;IAEvF,OAAO;QACL,gBAAgB,EAAE,CAAC,GAAW,EAAE,cAAwC,EAAE,EAAE,CAC1E,gBAAgB,CAAC,GAAG,EAAE,cAAc,EAAE;YACpC,KAAK,EAAE,SAAS;YAChB,mBAAmB,EAAE,MAAM;YAC3B,aAAa;YACb,qBAAqB;YACrB,eAAe;YACf,UAAU;SACX,CAAC;KACL,CAAA;AACH,CAAC"}
|
|
@@ -0,0 +1,150 @@
|
|
|
1
|
+
import { load } from 'cheerio';
|
|
2
|
+
import sanitizeHtml from 'sanitize-html';
|
|
3
|
+
import { decodeHtmlEntities, normalizeWhitespace } from './cleaner.js';
|
|
4
|
+
const MIN_SEGMENT_LENGTH = 30;
|
|
5
|
+
export function sanitizeHtmlForMarkdownConversion(html) {
|
|
6
|
+
return sanitizeHtml(html, {
|
|
7
|
+
allowedTags: [
|
|
8
|
+
'article',
|
|
9
|
+
'section',
|
|
10
|
+
'div',
|
|
11
|
+
'p',
|
|
12
|
+
'h1',
|
|
13
|
+
'h2',
|
|
14
|
+
'h3',
|
|
15
|
+
'h4',
|
|
16
|
+
'h5',
|
|
17
|
+
'h6',
|
|
18
|
+
'ol',
|
|
19
|
+
'ul',
|
|
20
|
+
'li',
|
|
21
|
+
'blockquote',
|
|
22
|
+
'pre',
|
|
23
|
+
'code',
|
|
24
|
+
'span',
|
|
25
|
+
'strong',
|
|
26
|
+
'em',
|
|
27
|
+
'br',
|
|
28
|
+
'a',
|
|
29
|
+
],
|
|
30
|
+
allowedAttributes: {
|
|
31
|
+
a: ['href'],
|
|
32
|
+
},
|
|
33
|
+
nonTextTags: [
|
|
34
|
+
'style',
|
|
35
|
+
'script',
|
|
36
|
+
'noscript',
|
|
37
|
+
'template',
|
|
38
|
+
'svg',
|
|
39
|
+
'canvas',
|
|
40
|
+
'iframe',
|
|
41
|
+
'object',
|
|
42
|
+
'embed',
|
|
43
|
+
],
|
|
44
|
+
textFilter(text) {
|
|
45
|
+
return decodeHtmlEntities(text);
|
|
46
|
+
},
|
|
47
|
+
});
|
|
48
|
+
}
|
|
49
|
+
export function extractArticleContent(html) {
|
|
50
|
+
const segments = collectSegmentsFromHtml(html);
|
|
51
|
+
if (segments.length > 0) {
|
|
52
|
+
return segments.join('\n');
|
|
53
|
+
}
|
|
54
|
+
const fallback = normalizeWhitespace(extractPlainText(html));
|
|
55
|
+
return fallback ?? '';
|
|
56
|
+
}
|
|
57
|
+
export function collectSegmentsFromHtml(html) {
|
|
58
|
+
const sanitized = sanitizeHtml(html, {
|
|
59
|
+
allowedTags: [
|
|
60
|
+
'article',
|
|
61
|
+
'section',
|
|
62
|
+
'div',
|
|
63
|
+
'p',
|
|
64
|
+
'h1',
|
|
65
|
+
'h2',
|
|
66
|
+
'h3',
|
|
67
|
+
'h4',
|
|
68
|
+
'h5',
|
|
69
|
+
'h6',
|
|
70
|
+
'ol',
|
|
71
|
+
'ul',
|
|
72
|
+
'li',
|
|
73
|
+
'blockquote',
|
|
74
|
+
'pre',
|
|
75
|
+
'code',
|
|
76
|
+
'span',
|
|
77
|
+
'strong',
|
|
78
|
+
'em',
|
|
79
|
+
'br',
|
|
80
|
+
],
|
|
81
|
+
allowedAttributes: {},
|
|
82
|
+
nonTextTags: [
|
|
83
|
+
'style',
|
|
84
|
+
'script',
|
|
85
|
+
'noscript',
|
|
86
|
+
'template',
|
|
87
|
+
'svg',
|
|
88
|
+
'canvas',
|
|
89
|
+
'iframe',
|
|
90
|
+
'object',
|
|
91
|
+
'embed',
|
|
92
|
+
],
|
|
93
|
+
textFilter(text) {
|
|
94
|
+
return decodeHtmlEntities(text);
|
|
95
|
+
},
|
|
96
|
+
});
|
|
97
|
+
const $ = load(sanitized);
|
|
98
|
+
const segments = [];
|
|
99
|
+
$('h1,h2,h3,h4,h5,h6,li,p,blockquote,pre').each((_, element) => {
|
|
100
|
+
if (!('tagName' in element) || typeof element.tagName !== 'string') {
|
|
101
|
+
return;
|
|
102
|
+
}
|
|
103
|
+
const tag = element.tagName.toLowerCase();
|
|
104
|
+
const raw = $(element).text();
|
|
105
|
+
const text = normalizeWhitespace(raw).replaceAll(/\n+/g, ' ');
|
|
106
|
+
if (!text || text.length === 0) {
|
|
107
|
+
return;
|
|
108
|
+
}
|
|
109
|
+
if (tag.startsWith('h')) {
|
|
110
|
+
if (text.length >= 10) {
|
|
111
|
+
segments.push(text);
|
|
112
|
+
}
|
|
113
|
+
return;
|
|
114
|
+
}
|
|
115
|
+
if (tag === 'li') {
|
|
116
|
+
if (text.length >= 20) {
|
|
117
|
+
segments.push(`• ${text}`);
|
|
118
|
+
}
|
|
119
|
+
return;
|
|
120
|
+
}
|
|
121
|
+
if (text.length < MIN_SEGMENT_LENGTH) {
|
|
122
|
+
return;
|
|
123
|
+
}
|
|
124
|
+
segments.push(text);
|
|
125
|
+
});
|
|
126
|
+
if (segments.length === 0) {
|
|
127
|
+
const fallback = normalizeWhitespace($('body').text() || sanitized);
|
|
128
|
+
return fallback ? [fallback] : [];
|
|
129
|
+
}
|
|
130
|
+
return mergeConsecutiveSegments(segments);
|
|
131
|
+
}
|
|
132
|
+
export function extractPlainText(html) {
|
|
133
|
+
const stripped = sanitizeHtml(html, { allowedTags: [], allowedAttributes: {} });
|
|
134
|
+
return decodeHtmlEntities(stripped);
|
|
135
|
+
}
|
|
136
|
+
function mergeConsecutiveSegments(segments) {
|
|
137
|
+
const merged = [];
|
|
138
|
+
for (const segment of segments) {
|
|
139
|
+
if (!segment)
|
|
140
|
+
continue;
|
|
141
|
+
const last = merged.at(-1);
|
|
142
|
+
if (last && !segment.startsWith('•') && segment.length < MIN_SEGMENT_LENGTH / 2) {
|
|
143
|
+
merged[merged.length - 1] = `${last} ${segment}`;
|
|
144
|
+
continue;
|
|
145
|
+
}
|
|
146
|
+
merged.push(segment);
|
|
147
|
+
}
|
|
148
|
+
return merged;
|
|
149
|
+
}
|
|
150
|
+
//# sourceMappingURL=article.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"article.js","sourceRoot":"","sources":["../../../../../src/content/link-preview/content/article.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,IAAI,EAAE,MAAM,SAAS,CAAA;AAC9B,OAAO,YAAY,MAAM,eAAe,CAAA;AAExC,OAAO,EAAE,kBAAkB,EAAE,mBAAmB,EAAE,MAAM,cAAc,CAAA;AAEtE,MAAM,kBAAkB,GAAG,EAAE,CAAA;AAE7B,MAAM,UAAU,iCAAiC,CAAC,IAAY;IAC5D,OAAO,YAAY,CAAC,IAAI,EAAE;QACxB,WAAW,EAAE;YACX,SAAS;YACT,SAAS;YACT,KAAK;YACL,GAAG;YACH,IAAI;YACJ,IAAI;YACJ,IAAI;YACJ,IAAI;YACJ,IAAI;YACJ,IAAI;YACJ,IAAI;YACJ,IAAI;YACJ,IAAI;YACJ,YAAY;YACZ,KAAK;YACL,MAAM;YACN,MAAM;YACN,QAAQ;YACR,IAAI;YACJ,IAAI;YACJ,GAAG;SACJ;QACD,iBAAiB,EAAE;YACjB,CAAC,EAAE,CAAC,MAAM,CAAC;SACZ;QACD,WAAW,EAAE;YACX,OAAO;YACP,QAAQ;YACR,UAAU;YACV,UAAU;YACV,KAAK;YACL,QAAQ;YACR,QAAQ;YACR,QAAQ;YACR,OAAO;SACR;QACD,UAAU,CAAC,IAAY;YACrB,OAAO,kBAAkB,CAAC,IAAI,CAAC,CAAA;QACjC,CAAC;KACF,CAAC,CAAA;AACJ,CAAC;AAED,MAAM,UAAU,qBAAqB,CAAC,IAAY;IAChD,MAAM,QAAQ,GAAG,uBAAuB,CAAC,IAAI,CAAC,CAAA;IAC9C,IAAI,QAAQ,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;QACxB,OAAO,QAAQ,CAAC,IAAI,CAAC,IAAI,CAAC,CAAA;IAC5B,CAAC;IACD,MAAM,QAAQ,GAAG,mBAAmB,CAAC,gBAAgB,CAAC,IAAI,CAAC,CAAC,CAAA;IAC5D,OAAO,QAAQ,IAAI,EAAE,CAAA;AACvB,CAAC;AAED,MAAM,UAAU,uBAAuB,CAAC,IAAY;IAClD,MAAM,SAAS,GAAG,YAAY,CAAC,IAAI,EAAE;QACnC,WAAW,EAAE;YACX,SAAS;YACT,SAAS;YACT,KAAK;YACL,GAAG;YACH,IAAI;YACJ,IAAI;YACJ,IAAI;YACJ,IAAI;YACJ,IAAI;YACJ,IAAI;YACJ,IAAI;YACJ,IAAI;YACJ,IAAI;YACJ,YAAY;YACZ,KAAK;YACL,MAAM;YACN,MAAM;YACN,QAAQ;YACR,IAAI;YACJ,IAAI;SACL;QACD,iBAAiB,EAAE,EAAE;QACrB,WAAW,EAAE;YACX,OAAO;YACP,QAAQ;YACR,UAAU;YACV,UAAU;YACV,KAAK;YACL,QAAQ;YACR,QAAQ;YACR,QAAQ;YACR,OAAO;SACR;QACD,UAAU,CAAC,IAAY;YACrB,OAAO,kBAAkB,CAAC,IAAI,CAAC,CAAA;QACjC,CAAC;KACF,CAAC,CAAA;IAEF,MAAM,CAAC,GAAG,IAAI,CAAC,SAAS,CAAC,CAAA;IACzB,MAAM,QAAQ,GAAa,EAAE,CAAA;IAE7B,CAAC,CAAC,uCAAuC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,OAAO,EAAE,EAAE;QAC7D,IAAI,CAAC,CAAC,SAAS,IAAI,OAAO,CAAC,IAAI,OAAO,OAAO,CAAC,OAAO,KAAK,QAAQ,EAAE,CAAC;YACnE,OAAM;QACR,CAAC;QAED,MAAM,GAAG,GAAG,OAAO,CAAC,OAAO,CAAC,WAAW,EAAE,CAAA;QAEzC,MAAM,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,CAAC,IAAI,EAAE,CAAA;QAC7B,MAAM,IAAI,GAAG,mBAAmB,CAAC,GAAG,CAAC,CAAC,UAAU,CAAC,MAAM,EAAE,GAAG,CAAC,CAAA;QAC7D,IAAI,CAAC,IAAI,IAAI,IAAI,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;YAC/B,OAAM;QACR,CAAC;QAED,IAAI,GAAG,CAAC,UAAU,CAAC,GAAG,CAAC,EAAE,CAAC;YACxB,IAAI,IAAI,CAAC,MAAM,IAAI,EAAE,EAAE,CAAC;gBACtB,QAAQ,CAAC,IAAI,CAAC,IAAI,CAAC,CAAA;YACrB,CAAC;YACD,OAAM;QACR,CAAC;QAED,IAAI,GAAG,KAAK,IAAI,EAAE,CAAC;YACjB,IAAI,IAAI,CAAC,MAAM,IAAI,EAAE,EAAE,CAAC;gBACtB,QAAQ,CAAC,IAAI,CAAC,KAAK,IAAI,EAAE,CAAC,CAAA;YAC5B,CAAC;YACD,OAAM;QACR,CAAC;QAED,IAAI,IAAI,CAAC,MAAM,GAAG,kBAAkB,EAAE,CAAC;YACrC,OAAM;QACR,CAAC;QAED,QAAQ,CAAC,IAAI,CAAC,IAAI,CAAC,CAAA;IACrB,CAAC,CAAC,CAAA;IAEF,IAAI,QAAQ,CAAC,MAAM,KAAK,CAAC,EAAE,CAAC;QAC1B,MAAM,QAAQ,GAAG,mBAAmB,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,IAAI,EAAE,IAAI,SAAS,CAAC,CAAA;QACnE,OAAO,QAAQ,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,EAAE,CAAA;IACnC,CAAC;IAED,OAAO,wBAAwB,CAAC,QAAQ,CAAC,CAAA;AAC3C,CAAC;AAED,MAAM,UAAU,gBAAgB,CAAC,IAAY;IAC3C,MAAM,QAAQ,GAAG,YAAY,CAAC,IAAI,EAAE,EAAE,WAAW,EAAE,EAAE,EAAE,iBAAiB,EAAE,EAAE,EAAE,CAAC,CAAA;IAC/E,OAAO,kBAAkB,CAAC,QAAQ,CAAC,CAAA;AACrC,CAAC;AAED,SAAS,wBAAwB,CAAC,QAAkB;IAClD,MAAM,MAAM,GAAa,EAAE,CAAA;IAC3B,KAAK,MAAM,OAAO,IAAI,QAAQ,EAAE,CAAC;QAC/B,IAAI,CAAC,OAAO;YAAE,SAAQ;QACtB,MAAM,IAAI,GAAG,MAAM,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC,CAAA;QAC1B,IAAI,IAAI,IAAI,CAAC,OAAO,CAAC,UAAU,CAAC,GAAG,CAAC,IAAI,OAAO,CAAC,MAAM,GAAG,kBAAkB,GAAG,CAAC,EAAE,CAAC;YAChF,MAAM,CAAC,MAAM,CAAC,MAAM,GAAG,CAAC,CAAC,GAAG,GAAG,IAAI,IAAI,OAAO,EAAE,CAAA;YAChD,SAAQ;QACV,CAAC;QACD,MAAM,CAAC,IAAI,CAAC,OAAO,CAAC,CAAA;IACtB,CAAC;IACD,OAAO,MAAM,CAAA;AACf,CAAC"}
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
import { compact } from 'es-toolkit';
|
|
2
|
+
const WORD_SPLIT_PATTERN = /\s+/g;
|
|
3
|
+
export function normalizeForPrompt(input) {
|
|
4
|
+
return input
|
|
5
|
+
.replaceAll('\u00A0', ' ')
|
|
6
|
+
.replaceAll(/[\t ]+/g, ' ')
|
|
7
|
+
.replaceAll(/\s*\n\s*/g, '\n')
|
|
8
|
+
.replaceAll(/\n{3,}/g, '\n\n')
|
|
9
|
+
.trim();
|
|
10
|
+
}
|
|
11
|
+
export function normalizeWhitespace(input) {
|
|
12
|
+
return input
|
|
13
|
+
.replaceAll('\u00A0', ' ')
|
|
14
|
+
.replaceAll(/[\t ]+/g, ' ')
|
|
15
|
+
.replaceAll(/\s*\n\s*/g, '\n')
|
|
16
|
+
.trim();
|
|
17
|
+
}
|
|
18
|
+
export function decodeHtmlEntities(input) {
|
|
19
|
+
return input
|
|
20
|
+
.replaceAll('&', '&')
|
|
21
|
+
.replaceAll('<', '<')
|
|
22
|
+
.replaceAll('>', '>')
|
|
23
|
+
.replaceAll('"', '"')
|
|
24
|
+
.replaceAll(''', "'")
|
|
25
|
+
.replaceAll(''', "'")
|
|
26
|
+
.replaceAll('/', '/')
|
|
27
|
+
.replaceAll(' ', ' ');
|
|
28
|
+
}
|
|
29
|
+
export function normalizeCandidate(value) {
|
|
30
|
+
if (!value) {
|
|
31
|
+
return null;
|
|
32
|
+
}
|
|
33
|
+
const trimmed = value.replaceAll(/\s+/g, ' ').trim();
|
|
34
|
+
return trimmed.length > 0 ? trimmed : null;
|
|
35
|
+
}
|
|
36
|
+
export function clipAtSentenceBoundary(input, maxLength) {
|
|
37
|
+
if (input.length <= maxLength) {
|
|
38
|
+
return input;
|
|
39
|
+
}
|
|
40
|
+
const slice = input.slice(0, maxLength);
|
|
41
|
+
const lastSentenceBreak = Math.max(slice.lastIndexOf('. '), slice.lastIndexOf('! '), slice.lastIndexOf('? '), slice.lastIndexOf('\n\n'));
|
|
42
|
+
if (lastSentenceBreak > maxLength * 0.5) {
|
|
43
|
+
return slice.slice(0, lastSentenceBreak + 1);
|
|
44
|
+
}
|
|
45
|
+
return slice;
|
|
46
|
+
}
|
|
47
|
+
export function applyContentBudget(baseContent, maxCharacters) {
|
|
48
|
+
const totalCharacters = baseContent.length;
|
|
49
|
+
const truncated = totalCharacters > maxCharacters;
|
|
50
|
+
const clipped = truncated ? clipAtSentenceBoundary(baseContent, maxCharacters) : baseContent;
|
|
51
|
+
const content = clipped.trim();
|
|
52
|
+
const wordCount = content.length > 0 ? compact(content.split(WORD_SPLIT_PATTERN)).length : 0;
|
|
53
|
+
return { content, truncated, totalCharacters, wordCount };
|
|
54
|
+
}
|
|
55
|
+
//# sourceMappingURL=cleaner.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"cleaner.js","sourceRoot":"","sources":["../../../../../src/content/link-preview/content/cleaner.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,MAAM,YAAY,CAAA;AAEpC,MAAM,kBAAkB,GAAG,MAAM,CAAA;AASjC,MAAM,UAAU,kBAAkB,CAAC,KAAa;IAC9C,OAAO,KAAK;SACT,UAAU,CAAC,QAAQ,EAAE,GAAG,CAAC;SACzB,UAAU,CAAC,SAAS,EAAE,GAAG,CAAC;SAC1B,UAAU,CAAC,WAAW,EAAE,IAAI,CAAC;SAC7B,UAAU,CAAC,SAAS,EAAE,MAAM,CAAC;SAC7B,IAAI,EAAE,CAAA;AACX,CAAC;AAED,MAAM,UAAU,mBAAmB,CAAC,KAAa;IAC/C,OAAO,KAAK;SACT,UAAU,CAAC,QAAQ,EAAE,GAAG,CAAC;SACzB,UAAU,CAAC,SAAS,EAAE,GAAG,CAAC;SAC1B,UAAU,CAAC,WAAW,EAAE,IAAI,CAAC;SAC7B,IAAI,EAAE,CAAA;AACX,CAAC;AAED,MAAM,UAAU,kBAAkB,CAAC,KAAa;IAC9C,OAAO,KAAK;SACT,UAAU,CAAC,OAAO,EAAE,GAAG,CAAC;SACxB,UAAU,CAAC,MAAM,EAAE,GAAG,CAAC;SACvB,UAAU,CAAC,MAAM,EAAE,GAAG,CAAC;SACvB,UAAU,CAAC,QAAQ,EAAE,GAAG,CAAC;SACzB,UAAU,CAAC,OAAO,EAAE,GAAG,CAAC;SACxB,UAAU,CAAC,QAAQ,EAAE,GAAG,CAAC;SACzB,UAAU,CAAC,QAAQ,EAAE,GAAG,CAAC;SACzB,UAAU,CAAC,QAAQ,EAAE,GAAG,CAAC,CAAA;AAC9B,CAAC;AAED,MAAM,UAAU,kBAAkB,CAAC,KAAgC;IACjE,IAAI,CAAC,KAAK,EAAE,CAAC;QACX,OAAO,IAAI,CAAA;IACb,CAAC;IACD,MAAM,OAAO,GAAG,KAAK,CAAC,UAAU,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC,IAAI,EAAE,CAAA;IACpD,OAAO,OAAO,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,CAAC,CAAC,IAAI,CAAA;AAC5C,CAAC;AAED,MAAM,UAAU,sBAAsB,CAAC,KAAa,EAAE,SAAiB;IACrE,IAAI,KAAK,CAAC,MAAM,IAAI,SAAS,EAAE,CAAC;QAC9B,OAAO,KAAK,CAAA;IACd,CAAC;IACD,MAAM,KAAK,GAAG,KAAK,CAAC,KAAK,CAAC,CAAC,EAAE,SAAS,CAAC,CAAA;IACvC,MAAM,iBAAiB,GAAG,IAAI,CAAC,GAAG,CAChC,KAAK,CAAC,WAAW,CAAC,IAAI,CAAC,EACvB,KAAK,CAAC,WAAW,CAAC,IAAI,CAAC,EACvB,KAAK,CAAC,WAAW,CAAC,IAAI,CAAC,EACvB,KAAK,CAAC,WAAW,CAAC,MAAM,CAAC,CAC1B,CAAA;IACD,IAAI,iBAAiB,GAAG,SAAS,GAAG,GAAG,EAAE,CAAC;QACxC,OAAO,KAAK,CAAC,KAAK,CAAC,CAAC,EAAE,iBAAiB,GAAG,CAAC,CAAC,CAAA;IAC9C,CAAC;IACD,OAAO,KAAK,CAAA;AACd,CAAC;AAED,MAAM,UAAU,kBAAkB,CAChC,WAAmB,EACnB,aAAqB;IAErB,MAAM,eAAe,GAAG,WAAW,CAAC,MAAM,CAAA;IAC1C,MAAM,SAAS,GAAG,eAAe,GAAG,aAAa,CAAA;IACjD,MAAM,OAAO,GAAG,SAAS,CAAC,CAAC,CAAC,sBAAsB,CAAC,WAAW,EAAE,aAAa,CAAC,CAAC,CAAC,CAAC,WAAW,CAAA;IAC5F,MAAM,OAAO,GAAG,OAAO,CAAC,IAAI,EAAE,CAAA;IAC9B,MAAM,SAAS,GAAG,OAAO,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,OAAO,CAAC,OAAO,CAAC,KAAK,CAAC,kBAAkB,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,CAAA;IAC5F,OAAO,EAAE,OAAO,EAAE,SAAS,EAAE,eAAe,EAAE,SAAS,EAAE,CAAA;AAC3D,CAAC"}
|