@mrclrchtr/supi-web 0.1.0 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/node_modules/@mrclrchtr/supi-core/package.json +1 -5
- package/package.json +4 -10
- package/src/convert.ts +17 -4
- package/src/web.ts +25 -7
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@mrclrchtr/supi-core",
|
|
3
|
-
"version": "
|
|
3
|
+
"version": "1.0.0",
|
|
4
4
|
"description": "SuPi core — shared infrastructure for SuPi extensions (XML context tags, config system)",
|
|
5
5
|
"license": "MIT",
|
|
6
6
|
"repository": {
|
|
@@ -22,9 +22,5 @@
|
|
|
22
22
|
"@earendil-works/pi-coding-agent": "*",
|
|
23
23
|
"@earendil-works/pi-tui": "*"
|
|
24
24
|
},
|
|
25
|
-
"devDependencies": {
|
|
26
|
-
"@types/node": "^25.6.0",
|
|
27
|
-
"vitest": "^4.1.4"
|
|
28
|
-
},
|
|
29
25
|
"main": "src/index.ts"
|
|
30
26
|
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@mrclrchtr/supi-web",
|
|
3
|
-
"version": "
|
|
3
|
+
"version": "1.0.0",
|
|
4
4
|
"description": "SuPi Web extension — fetch web pages as clean Markdown (web_fetch_md) and library docs via Context7 (web_docs_search, web_docs_fetch)",
|
|
5
5
|
"license": "MIT",
|
|
6
6
|
"repository": {
|
|
@@ -20,12 +20,12 @@
|
|
|
20
20
|
"README.md"
|
|
21
21
|
],
|
|
22
22
|
"dependencies": {
|
|
23
|
-
"@mrclrchtr/supi-core": "workspace:*",
|
|
24
23
|
"@upstash/context7-sdk": "^0.3.0",
|
|
25
|
-
"jsdom": "^
|
|
24
|
+
"jsdom": "^29.0.0",
|
|
26
25
|
"@mozilla/readability": "^0.6.0",
|
|
27
26
|
"turndown": "^7.2.0",
|
|
28
|
-
"turndown-plugin-gfm": "^1.0.2"
|
|
27
|
+
"turndown-plugin-gfm": "^1.0.2",
|
|
28
|
+
"@mrclrchtr/supi-core": "1.0.0"
|
|
29
29
|
},
|
|
30
30
|
"bundledDependencies": [
|
|
31
31
|
"@mrclrchtr/supi-core"
|
|
@@ -34,12 +34,6 @@
|
|
|
34
34
|
"@earendil-works/pi-coding-agent": "*",
|
|
35
35
|
"typebox": "*"
|
|
36
36
|
},
|
|
37
|
-
"devDependencies": {
|
|
38
|
-
"vitest": "^4.1.5",
|
|
39
|
-
"@types/jsdom": "^21.1.7",
|
|
40
|
-
"@types/turndown": "^5.0.6",
|
|
41
|
-
"@mrclrchtr/supi-test-utils": "workspace:*"
|
|
42
|
-
},
|
|
43
37
|
"pi": {
|
|
44
38
|
"extensions": [
|
|
45
39
|
"./src/web.ts",
|
package/src/convert.ts
CHANGED
|
@@ -110,6 +110,17 @@ function absolutizeLinks(root: Element, baseUrl: string): void {
|
|
|
110
110
|
}
|
|
111
111
|
}
|
|
112
112
|
|
|
113
|
+
/** Dangerous URI schemes that must never be used in href/src attributes. */
|
|
114
|
+
const DANGEROUS_SCHEMES = ["javascript:", "data:", "vbscript:", "file:"];
|
|
115
|
+
|
|
116
|
+
function hasDangerousScheme(value: string): boolean {
|
|
117
|
+
// Case-insensitive scheme check: split on first ':' and compare lowercased
|
|
118
|
+
const colonIndex = value.indexOf(":");
|
|
119
|
+
if (colonIndex === -1) return false;
|
|
120
|
+
const scheme = value.slice(0, colonIndex + 1).toLowerCase();
|
|
121
|
+
return DANGEROUS_SCHEMES.includes(scheme);
|
|
122
|
+
}
|
|
123
|
+
|
|
113
124
|
function resolveUrl(href: string, baseUrl: string): string {
|
|
114
125
|
const trimmed = String(href || "").trim();
|
|
115
126
|
if (
|
|
@@ -120,14 +131,16 @@ function resolveUrl(href: string, baseUrl: string): string {
|
|
|
120
131
|
) {
|
|
121
132
|
return trimmed;
|
|
122
133
|
}
|
|
123
|
-
if (trimmed
|
|
134
|
+
if (hasDangerousScheme(trimmed)) {
|
|
124
135
|
return "";
|
|
125
136
|
}
|
|
126
137
|
try {
|
|
127
138
|
const resolved = new URL(trimmed, baseUrl);
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
139
|
+
// Even after URL resolution, reject any non-http/https protocols
|
|
140
|
+
if (resolved.protocol !== "http:" && resolved.protocol !== "https:") {
|
|
141
|
+
return "";
|
|
142
|
+
}
|
|
143
|
+
return resolved.toString();
|
|
131
144
|
} catch {
|
|
132
145
|
return trimmed;
|
|
133
146
|
}
|
package/src/web.ts
CHANGED
|
@@ -2,6 +2,7 @@
|
|
|
2
2
|
* SuPi Web extension entry point — registers the `web_fetch_md` tool with pi.
|
|
3
3
|
*/
|
|
4
4
|
|
|
5
|
+
import { spawnSync } from "node:child_process";
|
|
5
6
|
import type { ExtensionAPI } from "@earendil-works/pi-coding-agent";
|
|
6
7
|
import { Type } from "typebox";
|
|
7
8
|
import { htmlToMarkdown, wrapAsCodeBlock } from "./convert.ts";
|
|
@@ -26,12 +27,29 @@ Links and images are absolutized by default. Use \`abs_links: false\` to keep th
|
|
|
26
27
|
const PROMPT_SNIPPET =
|
|
27
28
|
"web_fetch_md — fetch a URL and convert it to clean Markdown suitable for LLM ingestion.";
|
|
28
29
|
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
30
|
+
function isGhAvailable(): boolean {
|
|
31
|
+
try {
|
|
32
|
+
const result = spawnSync("gh", ["--version"], { stdio: "ignore" });
|
|
33
|
+
return result.status === 0;
|
|
34
|
+
} catch {
|
|
35
|
+
return false;
|
|
36
|
+
}
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
function buildPromptGuidelines(): string[] {
|
|
40
|
+
const guidelines = [
|
|
41
|
+
"Use web_fetch_md to fetch web pages and convert them to clean Markdown for LLM ingestion.",
|
|
42
|
+
"Only accept real `http://` or `https://` URLs; stop and ask the user for an allowed source if the page is access-controlled.",
|
|
43
|
+
"Prefer `output_mode: auto` (default) so large pages are written to temp files instead of flooding the context window.",
|
|
44
|
+
"Set `abs_links: false` only when relative links are intentional (e.g., local documentation).",
|
|
45
|
+
];
|
|
46
|
+
if (isGhAvailable()) {
|
|
47
|
+
guidelines.push(
|
|
48
|
+
"For GitHub URLs (e.g., repos, issues, PRs, releases), prefer the `gh` CLI via `bash` over this tool.",
|
|
49
|
+
);
|
|
50
|
+
}
|
|
51
|
+
return guidelines;
|
|
52
|
+
}
|
|
35
53
|
|
|
36
54
|
const OutputModeEnum = Type.Union(
|
|
37
55
|
[Type.Literal("auto"), Type.Literal("inline"), Type.Literal("file")],
|
|
@@ -44,7 +62,7 @@ export default function webExtension(pi: ExtensionAPI): void {
|
|
|
44
62
|
label: TOOL_LABEL,
|
|
45
63
|
description: TOOL_DESCRIPTION,
|
|
46
64
|
promptSnippet: PROMPT_SNIPPET,
|
|
47
|
-
promptGuidelines:
|
|
65
|
+
promptGuidelines: buildPromptGuidelines(),
|
|
48
66
|
parameters: Type.Object({
|
|
49
67
|
url: Type.String({ description: "http(s) URL to fetch" }),
|
|
50
68
|
output_mode: Type.Optional(OutputModeEnum),
|