@mrclrchtr/supi-web 0.1.0 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@mrclrchtr/supi-core",
3
- "version": "0.1.0",
3
+ "version": "1.0.0",
4
4
  "description": "SuPi core — shared infrastructure for SuPi extensions (XML context tags, config system)",
5
5
  "license": "MIT",
6
6
  "repository": {
@@ -22,9 +22,5 @@
22
22
  "@earendil-works/pi-coding-agent": "*",
23
23
  "@earendil-works/pi-tui": "*"
24
24
  },
25
- "devDependencies": {
26
- "@types/node": "^25.6.0",
27
- "vitest": "^4.1.4"
28
- },
29
25
  "main": "src/index.ts"
30
26
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@mrclrchtr/supi-web",
3
- "version": "0.1.0",
3
+ "version": "1.0.0",
4
4
  "description": "SuPi Web extension — fetch web pages as clean Markdown (web_fetch_md) and library docs via Context7 (web_docs_search, web_docs_fetch)",
5
5
  "license": "MIT",
6
6
  "repository": {
@@ -20,12 +20,12 @@
20
20
  "README.md"
21
21
  ],
22
22
  "dependencies": {
23
- "@mrclrchtr/supi-core": "workspace:*",
24
23
  "@upstash/context7-sdk": "^0.3.0",
25
- "jsdom": "^26.1.0",
24
+ "jsdom": "^29.0.0",
26
25
  "@mozilla/readability": "^0.6.0",
27
26
  "turndown": "^7.2.0",
28
- "turndown-plugin-gfm": "^1.0.2"
27
+ "turndown-plugin-gfm": "^1.0.2",
28
+ "@mrclrchtr/supi-core": "1.0.0"
29
29
  },
30
30
  "bundledDependencies": [
31
31
  "@mrclrchtr/supi-core"
@@ -34,12 +34,6 @@
34
34
  "@earendil-works/pi-coding-agent": "*",
35
35
  "typebox": "*"
36
36
  },
37
- "devDependencies": {
38
- "vitest": "^4.1.5",
39
- "@types/jsdom": "^21.1.7",
40
- "@types/turndown": "^5.0.6",
41
- "@mrclrchtr/supi-test-utils": "workspace:*"
42
- },
43
37
  "pi": {
44
38
  "extensions": [
45
39
  "./src/web.ts",
package/src/convert.ts CHANGED
@@ -110,6 +110,17 @@ function absolutizeLinks(root: Element, baseUrl: string): void {
110
110
  }
111
111
  }
112
112
 
113
+ /** Dangerous URI schemes that must never be used in href/src attributes. */
114
+ const DANGEROUS_SCHEMES = ["javascript:", "data:", "vbscript:", "file:"];
115
+
116
+ function hasDangerousScheme(value: string): boolean {
117
+ // Case-insensitive scheme check: split on first ':' and compare lowercased
118
+ const colonIndex = value.indexOf(":");
119
+ if (colonIndex === -1) return false;
120
+ const scheme = value.slice(0, colonIndex + 1).toLowerCase();
121
+ return DANGEROUS_SCHEMES.includes(scheme);
122
+ }
123
+
113
124
  function resolveUrl(href: string, baseUrl: string): string {
114
125
  const trimmed = String(href || "").trim();
115
126
  if (
@@ -120,14 +131,16 @@ function resolveUrl(href: string, baseUrl: string): string {
120
131
  ) {
121
132
  return trimmed;
122
133
  }
123
- if (trimmed.startsWith("javascript:")) {
134
+ if (hasDangerousScheme(trimmed)) {
124
135
  return "";
125
136
  }
126
137
  try {
127
138
  const resolved = new URL(trimmed, baseUrl);
128
- return resolved.protocol === "http:" || resolved.protocol === "https:"
129
- ? resolved.toString()
130
- : trimmed;
139
+ // Even after URL resolution, reject any non-http/https protocols
140
+ if (resolved.protocol !== "http:" && resolved.protocol !== "https:") {
141
+ return "";
142
+ }
143
+ return resolved.toString();
131
144
  } catch {
132
145
  return trimmed;
133
146
  }
package/src/web.ts CHANGED
@@ -2,6 +2,7 @@
2
2
  * SuPi Web extension entry point — registers the `web_fetch_md` tool with pi.
3
3
  */
4
4
 
5
+ import { spawnSync } from "node:child_process";
5
6
  import type { ExtensionAPI } from "@earendil-works/pi-coding-agent";
6
7
  import { Type } from "typebox";
7
8
  import { htmlToMarkdown, wrapAsCodeBlock } from "./convert.ts";
@@ -26,12 +27,29 @@ Links and images are absolutized by default. Use \`abs_links: false\` to keep th
26
27
  const PROMPT_SNIPPET =
27
28
  "web_fetch_md — fetch a URL and convert it to clean Markdown suitable for LLM ingestion.";
28
29
 
29
- const PROMPT_GUIDELINES = [
30
- "Use web_fetch_md to fetch web pages and convert them to clean Markdown for LLM ingestion.",
31
- "Only accept real `http://` or `https://` URLs; stop and ask the user for an allowed source if the page is access-controlled.",
32
- "Prefer `output_mode: auto` (default) so large pages are written to temp files instead of flooding the context window.",
33
- "Set `abs_links: false` only when relative links are intentional (e.g., local documentation).",
34
- ];
30
+ function isGhAvailable(): boolean {
31
+ try {
32
+ const result = spawnSync("gh", ["--version"], { stdio: "ignore" });
33
+ return result.status === 0;
34
+ } catch {
35
+ return false;
36
+ }
37
+ }
38
+
39
+ function buildPromptGuidelines(): string[] {
40
+ const guidelines = [
41
+ "Use web_fetch_md to fetch web pages and convert them to clean Markdown for LLM ingestion.",
42
+ "Only accept real `http://` or `https://` URLs; stop and ask the user for an allowed source if the page is access-controlled.",
43
+ "Prefer `output_mode: auto` (default) so large pages are written to temp files instead of flooding the context window.",
44
+ "Set `abs_links: false` only when relative links are intentional (e.g., local documentation).",
45
+ ];
46
+ if (isGhAvailable()) {
47
+ guidelines.push(
48
+ "For GitHub URLs (e.g., repos, issues, PRs, releases), prefer the `gh` CLI via `bash` over this tool.",
49
+ );
50
+ }
51
+ return guidelines;
52
+ }
35
53
 
36
54
  const OutputModeEnum = Type.Union(
37
55
  [Type.Literal("auto"), Type.Literal("inline"), Type.Literal("file")],
@@ -44,7 +62,7 @@ export default function webExtension(pi: ExtensionAPI): void {
44
62
  label: TOOL_LABEL,
45
63
  description: TOOL_DESCRIPTION,
46
64
  promptSnippet: PROMPT_SNIPPET,
47
- promptGuidelines: PROMPT_GUIDELINES,
65
+ promptGuidelines: buildPromptGuidelines(),
48
66
  parameters: Type.Object({
49
67
  url: Type.String({ description: "http(s) URL to fetch" }),
50
68
  output_mode: Type.Optional(OutputModeEnum),