@cfbender/cesium 0.3.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/ARCHITECTURE.md +304 -0
- package/CHANGELOG.md +335 -0
- package/LICENSE +21 -0
- package/README.md +479 -0
- package/agents/cesium.md +39 -0
- package/assets/styleguide.html +857 -0
- package/package.json +61 -0
- package/src/cli/commands/ls.ts +186 -0
- package/src/cli/commands/open.ts +208 -0
- package/src/cli/commands/prune.ts +348 -0
- package/src/cli/commands/restart.ts +38 -0
- package/src/cli/commands/serve.ts +214 -0
- package/src/cli/commands/stop.ts +130 -0
- package/src/cli/commands/theme.ts +333 -0
- package/src/cli/index.ts +78 -0
- package/src/config.ts +94 -0
- package/src/index.ts +35 -0
- package/src/prompt/system-fragment.md +97 -0
- package/src/render/client-js.ts +316 -0
- package/src/render/controls.ts +302 -0
- package/src/render/critique.ts +360 -0
- package/src/render/extract.ts +83 -0
- package/src/render/scrub.ts +141 -0
- package/src/render/theme.ts +712 -0
- package/src/render/validate.ts +524 -0
- package/src/render/wrap.ts +165 -0
- package/src/server/api.ts +166 -0
- package/src/server/http.ts +195 -0
- package/src/server/lifecycle.ts +331 -0
- package/src/server/stop.ts +124 -0
- package/src/storage/index-cache.ts +71 -0
- package/src/storage/index-gen.ts +447 -0
- package/src/storage/lock.ts +108 -0
- package/src/storage/mutate.ts +396 -0
- package/src/storage/paths.ts +159 -0
- package/src/storage/project-summaries.ts +19 -0
- package/src/storage/theme-write.ts +19 -0
- package/src/storage/write.ts +75 -0
- package/src/tools/ask.ts +353 -0
- package/src/tools/critique.ts +66 -0
- package/src/tools/publish.ts +404 -0
- package/src/tools/stop.ts +53 -0
- package/src/tools/styleguide.ts +23 -0
- package/src/tools/wait.ts +192 -0
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
// Extracts visible plain text from an HTML body fragment.
|
|
2
|
+
// Used to populate the bodyText field on IndexEntry for full-text search.
|
|
3
|
+
|
|
4
|
+
import { parseFragment, defaultTreeAdapter as ta } from "parse5";
|
|
5
|
+
import type { DefaultTreeAdapterTypes } from "parse5";
|
|
6
|
+
|
|
7
|
+
type ChildNode = DefaultTreeAdapterTypes.ChildNode;
|
|
8
|
+
|
|
9
|
+
// Tags whose text content should be excluded from extraction.
|
|
10
|
+
const SKIP_TAGS = new Set(["script", "style", "noscript"]);
|
|
11
|
+
|
|
12
|
+
function collectText(node: ChildNode, parts: string[]): void {
|
|
13
|
+
if (ta.isTextNode(node)) {
|
|
14
|
+
parts.push(ta.getTextNodeContent(node));
|
|
15
|
+
return;
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
if (!ta.isElementNode(node)) {
|
|
19
|
+
// comment, doctype — skip
|
|
20
|
+
return;
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
const tag = ta.getTagName(node).toLowerCase();
|
|
24
|
+
if (SKIP_TAGS.has(tag)) {
|
|
25
|
+
// Do not descend into script/style/noscript
|
|
26
|
+
return;
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
const children = ta.getChildNodes(node) as ChildNode[];
|
|
30
|
+
for (const child of children) {
|
|
31
|
+
collectText(child, parts);
|
|
32
|
+
}
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
/**
|
|
36
|
+
* Extracts visible text content from an HTML body fragment.
|
|
37
|
+
*
|
|
38
|
+
* - Skips contents of <script>, <style>, and <noscript>.
|
|
39
|
+
* - Collapses all whitespace runs to single spaces.
|
|
40
|
+
* - Trims leading/trailing whitespace.
|
|
41
|
+
* - Truncates to maxChars (default 5000), breaking at a word boundary
|
|
42
|
+
* (nearest preceding whitespace within the last 100 chars) when possible.
|
|
43
|
+
*
|
|
44
|
+
* Returns text as-written (not lowercased). Lowercasing happens at
|
|
45
|
+
* search-attribute write time in index-gen.
|
|
46
|
+
*
|
|
47
|
+
* HTML entities are decoded automatically by parse5.
|
|
48
|
+
*
|
|
49
|
+
* This function is pure: same input always yields the same output.
|
|
50
|
+
*/
|
|
51
|
+
export function extractTextContent(htmlBody: string, maxChars: number = 5000): string {
|
|
52
|
+
if (htmlBody.trim() === "") return "";
|
|
53
|
+
|
|
54
|
+
const fragment = parseFragment(htmlBody);
|
|
55
|
+
const parts: string[] = [];
|
|
56
|
+
|
|
57
|
+
const children = ta.getChildNodes(fragment) as ChildNode[];
|
|
58
|
+
for (const child of children) {
|
|
59
|
+
collectText(child, parts);
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
// Join all text parts, collapse whitespace, trim
|
|
63
|
+
const raw = parts.join(" ");
|
|
64
|
+
const collapsed = raw.replace(/\s+/g, " ").trim();
|
|
65
|
+
|
|
66
|
+
if (collapsed.length <= maxChars) {
|
|
67
|
+
return collapsed;
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
// Truncate: try to break at a word boundary within last 100 chars
|
|
71
|
+
const hardCut = maxChars;
|
|
72
|
+
const windowStart = Math.max(0, hardCut - 100);
|
|
73
|
+
const window = collapsed.slice(windowStart, hardCut);
|
|
74
|
+
const lastSpace = window.lastIndexOf(" ");
|
|
75
|
+
|
|
76
|
+
if (lastSpace !== -1) {
|
|
77
|
+
// Break at the last space within the window
|
|
78
|
+
return collapsed.slice(0, windowStart + lastSpace).trimEnd();
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
// No whitespace found in window — hard cut
|
|
82
|
+
return collapsed.slice(0, hardCut);
|
|
83
|
+
}
|
|
@@ -0,0 +1,141 @@
|
|
|
1
|
+
// Strips external resources (remote links, scripts, images) from agent-supplied HTML.
|
|
2
|
+
|
|
3
|
+
import { parseFragment, serialize, defaultTreeAdapter as ta } from "parse5";
|
|
4
|
+
import type { DefaultTreeAdapterTypes } from "parse5";
|
|
5
|
+
|
|
6
|
+
type ChildNode = DefaultTreeAdapterTypes.ChildNode;
|
|
7
|
+
type Element = DefaultTreeAdapterTypes.Element;
|
|
8
|
+
|
|
9
|
+
export type ScrubReason = "script-src" | "stylesheet-href" | "img-http" | "url-http";
|
|
10
|
+
|
|
11
|
+
export interface ScrubRemoval {
|
|
12
|
+
reason: ScrubReason;
|
|
13
|
+
original: string;
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
export interface ScrubResult {
|
|
17
|
+
html: string;
|
|
18
|
+
removed: ScrubRemoval[];
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
const HTTP_RE = /^https?:\/\//i;
|
|
22
|
+
const URL_HTTP_RE = /url\(\s*['"]?(https?:\/\/[^)'"]+)['"]?\s*\)/gi;
|
|
23
|
+
|
|
24
|
+
function attrVal(node: Element, name: string): string | undefined {
|
|
25
|
+
const attr = ta.getAttrList(node).find((a) => a.name === name);
|
|
26
|
+
return attr?.value;
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
function makeComment(text: string): ChildNode {
|
|
30
|
+
return ta.createCommentNode(text) as unknown as ChildNode;
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
function scrubNode(node: ChildNode, removed: ScrubRemoval[]): ChildNode | null {
|
|
34
|
+
if (!ta.isElementNode(node)) return node;
|
|
35
|
+
const el = node as Element;
|
|
36
|
+
const tag = ta.getTagName(el);
|
|
37
|
+
|
|
38
|
+
// <script src="..."> — remove any script with a src (local or remote)
|
|
39
|
+
if (tag === "script" && attrVal(el, "src") !== undefined) {
|
|
40
|
+
const src = attrVal(el, "src") ?? "";
|
|
41
|
+
removed.push({ reason: "script-src", original: `<script src="${src}">` });
|
|
42
|
+
return makeComment(` cesium: removed external <script src="${src}"> `);
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
// <link rel="stylesheet" href="http...">
|
|
46
|
+
if (tag === "link") {
|
|
47
|
+
const rel = (attrVal(el, "rel") ?? "").toLowerCase();
|
|
48
|
+
const href = attrVal(el, "href") ?? "";
|
|
49
|
+
if (rel === "stylesheet" && HTTP_RE.test(href)) {
|
|
50
|
+
removed.push({
|
|
51
|
+
reason: "stylesheet-href",
|
|
52
|
+
original: `<link rel="stylesheet" href="${href}">`,
|
|
53
|
+
});
|
|
54
|
+
return makeComment(` cesium: removed external <link rel="stylesheet" href="${href}"> `);
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
// <img src="http...">
|
|
59
|
+
if (tag === "img") {
|
|
60
|
+
const src = attrVal(el, "src") ?? "";
|
|
61
|
+
if (HTTP_RE.test(src)) {
|
|
62
|
+
removed.push({ reason: "img-http", original: `<img src="${src}">` });
|
|
63
|
+
return makeComment(` cesium: removed external <img src="${src}"> `);
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
// Scrub url(http...) from inline style attributes
|
|
68
|
+
const attrs = ta.getAttrList(el);
|
|
69
|
+
for (const attr of attrs) {
|
|
70
|
+
if (attr.name === "style" && URL_HTTP_RE.test(attr.value)) {
|
|
71
|
+
URL_HTTP_RE.lastIndex = 0;
|
|
72
|
+
const newVal = attr.value.replace(URL_HTTP_RE, (_match, url: string) => {
|
|
73
|
+
removed.push({ reason: "url-http", original: `url(${url})` });
|
|
74
|
+
return "url()";
|
|
75
|
+
});
|
|
76
|
+
URL_HTTP_RE.lastIndex = 0;
|
|
77
|
+
attr.value = newVal;
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
// Recurse into children
|
|
82
|
+
const children = ta.getChildNodes(el) as ChildNode[];
|
|
83
|
+
const toRemove: number[] = [];
|
|
84
|
+
const replacements: Map<number, ChildNode> = new Map();
|
|
85
|
+
|
|
86
|
+
for (let i = 0; i < children.length; i++) {
|
|
87
|
+
const child = children[i];
|
|
88
|
+
if (child === undefined) continue;
|
|
89
|
+
const result = scrubNode(child, removed);
|
|
90
|
+
if (result === null) {
|
|
91
|
+
toRemove.push(i);
|
|
92
|
+
} else if (result !== child) {
|
|
93
|
+
replacements.set(i, result);
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
// Apply replacements in reverse order to preserve indices
|
|
98
|
+
for (let i = children.length - 1; i >= 0; i--) {
|
|
99
|
+
const child = children[i];
|
|
100
|
+
if (child === undefined) continue;
|
|
101
|
+
if (toRemove.includes(i)) {
|
|
102
|
+
ta.detachNode(child);
|
|
103
|
+
} else {
|
|
104
|
+
const replacement = replacements.get(i);
|
|
105
|
+
if (replacement !== undefined) {
|
|
106
|
+
ta.insertBefore(el, replacement, child);
|
|
107
|
+
ta.detachNode(child);
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
return el as unknown as ChildNode;
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
export function scrub(htmlBody: string): ScrubResult {
|
|
116
|
+
const removed: ScrubRemoval[] = [];
|
|
117
|
+
const fragment = parseFragment(htmlBody);
|
|
118
|
+
const children = ta.getChildNodes(fragment) as ChildNode[];
|
|
119
|
+
|
|
120
|
+
const toDetach: ChildNode[] = [];
|
|
121
|
+
const toReplace: Array<{ old: ChildNode; replacement: ChildNode }> = [];
|
|
122
|
+
|
|
123
|
+
for (const child of children) {
|
|
124
|
+
const result = scrubNode(child, removed);
|
|
125
|
+
if (result === null) {
|
|
126
|
+
toDetach.push(child);
|
|
127
|
+
} else if (result !== child) {
|
|
128
|
+
toReplace.push({ old: child, replacement: result });
|
|
129
|
+
}
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
for (const { old, replacement } of toReplace) {
|
|
133
|
+
ta.insertBefore(fragment, replacement, old);
|
|
134
|
+
ta.detachNode(old);
|
|
135
|
+
}
|
|
136
|
+
for (const node of toDetach) {
|
|
137
|
+
ta.detachNode(node);
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
return { html: serialize(fragment), removed };
|
|
141
|
+
}
|