@hulistmi/hulistmi 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE.md +22 -0
- package/README.md +289 -0
- package/bin/hulistmi.mjs +21 -0
- package/package.json +73 -0
- package/public/SKILL.md +26 -0
- package/public/_headers +3 -0
- package/public/favicon.svg +5 -0
- package/public/index.html +47 -0
- package/public/llms.txt +28 -0
- package/public/robots.txt +6 -0
- package/public/sitemap.xml +6 -0
- package/public/webmcp.js +3 -0
- package/src/cli.ts +67 -0
- package/src/index.ts +260 -0
- package/src/lib/catalog.ts +113 -0
- package/src/lib/cli-endpoints.ts +62 -0
- package/src/lib/documents.ts +164 -0
- package/src/lib/fetch.ts +165 -0
- package/src/lib/guides/fetch.ts +8 -0
- package/src/lib/guides/index.ts +3 -0
- package/src/lib/guides/render.ts +13 -0
- package/src/lib/guides/types.ts +5 -0
- package/src/lib/mcp.ts +119 -0
- package/src/lib/rate-limit.ts +27 -0
- package/src/lib/reference/fetch.ts +8 -0
- package/src/lib/reference/index.ts +3 -0
- package/src/lib/reference/render.ts +13 -0
- package/src/lib/reference/types.ts +5 -0
- package/src/lib/render.ts +216 -0
- package/src/lib/search.ts +81 -0
- package/src/lib/skill.ts +100 -0
- package/src/lib/types.ts +18 -0
- package/src/lib/upstream-contract.json +103 -0
- package/src/lib/upstream-contract.ts +3 -0
- package/src/lib/url.ts +50 -0
- package/src/lib/webmcp.ts +19 -0
- package/wrangler.jsonc +18 -0
package/src/lib/fetch.ts
ADDED
|
@@ -0,0 +1,165 @@
|
|
|
1
|
+
import { UPSTREAM_CONTRACT } from "./upstream-contract";
|
|
2
|
+
|
|
3
|
+
export class NotFoundError extends Error {}
|
|
4
|
+
export class UpstreamPolicyError extends Error {}
|
|
5
|
+
export class UpstreamSizeError extends Error {}
|
|
6
|
+
export class UpstreamTimeoutError extends Error {}
|
|
7
|
+
|
|
8
|
+
export const HULISTMI_USER_AGENT =
|
|
9
|
+
"hulistmi-ai/1.0 (+https://hulistmi.ai/#bot)";
|
|
10
|
+
export const UPSTREAM_TIMEOUT_MS = 10_000;
|
|
11
|
+
export const MAX_UPSTREAM_RESPONSE_BYTES = 1_048_576;
|
|
12
|
+
export const MAX_RENDERED_MARKDOWN_BYTES = 524_288;
|
|
13
|
+
export const MAX_MCP_REQUEST_BYTES = 131_072;
|
|
14
|
+
|
|
15
|
+
export interface VerifiedHuaweiRequest {
|
|
16
|
+
url: string;
|
|
17
|
+
headers?: Record<string, string>;
|
|
18
|
+
body: unknown;
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
function assertAllowedHuaweiUrl(input: string): URL {
|
|
22
|
+
const url = new URL(input);
|
|
23
|
+
if (
|
|
24
|
+
url.protocol !== "https:" ||
|
|
25
|
+
!UPSTREAM_CONTRACT.allowedHosts.includes(url.hostname) ||
|
|
26
|
+
!UPSTREAM_CONTRACT.allowedPaths.includes(url.pathname)
|
|
27
|
+
) {
|
|
28
|
+
throw new Error(`Unverified Huawei upstream URL: ${input}`);
|
|
29
|
+
}
|
|
30
|
+
return url;
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
function collectVerifiedRequests(): VerifiedHuaweiRequest[] {
|
|
34
|
+
return [
|
|
35
|
+
...Object.values(UPSTREAM_CONTRACT.catalogs).map((entry) => entry.request),
|
|
36
|
+
...Object.values(UPSTREAM_CONTRACT.documents).flatMap((doc) =>
|
|
37
|
+
[
|
|
38
|
+
doc.checkCenterGrayUser,
|
|
39
|
+
doc.getDocumentById,
|
|
40
|
+
"getCenterRootNodeTree" in doc ? doc.getCenterRootNodeTree : undefined,
|
|
41
|
+
"getCenterDocument" in doc ? doc.getCenterDocument : undefined,
|
|
42
|
+
].filter((request): request is VerifiedHuaweiRequest => Boolean(request)),
|
|
43
|
+
),
|
|
44
|
+
{
|
|
45
|
+
url: UPSTREAM_CONTRACT.search.url,
|
|
46
|
+
headers: UPSTREAM_CONTRACT.search.headers,
|
|
47
|
+
body: UPSTREAM_CONTRACT.search.bodyForUIAbility,
|
|
48
|
+
},
|
|
49
|
+
];
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
function isVerifiedHeaderValue(name: string, value: string): boolean {
|
|
53
|
+
return collectVerifiedRequests().some((request) =>
|
|
54
|
+
Object.entries(request.headers ?? {}).some(
|
|
55
|
+
([key, verified]) => key.toLowerCase() === name && verified === value,
|
|
56
|
+
),
|
|
57
|
+
);
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
function verifiedHeaders(request: VerifiedHuaweiRequest): HeadersInit {
|
|
61
|
+
const headers = new Headers({
|
|
62
|
+
"Content-Type": "application/json",
|
|
63
|
+
Accept: "application/json",
|
|
64
|
+
"User-Agent": HULISTMI_USER_AGENT,
|
|
65
|
+
});
|
|
66
|
+
for (const [key, value] of Object.entries(request.headers ?? {})) {
|
|
67
|
+
const lower = key.toLowerCase();
|
|
68
|
+
if (
|
|
69
|
+
(lower === "origin" || lower === "referer") &&
|
|
70
|
+
isVerifiedHeaderValue(lower, value)
|
|
71
|
+
)
|
|
72
|
+
headers.set(key, value);
|
|
73
|
+
}
|
|
74
|
+
return headers;
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
async function readCappedText(
|
|
78
|
+
response: Response,
|
|
79
|
+
maxBytes: number,
|
|
80
|
+
): Promise<string> {
|
|
81
|
+
const contentLength = Number(response.headers.get("Content-Length") ?? "0");
|
|
82
|
+
if (contentLength > maxBytes)
|
|
83
|
+
throw new UpstreamSizeError(
|
|
84
|
+
"Huawei upstream response exceeded maximum size",
|
|
85
|
+
);
|
|
86
|
+
if (!response.body) return response.text();
|
|
87
|
+
|
|
88
|
+
const reader = response.body.getReader();
|
|
89
|
+
const chunks: Uint8Array[] = [];
|
|
90
|
+
let total = 0;
|
|
91
|
+
while (true) {
|
|
92
|
+
const { done, value } = await reader.read();
|
|
93
|
+
if (done) break;
|
|
94
|
+
total += value.byteLength;
|
|
95
|
+
if (total > maxBytes) {
|
|
96
|
+
await reader.cancel();
|
|
97
|
+
throw new UpstreamSizeError(
|
|
98
|
+
"Huawei upstream response exceeded maximum size",
|
|
99
|
+
);
|
|
100
|
+
}
|
|
101
|
+
chunks.push(value);
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
const bytes = new Uint8Array(total);
|
|
105
|
+
let offset = 0;
|
|
106
|
+
for (const chunk of chunks) {
|
|
107
|
+
bytes.set(chunk, offset);
|
|
108
|
+
offset += chunk.byteLength;
|
|
109
|
+
}
|
|
110
|
+
return new TextDecoder().decode(bytes);
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
export async function fetchHuaweiJson<T>(
|
|
114
|
+
request: VerifiedHuaweiRequest,
|
|
115
|
+
maxBytes = MAX_UPSTREAM_RESPONSE_BYTES,
|
|
116
|
+
): Promise<T> {
|
|
117
|
+
const verifiedUrl = assertAllowedHuaweiUrl(request.url);
|
|
118
|
+
const controller = new AbortController();
|
|
119
|
+
const timeout = setTimeout(() => controller.abort(), UPSTREAM_TIMEOUT_MS);
|
|
120
|
+
|
|
121
|
+
let response: Response;
|
|
122
|
+
try {
|
|
123
|
+
response = await fetch(verifiedUrl, {
|
|
124
|
+
method: "POST",
|
|
125
|
+
headers: verifiedHeaders(request),
|
|
126
|
+
body: JSON.stringify(request.body),
|
|
127
|
+
signal: controller.signal,
|
|
128
|
+
});
|
|
129
|
+
} catch (error) {
|
|
130
|
+
if (error instanceof DOMException && error.name === "AbortError")
|
|
131
|
+
throw new UpstreamTimeoutError("Huawei upstream request timed out");
|
|
132
|
+
throw error;
|
|
133
|
+
} finally {
|
|
134
|
+
clearTimeout(timeout);
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
const robots = response.headers.get("X-Robots-Tag")?.toLowerCase() ?? "";
|
|
138
|
+
if (
|
|
139
|
+
robots.includes("noai") ||
|
|
140
|
+
robots.includes("none") ||
|
|
141
|
+
robots.includes("noindex")
|
|
142
|
+
) {
|
|
143
|
+
throw new UpstreamPolicyError(
|
|
144
|
+
"Upstream policy disallows rendering this content",
|
|
145
|
+
);
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
if (!response.ok)
|
|
149
|
+
throw new Error(`Huawei upstream request failed: ${response.status}`);
|
|
150
|
+
const data = JSON.parse(await readCappedText(response, maxBytes)) as T & {
|
|
151
|
+
code?: number | string;
|
|
152
|
+
};
|
|
153
|
+
if (data.code === 404 || data.code === "404")
|
|
154
|
+
throw new NotFoundError("Huawei document not found");
|
|
155
|
+
return data;
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
export function assertRenderedMarkdownWithinLimit(content: string): string {
|
|
159
|
+
if (
|
|
160
|
+
new TextEncoder().encode(content).byteLength > MAX_RENDERED_MARKDOWN_BYTES
|
|
161
|
+
) {
|
|
162
|
+
throw new UpstreamSizeError("Rendered markdown exceeded maximum size");
|
|
163
|
+
}
|
|
164
|
+
return content;
|
|
165
|
+
}
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
import { fetchHarmonyDocumentPageData } from "../documents";
|
|
2
|
+
import type { HarmonyDocumentValue } from "../types";
|
|
3
|
+
|
|
4
|
+
export async function fetchGuidePageData(
|
|
5
|
+
path: string,
|
|
6
|
+
): Promise<HarmonyDocumentValue> {
|
|
7
|
+
return fetchHarmonyDocumentPageData("harmonyos-guides", path);
|
|
8
|
+
}
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
import { renderDocumentMarkdown } from "../render";
|
|
2
|
+
import type { HarmonyDocumentValue } from "../types";
|
|
3
|
+
|
|
4
|
+
export function renderGuideMarkdown(
|
|
5
|
+
value: HarmonyDocumentValue,
|
|
6
|
+
path: string,
|
|
7
|
+
): string {
|
|
8
|
+
return renderDocumentMarkdown(
|
|
9
|
+
value,
|
|
10
|
+
`harmonyos-guides/${path}`,
|
|
11
|
+
"HarmonyOS Guides",
|
|
12
|
+
);
|
|
13
|
+
}
|
package/src/lib/mcp.ts
ADDED
|
@@ -0,0 +1,119 @@
|
|
|
1
|
+
import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js";
|
|
2
|
+
import { z } from "zod";
|
|
3
|
+
import { fetchHarmonyOSCatalog, renderCatalogMarkdown } from "./catalog";
|
|
4
|
+
import { assertRenderedMarkdownWithinLimit } from "./fetch";
|
|
5
|
+
import { fetchGuidePageData, renderGuideMarkdown } from "./guides";
|
|
6
|
+
import { fetchReferencePageData, renderReferenceMarkdown } from "./reference";
|
|
7
|
+
import { renderSearchMarkdown, searchHarmonyOSDocs } from "./search";
|
|
8
|
+
|
|
9
|
+
export const MCP_SERVER_INFO = {
|
|
10
|
+
name: "hulistmi.ai",
|
|
11
|
+
version: "1.0.0",
|
|
12
|
+
} as const;
|
|
13
|
+
|
|
14
|
+
const readOnlyAnnotations = {
|
|
15
|
+
readOnlyHint: true,
|
|
16
|
+
destructiveHint: false,
|
|
17
|
+
idempotentHint: true,
|
|
18
|
+
openWorldHint: true,
|
|
19
|
+
} as const;
|
|
20
|
+
|
|
21
|
+
export const TOOL_DEFINITIONS = {
|
|
22
|
+
searchHarmonyOSDocumentation: {
|
|
23
|
+
description: "Search HarmonyOS developer documentation.",
|
|
24
|
+
http: { path: "/search", query: { q: "query" } },
|
|
25
|
+
},
|
|
26
|
+
fetchHarmonyOSDocumentation: {
|
|
27
|
+
description: "Fetch a HarmonyOS documentation page as Markdown.",
|
|
28
|
+
http: { path: "/{path}", query: {} },
|
|
29
|
+
},
|
|
30
|
+
fetchHarmonyOSCatalog: {
|
|
31
|
+
description: "Fetch a HarmonyOS documentation catalog.",
|
|
32
|
+
http: {
|
|
33
|
+
path: "/catalog",
|
|
34
|
+
query: { catalogName: "catalogName", language: "language", depth: "depth" },
|
|
35
|
+
},
|
|
36
|
+
},
|
|
37
|
+
} as const;
|
|
38
|
+
|
|
39
|
+
export function createMcpServer(): McpServer {
|
|
40
|
+
const server = new McpServer(MCP_SERVER_INFO);
|
|
41
|
+
server.registerTool(
|
|
42
|
+
"searchHarmonyOSDocumentation",
|
|
43
|
+
{
|
|
44
|
+
title: "Search HarmonyOS Documentation",
|
|
45
|
+
description: TOOL_DEFINITIONS.searchHarmonyOSDocumentation.description,
|
|
46
|
+
inputSchema: { query: z.string().min(1).max(120) },
|
|
47
|
+
annotations: readOnlyAnnotations,
|
|
48
|
+
},
|
|
49
|
+
async ({ query }) => ({
|
|
50
|
+
content: [
|
|
51
|
+
{
|
|
52
|
+
type: "text",
|
|
53
|
+
text: assertRenderedMarkdownWithinLimit(
|
|
54
|
+
renderSearchMarkdown(await searchHarmonyOSDocs(query)),
|
|
55
|
+
),
|
|
56
|
+
},
|
|
57
|
+
],
|
|
58
|
+
}),
|
|
59
|
+
);
|
|
60
|
+
server.registerTool(
|
|
61
|
+
"fetchHarmonyOSDocumentation",
|
|
62
|
+
{
|
|
63
|
+
title: "Fetch HarmonyOS Documentation",
|
|
64
|
+
description: TOOL_DEFINITIONS.fetchHarmonyOSDocumentation.description,
|
|
65
|
+
inputSchema: { path: z.string().min(1) },
|
|
66
|
+
annotations: readOnlyAnnotations,
|
|
67
|
+
},
|
|
68
|
+
async ({ path }) => {
|
|
69
|
+
const normalized = path.replace(/^\/+/, "");
|
|
70
|
+
const content = normalized.startsWith("harmonyos-references/")
|
|
71
|
+
? renderReferenceMarkdown(
|
|
72
|
+
await fetchReferencePageData(
|
|
73
|
+
normalized.replace("harmonyos-references/", ""),
|
|
74
|
+
),
|
|
75
|
+
normalized.replace("harmonyos-references/", ""),
|
|
76
|
+
)
|
|
77
|
+
: renderGuideMarkdown(
|
|
78
|
+
await fetchGuidePageData(
|
|
79
|
+
normalized.replace("harmonyos-guides/", ""),
|
|
80
|
+
),
|
|
81
|
+
normalized.replace("harmonyos-guides/", ""),
|
|
82
|
+
);
|
|
83
|
+
return {
|
|
84
|
+
content: [
|
|
85
|
+
{ type: "text", text: assertRenderedMarkdownWithinLimit(content) },
|
|
86
|
+
],
|
|
87
|
+
};
|
|
88
|
+
},
|
|
89
|
+
);
|
|
90
|
+
server.registerTool(
|
|
91
|
+
"fetchHarmonyOSCatalog",
|
|
92
|
+
{
|
|
93
|
+
title: "Fetch HarmonyOS Catalog",
|
|
94
|
+
description: TOOL_DEFINITIONS.fetchHarmonyOSCatalog.description,
|
|
95
|
+
inputSchema: {
|
|
96
|
+
catalogName: z
|
|
97
|
+
.enum(["harmonyos-guides", "harmonyos-references"])
|
|
98
|
+
.default("harmonyos-guides"),
|
|
99
|
+
language: z.literal("en").default("en"),
|
|
100
|
+
depth: z.number().int().min(1).optional(),
|
|
101
|
+
},
|
|
102
|
+
annotations: readOnlyAnnotations,
|
|
103
|
+
},
|
|
104
|
+
async ({ catalogName, language, depth }) => ({
|
|
105
|
+
content: [
|
|
106
|
+
{
|
|
107
|
+
type: "text",
|
|
108
|
+
text: assertRenderedMarkdownWithinLimit(
|
|
109
|
+
renderCatalogMarkdown(
|
|
110
|
+
await fetchHarmonyOSCatalog(catalogName, language),
|
|
111
|
+
depth,
|
|
112
|
+
),
|
|
113
|
+
),
|
|
114
|
+
},
|
|
115
|
+
],
|
|
116
|
+
}),
|
|
117
|
+
);
|
|
118
|
+
return server;
|
|
119
|
+
}
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
export interface RateLimitBinding {
|
|
2
|
+
limit(options: { key: string }): Promise<{ success: boolean }>;
|
|
3
|
+
}
|
|
4
|
+
|
|
5
|
+
export interface RateLimitEnv {
|
|
6
|
+
RATE_LIMITER?: RateLimitBinding;
|
|
7
|
+
}
|
|
8
|
+
|
|
9
|
+
export async function enforceRateLimit(
|
|
10
|
+
request: Request,
|
|
11
|
+
env: RateLimitEnv,
|
|
12
|
+
): Promise<Response | null> {
|
|
13
|
+
if (!env.RATE_LIMITER) return null;
|
|
14
|
+
const url = new URL(request.url);
|
|
15
|
+
const ip = request.headers.get("CF-Connecting-IP") ?? "anonymous";
|
|
16
|
+
const route = url.pathname.split("/")[1] || "root";
|
|
17
|
+
const result = await env.RATE_LIMITER.limit({ key: `${ip}:${route}` });
|
|
18
|
+
if (result.success) return null;
|
|
19
|
+
return new Response(JSON.stringify({ error: "Rate limit exceeded" }), {
|
|
20
|
+
status: 429,
|
|
21
|
+
headers: {
|
|
22
|
+
"Content-Type": "application/json",
|
|
23
|
+
"Retry-After": "60",
|
|
24
|
+
"Cache-Control": "no-store",
|
|
25
|
+
},
|
|
26
|
+
});
|
|
27
|
+
}
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
import { fetchHarmonyDocumentPageData } from "../documents";
|
|
2
|
+
import type { HarmonyDocumentValue } from "../types";
|
|
3
|
+
|
|
4
|
+
export async function fetchReferencePageData(
|
|
5
|
+
path: string,
|
|
6
|
+
): Promise<HarmonyDocumentValue> {
|
|
7
|
+
return fetchHarmonyDocumentPageData("harmonyos-references", path);
|
|
8
|
+
}
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
import { renderDocumentMarkdown } from "../render";
|
|
2
|
+
import type { HarmonyDocumentValue } from "../types";
|
|
3
|
+
|
|
4
|
+
export function renderReferenceMarkdown(
|
|
5
|
+
value: HarmonyDocumentValue,
|
|
6
|
+
path: string,
|
|
7
|
+
): string {
|
|
8
|
+
return renderDocumentMarkdown(
|
|
9
|
+
value,
|
|
10
|
+
`harmonyos-references/${path}`,
|
|
11
|
+
"HarmonyOS References",
|
|
12
|
+
);
|
|
13
|
+
}
|
|
@@ -0,0 +1,216 @@
|
|
|
1
|
+
import * as cheerio from "cheerio";
|
|
2
|
+
import type { HarmonyDocumentValue } from "./types";
|
|
3
|
+
import { generateHuaweiDocUrl } from "./url";
|
|
4
|
+
|
|
5
|
+
interface NodeLike {
|
|
6
|
+
type?: string;
|
|
7
|
+
tagName?: string;
|
|
8
|
+
name?: string;
|
|
9
|
+
data?: string;
|
|
10
|
+
children?: NodeLike[];
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
export function htmlToMarkdown(html: string): string {
|
|
14
|
+
const $ = cheerio.load(html);
|
|
15
|
+
$("script,style,noscript").remove();
|
|
16
|
+
|
|
17
|
+
const root = $("body").length ? $("body") : $.root();
|
|
18
|
+
return renderChildren(root.get(0) as NodeLike, $)
|
|
19
|
+
.replace(/\n{3,}/g, "\n\n")
|
|
20
|
+
.trim();
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
function renderChildren(node: NodeLike, $: cheerio.CheerioAPI): string {
|
|
24
|
+
return (node.children ?? [])
|
|
25
|
+
.map((child) => renderBlock(child, $))
|
|
26
|
+
.filter(Boolean)
|
|
27
|
+
.join("\n\n");
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
function renderBlock(node: NodeLike, $: cheerio.CheerioAPI): string {
|
|
31
|
+
if (node.type === "text") return normalizeText(node.data ?? "");
|
|
32
|
+
if (node.type !== "tag") return "";
|
|
33
|
+
|
|
34
|
+
const tag = (node.tagName ?? node.name ?? "").toLowerCase();
|
|
35
|
+
const element = $(node);
|
|
36
|
+
switch (tag) {
|
|
37
|
+
case "h1":
|
|
38
|
+
case "h2":
|
|
39
|
+
case "h3":
|
|
40
|
+
case "h4":
|
|
41
|
+
case "h5":
|
|
42
|
+
case "h6": {
|
|
43
|
+
const depth = Number(tag.slice(1));
|
|
44
|
+
return `${"#".repeat(depth)} ${inlineText(node, $)}`;
|
|
45
|
+
}
|
|
46
|
+
case "p":
|
|
47
|
+
return inlineText(node, $);
|
|
48
|
+
case "pre":
|
|
49
|
+
return `\`\`\`\n${element.text().trim()}\n\`\`\``;
|
|
50
|
+
case "ul":
|
|
51
|
+
return renderList(node, $, false);
|
|
52
|
+
case "ol":
|
|
53
|
+
return renderList(node, $, true);
|
|
54
|
+
case "table":
|
|
55
|
+
return renderTable(node, $);
|
|
56
|
+
case "br":
|
|
57
|
+
return "\n";
|
|
58
|
+
case "hr":
|
|
59
|
+
return "---";
|
|
60
|
+
case "blockquote":
|
|
61
|
+
return renderChildren(node, $)
|
|
62
|
+
.split("\n")
|
|
63
|
+
.map((line) => (line ? `> ${line}` : ">"))
|
|
64
|
+
.join("\n");
|
|
65
|
+
case "div":
|
|
66
|
+
case "section":
|
|
67
|
+
case "article":
|
|
68
|
+
case "main":
|
|
69
|
+
case "body":
|
|
70
|
+
case "thead":
|
|
71
|
+
case "tbody":
|
|
72
|
+
case "tfoot":
|
|
73
|
+
return renderChildren(node, $);
|
|
74
|
+
default:
|
|
75
|
+
return hasBlockChildren(node)
|
|
76
|
+
? renderChildren(node, $)
|
|
77
|
+
: inlineText(node, $);
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
function renderList(
|
|
82
|
+
node: NodeLike,
|
|
83
|
+
$: cheerio.CheerioAPI,
|
|
84
|
+
ordered: boolean,
|
|
85
|
+
): string {
|
|
86
|
+
return (node.children ?? [])
|
|
87
|
+
.filter((child) => child.type === "tag" && tagName(child) === "li")
|
|
88
|
+
.map((item, index) => {
|
|
89
|
+
const marker = ordered ? `${index + 1}.` : "-";
|
|
90
|
+
const text = inlineTextWithoutNestedBlocks(item, $);
|
|
91
|
+
const nested = (item.children ?? [])
|
|
92
|
+
.filter((child) => ["ul", "ol"].includes(tagName(child)))
|
|
93
|
+
.map((child) =>
|
|
94
|
+
renderList(child, $, tagName(child) === "ol")
|
|
95
|
+
.split("\n")
|
|
96
|
+
.map((line) => ` ${line}`)
|
|
97
|
+
.join("\n"),
|
|
98
|
+
)
|
|
99
|
+
.filter(Boolean)
|
|
100
|
+
.join("\n");
|
|
101
|
+
return nested ? `${marker} ${text}\n${nested}` : `${marker} ${text}`;
|
|
102
|
+
})
|
|
103
|
+
.join("\n");
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
function renderTable(node: NodeLike, $: cheerio.CheerioAPI): string {
|
|
107
|
+
const rows = $(node)
|
|
108
|
+
.find("tr")
|
|
109
|
+
.toArray()
|
|
110
|
+
.map((row) =>
|
|
111
|
+
$(row)
|
|
112
|
+
.children("th,td")
|
|
113
|
+
.toArray()
|
|
114
|
+
.map((cell) => escapeTableCell(inlineText(cell as NodeLike, $))),
|
|
115
|
+
)
|
|
116
|
+
.filter((cells) => cells.length > 0);
|
|
117
|
+
if (rows.length === 0) return "";
|
|
118
|
+
|
|
119
|
+
const columnCount = Math.max(...rows.map((row) => row.length));
|
|
120
|
+
const normalized = rows.map((row) =>
|
|
121
|
+
Array.from({ length: columnCount }, (_, index) => row[index] ?? ""),
|
|
122
|
+
);
|
|
123
|
+
const [header, ...body] = normalized;
|
|
124
|
+
return [
|
|
125
|
+
tableRow(header),
|
|
126
|
+
tableRow(header.map(() => "---")),
|
|
127
|
+
...body.map(tableRow),
|
|
128
|
+
].join("\n");
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
function tableRow(cells: string[]): string {
|
|
132
|
+
return `| ${cells.join(" | ")} |`;
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
function escapeTableCell(text: string): string {
|
|
136
|
+
return text.replace(/\|/g, "\\|").replace(/\n+/g, "<br>");
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
function inlineTextWithoutNestedBlocks(
|
|
140
|
+
node: NodeLike,
|
|
141
|
+
$: cheerio.CheerioAPI,
|
|
142
|
+
): string {
|
|
143
|
+
return (node.children ?? [])
|
|
144
|
+
.filter((child) => !["ul", "ol"].includes(tagName(child)))
|
|
145
|
+
.map((child) => inlineNode(child, $))
|
|
146
|
+
.join("")
|
|
147
|
+
.replace(/\s+/g, " ")
|
|
148
|
+
.trim();
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
function inlineText(node: NodeLike, $: cheerio.CheerioAPI): string {
|
|
152
|
+
return (node.children ?? [])
|
|
153
|
+
.map((child) => inlineNode(child, $))
|
|
154
|
+
.join("")
|
|
155
|
+
.replace(/\s+/g, " ")
|
|
156
|
+
.trim();
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
function inlineNode(node: NodeLike, $: cheerio.CheerioAPI): string {
|
|
160
|
+
if (node.type === "text") return node.data ?? "";
|
|
161
|
+
if (node.type !== "tag") return "";
|
|
162
|
+
|
|
163
|
+
const tag = tagName(node);
|
|
164
|
+
if (tag === "br") return "\n";
|
|
165
|
+
if (tag === "code") return `\`${normalizeText($(node).text())}\``;
|
|
166
|
+
if (tag === "a") {
|
|
167
|
+
const text = inlineText(node, $);
|
|
168
|
+
const href = $(node).attr("href");
|
|
169
|
+
return href && text ? `[${text}](${href})` : text;
|
|
170
|
+
}
|
|
171
|
+
if (tag === "img") {
|
|
172
|
+
const alt = $(node).attr("alt") ?? $(node).attr("title") ?? "";
|
|
173
|
+
return alt ? `.attr("src") ?? ""})` : "";
|
|
174
|
+
}
|
|
175
|
+
return inlineText(node, $);
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
function hasBlockChildren(node: NodeLike): boolean {
|
|
179
|
+
return (node.children ?? []).some((child) =>
|
|
180
|
+
[
|
|
181
|
+
"h1",
|
|
182
|
+
"h2",
|
|
183
|
+
"h3",
|
|
184
|
+
"h4",
|
|
185
|
+
"h5",
|
|
186
|
+
"h6",
|
|
187
|
+
"p",
|
|
188
|
+
"ul",
|
|
189
|
+
"ol",
|
|
190
|
+
"table",
|
|
191
|
+
"pre",
|
|
192
|
+
"div",
|
|
193
|
+
"section",
|
|
194
|
+
"article",
|
|
195
|
+
].includes(tagName(child)),
|
|
196
|
+
);
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
function tagName(node: NodeLike): string {
|
|
200
|
+
return (node.tagName ?? node.name ?? "").toLowerCase();
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
function normalizeText(text: string): string {
|
|
204
|
+
return text.replace(/\s+/g, " ").trim();
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
export function renderDocumentMarkdown(
|
|
208
|
+
value: HarmonyDocumentValue,
|
|
209
|
+
path: string,
|
|
210
|
+
category: string,
|
|
211
|
+
): string {
|
|
212
|
+
const title = value.title || "Untitled";
|
|
213
|
+
const sourceUrl = generateHuaweiDocUrl(path);
|
|
214
|
+
const body = htmlToMarkdown(value.content?.content ?? "");
|
|
215
|
+
return `---\ntitle: ${title}\nsource: ${sourceUrl}\ntimestamp: ${new Date().toISOString()}\ncategory: ${category}\nlanguage: en\n---\n\n# ${title}\n\n${body}\n\n---\n\n*Extracted by [hulistmi.ai](https://hulistmi-ai.y6vd2dkjgb.workers.dev) - Making HarmonyOS docs AI-readable.*\n*This is unofficial content. Source documentation belongs to Huawei.*\n`;
|
|
216
|
+
}
|
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
import { fetchHuaweiJson } from "./fetch";
|
|
2
|
+
import { UPSTREAM_CONTRACT } from "./upstream-contract";
|
|
3
|
+
|
|
4
|
+
export interface SearchResult {
|
|
5
|
+
title: string;
|
|
6
|
+
url: string;
|
|
7
|
+
description: string;
|
|
8
|
+
breadcrumbs: string[];
|
|
9
|
+
tags: string[];
|
|
10
|
+
type: string;
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
interface HuaweiSearchResponse {
|
|
14
|
+
searchResult?: Array<{ developerInfos?: HuaweiDeveloperInfo[] }>;
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
interface HuaweiDeveloperInfo {
|
|
18
|
+
name?: string;
|
|
19
|
+
url?: string;
|
|
20
|
+
description?: string;
|
|
21
|
+
highlightInfos?: string[];
|
|
22
|
+
ext?: string;
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
export async function searchHarmonyOSDocs(
|
|
26
|
+
query: string,
|
|
27
|
+
): Promise<{ query: string; results: SearchResult[] }> {
|
|
28
|
+
const trimmed = query.trim();
|
|
29
|
+
if (!trimmed) throw new Error("Search query is required");
|
|
30
|
+
if (trimmed.length > UPSTREAM_CONTRACT.search.maxQueryLength)
|
|
31
|
+
throw new Error("Search query is too long");
|
|
32
|
+
const data = await fetchHuaweiJson<HuaweiSearchResponse>({
|
|
33
|
+
url: UPSTREAM_CONTRACT.search.url,
|
|
34
|
+
headers: UPSTREAM_CONTRACT.search.headers,
|
|
35
|
+
body: { ...UPSTREAM_CONTRACT.search.bodyForUIAbility, keyWord: trimmed },
|
|
36
|
+
});
|
|
37
|
+
return {
|
|
38
|
+
query: trimmed,
|
|
39
|
+
results: normalizeSearchResults(data).slice(
|
|
40
|
+
0,
|
|
41
|
+
UPSTREAM_CONTRACT.search.maxLength,
|
|
42
|
+
),
|
|
43
|
+
};
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
function normalizeSearchResults(data: HuaweiSearchResponse): SearchResult[] {
|
|
47
|
+
return (data.searchResult ?? [])
|
|
48
|
+
.flatMap((group) => group.developerInfos ?? [])
|
|
49
|
+
.map((item) => ({
|
|
50
|
+
title: item.name ?? "Untitled",
|
|
51
|
+
url: item.url?.startsWith("http")
|
|
52
|
+
? item.url
|
|
53
|
+
: `https://developer.huawei.com${item.url ?? ""}`,
|
|
54
|
+
description: item.description ?? item.highlightInfos?.join(" ") ?? "",
|
|
55
|
+
breadcrumbs: [],
|
|
56
|
+
tags: parseTags(item.ext),
|
|
57
|
+
type: "documentation",
|
|
58
|
+
}));
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
function parseTags(ext: string | undefined): string[] {
|
|
62
|
+
if (!ext) return [];
|
|
63
|
+
try {
|
|
64
|
+
const parsed = JSON.parse(ext) as { domain?: string; nextSubType?: string };
|
|
65
|
+
return [parsed.domain, parsed.nextSubType].filter(Boolean) as string[];
|
|
66
|
+
} catch {
|
|
67
|
+
return [];
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
export function renderSearchMarkdown(result: {
|
|
72
|
+
query: string;
|
|
73
|
+
results: SearchResult[];
|
|
74
|
+
}): string {
|
|
75
|
+
const lines = [`# HarmonyOS search: ${result.query}`, ""];
|
|
76
|
+
for (const item of result.results) {
|
|
77
|
+
lines.push(`- [${item.title}](${item.url})`);
|
|
78
|
+
if (item.description) lines.push(` ${item.description}`);
|
|
79
|
+
}
|
|
80
|
+
return lines.join("\n");
|
|
81
|
+
}
|