@zenalexa/unicli 0.220.0 → 0.220.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (182) hide show
  1. package/AGENTS.md +10 -19
  2. package/README.md +17 -11
  3. package/README.zh-CN.md +17 -11
  4. package/dist/adapters/anilist/web.d.ts +11 -0
  5. package/dist/adapters/anilist/web.d.ts.map +1 -0
  6. package/dist/adapters/anilist/web.js +284 -0
  7. package/dist/adapters/anilist/web.js.map +1 -0
  8. package/dist/adapters/bangumi/web.d.ts +14 -0
  9. package/dist/adapters/bangumi/web.d.ts.map +1 -0
  10. package/dist/adapters/bangumi/web.js +257 -0
  11. package/dist/adapters/bangumi/web.js.map +1 -0
  12. package/dist/adapters/dlsite/web.d.ts +31 -0
  13. package/dist/adapters/dlsite/web.d.ts.map +1 -0
  14. package/dist/adapters/dlsite/web.js +455 -0
  15. package/dist/adapters/dlsite/web.js.map +1 -0
  16. package/dist/adapters/ehentai/web.d.ts +66 -0
  17. package/dist/adapters/ehentai/web.d.ts.map +1 -0
  18. package/dist/adapters/ehentai/web.js +608 -0
  19. package/dist/adapters/ehentai/web.js.map +1 -0
  20. package/dist/adapters/jikan/web.d.ts +9 -0
  21. package/dist/adapters/jikan/web.d.ts.map +1 -0
  22. package/dist/adapters/jikan/web.js +154 -0
  23. package/dist/adapters/jikan/web.js.map +1 -0
  24. package/dist/adapters/kitsu/web.d.ts +9 -0
  25. package/dist/adapters/kitsu/web.d.ts.map +1 -0
  26. package/dist/adapters/kitsu/web.js +97 -0
  27. package/dist/adapters/kitsu/web.js.map +1 -0
  28. package/dist/adapters/mangadex/web.d.ts +10 -0
  29. package/dist/adapters/mangadex/web.d.ts.map +1 -0
  30. package/dist/adapters/mangadex/web.js +188 -0
  31. package/dist/adapters/mangadex/web.js.map +1 -0
  32. package/dist/adapters/moegirl/web.d.ts +23 -0
  33. package/dist/adapters/moegirl/web.d.ts.map +1 -0
  34. package/dist/adapters/moegirl/web.js +269 -0
  35. package/dist/adapters/moegirl/web.js.map +1 -0
  36. package/dist/adapters/safebooru/web.d.ts +10 -0
  37. package/dist/adapters/safebooru/web.d.ts.map +1 -0
  38. package/dist/adapters/safebooru/web.js +120 -0
  39. package/dist/adapters/safebooru/web.js.map +1 -0
  40. package/dist/adapters/vndb/web.d.ts +10 -0
  41. package/dist/adapters/vndb/web.d.ts.map +1 -0
  42. package/dist/adapters/vndb/web.js +321 -0
  43. package/dist/adapters/vndb/web.js.map +1 -0
  44. package/dist/agents/codex-pack.d.ts +62 -0
  45. package/dist/agents/codex-pack.d.ts.map +1 -0
  46. package/dist/agents/codex-pack.js +163 -0
  47. package/dist/agents/codex-pack.js.map +1 -0
  48. package/dist/commands/agents.d.ts.map +1 -1
  49. package/dist/commands/agents.js +6 -43
  50. package/dist/commands/agents.js.map +1 -1
  51. package/dist/commands/browser/adapter.d.ts.map +1 -1
  52. package/dist/commands/browser/adapter.js +17 -3
  53. package/dist/commands/browser/adapter.js.map +1 -1
  54. package/dist/commands/describe.d.ts.map +1 -1
  55. package/dist/commands/describe.js +6 -7
  56. package/dist/commands/describe.js.map +1 -1
  57. package/dist/commands/dispatch.d.ts +1 -1
  58. package/dist/commands/dispatch.d.ts.map +1 -1
  59. package/dist/commands/dispatch.js +4 -2
  60. package/dist/commands/dispatch.js.map +1 -1
  61. package/dist/commands/mcp.d.ts +1 -1
  62. package/dist/commands/mcp.d.ts.map +1 -1
  63. package/dist/commands/mcp.js +10 -5
  64. package/dist/commands/mcp.js.map +1 -1
  65. package/dist/core/command-contract-lint.d.ts +10 -0
  66. package/dist/core/command-contract-lint.d.ts.map +1 -0
  67. package/dist/core/command-contract-lint.js +41 -0
  68. package/dist/core/command-contract-lint.js.map +1 -0
  69. package/dist/core/command-contract.d.ts +100 -0
  70. package/dist/core/command-contract.d.ts.map +1 -0
  71. package/dist/core/command-contract.js +174 -0
  72. package/dist/core/command-contract.js.map +1 -0
  73. package/dist/core/index.d.ts +2 -0
  74. package/dist/core/index.d.ts.map +1 -1
  75. package/dist/core/index.js +2 -0
  76. package/dist/core/index.js.map +1 -1
  77. package/dist/discovery/aliases.d.ts +2 -2
  78. package/dist/discovery/aliases.d.ts.map +1 -1
  79. package/dist/discovery/aliases.js +464 -6
  80. package/dist/discovery/aliases.js.map +1 -1
  81. package/dist/discovery/search.d.ts.map +1 -1
  82. package/dist/discovery/search.js +147 -2
  83. package/dist/discovery/search.js.map +1 -1
  84. package/dist/engine/args.d.ts.map +1 -1
  85. package/dist/engine/args.js +18 -1
  86. package/dist/engine/args.js.map +1 -1
  87. package/dist/engine/artifact-validation.d.ts +29 -0
  88. package/dist/engine/artifact-validation.d.ts.map +1 -0
  89. package/dist/engine/artifact-validation.js +211 -0
  90. package/dist/engine/artifact-validation.js.map +1 -0
  91. package/dist/engine/browser/diagnostics.d.ts +38 -0
  92. package/dist/engine/browser/diagnostics.d.ts.map +1 -0
  93. package/dist/engine/browser/diagnostics.js +40 -0
  94. package/dist/engine/browser/diagnostics.js.map +1 -0
  95. package/dist/engine/invoke.d.ts +1 -0
  96. package/dist/engine/invoke.d.ts.map +1 -1
  97. package/dist/engine/invoke.js +1 -0
  98. package/dist/engine/invoke.js.map +1 -1
  99. package/dist/engine/kernel/errors.d.ts +11 -0
  100. package/dist/engine/kernel/errors.d.ts.map +1 -0
  101. package/dist/engine/kernel/errors.js +15 -0
  102. package/dist/engine/kernel/errors.js.map +1 -0
  103. package/dist/engine/kernel/execute.d.ts +7 -18
  104. package/dist/engine/kernel/execute.d.ts.map +1 -1
  105. package/dist/engine/kernel/execute.js +25 -410
  106. package/dist/engine/kernel/execute.js.map +1 -1
  107. package/dist/engine/kernel/stages.d.ts +44 -0
  108. package/dist/engine/kernel/stages.d.ts.map +1 -0
  109. package/dist/engine/kernel/stages.js +428 -0
  110. package/dist/engine/kernel/stages.js.map +1 -0
  111. package/dist/engine/kernel/types.d.ts +21 -1
  112. package/dist/engine/kernel/types.d.ts.map +1 -1
  113. package/dist/engine/steps/download.d.ts +1 -0
  114. package/dist/engine/steps/download.d.ts.map +1 -1
  115. package/dist/engine/steps/download.js +10 -6
  116. package/dist/engine/steps/download.js.map +1 -1
  117. package/dist/fast-path/render.js +1 -1
  118. package/dist/fast-path/render.js.map +1 -1
  119. package/dist/manifest-compact.txt +3 -3
  120. package/dist/manifest-search.json +1 -1
  121. package/dist/manifest.json +3074 -3
  122. package/dist/mcp/handler.d.ts.map +1 -1
  123. package/dist/mcp/handler.js +11 -1
  124. package/dist/mcp/handler.js.map +1 -1
  125. package/dist/mcp/server.d.ts +1 -1
  126. package/dist/mcp/server.js +1 -1
  127. package/dist/mcp/tools.d.ts.map +1 -1
  128. package/dist/mcp/tools.js +18 -10
  129. package/dist/mcp/tools.js.map +1 -1
  130. package/dist/output/error-map.d.ts.map +1 -1
  131. package/dist/output/error-map.js +1 -1
  132. package/dist/output/error-map.js.map +1 -1
  133. package/dist/registry.d.ts.map +1 -1
  134. package/dist/registry.js +2 -1
  135. package/dist/registry.js.map +1 -1
  136. package/package.json +2 -2
  137. package/server.json +3 -3
  138. package/skills/unicli/SKILL.md +1 -1
  139. package/skills/unicli-claude-code/SKILL.md +1 -1
  140. package/skills/unicli-hermes/SKILL.md +1 -1
  141. package/src/adapters/anilist/web.test.ts +93 -0
  142. package/src/adapters/anilist/web.ts +341 -0
  143. package/src/adapters/arxiv/download.yaml +53 -0
  144. package/src/adapters/bangumi/web.test.ts +109 -0
  145. package/src/adapters/bangumi/web.ts +295 -0
  146. package/src/adapters/danbooru/artists.yaml +44 -0
  147. package/src/adapters/danbooru/comments.yaml +45 -0
  148. package/src/adapters/danbooru/detail.yaml +78 -0
  149. package/src/adapters/danbooru/download.yaml +51 -0
  150. package/src/adapters/danbooru/pools.yaml +56 -0
  151. package/src/adapters/danbooru/search.yaml +69 -0
  152. package/src/adapters/danbooru/tags.yaml +42 -0
  153. package/src/adapters/danbooru/wiki.yaml +44 -0
  154. package/src/adapters/dlsite/web.test.ts +132 -0
  155. package/src/adapters/dlsite/web.ts +557 -0
  156. package/src/adapters/ehentai/web.test.ts +157 -0
  157. package/src/adapters/ehentai/web.ts +750 -0
  158. package/src/adapters/jikan/web.test.ts +50 -0
  159. package/src/adapters/jikan/web.ts +177 -0
  160. package/src/adapters/kitsu/web.test.ts +29 -0
  161. package/src/adapters/kitsu/web.ts +109 -0
  162. package/src/adapters/konachan/detail.yaml +62 -0
  163. package/src/adapters/konachan/download.yaml +55 -0
  164. package/src/adapters/konachan/search.yaml +65 -0
  165. package/src/adapters/konachan/tags.yaml +40 -0
  166. package/src/adapters/mangadex/web.test.ts +46 -0
  167. package/src/adapters/mangadex/web.ts +210 -0
  168. package/src/adapters/moegirl/web.test.ts +87 -0
  169. package/src/adapters/moegirl/web.ts +343 -0
  170. package/src/adapters/pdf/read.yaml +49 -0
  171. package/src/adapters/pixiv/download.yaml +15 -2
  172. package/src/adapters/safebooru/detail.yaml +63 -0
  173. package/src/adapters/safebooru/download.yaml +58 -0
  174. package/src/adapters/safebooru/search.yaml +69 -0
  175. package/src/adapters/safebooru/web.test.ts +60 -0
  176. package/src/adapters/safebooru/web.ts +130 -0
  177. package/src/adapters/vndb/web.test.ts +86 -0
  178. package/src/adapters/vndb/web.ts +393 -0
  179. package/src/adapters/yandere/detail.yaml +61 -0
  180. package/src/adapters/yandere/download.yaml +56 -0
  181. package/src/adapters/yandere/search.yaml +67 -0
  182. package/src/adapters/yandere/tags.yaml +41 -0
@@ -0,0 +1,210 @@
1
+ /**
2
+ * @owner src/adapters/mangadex/web.ts
3
+ * @does Register MangaDex public manga and author search commands.
4
+ * @needs MangaDex public API and relationship include semantics.
5
+ * @feeds Manga/doujin-adjacent title and creator discovery.
6
+ * @breaks MangaDex API schema or public rate limits can block lookup.
7
+ */
8
+
9
+ import { USER_AGENT } from "../../constants.js";
10
+ import { cli, Strategy } from "../../registry.js";
11
+
12
+ const API = "https://api.mangadex.org";
13
+
14
+ function str(value: unknown): string {
15
+ return value === undefined || value === null ? "" : String(value);
16
+ }
17
+
18
+ function required(value: unknown, label: string): string {
19
+ const text = str(value).trim();
20
+ if (!text) throw new Error(`mangadex ${label} cannot be empty.`);
21
+ return text;
22
+ }
23
+
24
+ function requireLimit(value: unknown): number {
25
+ if (value === undefined || value === null || value === "") return 10;
26
+ const n = Number(value);
27
+ if (!Number.isInteger(n) || n < 1 || n > 50) {
28
+ throw new Error("mangadex limit must be an integer in [1, 50].");
29
+ }
30
+ return n;
31
+ }
32
+
33
+ function optionalYear(value: unknown): number | undefined {
34
+ if (value === undefined || value === null || value === "") return undefined;
35
+ const n = Number(value);
36
+ if (!Number.isInteger(n) || n < 1900 || n > 2100) {
37
+ throw new Error("mangadex year must be an integer in [1900, 2100].");
38
+ }
39
+ return n;
40
+ }
41
+
42
+ const SORT_PARAMS: Record<string, [string, string] | undefined> = {
43
+ relevance: undefined,
44
+ latest: ["order[latestUploadedChapter]", "desc"],
45
+ followed: ["order[followedCount]", "desc"],
46
+ year: ["order[year]", "desc"],
47
+ };
48
+
49
+ function applySort(url: URL, value: unknown): void {
50
+ const key = String(value ?? "relevance").trim();
51
+ const spec = SORT_PARAMS[key];
52
+ if (!(key in SORT_PARAMS)) {
53
+ throw new Error(
54
+ `mangadex sort must be one of: ${Object.keys(SORT_PARAMS).join(", ")}.`,
55
+ );
56
+ }
57
+ if (spec) url.searchParams.set(spec[0], spec[1]);
58
+ }
59
+
60
+ const CONTENT_RATINGS = new Set([
61
+ "safe",
62
+ "suggestive",
63
+ "erotica",
64
+ "pornographic",
65
+ "all",
66
+ ]);
67
+
68
+ function applyContentRating(url: URL, value: unknown): void {
69
+ if (value === undefined || value === null || value === "") return;
70
+ const rating = String(value).trim();
71
+ if (!CONTENT_RATINGS.has(rating)) {
72
+ throw new Error(
73
+ `mangadex content_rating must be one of: ${Array.from(CONTENT_RATINGS).join(", ")}.`,
74
+ );
75
+ }
76
+ if (rating !== "all") url.searchParams.append("contentRating[]", rating);
77
+ }
78
+
79
+ async function getJson(url: URL): Promise<unknown[]> {
80
+ const response = await fetch(url, {
81
+ headers: { Accept: "application/json", "User-Agent": USER_AGENT },
82
+ });
83
+ if (!response.ok)
84
+ throw new Error(`mangadex request failed with HTTP ${response.status}.`);
85
+ const data = (await response.json()) as { data?: unknown[]; result?: string };
86
+ if (data.result && data.result !== "ok")
87
+ throw new Error(`mangadex API returned ${data.result}.`);
88
+ return data.data ?? [];
89
+ }
90
+
91
+ function localized(values: unknown): string {
92
+ const obj =
93
+ values && typeof values === "object"
94
+ ? (values as Record<string, unknown>)
95
+ : {};
96
+ return str(
97
+ obj.en || obj.ja || obj["ja-ro"] || obj.zh || Object.values(obj)[0],
98
+ );
99
+ }
100
+
101
+ export function mapMangaDexManga(rows: unknown[]): Record<string, unknown>[] {
102
+ return rows.map((row, index) => {
103
+ const item = row as Record<string, unknown>;
104
+ const attrs =
105
+ (item.attributes as Record<string, unknown> | undefined) ?? {};
106
+ return {
107
+ rank: index + 1,
108
+ id: item.id,
109
+ title: localized(attrs.title),
110
+ status: str(attrs.status),
111
+ year: attrs.year ?? null,
112
+ content_rating: str(attrs.contentRating),
113
+ latest_uploaded_chapter: str(attrs.latestUploadedChapter),
114
+ description: localized(attrs.description).slice(0, 700),
115
+ url: `https://mangadex.org/title/${item.id}`,
116
+ };
117
+ });
118
+ }
119
+
120
+ export function mapMangaDexAuthors(rows: unknown[]): Record<string, unknown>[] {
121
+ return rows.map((row, index) => {
122
+ const item = row as Record<string, unknown>;
123
+ const attrs =
124
+ (item.attributes as Record<string, unknown> | undefined) ?? {};
125
+ return {
126
+ rank: index + 1,
127
+ id: item.id,
128
+ name: str(attrs.name),
129
+ twitter: str(attrs.twitter),
130
+ pixiv: str(attrs.pixiv),
131
+ website: str(attrs.website),
132
+ url: `https://mangadex.org/author/${item.id}`,
133
+ };
134
+ });
135
+ }
136
+
137
+ async function searchManga(kwargs: Record<string, unknown>) {
138
+ const query = required(kwargs.query, "query");
139
+ const url = new URL(`${API}/manga`);
140
+ url.searchParams.set("title", query);
141
+ url.searchParams.set("limit", String(requireLimit(kwargs.limit)));
142
+ const year = optionalYear(kwargs.year);
143
+ if (year) url.searchParams.set("year", String(year));
144
+ applySort(url, kwargs.sort);
145
+ applyContentRating(url, kwargs["content-rating"]);
146
+ url.searchParams.append("includes[]", "author");
147
+ url.searchParams.append("includes[]", "artist");
148
+ const rows = mapMangaDexManga(await getJson(url));
149
+ if (rows.length === 0)
150
+ throw new Error(`No MangaDex manga found for "${query}".`);
151
+ return rows;
152
+ }
153
+
154
+ async function searchAuthors(kwargs: Record<string, unknown>) {
155
+ const query = required(kwargs.query, "query");
156
+ const url = new URL(`${API}/author`);
157
+ url.searchParams.set("name", query);
158
+ url.searchParams.set("limit", String(requireLimit(kwargs.limit)));
159
+ const rows = mapMangaDexAuthors(await getJson(url));
160
+ if (rows.length === 0)
161
+ throw new Error(`No MangaDex authors found for "${query}".`);
162
+ return rows;
163
+ }
164
+
165
+ const SEARCH_ARGS = [
166
+ { name: "query", type: "str" as const, required: true, positional: true },
167
+ { name: "limit", type: "int" as const, default: 10 },
168
+ ];
169
+
170
+ const MANGA_ARGS = [
171
+ { name: "query", type: "str" as const, required: true, positional: true },
172
+ { name: "limit", type: "int" as const, default: 10 },
173
+ { name: "year", type: "int" as const },
174
+ {
175
+ name: "sort",
176
+ type: "str" as const,
177
+ default: "relevance",
178
+ choices: ["relevance", "latest", "followed", "year"],
179
+ },
180
+ {
181
+ name: "content-rating",
182
+ type: "str" as const,
183
+ choices: ["safe", "suggestive", "erotica", "pornographic", "all"],
184
+ },
185
+ ];
186
+
187
+ cli({
188
+ site: "mangadex",
189
+ name: "manga",
190
+ description:
191
+ "Search MangaDex manga by Japanese title, romaji, alias, or keyword",
192
+ domain: "mangadex.org",
193
+ strategy: Strategy.PUBLIC,
194
+ browser: false,
195
+ args: MANGA_ARGS,
196
+ columns: ["rank", "id", "title", "status", "year", "content_rating", "url"],
197
+ func: async (_page, kwargs) => searchManga(kwargs),
198
+ });
199
+
200
+ cli({
201
+ site: "mangadex",
202
+ name: "authors",
203
+ description: "Search MangaDex authors and artists by public name or romaji",
204
+ domain: "mangadex.org",
205
+ strategy: Strategy.PUBLIC,
206
+ browser: false,
207
+ args: SEARCH_ARGS,
208
+ columns: ["rank", "id", "name", "twitter", "pixiv", "website", "url"],
209
+ func: async (_page, kwargs) => searchAuthors(kwargs),
210
+ });
@@ -0,0 +1,87 @@
1
+ import { describe, expect, it } from "vitest";
2
+ import { resolveCommand } from "../../registry.js";
3
+ import {
4
+ mapMoegirlOpenSearch,
5
+ parseMoegirlLinksHtml,
6
+ parseMoegirlPageHtml,
7
+ } from "./web.js";
8
+
9
+ describe("moegirl public commands", () => {
10
+ it("registers search, page, and links", () => {
11
+ expect(
12
+ Object.keys(resolveCommand("moegirl", "search")!.adapter.commands),
13
+ ).toEqual(expect.arrayContaining(["search", "page", "links"]));
14
+ });
15
+
16
+ it("maps OpenSearch results", () => {
17
+ expect(
18
+ mapMoegirlOpenSearch(
19
+ [
20
+ "花火",
21
+ ["花火", "花火(星穹铁道)"],
22
+ ["", ""],
23
+ [
24
+ "https://zh.moegirl.org.cn/%E8%8A%B1%E7%81%AB",
25
+ "https://zh.moegirl.org.cn/%E8%8A%B1%E7%81%AB(%E6%98%9F%E7%A9%B9%E9%93%81%E9%81%93)",
26
+ ],
27
+ ],
28
+ 5,
29
+ ),
30
+ ).toEqual([
31
+ {
32
+ rank: 1,
33
+ title: "花火",
34
+ description: "",
35
+ url: "https://zh.moegirl.org.cn/%E8%8A%B1%E7%81%AB",
36
+ },
37
+ {
38
+ rank: 2,
39
+ title: "花火(星穹铁道)",
40
+ description: "",
41
+ url: "https://zh.moegirl.org.cn/%E8%8A%B1%E7%81%AB(%E6%98%9F%E7%A9%B9%E9%93%81%E9%81%93)",
42
+ },
43
+ ]);
44
+ });
45
+
46
+ it("parses page text and metadata", () => {
47
+ const row = parseMoegirlPageHtml(
48
+ `<title>花火(星穹铁道) - 萌娘百科 万物皆可萌的百科全书</title>
49
+ <meta name="description" content="花火是游戏《崩坏:星穹铁道》中的登场角色。">
50
+ <link rel="canonical" href="https://zh.moegirl.org.cn/%E8%8A%B1%E7%81%AB(%E6%98%9F%E7%A9%B9%E9%93%81%E9%81%93)">
51
+ <script>RLCONF={"wgCategories":["崩坏:星穹铁道角色","游戏角色"]};</script>
52
+ <template id="MOE_SKIN_TEMPLATE_BODYCONTENT"><div class="mw-parser-output">
53
+ <p><b>花火</b>是游戏《崩坏:星穹铁道》中的角色。</p>
54
+ <p>她属于假面愚者。</p></div></template>`,
55
+ "花火(星穹铁道)",
56
+ 1,
57
+ );
58
+ expect(row).toMatchObject({
59
+ title: "花火(星穹铁道)",
60
+ description: "花火是游戏《崩坏:星穹铁道》中的登场角色。",
61
+ categories: ["崩坏:星穹铁道角色", "游戏角色"],
62
+ paragraphs: 1,
63
+ url: "https://zh.moegirl.org.cn/%E8%8A%B1%E7%81%AB(%E6%98%9F%E7%A9%B9%E9%93%81%E9%81%93)",
64
+ });
65
+ expect(String(row.extract)).toContain("崩坏:星穹铁道");
66
+ });
67
+
68
+ it("extracts disambiguation links with contains filter", () => {
69
+ expect(
70
+ parseMoegirlLinksHtml(
71
+ `<template id="MOE_SKIN_TEMPLATE_BODYCONTENT"><div>
72
+ <a href="/%E8%8A%B1%E7%81%AB" title="花火">花火</a>
73
+ <a href="/%E8%8A%B1%E7%81%AB(%E6%98%9F%E7%A9%B9%E9%93%81%E9%81%93)" title="花火(星穹铁道)">花火(星穹铁道)</a>
74
+ <a href="/index.php?title=X&amp;action=edit&amp;redlink=1" title="X(页面不存在)">X</a>
75
+ </div></template>`,
76
+ 5,
77
+ "星穹铁道",
78
+ ),
79
+ ).toEqual([
80
+ {
81
+ rank: 1,
82
+ title: "花火(星穹铁道)",
83
+ url: "https://zh.moegirl.org.cn/%E8%8A%B1%E7%81%AB(%E6%98%9F%E7%A9%B9%E9%93%81%E9%81%93)",
84
+ },
85
+ ]);
86
+ });
87
+ });
@@ -0,0 +1,343 @@
1
+ /**
2
+ * @owner src/adapters/moegirl/web.ts
3
+ * @does Register Moegirl public search, page reading, and article link extraction commands.
4
+ * @needs Moegirl public OpenSearch endpoint and rendered MediaWiki HTML.
5
+ * @feeds ACG entity discovery, character disambiguation, and wiki-backed content research.
6
+ * @breaks Moegirl skin/template markup drift can reduce page and link extraction quality.
7
+ */
8
+
9
+ import { USER_AGENT } from "../../constants.js";
10
+ import { cli, Strategy } from "../../registry.js";
11
+
12
+ const ORIGIN = "https://zh.moegirl.org.cn";
13
+
14
+ interface SearchRow {
15
+ rank: number;
16
+ title: string;
17
+ description: string;
18
+ url: string;
19
+ }
20
+
21
+ interface LinkRow {
22
+ rank: number;
23
+ title: string;
24
+ url: string;
25
+ }
26
+
27
+ function str(value: unknown): string {
28
+ return value === undefined || value === null ? "" : String(value);
29
+ }
30
+
31
+ function decodeHtml(value: unknown): string {
32
+ return decodeHtmlEntities(value).replace(/\s+/g, " ").trim();
33
+ }
34
+
35
+ function decodeHtmlText(value: unknown): string {
36
+ return decodeHtmlEntities(value)
37
+ .replace(/[ \t\f\v]+/g, " ")
38
+ .replace(/\n[ \t]+/g, "\n")
39
+ .replace(/\n{3,}/g, "\n\n")
40
+ .trim();
41
+ }
42
+
43
+ function decodeHtmlEntities(value: unknown): string {
44
+ return str(value)
45
+ .replace(/&#(\d+);/g, (_m, code: string) =>
46
+ String.fromCodePoint(Number(code)),
47
+ )
48
+ .replace(/&#x([0-9a-f]+);/gi, (_m, code: string) =>
49
+ String.fromCodePoint(Number.parseInt(code, 16)),
50
+ )
51
+ .replace(/&nbsp;/g, " ")
52
+ .replace(/&amp;/g, "&")
53
+ .replace(/&lt;/g, "<")
54
+ .replace(/&gt;/g, ">")
55
+ .replace(/&quot;/g, '"')
56
+ .replace(/&#039;/g, "'")
57
+ .replace(/&#39;/g, "'");
58
+ }
59
+
60
+ function requireText(value: unknown, label: string): string {
61
+ const text = str(value).trim();
62
+ if (!text) throw new Error(`moegirl ${label} cannot be empty.`);
63
+ return text;
64
+ }
65
+
66
+ function requireLimit(value: unknown, fallback = 10): number {
67
+ if (value === undefined || value === null || value === "") return fallback;
68
+ const n = Number(value);
69
+ if (!Number.isInteger(n) || n < 1 || n > 50) {
70
+ throw new Error("moegirl limit must be an integer in [1, 50].");
71
+ }
72
+ return n;
73
+ }
74
+
75
+ function requireParagraphCap(value: unknown): number {
76
+ if (value === undefined || value === null || value === "") return 0;
77
+ const n = Number(value);
78
+ if (!Number.isInteger(n) || n < 0 || n > 100) {
79
+ throw new Error("moegirl paragraphs must be an integer in [0, 100].");
80
+ }
81
+ return n;
82
+ }
83
+
84
+ function articleUrl(title: string): string {
85
+ return `${ORIGIN}/${encodeURIComponent(title.replace(/ /g, "_"))}`;
86
+ }
87
+
88
+ async function fetchJson(url: string): Promise<unknown> {
89
+ const response = await fetch(url, {
90
+ headers: {
91
+ Accept: "application/json",
92
+ "User-Agent": USER_AGENT,
93
+ },
94
+ });
95
+ if (!response.ok) {
96
+ throw new Error(`moegirl request failed with HTTP ${response.status}.`);
97
+ }
98
+ return response.json();
99
+ }
100
+
101
+ async function fetchText(url: string): Promise<string> {
102
+ const response = await fetch(url, {
103
+ headers: {
104
+ Accept: "text/html,application/xhtml+xml",
105
+ "User-Agent": USER_AGENT,
106
+ },
107
+ });
108
+ if (!response.ok) {
109
+ throw new Error(`moegirl request failed with HTTP ${response.status}.`);
110
+ }
111
+ return response.text();
112
+ }
113
+
114
+ export function mapMoegirlOpenSearch(
115
+ data: unknown,
116
+ limit: number,
117
+ ): SearchRow[] {
118
+ if (
119
+ !Array.isArray(data) ||
120
+ !Array.isArray(data[1]) ||
121
+ !Array.isArray(data[3])
122
+ ) {
123
+ throw new Error("moegirl OpenSearch response shape changed.");
124
+ }
125
+ const titles = data[1] as unknown[];
126
+ const descriptions = Array.isArray(data[2]) ? (data[2] as unknown[]) : [];
127
+ const urls = data[3] as unknown[];
128
+ return titles.slice(0, limit).map((title, index) => ({
129
+ rank: index + 1,
130
+ title: str(title),
131
+ description: str(descriptions[index]),
132
+ url: str(urls[index]),
133
+ }));
134
+ }
135
+
136
+ function firstMatch(value: string, re: RegExp): string {
137
+ const match = value.match(re);
138
+ return match ? decodeHtml(match[1]) : "";
139
+ }
140
+
141
+ function bodyHtml(html: string): string {
142
+ return (
143
+ firstRawMatch(
144
+ html,
145
+ /<template id="MOE_SKIN_TEMPLATE_BODYCONTENT">([\s\S]*?)<\/template>/,
146
+ ) ||
147
+ firstRawMatch(
148
+ html,
149
+ /<div[^>]+class="[^"]*\bmw-parser-output\b[^"]*"[^>]*>([\s\S]*?)<\/div>\s*<\/div>/,
150
+ ) ||
151
+ html
152
+ );
153
+ }
154
+
155
+ function firstRawMatch(value: string, re: RegExp): string {
156
+ const match = value.match(re);
157
+ return match ? match[1] : "";
158
+ }
159
+
160
+ function cleanArticleText(html: string): string {
161
+ return decodeHtmlText(
162
+ bodyHtml(html)
163
+ .replace(/<style[\s\S]*?<\/style>/gi, " ")
164
+ .replace(/<script[\s\S]*?<\/script>/gi, " ")
165
+ .replace(/<sup[\s\S]*?<\/sup>/gi, " ")
166
+ .replace(/<\/(?:p|li|h[1-6]|tr)>/gi, "\n\n")
167
+ .replace(/<(?:br|div)\b[^>]*>/gi, "\n")
168
+ .replace(/<[^>]+>/g, " ")
169
+ .replace(/\n\s+/g, "\n")
170
+ .replace(/\n{3,}/g, "\n\n"),
171
+ );
172
+ }
173
+
174
+ export function parseMoegirlPageHtml(
175
+ html: string,
176
+ requestedTitle: string,
177
+ paragraphCap: number,
178
+ ): Record<string, unknown> {
179
+ const pageTitle =
180
+ firstMatch(html, /<title>([^<]+?)(?: - 萌娘百科[^<]*)?<\/title>/) ||
181
+ firstMatch(
182
+ html,
183
+ /<meta property="og:title" content="(?:&lt;[^&]+&gt;)?([^"<]+)(?:&lt;\/[^&]+&gt;)?"/,
184
+ ) ||
185
+ requestedTitle;
186
+ const title = requestedTitle.includes("(") ? requestedTitle : pageTitle;
187
+ const description =
188
+ firstMatch(html, /<meta name="description" content="([^"]*)"/) ||
189
+ firstMatch(html, /<meta property="og:description" content="([^"]*)"/);
190
+ const url =
191
+ firstMatch(html, /<link rel="canonical" href="([^"]+)"/) ||
192
+ articleUrl(title);
193
+ const categories = [...html.matchAll(/"wgCategories":\[(.*?)\]/g)][0]?.[1]
194
+ ?.split(",")
195
+ .map((item) => decodeHtml(item.replace(/^"|"$/g, "")))
196
+ .filter(Boolean);
197
+ const text = cleanArticleText(html);
198
+ if (!text) {
199
+ throw new Error(
200
+ `Moegirl article "${requestedTitle}" has no readable text.`,
201
+ );
202
+ }
203
+ const paragraphs = text
204
+ .split(/\n+/)
205
+ .map((part) => part.trim())
206
+ .filter(Boolean);
207
+ const selected =
208
+ paragraphCap > 0 ? paragraphs.slice(0, paragraphCap) : paragraphs;
209
+ return {
210
+ title,
211
+ description,
212
+ categories: categories ?? [],
213
+ paragraphs: selected.length,
214
+ extract: selected.join("\n\n"),
215
+ url,
216
+ };
217
+ }
218
+
219
+ export function parseMoegirlLinksHtml(
220
+ html: string,
221
+ limit: number,
222
+ contains: string,
223
+ ): LinkRow[] {
224
+ const rows: LinkRow[] = [];
225
+ const seen = new Set<string>();
226
+ const needle = contains.trim();
227
+ for (const match of bodyHtml(html).matchAll(
228
+ /<a\b(?=[^>]*\bhref="([^"]+)")(?=[^>]*\btitle="([^"]+)")[^>]*>/g,
229
+ )) {
230
+ const href = decodeHtml(match[1]);
231
+ const title = decodeHtml(match[2]);
232
+ if (!href.startsWith("/") || href.includes("redlink=1")) continue;
233
+ if (title.includes("页面不存在")) continue;
234
+ if (needle && !title.includes(needle)) continue;
235
+ const key = `${title}\n${href}`;
236
+ if (seen.has(key)) continue;
237
+ seen.add(key);
238
+ rows.push({
239
+ rank: rows.length + 1,
240
+ title,
241
+ url: `${ORIGIN}${href}`,
242
+ });
243
+ if (rows.length >= limit) break;
244
+ }
245
+ return rows;
246
+ }
247
+
248
+ cli({
249
+ site: "moegirl",
250
+ name: "search",
251
+ description:
252
+ "Search Moegirl ACG wiki articles for characters, works, songs, games, and studios",
253
+ domain: "zh.moegirl.org.cn",
254
+ strategy: Strategy.PUBLIC,
255
+ browser: false,
256
+ args: [
257
+ { name: "query", type: "str", required: true, positional: true },
258
+ { name: "limit", type: "int", default: 10 },
259
+ ],
260
+ columns: ["rank", "title", "description", "url"],
261
+ func: async (_page, kwargs) => {
262
+ const query = requireText(kwargs.query, "query");
263
+ const limit = requireLimit(kwargs.limit);
264
+ const url = new URL(`${ORIGIN}/api.php`);
265
+ url.searchParams.set("action", "opensearch");
266
+ url.searchParams.set("search", query);
267
+ url.searchParams.set("limit", String(limit));
268
+ url.searchParams.set("namespace", "0");
269
+ url.searchParams.set("format", "json");
270
+ const rows = mapMoegirlOpenSearch(await fetchJson(url.toString()), limit);
271
+ if (rows.length === 0)
272
+ throw new Error(`No Moegirl pages found for "${query}".`);
273
+ return rows;
274
+ },
275
+ });
276
+
277
+ cli({
278
+ site: "moegirl",
279
+ name: "page",
280
+ description: "Read a Moegirl article as plain text with metadata",
281
+ domain: "zh.moegirl.org.cn",
282
+ strategy: Strategy.PUBLIC,
283
+ browser: false,
284
+ args: [
285
+ { name: "title", type: "str", required: true, positional: true },
286
+ {
287
+ name: "paragraphs",
288
+ type: "int",
289
+ default: 0,
290
+ description: "Paragraph cap, 0 means full",
291
+ },
292
+ ],
293
+ columns: [
294
+ "title",
295
+ "description",
296
+ "categories",
297
+ "paragraphs",
298
+ "extract",
299
+ "url",
300
+ ],
301
+ func: async (_page, kwargs) => {
302
+ const title = requireText(kwargs.title, "title");
303
+ const paragraphCap = requireParagraphCap(kwargs.paragraphs);
304
+ return [
305
+ parseMoegirlPageHtml(
306
+ await fetchText(articleUrl(title)),
307
+ title,
308
+ paragraphCap,
309
+ ),
310
+ ];
311
+ },
312
+ });
313
+
314
+ cli({
315
+ site: "moegirl",
316
+ name: "links",
317
+ description:
318
+ "Extract internal Moegirl article links from a page, useful for disambiguation pages",
319
+ domain: "zh.moegirl.org.cn",
320
+ strategy: Strategy.PUBLIC,
321
+ browser: false,
322
+ args: [
323
+ { name: "title", type: "str", required: true, positional: true },
324
+ { name: "limit", type: "int", default: 20 },
325
+ {
326
+ name: "contains",
327
+ type: "str",
328
+ description: "Only return links whose title contains this text",
329
+ },
330
+ ],
331
+ columns: ["rank", "title", "url"],
332
+ func: async (_page, kwargs) => {
333
+ const title = requireText(kwargs.title, "title");
334
+ const rows = parseMoegirlLinksHtml(
335
+ await fetchText(articleUrl(title)),
336
+ requireLimit(kwargs.limit),
337
+ str(kwargs.contains),
338
+ );
339
+ if (rows.length === 0)
340
+ throw new Error(`No Moegirl links found on "${title}".`);
341
+ return rows;
342
+ },
343
+ });
@@ -0,0 +1,49 @@
1
+ site: pdf
2
+ name: read
3
+ description: Extract text from a local PDF file
4
+ type: desktop
5
+ strategy: public
6
+ binary: pdftotext
7
+ detect: which pdftotext
8
+
9
+ args:
10
+ file:
11
+ type: str
12
+ required: true
13
+ positional: true
14
+ description: Local PDF file path
15
+ x-unicli-kind: path
16
+ first_page:
17
+ type: int
18
+ default: 1
19
+ description: First page to read
20
+ last_page:
21
+ type: int
22
+ default: 20
23
+ description: Last page to read
24
+
25
+ pipeline:
26
+ - exec:
27
+ command: pdftotext
28
+ args:
29
+ - "-layout"
30
+ - "-enc"
31
+ - "UTF-8"
32
+ - "-f"
33
+ - "${{ args.first_page }}"
34
+ - "-l"
35
+ - "${{ args.last_page }}"
36
+ - "${{ args.file }}"
37
+ - "-"
38
+ parse: text
39
+ timeout: 60000
40
+
41
+ columns: []
42
+
43
+ # schema-v2 metadata — injected by `unicli migrate schema-v2`
44
+ capabilities: ["subprocess.exec"]
45
+ minimum_capability: subprocess.exec
46
+ trust: user
47
+ confidentiality: private
48
+ quarantine: false
49
+ schema_version: v2