@zenalexa/unicli 0.223.4 → 0.224.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/AGENTS.md +5 -5
- package/README.md +4 -4
- package/README.zh-CN.md +4 -4
- package/dist/adapters/marxists-cn/archive.d.ts +61 -0
- package/dist/adapters/marxists-cn/archive.d.ts.map +1 -0
- package/dist/adapters/marxists-cn/archive.js +861 -0
- package/dist/adapters/marxists-cn/archive.js.map +1 -0
- package/dist/adapters/twitter/lists-extra.d.ts +17 -1
- package/dist/adapters/twitter/lists-extra.d.ts.map +1 -1
- package/dist/adapters/twitter/lists-extra.js +123 -21
- package/dist/adapters/twitter/lists-extra.js.map +1 -1
- package/dist/adapters/twitter/post.js +1 -0
- package/dist/adapters/twitter/post.js.map +1 -1
- package/dist/adapters/twitter/thread.d.ts +13 -1
- package/dist/adapters/twitter/thread.d.ts.map +1 -1
- package/dist/adapters/twitter/thread.js +76 -33
- package/dist/adapters/twitter/thread.js.map +1 -1
- package/dist/cli.d.ts.map +1 -1
- package/dist/cli.js +3 -0
- package/dist/cli.js.map +1 -1
- package/dist/commands/architecture.d.ts +21 -0
- package/dist/commands/architecture.d.ts.map +1 -0
- package/dist/commands/architecture.js +47 -0
- package/dist/commands/architecture.js.map +1 -0
- package/dist/commands/compute.d.ts.map +1 -1
- package/dist/commands/compute.js +31 -6
- package/dist/commands/compute.js.map +1 -1
- package/dist/commands/doctor-compute.d.ts.map +1 -1
- package/dist/commands/doctor-compute.js +88 -1
- package/dist/commands/doctor-compute.js.map +1 -1
- package/dist/compute/action-execution.d.ts +30 -0
- package/dist/compute/action-execution.d.ts.map +1 -0
- package/dist/compute/action-execution.js +112 -0
- package/dist/compute/action-execution.js.map +1 -0
- package/dist/compute/capture-reference.d.ts.map +1 -1
- package/dist/compute/capture-reference.js +6 -1
- package/dist/compute/capture-reference.js.map +1 -1
- package/dist/compute/capture.d.ts +2 -0
- package/dist/compute/capture.d.ts.map +1 -1
- package/dist/compute/capture.js +6 -1
- package/dist/compute/capture.js.map +1 -1
- package/dist/compute/cursor-visual-style.d.ts +35 -0
- package/dist/compute/cursor-visual-style.d.ts.map +1 -0
- package/dist/compute/cursor-visual-style.js +39 -0
- package/dist/compute/cursor-visual-style.js.map +1 -0
- package/dist/compute/linux-overlay.d.ts +38 -0
- package/dist/compute/linux-overlay.d.ts.map +1 -0
- package/dist/compute/linux-overlay.js +274 -0
- package/dist/compute/linux-overlay.js.map +1 -0
- package/dist/compute/macos-overlay.d.ts +64 -0
- package/dist/compute/macos-overlay.d.ts.map +1 -0
- package/dist/compute/macos-overlay.js +590 -0
- package/dist/compute/macos-overlay.js.map +1 -0
- package/dist/compute/overlay-daemon.d.ts +47 -0
- package/dist/compute/overlay-daemon.d.ts.map +1 -0
- package/dist/compute/overlay-daemon.js +206 -0
- package/dist/compute/overlay-daemon.js.map +1 -0
- package/dist/compute/overlay.d.ts +42 -0
- package/dist/compute/overlay.d.ts.map +1 -0
- package/dist/compute/overlay.js +111 -0
- package/dist/compute/overlay.js.map +1 -0
- package/dist/compute/platform-overlays.d.ts +20 -0
- package/dist/compute/platform-overlays.d.ts.map +1 -0
- package/dist/compute/platform-overlays.js +31 -0
- package/dist/compute/platform-overlays.js.map +1 -0
- package/dist/compute/visual-timeline.d.ts +132 -0
- package/dist/compute/visual-timeline.d.ts.map +1 -0
- package/dist/compute/visual-timeline.js +431 -0
- package/dist/compute/visual-timeline.js.map +1 -0
- package/dist/compute/windows-overlay.d.ts +38 -0
- package/dist/compute/windows-overlay.d.ts.map +1 -0
- package/dist/compute/windows-overlay.js +282 -0
- package/dist/compute/windows-overlay.js.map +1 -0
- package/dist/core/architecture-tree.d.ts +68 -0
- package/dist/core/architecture-tree.d.ts.map +1 -0
- package/dist/core/architecture-tree.js +215 -0
- package/dist/core/architecture-tree.js.map +1 -0
- package/dist/discovery/aliases.d.ts.map +1 -1
- package/dist/discovery/aliases.js +93 -0
- package/dist/discovery/aliases.js.map +1 -1
- package/dist/discovery/core-catalog.d.ts.map +1 -1
- package/dist/discovery/core-catalog.js +14 -0
- package/dist/discovery/core-catalog.js.map +1 -1
- package/dist/discovery/intents.d.ts.map +1 -1
- package/dist/discovery/intents.js +124 -0
- package/dist/discovery/intents.js.map +1 -1
- package/dist/discovery/loader.d.ts +12 -6
- package/dist/discovery/loader.d.ts.map +1 -1
- package/dist/discovery/loader.js +37 -10
- package/dist/discovery/loader.js.map +1 -1
- package/dist/discovery/search.d.ts +27 -28
- package/dist/discovery/search.d.ts.map +1 -1
- package/dist/discovery/search.js +118 -120
- package/dist/discovery/search.js.map +1 -1
- package/dist/engine/text-normalize.d.ts +14 -0
- package/dist/engine/text-normalize.d.ts.map +1 -1
- package/dist/engine/text-normalize.js +64 -0
- package/dist/engine/text-normalize.js.map +1 -1
- package/dist/fast-path/handlers/discovery.d.ts +12 -5
- package/dist/fast-path/handlers/discovery.d.ts.map +1 -1
- package/dist/fast-path/handlers/discovery.js +42 -7
- package/dist/fast-path/handlers/discovery.js.map +1 -1
- package/dist/manifest-compact.txt +2 -2
- package/dist/manifest.json +352 -3
- package/dist/mcp/profiles/computer-use.d.ts.map +1 -1
- package/dist/mcp/profiles/computer-use.js +76 -8
- package/dist/mcp/profiles/computer-use.js.map +1 -1
- package/dist/registry.d.ts +14 -5
- package/dist/registry.d.ts.map +1 -1
- package/dist/registry.js +33 -6
- package/dist/registry.js.map +1 -1
- package/dist/transport/cascade.d.ts +1 -0
- package/dist/transport/cascade.d.ts.map +1 -1
- package/dist/transport/cascade.js +2 -2
- package/dist/transport/cascade.js.map +1 -1
- package/docs/operate/compute.md +66 -1
- package/docs/operate/troubleshooting.md +42 -0
- package/package.json +9 -5
- package/server.json +3 -3
- package/skills/unicli/SKILL.md +1 -1
- package/skills/unicli-claude-code/SKILL.md +1 -1
- package/skills/unicli-hermes/SKILL.md +1 -1
- package/src/adapters/marxists-cn/archive.test.ts +173 -0
- package/src/adapters/marxists-cn/archive.ts +1049 -0
- package/src/adapters/twitter/lists-extra.test.ts +115 -0
- package/src/adapters/twitter/lists-extra.ts +146 -26
- package/src/adapters/twitter/post.ts +1 -0
- package/src/adapters/twitter/thread.test.ts +25 -1
- package/src/adapters/twitter/thread.ts +99 -47
- package/dist/manifest-search.json +0 -1
|
@@ -0,0 +1,861 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @owner src/adapters/marxists-cn/archive.ts
|
|
3
|
+
* @does Register agent-facing Chinese Marxists Internet Archive index, works, search, read, and reading-list commands.
|
|
4
|
+
* @needs Public www.marxists.org/chinese HTML, GB18030 decoding, bounded archive crawling.
|
|
5
|
+
* @feeds surface coverage ledger, Marxist philosophy, Western Marxism, and socialist-history retrieval workflows.
|
|
6
|
+
* @breaks Charset drift, silent off-site URLs, or weak crawl bounds hide primary-source archive content.
|
|
7
|
+
* @invariants All fetched pages are constrained to https://www.marxists.org/chinese/.
|
|
8
|
+
* @side-effects Performs public HTTP GET requests only.
|
|
9
|
+
* @perf Search crawls bounded author/topic index pages and optionally bounded full-text pages.
|
|
10
|
+
* @concurrency Search fetches index pages in small batches to avoid hammering the archive.
|
|
11
|
+
* @test src/adapters/marxists-cn/archive.test.ts
|
|
12
|
+
* @stability stable
|
|
13
|
+
* @since 0.224.0
|
|
14
|
+
*/
|
|
15
|
+
import { decodeHtmlEntities } from "../../engine/text-normalize.js";
|
|
16
|
+
import { cli, Strategy } from "../../registry.js";
|
|
17
|
+
const MARXISTS_CN_BASE = "https://www.marxists.org/chinese/";
|
|
18
|
+
const MARXISTS_HOST = "www.marxists.org";
|
|
19
|
+
const USER_AGENT = "unicli-marxists-cn/1.0 (https://github.com/olo-dot-io/Uni-CLI)";
|
|
20
|
+
const INDEX_COLUMNS = ["rank", "title", "latinName", "kind", "path", "url"];
|
|
21
|
+
const WORK_COLUMNS = [
|
|
22
|
+
"rank",
|
|
23
|
+
"scope",
|
|
24
|
+
"section",
|
|
25
|
+
"title",
|
|
26
|
+
"note",
|
|
27
|
+
"format",
|
|
28
|
+
"path",
|
|
29
|
+
"url",
|
|
30
|
+
];
|
|
31
|
+
const SEARCH_COLUMNS = [
|
|
32
|
+
"rank",
|
|
33
|
+
"type",
|
|
34
|
+
"scope",
|
|
35
|
+
"section",
|
|
36
|
+
"title",
|
|
37
|
+
"snippet",
|
|
38
|
+
"path",
|
|
39
|
+
"url",
|
|
40
|
+
"score",
|
|
41
|
+
];
|
|
42
|
+
const READ_COLUMNS = ["title", "author", "date", "chars", "text", "url"];
|
|
43
|
+
const READING_LIST_COLUMNS = [
|
|
44
|
+
"rank",
|
|
45
|
+
"preset",
|
|
46
|
+
"author",
|
|
47
|
+
"title",
|
|
48
|
+
"year",
|
|
49
|
+
"theme",
|
|
50
|
+
"path",
|
|
51
|
+
"url",
|
|
52
|
+
"readCommand",
|
|
53
|
+
"note",
|
|
54
|
+
];
|
|
55
|
+
const WESTERN_MARXISM_PRESET = "western-marxism";
|
|
56
|
+
const WESTERN_MARXISM_READINGS = [
|
|
57
|
+
{
|
|
58
|
+
author: "葛兰西",
|
|
59
|
+
title: "《现代君主论》",
|
|
60
|
+
year: "1931-1934",
|
|
61
|
+
theme: "hegemony-party-state",
|
|
62
|
+
path: "gramsci/1931-1934/index.htm",
|
|
63
|
+
note: "站内 HTML 目录;适合作为葛兰西政治理论入口。",
|
|
64
|
+
},
|
|
65
|
+
{
|
|
66
|
+
author: "葛兰西",
|
|
67
|
+
title: "《狱中札记》选读:马克思主义问题",
|
|
68
|
+
year: "1929-1935",
|
|
69
|
+
theme: "praxis-hegemony",
|
|
70
|
+
path: "gramsci/marxist/index.htm",
|
|
71
|
+
note: "站内标注为译自《狱中札记》的专题选读。",
|
|
72
|
+
},
|
|
73
|
+
{
|
|
74
|
+
author: "卢卡奇",
|
|
75
|
+
title: "《历史与阶级意识》",
|
|
76
|
+
year: "1922",
|
|
77
|
+
theme: "reification-class-consciousness",
|
|
78
|
+
path: "georg-lukacs/1922/index.htm",
|
|
79
|
+
note: "站内 HTML 目录;西方马克思主义常用起点。",
|
|
80
|
+
},
|
|
81
|
+
{
|
|
82
|
+
author: "卢卡奇",
|
|
83
|
+
title: "《列宁——关于列宁思想统一性的研究》",
|
|
84
|
+
year: "1924",
|
|
85
|
+
theme: "party-dialectics",
|
|
86
|
+
path: "georg-lukacs/1924/index.htm",
|
|
87
|
+
note: "站内 HTML 目录;与《历史与阶级意识》配套阅读。",
|
|
88
|
+
},
|
|
89
|
+
{
|
|
90
|
+
author: "科尔施",
|
|
91
|
+
title: "《马克思主义与哲学》",
|
|
92
|
+
year: "1923",
|
|
93
|
+
theme: "marxism-philosophy",
|
|
94
|
+
path: "korsch-karl/mia-chinese-korsch-karl-1923.htm",
|
|
95
|
+
note: "站内 HTML 正文;早期西方马克思主义核心文本。",
|
|
96
|
+
},
|
|
97
|
+
{
|
|
98
|
+
author: "本雅明",
|
|
99
|
+
title: "《机械复制时代的艺术作品》",
|
|
100
|
+
year: "1936",
|
|
101
|
+
theme: "aesthetics-technology",
|
|
102
|
+
path: "walter-benjamin/mia-chinese-walter-benjamin-1936.htm",
|
|
103
|
+
note: "站内 HTML 正文;文化与技术批判入口。",
|
|
104
|
+
},
|
|
105
|
+
{
|
|
106
|
+
author: "本雅明",
|
|
107
|
+
title: "《历史哲学论纲》",
|
|
108
|
+
year: "1940",
|
|
109
|
+
theme: "history-messianism",
|
|
110
|
+
path: "walter-benjamin/mia-chinese-walter-benjamin-1940.htm",
|
|
111
|
+
note: "站内 HTML 正文;历史哲学短文本。",
|
|
112
|
+
},
|
|
113
|
+
{
|
|
114
|
+
author: "阿多诺",
|
|
115
|
+
title: "《文化工业再思考》",
|
|
116
|
+
year: "1967",
|
|
117
|
+
theme: "frankfurt-culture-industry",
|
|
118
|
+
path: "adorno/mia-chinese-adorno-1967.htm",
|
|
119
|
+
note: "站内 HTML 正文;法兰克福学派文化批判入口。",
|
|
120
|
+
},
|
|
121
|
+
{
|
|
122
|
+
author: "马尔库塞",
|
|
123
|
+
title: "《单向度的人》",
|
|
124
|
+
year: "1964",
|
|
125
|
+
theme: "advanced-industrial-society",
|
|
126
|
+
path: "marcuse/marxist.org-chinese-marcuse-1964.htm",
|
|
127
|
+
note: "站内 HTML 正文;发达工业社会意识形态批判。",
|
|
128
|
+
},
|
|
129
|
+
{
|
|
130
|
+
author: "马尔库塞",
|
|
131
|
+
title: "《苏联的马克思主义——一种批判的分析》",
|
|
132
|
+
year: "1958",
|
|
133
|
+
theme: "soviet-marxism",
|
|
134
|
+
path: "marcuse/1958/index.htm",
|
|
135
|
+
note: "站内 HTML 目录;与其社会批判文本互补。",
|
|
136
|
+
},
|
|
137
|
+
{
|
|
138
|
+
author: "阿尔都塞",
|
|
139
|
+
title: "《保卫马克思》",
|
|
140
|
+
year: "1965",
|
|
141
|
+
theme: "structural-marxism",
|
|
142
|
+
path: "althusser/1965/index.htm",
|
|
143
|
+
note: "站内 HTML 目录;结构主义马克思主义入口。",
|
|
144
|
+
},
|
|
145
|
+
{
|
|
146
|
+
author: "阿尔都塞",
|
|
147
|
+
title: "《意识形态和意识形态国家机器》",
|
|
148
|
+
year: "1970",
|
|
149
|
+
theme: "ideology-state-apparatuses",
|
|
150
|
+
path: "althusser/mia-chinese-althusser-197004.htm",
|
|
151
|
+
note: "站内 HTML 正文;意识形态理论核心文本。",
|
|
152
|
+
},
|
|
153
|
+
{
|
|
154
|
+
author: "曼德尔",
|
|
155
|
+
title: "《论马克思主义经济学》",
|
|
156
|
+
year: "1962",
|
|
157
|
+
theme: "marxist-economics",
|
|
158
|
+
path: "ernest-mandel/1962book/index.htm",
|
|
159
|
+
note: "站内 HTML 目录;马克思主义经济学导论性文本。",
|
|
160
|
+
},
|
|
161
|
+
{
|
|
162
|
+
author: "曼德尔",
|
|
163
|
+
title: "《晚期资本主义》序言",
|
|
164
|
+
year: "1975",
|
|
165
|
+
theme: "late-capitalism",
|
|
166
|
+
path: "ernest-mandel/mia-chinese-mandel-1975.htm",
|
|
167
|
+
note: "站内 HTML 正文;完整书另有 PDF,HTML 入口可直接读。",
|
|
168
|
+
},
|
|
169
|
+
];
|
|
170
|
+
function stringField(value) {
|
|
171
|
+
return typeof value === "string" ? value.trim() : "";
|
|
172
|
+
}
|
|
173
|
+
function cleanInlineText(value) {
|
|
174
|
+
return decodeHtmlEntities(value.replace(/<[^>]+>/g, " "))
|
|
175
|
+
.replace(/\s+/g, " ")
|
|
176
|
+
.replace(/\s+([,.;:!?,。;:!?、)】])/g, "$1")
|
|
177
|
+
.replace(/([(【])\s+/g, "$1")
|
|
178
|
+
.trim();
|
|
179
|
+
}
|
|
180
|
+
function cleanBlockText(value) {
|
|
181
|
+
return decodeHtmlEntities(value)
|
|
182
|
+
.replace(/\r\n?/g, "\n")
|
|
183
|
+
.replace(/[ \t]+\n/g, "\n")
|
|
184
|
+
.replace(/\n[ \t]+/g, "\n")
|
|
185
|
+
.replace(/\n{3,}/g, "\n\n")
|
|
186
|
+
.trim();
|
|
187
|
+
}
|
|
188
|
+
function decodeAttribute(value) {
|
|
189
|
+
return decodeHtmlEntities(value.replace(/\s+/g, " ")).trim();
|
|
190
|
+
}
|
|
191
|
+
function htmlAttribute(attrs, name) {
|
|
192
|
+
const re = new RegExp(`${name}\\s*=\\s*(?:"([^"]*)"|'([^']*)'|([^\\s>]+))`, "i");
|
|
193
|
+
const match = attrs.match(re);
|
|
194
|
+
return decodeAttribute(match?.[1] ?? match?.[2] ?? match?.[3] ?? "");
|
|
195
|
+
}
|
|
196
|
+
function imageAlt(inner) {
|
|
197
|
+
const match = inner.match(/<img\b([^>]*)>/i);
|
|
198
|
+
return match ? htmlAttribute(match[1], "alt") : "";
|
|
199
|
+
}
|
|
200
|
+
export function requireMarxistsLimit(value, fallback, max, label) {
|
|
201
|
+
const raw = value === undefined || value === null || value === "" ? fallback : value;
|
|
202
|
+
const n = Number(raw);
|
|
203
|
+
if (!Number.isInteger(n) || n < 1 || n > max) {
|
|
204
|
+
throw new Error(`marxists-cn ${label} must be an integer in [1, ${max}].`);
|
|
205
|
+
}
|
|
206
|
+
return n;
|
|
207
|
+
}
|
|
208
|
+
function boolArg(value) {
|
|
209
|
+
if (value === true || value === false)
|
|
210
|
+
return value;
|
|
211
|
+
const text = String(value ?? "")
|
|
212
|
+
.trim()
|
|
213
|
+
.toLowerCase();
|
|
214
|
+
return text === "1" || text === "true" || text === "yes";
|
|
215
|
+
}
|
|
216
|
+
export function normalizeMarxistsPath(value, options = {}) {
|
|
217
|
+
let raw = String(value ?? "").trim();
|
|
218
|
+
if (!raw)
|
|
219
|
+
raw = "index.html";
|
|
220
|
+
let url;
|
|
221
|
+
if (/^https?:\/\//i.test(raw)) {
|
|
222
|
+
url = new URL(raw);
|
|
223
|
+
}
|
|
224
|
+
else {
|
|
225
|
+
raw = raw.replace(/^\/+/, "");
|
|
226
|
+
if (raw.startsWith("chinese/"))
|
|
227
|
+
raw = raw.slice("chinese/".length);
|
|
228
|
+
if (options.directoryIndex && !/\.[a-z0-9]{1,8}(?:[#?].*)?$/i.test(raw)) {
|
|
229
|
+
raw = `${raw.replace(/\/+$/, "")}/index.htm`;
|
|
230
|
+
}
|
|
231
|
+
url = new URL(raw, MARXISTS_CN_BASE);
|
|
232
|
+
}
|
|
233
|
+
if (url.hostname !== MARXISTS_HOST) {
|
|
234
|
+
throw new Error(`marxists-cn URL must stay on ${MARXISTS_HOST}.`);
|
|
235
|
+
}
|
|
236
|
+
if (!url.pathname.startsWith("/chinese/")) {
|
|
237
|
+
throw new Error("marxists-cn URL must stay under /chinese/.");
|
|
238
|
+
}
|
|
239
|
+
url.protocol = "https:";
|
|
240
|
+
url.hash = "";
|
|
241
|
+
url.search = "";
|
|
242
|
+
const path = decodeURIComponent(url.pathname.slice("/chinese/".length));
|
|
243
|
+
if (!path || path.includes("\0") || path.split("/").includes("..")) {
|
|
244
|
+
throw new Error("marxists-cn path is not valid.");
|
|
245
|
+
}
|
|
246
|
+
return path;
|
|
247
|
+
}
|
|
248
|
+
export function marxistsUrl(path) {
|
|
249
|
+
const url = new URL(path, MARXISTS_CN_BASE);
|
|
250
|
+
if (url.hostname !== MARXISTS_HOST || !url.pathname.startsWith("/chinese/")) {
|
|
251
|
+
throw new Error("marxists-cn resolved URL left the Chinese archive.");
|
|
252
|
+
}
|
|
253
|
+
url.protocol = "https:";
|
|
254
|
+
url.hash = "";
|
|
255
|
+
url.search = "";
|
|
256
|
+
return url.toString();
|
|
257
|
+
}
|
|
258
|
+
function linkPath(href, baseUrl) {
|
|
259
|
+
if (!href || /^(?:#|mailto:|javascript:)/i.test(href))
|
|
260
|
+
return null;
|
|
261
|
+
try {
|
|
262
|
+
return normalizeMarxistsPath(new URL(href, baseUrl).toString());
|
|
263
|
+
}
|
|
264
|
+
catch {
|
|
265
|
+
return null;
|
|
266
|
+
}
|
|
267
|
+
}
|
|
268
|
+
function fileFormat(path) {
|
|
269
|
+
const ext = path.match(/\.([a-z0-9]+)$/i)?.[1]?.toLowerCase();
|
|
270
|
+
return ext ?? "html";
|
|
271
|
+
}
|
|
272
|
+
function archiveKind(path) {
|
|
273
|
+
const format = fileFormat(path);
|
|
274
|
+
if (["pdf", "chm", "mp3", "zip", "rar"].includes(format))
|
|
275
|
+
return format;
|
|
276
|
+
if (/^(?:search|update|whoweare)\//.test(path))
|
|
277
|
+
return "site-meta";
|
|
278
|
+
if (/^(?:abc|dictionary-of-marxism|reference-books)\//.test(path)) {
|
|
279
|
+
return "reference";
|
|
280
|
+
}
|
|
281
|
+
if (path.startsWith("pdf/"))
|
|
282
|
+
return "library";
|
|
283
|
+
if (/\/index\.html?$/i.test(path) || /\/index\.htm$/i.test(path)) {
|
|
284
|
+
return "directory";
|
|
285
|
+
}
|
|
286
|
+
return "html";
|
|
287
|
+
}
|
|
288
|
+
function latinName(title, visibleText) {
|
|
289
|
+
const source = title || visibleText;
|
|
290
|
+
const match = source.match(/[A-Za-z][A-Za-z0-9 .,'’\-·]+$/);
|
|
291
|
+
return match ? match[0].trim() : "";
|
|
292
|
+
}
|
|
293
|
+
function displayTitle(anchor) {
|
|
294
|
+
const title = anchor.text || anchor.title;
|
|
295
|
+
const latin = latinName(anchor.title, anchor.text);
|
|
296
|
+
return latin && title.endsWith(latin)
|
|
297
|
+
? title.slice(0, -latin.length).trim()
|
|
298
|
+
: title;
|
|
299
|
+
}
|
|
300
|
+
export function extractMarxistsAnchors(html) {
|
|
301
|
+
const anchors = [];
|
|
302
|
+
const re = /<a\b([^>]*)>([\s\S]*?)<\/a>/gi;
|
|
303
|
+
const matches = Array.from(html.matchAll(re));
|
|
304
|
+
for (let index = 0; index < matches.length; index += 1) {
|
|
305
|
+
const match = matches[index];
|
|
306
|
+
const attrs = match[1] ?? "";
|
|
307
|
+
const inner = match[2] ?? "";
|
|
308
|
+
const startAfter = (match.index ?? 0) + match[0].length;
|
|
309
|
+
const endAfter = matches[index + 1]?.index ?? html.length;
|
|
310
|
+
const text = cleanInlineText(inner) || imageAlt(inner);
|
|
311
|
+
anchors.push({
|
|
312
|
+
href: htmlAttribute(attrs, "href"),
|
|
313
|
+
title: htmlAttribute(attrs, "title"),
|
|
314
|
+
text,
|
|
315
|
+
rawAfter: html.slice(startAfter, endAfter),
|
|
316
|
+
});
|
|
317
|
+
}
|
|
318
|
+
return anchors;
|
|
319
|
+
}
|
|
320
|
+
export function parseMarxistsIndex(html, baseUrl = MARXISTS_CN_BASE) {
|
|
321
|
+
const rows = [];
|
|
322
|
+
const seen = new Set();
|
|
323
|
+
for (const anchor of extractMarxistsAnchors(html)) {
|
|
324
|
+
const path = linkPath(anchor.href, baseUrl);
|
|
325
|
+
const title = displayTitle(anchor);
|
|
326
|
+
if (!path || !title || seen.has(path))
|
|
327
|
+
continue;
|
|
328
|
+
seen.add(path);
|
|
329
|
+
rows.push({
|
|
330
|
+
title,
|
|
331
|
+
latinName: latinName(anchor.title, anchor.text),
|
|
332
|
+
kind: archiveKind(path),
|
|
333
|
+
path,
|
|
334
|
+
url: marxistsUrl(path),
|
|
335
|
+
});
|
|
336
|
+
}
|
|
337
|
+
return rows;
|
|
338
|
+
}
|
|
339
|
+
function sectionFromSegment(segment) {
|
|
340
|
+
const matches = Array.from(segment.matchAll(/<font\b(?=[^>]*\bsize\s*=\s*["']?5\b)[^>]*>([\s\S]*?)<\/font>|<h[2-6]\b[^>]*>([\s\S]*?)<\/h[2-6]>/gi));
|
|
341
|
+
for (let i = matches.length - 1; i >= 0; i -= 1) {
|
|
342
|
+
const text = cleanInlineText(matches[i][1] ?? matches[i][2] ?? "");
|
|
343
|
+
if (text && text.length <= 80)
|
|
344
|
+
return text;
|
|
345
|
+
}
|
|
346
|
+
return "";
|
|
347
|
+
}
|
|
348
|
+
function noteFromSegment(segment) {
|
|
349
|
+
return cleanInlineText(segment.replace(/<a\b[\s\S]*?<\/a>/gi, " ").replace(/<br\s*\/?>/gi, "\n")).slice(0, 240);
|
|
350
|
+
}
|
|
351
|
+
export function parseMarxistsWorks(html, scope, baseUrl) {
|
|
352
|
+
const anchors = extractMarxistsAnchors(html);
|
|
353
|
+
const rows = [];
|
|
354
|
+
const seen = new Set();
|
|
355
|
+
let section = "";
|
|
356
|
+
let previousEnd = 0;
|
|
357
|
+
const anchorMatches = Array.from(html.matchAll(/<a\b[^>]*>[\s\S]*?<\/a>/gi));
|
|
358
|
+
for (let index = 0; index < anchors.length; index += 1) {
|
|
359
|
+
const match = anchorMatches[index];
|
|
360
|
+
const before = html.slice(previousEnd, match?.index ?? previousEnd);
|
|
361
|
+
const nextSection = sectionFromSegment(before);
|
|
362
|
+
if (nextSection)
|
|
363
|
+
section = nextSection;
|
|
364
|
+
previousEnd = (match?.index ?? 0) + (match?.[0].length ?? 0);
|
|
365
|
+
const anchor = anchors[index];
|
|
366
|
+
const path = linkPath(anchor.href, baseUrl);
|
|
367
|
+
const title = displayTitle(anchor);
|
|
368
|
+
if (!path ||
|
|
369
|
+
!title ||
|
|
370
|
+
seen.has(path) ||
|
|
371
|
+
path === scope ||
|
|
372
|
+
path === "index.html") {
|
|
373
|
+
continue;
|
|
374
|
+
}
|
|
375
|
+
seen.add(path);
|
|
376
|
+
rows.push({
|
|
377
|
+
scope,
|
|
378
|
+
section,
|
|
379
|
+
title,
|
|
380
|
+
note: noteFromSegment(anchor.rawAfter),
|
|
381
|
+
format: fileFormat(path),
|
|
382
|
+
path,
|
|
383
|
+
url: marxistsUrl(path),
|
|
384
|
+
});
|
|
385
|
+
}
|
|
386
|
+
return rows;
|
|
387
|
+
}
|
|
388
|
+
function htmlTitle(html) {
|
|
389
|
+
const title = html.match(/<title[^>]*>([\s\S]*?)<\/title>/i)?.[1] ?? "";
|
|
390
|
+
return cleanInlineText(title);
|
|
391
|
+
}
|
|
392
|
+
function classText(html, className) {
|
|
393
|
+
const re = new RegExp(`<[^>]+class\\s*=\\s*["']${className}["'][^>]*>([\\s\\S]*?)<\\/[^>]+>`, "i");
|
|
394
|
+
return cleanInlineText(html.match(re)?.[1] ?? "");
|
|
395
|
+
}
|
|
396
|
+
export function marxistsHtmlToText(html) {
|
|
397
|
+
const body = html.match(/<body\b[^>]*>([\s\S]*?)<\/body>/i)?.[1] ?? html;
|
|
398
|
+
return cleanBlockText(body
|
|
399
|
+
.replace(/<script\b[\s\S]*?<\/script>/gi, " ")
|
|
400
|
+
.replace(/<style\b[\s\S]*?<\/style>/gi, " ")
|
|
401
|
+
.replace(/<!--[\s\S]*?-->/g, " ")
|
|
402
|
+
.replace(/<br\s*\/?>/gi, "\n")
|
|
403
|
+
.replace(/<\/(?:p|div|blockquote|h[1-6]|li|tr|pre)>/gi, "\n")
|
|
404
|
+
.replace(/<a\b[^>]*>([\s\S]*?)<\/a>/gi, "$1")
|
|
405
|
+
.replace(/<[^>]+>/g, " ")
|
|
406
|
+
.replace(/[ \t]+\n/g, "\n"));
|
|
407
|
+
}
|
|
408
|
+
function truncateText(text, maxLength) {
|
|
409
|
+
return text.length > maxLength
|
|
410
|
+
? `${text.slice(0, maxLength)}\n\n... [truncated]`
|
|
411
|
+
: text;
|
|
412
|
+
}
|
|
413
|
+
export function mapMarxistsReadRow(html, url, maxLength) {
|
|
414
|
+
const text = marxistsHtmlToText(html);
|
|
415
|
+
if (!text)
|
|
416
|
+
throw new Error("marxists-cn read produced no text.");
|
|
417
|
+
return {
|
|
418
|
+
title: classText(html, "title0") || htmlTitle(html),
|
|
419
|
+
author: classText(html, "author"),
|
|
420
|
+
date: classText(html, "date"),
|
|
421
|
+
chars: text.length,
|
|
422
|
+
text: truncateText(text, maxLength),
|
|
423
|
+
url,
|
|
424
|
+
};
|
|
425
|
+
}
|
|
426
|
+
function sniffCharset(buffer, contentType) {
|
|
427
|
+
const headerMatch = contentType.match(/charset=([^;\s]+)/i);
|
|
428
|
+
if (headerMatch)
|
|
429
|
+
return headerMatch[1].toLowerCase();
|
|
430
|
+
const prefix = new TextDecoder("latin1").decode(buffer.slice(0, 4096));
|
|
431
|
+
const metaMatch = prefix.match(/charset\s*=\s*["']?([a-zA-Z0-9_-]+)/i);
|
|
432
|
+
return metaMatch ? metaMatch[1].toLowerCase() : "gb18030";
|
|
433
|
+
}
|
|
434
|
+
export function decodeMarxistsBuffer(buffer, contentType = "") {
|
|
435
|
+
const charset = sniffCharset(buffer, contentType);
|
|
436
|
+
if (/utf-?8/.test(charset))
|
|
437
|
+
return new TextDecoder("utf-8").decode(buffer);
|
|
438
|
+
if (/gb2312|gbk|gb18030|big5/.test(charset)) {
|
|
439
|
+
return new TextDecoder("gb18030").decode(buffer);
|
|
440
|
+
}
|
|
441
|
+
return new TextDecoder("gb18030").decode(buffer);
|
|
442
|
+
}
|
|
443
|
+
async function fetchArchiveHtml(path) {
|
|
444
|
+
const url = marxistsUrl(path);
|
|
445
|
+
const response = await fetch(url, {
|
|
446
|
+
headers: {
|
|
447
|
+
Accept: "text/html,application/xhtml+xml,text/plain",
|
|
448
|
+
"Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8",
|
|
449
|
+
"User-Agent": USER_AGENT,
|
|
450
|
+
},
|
|
451
|
+
redirect: "follow",
|
|
452
|
+
});
|
|
453
|
+
if (response.status === 404) {
|
|
454
|
+
throw new Error(`marxists-cn path not found: ${path}.`);
|
|
455
|
+
}
|
|
456
|
+
if (!response.ok) {
|
|
457
|
+
throw new Error(`marxists-cn returned HTTP ${response.status} for ${url}.`);
|
|
458
|
+
}
|
|
459
|
+
const contentType = response.headers.get("content-type") ?? "";
|
|
460
|
+
if (!/text\/html|text\/plain|application\/xhtml\+xml|^$/i.test(contentType)) {
|
|
461
|
+
throw new Error(`marxists-cn read only supports text/html pages, got ${contentType || "unknown"} for ${path}.`);
|
|
462
|
+
}
|
|
463
|
+
return decodeMarxistsBuffer(await response.arrayBuffer(), contentType);
|
|
464
|
+
}
|
|
465
|
+
function withRank(rows, limit) {
|
|
466
|
+
return rows
|
|
467
|
+
.slice(0, limit)
|
|
468
|
+
.map((row, index) => ({ rank: index + 1, ...row }));
|
|
469
|
+
}
|
|
470
|
+
function isDirectoryLink(row) {
|
|
471
|
+
return (row.kind === "directory" &&
|
|
472
|
+
!/^(?:search|update|pdf|whoweare)\//.test(row.path));
|
|
473
|
+
}
|
|
474
|
+
function normalizeSearchText(value) {
|
|
475
|
+
return value
|
|
476
|
+
.normalize("NFKC")
|
|
477
|
+
.toLowerCase()
|
|
478
|
+
.replace(/[\s ]+/g, "");
|
|
479
|
+
}
|
|
480
|
+
function scoreText(query, fields) {
|
|
481
|
+
const q = normalizeSearchText(query);
|
|
482
|
+
if (!q)
|
|
483
|
+
return 0;
|
|
484
|
+
let score = 0;
|
|
485
|
+
for (const [field, weight] of fields) {
|
|
486
|
+
const normalized = normalizeSearchText(field);
|
|
487
|
+
if (!normalized)
|
|
488
|
+
continue;
|
|
489
|
+
if (normalized === q)
|
|
490
|
+
score += weight * 2;
|
|
491
|
+
if (normalized.includes(q))
|
|
492
|
+
score += weight;
|
|
493
|
+
for (const token of query.split(/\s+/).filter(Boolean)) {
|
|
494
|
+
if (normalizeSearchText(token) !== q &&
|
|
495
|
+
normalized.includes(normalizeSearchText(token))) {
|
|
496
|
+
score += Math.max(1, Math.round(weight / 3));
|
|
497
|
+
}
|
|
498
|
+
}
|
|
499
|
+
}
|
|
500
|
+
return score;
|
|
501
|
+
}
|
|
502
|
+
function snippetFor(text, query, maxLength = 220) {
|
|
503
|
+
const normalized = normalizeSearchText(text);
|
|
504
|
+
const q = normalizeSearchText(query);
|
|
505
|
+
const normalizedIndex = normalized.indexOf(q);
|
|
506
|
+
if (normalizedIndex < 0)
|
|
507
|
+
return text.slice(0, maxLength).trim();
|
|
508
|
+
const rawIndex = Math.max(0, Math.min(text.length, normalizedIndex));
|
|
509
|
+
const start = Math.max(0, rawIndex - 80);
|
|
510
|
+
const end = Math.min(text.length, rawIndex + maxLength - 80);
|
|
511
|
+
return text.slice(start, end).replace(/\s+/g, " ").trim();
|
|
512
|
+
}
|
|
513
|
+
async function fetchWorksForScope(scope) {
|
|
514
|
+
const path = normalizeMarxistsPath(scope, { directoryIndex: true });
|
|
515
|
+
return parseMarxistsWorks(await fetchArchiveHtml(path), path, marxistsUrl(path));
|
|
516
|
+
}
|
|
517
|
+
async function mapLimit(items, concurrency, fn) {
|
|
518
|
+
const results = [];
|
|
519
|
+
let next = 0;
|
|
520
|
+
async function worker() {
|
|
521
|
+
while (next < items.length) {
|
|
522
|
+
const index = next;
|
|
523
|
+
next += 1;
|
|
524
|
+
results[index] = await fn(items[index]);
|
|
525
|
+
}
|
|
526
|
+
}
|
|
527
|
+
await Promise.all(Array.from({ length: Math.min(concurrency, items.length) }, () => worker()));
|
|
528
|
+
return results;
|
|
529
|
+
}
|
|
530
|
+
async function buildSearchCandidates(options) {
|
|
531
|
+
const candidates = [];
|
|
532
|
+
const indexRows = parseMarxistsIndex(await fetchArchiveHtml("index.html"), MARXISTS_CN_BASE);
|
|
533
|
+
for (const row of indexRows) {
|
|
534
|
+
const score = scoreText(options.query, [
|
|
535
|
+
[row.title, 40],
|
|
536
|
+
[row.latinName, 24],
|
|
537
|
+
[row.path, 8],
|
|
538
|
+
]);
|
|
539
|
+
if (score > 0) {
|
|
540
|
+
candidates.push({
|
|
541
|
+
type: row.kind === "directory" ? "person_or_topic" : row.kind,
|
|
542
|
+
scope: row.path,
|
|
543
|
+
section: "",
|
|
544
|
+
title: row.title,
|
|
545
|
+
snippet: row.latinName,
|
|
546
|
+
path: row.path,
|
|
547
|
+
url: row.url,
|
|
548
|
+
score,
|
|
549
|
+
});
|
|
550
|
+
}
|
|
551
|
+
}
|
|
552
|
+
const scopes = options.scope
|
|
553
|
+
? [normalizeMarxistsPath(options.scope, { directoryIndex: true })]
|
|
554
|
+
: indexRows
|
|
555
|
+
.filter(isDirectoryLink)
|
|
556
|
+
.slice(0, options.scanPages)
|
|
557
|
+
.map((row) => row.path);
|
|
558
|
+
const workGroups = await mapLimit(scopes, 4, async (scope) => {
|
|
559
|
+
try {
|
|
560
|
+
return await fetchWorksForScope(scope);
|
|
561
|
+
}
|
|
562
|
+
catch {
|
|
563
|
+
return [];
|
|
564
|
+
}
|
|
565
|
+
});
|
|
566
|
+
const works = workGroups.flat();
|
|
567
|
+
for (const work of works) {
|
|
568
|
+
const score = scoreText(options.query, [
|
|
569
|
+
[work.title, 50],
|
|
570
|
+
[work.note, 18],
|
|
571
|
+
[work.section, 10],
|
|
572
|
+
[work.scope, 8],
|
|
573
|
+
[work.path, 8],
|
|
574
|
+
]);
|
|
575
|
+
if (score > 0) {
|
|
576
|
+
candidates.push({
|
|
577
|
+
type: work.format === "htm" || work.format === "html"
|
|
578
|
+
? "work"
|
|
579
|
+
: work.format,
|
|
580
|
+
scope: work.scope,
|
|
581
|
+
section: work.section,
|
|
582
|
+
title: work.title,
|
|
583
|
+
snippet: work.note,
|
|
584
|
+
path: work.path,
|
|
585
|
+
url: work.url,
|
|
586
|
+
score,
|
|
587
|
+
});
|
|
588
|
+
}
|
|
589
|
+
}
|
|
590
|
+
if (options.fullText) {
|
|
591
|
+
const htmlWorks = works
|
|
592
|
+
.filter((work) => /html?/i.test(work.format))
|
|
593
|
+
.slice(0, options.contentPages);
|
|
594
|
+
const textMatches = await mapLimit(htmlWorks, 3, async (work) => {
|
|
595
|
+
try {
|
|
596
|
+
const html = await fetchArchiveHtml(work.path);
|
|
597
|
+
const text = marxistsHtmlToText(html);
|
|
598
|
+
const score = scoreText(options.query, [[text, 80]]);
|
|
599
|
+
if (score <= 0)
|
|
600
|
+
return null;
|
|
601
|
+
return {
|
|
602
|
+
type: "text",
|
|
603
|
+
scope: work.scope,
|
|
604
|
+
section: work.section,
|
|
605
|
+
title: work.title,
|
|
606
|
+
snippet: snippetFor(text, options.query),
|
|
607
|
+
path: work.path,
|
|
608
|
+
url: work.url,
|
|
609
|
+
score: score + 10,
|
|
610
|
+
};
|
|
611
|
+
}
|
|
612
|
+
catch {
|
|
613
|
+
return null;
|
|
614
|
+
}
|
|
615
|
+
});
|
|
616
|
+
candidates.push(...textMatches.filter((row) => row !== null));
|
|
617
|
+
}
|
|
618
|
+
const bestByTarget = new Map();
|
|
619
|
+
for (const candidate of candidates) {
|
|
620
|
+
const key = `${candidate.type}:${candidate.path}:${candidate.title}`;
|
|
621
|
+
const existing = bestByTarget.get(key);
|
|
622
|
+
if (!existing || candidate.score > existing.score) {
|
|
623
|
+
bestByTarget.set(key, candidate);
|
|
624
|
+
}
|
|
625
|
+
}
|
|
626
|
+
return Array.from(bestByTarget.values()).sort((a, b) => b.score - a.score || a.title.localeCompare(b.title));
|
|
627
|
+
}
|
|
628
|
+
function assertHtmlReadablePath(path) {
|
|
629
|
+
const format = fileFormat(path);
|
|
630
|
+
if (!["htm", "html", "txt"].includes(format)) {
|
|
631
|
+
throw new Error(`marxists-cn read supports HTML/text paths only; "${path}" is ${format}.`);
|
|
632
|
+
}
|
|
633
|
+
}
|
|
634
|
+
export function marxistsReadingListRows(preset) {
|
|
635
|
+
const normalizedPreset = preset.trim().toLowerCase();
|
|
636
|
+
if (normalizedPreset !== WESTERN_MARXISM_PRESET) {
|
|
637
|
+
throw new Error(`marxists-cn reading-list preset must be ${WESTERN_MARXISM_PRESET}.`);
|
|
638
|
+
}
|
|
639
|
+
return WESTERN_MARXISM_READINGS.map((row) => ({
|
|
640
|
+
preset: WESTERN_MARXISM_PRESET,
|
|
641
|
+
...row,
|
|
642
|
+
url: marxistsUrl(row.path),
|
|
643
|
+
readCommand: `unicli marxists-cn read ${row.path}`,
|
|
644
|
+
}));
|
|
645
|
+
}
|
|
646
|
+
cli({
|
|
647
|
+
site: "marxists-cn",
|
|
648
|
+
name: "index",
|
|
649
|
+
description: "List Chinese Marxists Internet Archive top-level people, topics, and library links",
|
|
650
|
+
domain: MARXISTS_HOST,
|
|
651
|
+
base: MARXISTS_CN_BASE,
|
|
652
|
+
strategy: Strategy.PUBLIC,
|
|
653
|
+
args: [{ name: "limit", type: "int", default: 80, description: "Max rows" }],
|
|
654
|
+
columns: INDEX_COLUMNS,
|
|
655
|
+
capabilities: ["http.fetch", "archive.index", "marxism.reference"],
|
|
656
|
+
minimum_capability: "http.fetch",
|
|
657
|
+
func: async (_page, kwargs) => {
|
|
658
|
+
const limit = requireMarxistsLimit(kwargs.limit, 80, 300, "limit");
|
|
659
|
+
return withRank(parseMarxistsIndex(await fetchArchiveHtml("index.html")), limit);
|
|
660
|
+
},
|
|
661
|
+
});
|
|
662
|
+
cli({
|
|
663
|
+
site: "marxists-cn",
|
|
664
|
+
name: "reading-list",
|
|
665
|
+
description: "Return curated Chinese Marxists archive reading lists with directly readable paths",
|
|
666
|
+
domain: MARXISTS_HOST,
|
|
667
|
+
base: MARXISTS_CN_BASE,
|
|
668
|
+
strategy: Strategy.PUBLIC,
|
|
669
|
+
args: [
|
|
670
|
+
{
|
|
671
|
+
name: "preset",
|
|
672
|
+
type: "str",
|
|
673
|
+
required: true,
|
|
674
|
+
positional: true,
|
|
675
|
+
description: "Reading list preset, currently western-marxism",
|
|
676
|
+
},
|
|
677
|
+
{ name: "limit", type: "int", default: 40, description: "Max rows" },
|
|
678
|
+
],
|
|
679
|
+
columns: READING_LIST_COLUMNS,
|
|
680
|
+
capabilities: [
|
|
681
|
+
"archive.reading-list",
|
|
682
|
+
"archive.read",
|
|
683
|
+
"marxism.reference",
|
|
684
|
+
"western-marxism.reference",
|
|
685
|
+
],
|
|
686
|
+
minimum_capability: "archive.reading-list",
|
|
687
|
+
func: async (_page, kwargs) => {
|
|
688
|
+
const limit = requireMarxistsLimit(kwargs.limit, 40, 100, "limit");
|
|
689
|
+
const preset = stringField(kwargs.preset);
|
|
690
|
+
if (!preset)
|
|
691
|
+
throw new Error("marxists-cn reading-list preset is required.");
|
|
692
|
+
return withRank(marxistsReadingListRows(preset), limit);
|
|
693
|
+
},
|
|
694
|
+
});
|
|
695
|
+
cli({
|
|
696
|
+
site: "marxists-cn",
|
|
697
|
+
name: "western-marxism",
|
|
698
|
+
description: "List famous Western Marxist authors and works from the Chinese Marxists archive with read commands",
|
|
699
|
+
domain: MARXISTS_HOST,
|
|
700
|
+
base: MARXISTS_CN_BASE,
|
|
701
|
+
strategy: Strategy.PUBLIC,
|
|
702
|
+
args: [{ name: "limit", type: "int", default: 40, description: "Max rows" }],
|
|
703
|
+
columns: READING_LIST_COLUMNS,
|
|
704
|
+
capabilities: [
|
|
705
|
+
"archive.reading-list",
|
|
706
|
+
"archive.read",
|
|
707
|
+
"marxism.reference",
|
|
708
|
+
"western-marxism.reference",
|
|
709
|
+
],
|
|
710
|
+
minimum_capability: "archive.reading-list",
|
|
711
|
+
func: async (_page, kwargs) => {
|
|
712
|
+
const limit = requireMarxistsLimit(kwargs.limit, 40, 100, "limit");
|
|
713
|
+
return withRank(marxistsReadingListRows(WESTERN_MARXISM_PRESET), limit);
|
|
714
|
+
},
|
|
715
|
+
});
|
|
716
|
+
cli({
|
|
717
|
+
site: "marxists-cn",
|
|
718
|
+
name: "authors",
|
|
719
|
+
description: "List people, authors, organizations, and topic directories in the Chinese Marxists archive",
|
|
720
|
+
domain: MARXISTS_HOST,
|
|
721
|
+
base: MARXISTS_CN_BASE,
|
|
722
|
+
strategy: Strategy.PUBLIC,
|
|
723
|
+
args: [{ name: "limit", type: "int", default: 120, description: "Max rows" }],
|
|
724
|
+
columns: INDEX_COLUMNS,
|
|
725
|
+
capabilities: ["http.fetch", "archive.people", "marxism.reference"],
|
|
726
|
+
minimum_capability: "http.fetch",
|
|
727
|
+
func: async (_page, kwargs) => {
|
|
728
|
+
const limit = requireMarxistsLimit(kwargs.limit, 120, 300, "limit");
|
|
729
|
+
const rows = parseMarxistsIndex(await fetchArchiveHtml("index.html")).filter(isDirectoryLink);
|
|
730
|
+
return withRank(rows, limit);
|
|
731
|
+
},
|
|
732
|
+
});
|
|
733
|
+
cli({
|
|
734
|
+
site: "marxists-cn",
|
|
735
|
+
name: "works",
|
|
736
|
+
description: "List works/books/articles under a Chinese Marxists archive author or topic path",
|
|
737
|
+
domain: MARXISTS_HOST,
|
|
738
|
+
base: MARXISTS_CN_BASE,
|
|
739
|
+
strategy: Strategy.PUBLIC,
|
|
740
|
+
args: [
|
|
741
|
+
{
|
|
742
|
+
name: "scope",
|
|
743
|
+
type: "str",
|
|
744
|
+
required: true,
|
|
745
|
+
positional: true,
|
|
746
|
+
description: "Author/topic path, e.g. marx, lenin, georg-lukacs",
|
|
747
|
+
},
|
|
748
|
+
{ name: "limit", type: "int", default: 120, description: "Max rows" },
|
|
749
|
+
],
|
|
750
|
+
columns: WORK_COLUMNS,
|
|
751
|
+
capabilities: ["http.fetch", "archive.works", "marxism.reference"],
|
|
752
|
+
minimum_capability: "http.fetch",
|
|
753
|
+
func: async (_page, kwargs) => {
|
|
754
|
+
const limit = requireMarxistsLimit(kwargs.limit, 120, 400, "limit");
|
|
755
|
+
return withRank(await fetchWorksForScope(kwargs.scope), limit);
|
|
756
|
+
},
|
|
757
|
+
});
|
|
758
|
+
cli({
|
|
759
|
+
site: "marxists-cn",
|
|
760
|
+
name: "read",
|
|
761
|
+
description: "Read a Chinese Marxists archive HTML page as clean plain text",
|
|
762
|
+
domain: MARXISTS_HOST,
|
|
763
|
+
base: MARXISTS_CN_BASE,
|
|
764
|
+
strategy: Strategy.PUBLIC,
|
|
765
|
+
args: [
|
|
766
|
+
{
|
|
767
|
+
name: "path",
|
|
768
|
+
type: "str",
|
|
769
|
+
required: true,
|
|
770
|
+
positional: true,
|
|
771
|
+
description: "Archive path or URL under /chinese/",
|
|
772
|
+
},
|
|
773
|
+
{
|
|
774
|
+
name: "max-length",
|
|
775
|
+
type: "int",
|
|
776
|
+
default: 30000,
|
|
777
|
+
description: "Max text characters",
|
|
778
|
+
},
|
|
779
|
+
],
|
|
780
|
+
columns: READ_COLUMNS,
|
|
781
|
+
capabilities: ["http.fetch", "archive.read", "marxism.reference"],
|
|
782
|
+
minimum_capability: "http.fetch",
|
|
783
|
+
func: async (_page, kwargs) => {
|
|
784
|
+
const path = normalizeMarxistsPath(kwargs.path);
|
|
785
|
+
assertHtmlReadablePath(path);
|
|
786
|
+
const maxLength = requireMarxistsLimit(kwargs["max-length"] ?? kwargs.maxLength, 30000, 200000, "max-length");
|
|
787
|
+
const url = marxistsUrl(path);
|
|
788
|
+
return [mapMarxistsReadRow(await fetchArchiveHtml(path), url, maxLength)];
|
|
789
|
+
},
|
|
790
|
+
});
|
|
791
|
+
cli({
|
|
792
|
+
site: "marxists-cn",
|
|
793
|
+
name: "search",
|
|
794
|
+
description: "Search Chinese Marxists archive people, books, works, and scoped full text",
|
|
795
|
+
domain: MARXISTS_HOST,
|
|
796
|
+
base: MARXISTS_CN_BASE,
|
|
797
|
+
strategy: Strategy.PUBLIC,
|
|
798
|
+
args: [
|
|
799
|
+
{
|
|
800
|
+
name: "query",
|
|
801
|
+
type: "str",
|
|
802
|
+
required: true,
|
|
803
|
+
positional: true,
|
|
804
|
+
description: "Chinese or English search text",
|
|
805
|
+
},
|
|
806
|
+
{ name: "limit", type: "int", default: 20, description: "Max rows" },
|
|
807
|
+
{
|
|
808
|
+
name: "scope",
|
|
809
|
+
type: "str",
|
|
810
|
+
default: "",
|
|
811
|
+
description: "Optional author/topic path for focused search, e.g. marx",
|
|
812
|
+
},
|
|
813
|
+
{
|
|
814
|
+
name: "scan-pages",
|
|
815
|
+
type: "int",
|
|
816
|
+
default: 24,
|
|
817
|
+
description: "Top-level index pages to scan when scope is omitted",
|
|
818
|
+
},
|
|
819
|
+
{
|
|
820
|
+
name: "full-text",
|
|
821
|
+
type: "bool",
|
|
822
|
+
default: false,
|
|
823
|
+
description: "When true, search text inside scoped HTML work pages",
|
|
824
|
+
},
|
|
825
|
+
{
|
|
826
|
+
name: "content-pages",
|
|
827
|
+
type: "int",
|
|
828
|
+
default: 40,
|
|
829
|
+
description: "Max HTML pages to read for full-text scoped search",
|
|
830
|
+
},
|
|
831
|
+
],
|
|
832
|
+
columns: SEARCH_COLUMNS,
|
|
833
|
+
capabilities: [
|
|
834
|
+
"http.fetch",
|
|
835
|
+
"archive.search",
|
|
836
|
+
"archive.read",
|
|
837
|
+
"marxism.reference",
|
|
838
|
+
],
|
|
839
|
+
minimum_capability: "http.fetch",
|
|
840
|
+
func: async (_page, kwargs) => {
|
|
841
|
+
const query = stringField(kwargs.query);
|
|
842
|
+
if (!query)
|
|
843
|
+
throw new Error("marxists-cn search query cannot be empty.");
|
|
844
|
+
const limit = requireMarxistsLimit(kwargs.limit, 20, 100, "limit");
|
|
845
|
+
const scanPages = requireMarxistsLimit(kwargs["scan-pages"] ?? kwargs.scanPages, 24, 120, "scan-pages");
|
|
846
|
+
const contentPages = requireMarxistsLimit(kwargs["content-pages"] ?? kwargs.contentPages, 40, 120, "content-pages");
|
|
847
|
+
const scope = stringField(kwargs.scope);
|
|
848
|
+
const fullText = boolArg(kwargs["full-text"] ?? kwargs.fullText);
|
|
849
|
+
if (fullText && !scope) {
|
|
850
|
+
throw new Error("marxists-cn full-text search requires --scope to bound the crawl.");
|
|
851
|
+
}
|
|
852
|
+
return withRank(await buildSearchCandidates({
|
|
853
|
+
query,
|
|
854
|
+
scope,
|
|
855
|
+
scanPages,
|
|
856
|
+
fullText,
|
|
857
|
+
contentPages,
|
|
858
|
+
}), limit);
|
|
859
|
+
},
|
|
860
|
+
});
|
|
861
|
+
//# sourceMappingURL=archive.js.map
|