lumina-wiki 1.2.0 → 1.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +75 -1
- package/README.md +8 -2
- package/README.vi.md +8 -2
- package/README.zh.md +8 -2
- package/bin/lumina.js +65 -10
- package/package.json +4 -3
- package/src/installer/commands.js +9 -1
- package/src/installer/manifest.js +39 -3
- package/src/installer/prompts.js +9 -9
- package/src/skills/core/help/SKILL.md +136 -0
- package/src/templates/README.md +1 -0
- package/src/templates/_lumina/schema/lumi-help-runbook.md +220 -0
- package/src/templates/_lumina/schema/lumi-help.csv +23 -0
- package/src/tools/prepare_source.py +154 -5
- package/src/tools/requirements.txt +8 -0
package/CHANGELOG.md
CHANGED
|
@@ -3,6 +3,76 @@
|
|
|
3
3
|
All notable changes to Lumina-Wiki are documented here.
|
|
4
4
|
Format follows [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
|
|
5
5
|
|
|
6
|
+
## [1.4.0] - 2026-05-09
|
|
7
|
+
|
|
8
|
+
### Added — `/lumi-help` orientation skill (PR #9)
|
|
9
|
+
|
|
10
|
+
- New core skill `/lumi-help` with three modes:
|
|
11
|
+
- **Mode A — Orientation** (default): reads live workspace state
|
|
12
|
+
(`manifest.json`, `wiki/index.md`, `wiki/log.md`, `raw/`) and recommends
|
|
13
|
+
a single next action. Stale-log surfaces as a 30-day idle hint after
|
|
14
|
+
the primary recommendation, not as the primary action itself.
|
|
15
|
+
- **Mode B — Catalog** (`/lumi-help skills` or `/lumi-help catalog`): parses
|
|
16
|
+
`_lumina/schema/lumi-help.csv` and renders the full skill list grouped by
|
|
17
|
+
pack. Only sections matching installed packs are rendered at install time.
|
|
18
|
+
- **Mode C — Framework Q&A** (`/lumi-help explain <question>`): answers
|
|
19
|
+
how-it-works questions by citing shipped schema docs (`README.md` schema
|
|
20
|
+
block, `page-templates.md`, `cross-reference-packs.md`, `graph-packs.md`,
|
|
21
|
+
and the relevant `SKILL.md`).
|
|
22
|
+
- `src/templates/_lumina/schema/lumi-help.csv` — pack-conditional skill
|
|
23
|
+
catalog (CSV, `{{#if pack_*}}` gates rendered at install time). Single
|
|
24
|
+
source of truth for skill names, menu strings, and prerequisite ordering.
|
|
25
|
+
- `src/templates/_lumina/schema/lumi-help-runbook.md` — procedural detail
|
|
26
|
+
(bash probes, decision ladder, output formats) separated from the SKILL.md
|
|
27
|
+
contract; loaded on demand.
|
|
28
|
+
- `cleanupObsoleteCatalog()` in `manifest.js` removes the pre-v1.4
|
|
29
|
+
`skills-catalog.md` and `_state/skills-manifest.json` on re-install —
|
|
30
|
+
best-effort, `ENOENT` is not an error.
|
|
31
|
+
- `scripts/verify-lumi-help.test.mjs` — integrity test: validates CSV header
|
|
32
|
+
contract, column counts, id/menu uniqueness, valid enum values, pack gating,
|
|
33
|
+
and cross-references for all four pack combinations.
|
|
34
|
+
- `test:catalog` script wired into `package.json` (`node --test scripts/verify-lumi-help.test.mjs`).
|
|
35
|
+
- User guides (EN/VI/ZH) gain a `/lumi-help` section and a "Meet /lumi-help"
|
|
36
|
+
opener in Quick Start.
|
|
37
|
+
|
|
38
|
+
### Fixed
|
|
39
|
+
|
|
40
|
+
- `--cwd` / `--directory` flag propagation regression: dropping the
|
|
41
|
+
program-level `process.cwd()` default unmasks user-supplied `--cwd` values
|
|
42
|
+
that were being short-circuited by commander's `??` chain. Pinned by new
|
|
43
|
+
tests in `bin/lumina.deprecations.test.js`.
|
|
44
|
+
|
|
45
|
+
## [1.3.0] - 2026-05-09
|
|
46
|
+
|
|
47
|
+
### Added — Local text-document ingestion (research pack)
|
|
48
|
+
|
|
49
|
+
- `prepare_source.py` (research pack tool) now supports `.docx`, `.rtf`, and
|
|
50
|
+
`.epub` in addition to the existing PDF / TeX / HTML / Markdown formats.
|
|
51
|
+
- Hardened against zip-bomb (raw size cap + decompressed total cap) and XXE
|
|
52
|
+
/ XML billion-laughs (`defusedxml.defuse_stdlib()`) for ZIP-of-XML formats
|
|
53
|
+
(`.docx`, `.epub`).
|
|
54
|
+
- DRM-protected EPUB detection: explicit error with hint instead of an
|
|
55
|
+
opaque parse crash. Lumina does not strip DRM.
|
|
56
|
+
|
|
57
|
+
### Requirements
|
|
58
|
+
|
|
59
|
+
- The new format support requires the **research pack**:
|
|
60
|
+
`lumina install --packs core,research`. After install run
|
|
61
|
+
`pip install -r _lumina/tools/requirements.txt` to fetch
|
|
62
|
+
`python-docx`, `striprtf`, `ebooklib`, `beautifulsoup4`, and `defusedxml`.
|
|
63
|
+
- Missing libs raise an actionable `ValueError` (CLI exit 2) with the
|
|
64
|
+
`pip install …` hint — no silent empty-text writes.
|
|
65
|
+
|
|
66
|
+
### Known Limitations
|
|
67
|
+
|
|
68
|
+
- `.docx`: shapes, text boxes, headers/footers, table cells not extracted.
|
|
69
|
+
- `.rtf`: table layout and embedded images discarded.
|
|
70
|
+
- `.epub`: images, CSS, footnotes, and cross-references discarded; chapter-
|
|
71
|
+
level segmentation is **not** emitted in v1 — it will land alongside
|
|
72
|
+
`/lumi-chapter-ingest` EPUB support in a future release.
|
|
73
|
+
- `.odt`, image (`.png`, `.jpg`) and scanned-PDF ingestion remain out of
|
|
74
|
+
scope. See the roadmap entry "Vision/OCR ingestion" for the follow-up.
|
|
75
|
+
|
|
6
76
|
## [1.2.0] - 2026-05-07
|
|
7
77
|
|
|
8
78
|
### Added
|
|
@@ -313,7 +383,11 @@ Format follows [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
|
|
|
313
383
|
|
|
314
384
|
---
|
|
315
385
|
|
|
316
|
-
[Unreleased]: https://github.com/tronghieu/lumina-wiki/compare/v1.
|
|
386
|
+
[Unreleased]: https://github.com/tronghieu/lumina-wiki/compare/v1.4.0...HEAD
|
|
387
|
+
[1.4.0]: https://github.com/tronghieu/lumina-wiki/compare/v1.3.0...v1.4.0
|
|
388
|
+
[1.3.0]: https://github.com/tronghieu/lumina-wiki/compare/v1.2.0...v1.3.0
|
|
389
|
+
[1.2.0]: https://github.com/tronghieu/lumina-wiki/compare/v1.1.0...v1.2.0
|
|
390
|
+
[1.1.0]: https://github.com/tronghieu/lumina-wiki/compare/v1.0.0...v1.1.0
|
|
317
391
|
[1.0.0]: https://github.com/tronghieu/lumina-wiki/compare/v0.9.1...v1.0.0
|
|
318
392
|
[0.9.1]: https://github.com/tronghieu/lumina-wiki/compare/v0.9.0...v0.9.1
|
|
319
393
|
[0.9.0]: https://github.com/tronghieu/lumina-wiki/compare/v0.8.1...v0.9.0
|
package/README.md
CHANGED
|
@@ -177,6 +177,7 @@ These are the commands you can use when chatting with your AI agent.
|
|
|
177
177
|
| | `/lumi-check` | Check the wiki for errors, such as broken links. |
|
|
178
178
|
| | `/lumi-reset` | Safely reset parts of the wiki. |
|
|
179
179
|
| | `/lumi-verify` | Check that wiki notes match the sources they cite. Reports anything suspicious for your review; never edits notes for you. |
|
|
180
|
+
| | `/lumi-help` | Read your workspace state and recommend one next action. Pass `skills` to list every command, or `explain <topic>` to ask how Lumina itself works (e.g., `/lumi-help explain bidirectional links`). |
|
|
180
181
|
| **Research** | `/lumi-research-discover` | Discover and rank relevant research papers. |
|
|
181
182
|
| | `/lumi-research-watchlist` | Choose research topics for scheduled discovery with AI help. |
|
|
182
183
|
| | `/lumi-research-survey` | Create a survey or summary from existing knowledge. |
|
|
@@ -197,9 +198,10 @@ The scripts behind these skills live in `_lumina/scripts/` and `_lumina/tools/`;
|
|
|
197
198
|
Lumina-Wiki is evolving rapidly. Here is our user-facing roadmap:
|
|
198
199
|
|
|
199
200
|
**Near-term (Stability & New Ingestion)**
|
|
200
|
-
- [
|
|
201
|
+
- [x] **`/lumi-help` Skill:** A smart assistant that reads your workspace state and tells you the one thing to do next; `skills` shows every command, `explain <topic>` answers how Lumina itself works.
|
|
201
202
|
- [x] **Multilingual setup:** Choose English, Vietnamese, or Chinese as your primary language during install. *(shipped in v1.2)*
|
|
202
|
-
- [
|
|
203
|
+
- [x] **Native DOCX, RTF & EPUB ingestion:** Pull Word, Rich Text, and EPUB books straight into your wiki via the research pack. *(shipped in v1.x)*
|
|
204
|
+
- [ ] **Image OCR & Scanned PDFs:** Ingest screenshots and scanned PDFs into your wiki.
|
|
203
205
|
- [ ] **Advanced Paper Ranking:** See influence scores and quality signals for your research papers.
|
|
204
206
|
- [x] **Improved CI/CD:** Native support for Bun and Node 22 environments. *(shipped in v1.2)*
|
|
205
207
|
|
|
@@ -221,6 +223,10 @@ Lumina-Wiki is evolving rapidly. Here is our user-facing roadmap:
|
|
|
221
223
|
|
|
222
224
|
## 7. Contributing & License
|
|
223
225
|
|
|
226
|
+
### CLI Contract
|
|
227
|
+
|
|
228
|
+
CI scripts and integrations should reference [`docs/cli-contract.md`](./docs/cli-contract.md) for the v1.x stable flag list and exit code mapping. Anything not listed there is internal and may change without notice.
|
|
229
|
+
|
|
224
230
|
### Local Development (for contributors)
|
|
225
231
|
|
|
226
232
|
If you want to contribute to the `lumina-wiki` installer:
|
package/README.vi.md
CHANGED
|
@@ -177,6 +177,7 @@ Xem [Hướng dẫn Nâng cao](docs/user-guide/advanced-qmd.vi.md) để biết
|
|
|
177
177
|
| | `/lumi-check` | Kiểm tra lỗi trong wiki (liên kết hỏng, v.v.). |
|
|
178
178
|
| | `/lumi-reset` | Xóa các phần của wiki một cách an toàn. |
|
|
179
179
|
| | `/lumi-verify` | Kiểm tra xem các trang wiki có khớp với nguồn đã trích dẫn không. Báo cáo những điểm đáng ngờ để bạn xem xét; không tự sửa ghi chú giúp bạn. |
|
|
180
|
+
| | `/lumi-help` | Đọc trạng thái workspace và đề xuất một bước tiếp theo. Gõ `/lumi-help skills` để xem toàn bộ danh sách lệnh, hoặc `/lumi-help explain <chủ đề>` để hỏi Lumina hoạt động ra sao (ví dụ `/lumi-help explain bidirectional links`). |
|
|
180
181
|
| **Research**| `/lumi-research-discover` | Khám phá và xếp hạng các bài báo nghiên cứu liên quan. |
|
|
181
182
|
| | `/lumi-research-watchlist` | Giúp bạn chọn các chủ đề nghiên cứu để AI tìm định kỳ. |
|
|
182
183
|
| | `/lumi-research-survey` | Tạo một bài tổng quan/khảo sát từ kiến thức hiện có. |
|
|
@@ -197,9 +198,10 @@ Các script chạy nền nằm trong `_lumina/scripts/` và `_lumina/tools/`; th
|
|
|
197
198
|
Lumina-Wiki đang phát triển nhanh chóng. Dưới đây là lộ trình hướng tới người dùng của chúng tôi:
|
|
198
199
|
|
|
199
200
|
**Sắp tới (Ổn định & Mở rộng nạp tài liệu)**
|
|
200
|
-
- [
|
|
201
|
+
- [x] **Kỹ năng `/lumi-help`:** Trợ lý thông minh đọc trạng thái workspace và mách bạn bước tiếp theo; gõ `/lumi-help skills` để xem toàn bộ lệnh, hoặc `/lumi-help explain <chủ đề>` để hỏi Lumina hoạt động ra sao.
|
|
201
202
|
- [x] **Cài đặt đa ngôn ngữ:** Chọn Tiếng Anh, Tiếng Việt hoặc Tiếng Trung làm ngôn ngữ chính khi cài đặt. *(đã phát hành trong v1.2)*
|
|
202
|
-
- [
|
|
203
|
+
- [x] **Nạp DOCX, RTF & EPUB native:** Đưa thẳng file Word, Rich Text và sách EPUB vào wiki qua research pack. *(đã phát hành trong v1.x)*
|
|
204
|
+
- [ ] **OCR ảnh & PDF scan:** Nạp ảnh chụp màn hình và PDF dạng scan vào wiki.
|
|
203
205
|
- [ ] **Xếp hạng bài báo nâng cao:** Xem điểm số ảnh hưởng và tín hiệu chất lượng cho các nghiên cứu của bạn.
|
|
204
206
|
- [x] **Cải thiện CI/CD:** Hỗ trợ chính thức cho môi trường Bun và Node 22. *(đã phát hành trong v1.2)*
|
|
205
207
|
|
|
@@ -221,6 +223,10 @@ Lumina-Wiki đang phát triển nhanh chóng. Dưới đây là lộ trình hư
|
|
|
221
223
|
|
|
222
224
|
## 7. Đóng góp & Giấy phép
|
|
223
225
|
|
|
226
|
+
### Hợp đồng CLI
|
|
227
|
+
|
|
228
|
+
Script CI và tích hợp nên tham chiếu [`docs/cli-contract.md`](./docs/cli-contract.md) để biết danh sách cờ ổn định và mapping exit code cho v1.x. Bất cứ thứ gì không liệt kê trong đó đều là nội bộ và có thể đổi mà không báo trước.
|
|
229
|
+
|
|
224
230
|
### Phát triển cục bộ (dành cho người đóng góp)
|
|
225
231
|
|
|
226
232
|
Nếu bạn muốn đóng góp cho trình cài đặt `lumina-wiki`:
|
package/README.zh.md
CHANGED
|
@@ -178,6 +178,7 @@ npx skills add https://github.com/tobi/qmd --skill qmd
|
|
|
178
178
|
| | `/lumi-check` | 检查 wiki 中的问题(断链等)。 |
|
|
179
179
|
| | `/lumi-reset` | 安全地删除 wiki 的部分内容。 |
|
|
180
180
|
| | `/lumi-verify` | 核查 wiki 里的笔记是否与引用的来源相符。把可疑之处报告给你审阅;不会替你修改笔记。 |
|
|
181
|
+
| | `/lumi-help` | 读取工作区状态,给出下一步该做的一条建议。加参数 `/lumi-help skills` 可查看全部命令清单,或 `/lumi-help explain <主题>` 询问 Lumina 自己的工作原理(例如 `/lumi-help explain bidirectional links`)。 |
|
|
181
182
|
| **Research**| `/lumi-research-discover` | 发现并排序相关研究论文。 |
|
|
182
183
|
| | `/lumi-research-watchlist` | 帮你选择要定期查找的研究主题。 |
|
|
183
184
|
| | `/lumi-research-survey` | 从现有知识创建综述/调研。 |
|
|
@@ -198,9 +199,10 @@ npx skills add https://github.com/tobi/qmd --skill qmd
|
|
|
198
199
|
Lumina-Wiki 正在快速演进。这是我们的用户路线图:
|
|
199
200
|
|
|
200
201
|
**近期计划(稳定性与新导入支持)**
|
|
201
|
-
- [
|
|
202
|
+
- [x] **`/lumi-help` 技能:** 智能助手读取工作区状态并告诉你下一步该做什么;加参数 `/lumi-help skills` 可查看全部命令清单,或 `/lumi-help explain <主题>` 询问 Lumina 本身的工作原理。
|
|
202
203
|
- [x] **多语言安装:** 安装时可选英文、越南文或中文作为主语言。*(v1.2 已发布)*
|
|
203
|
-
- [
|
|
204
|
+
- [x] **原生 DOCX、RTF 与 EPUB 导入:** 通过 research pack 将 Word、Rich Text 与 EPUB 电子书直接导入维基。*(v1.x 已发布)*
|
|
205
|
+
- [ ] **图片 OCR 与扫描 PDF:** 将截图与扫描版 PDF 导入维基。
|
|
204
206
|
- [ ] **高级论文排名:** 查看研究论文的影响力评分和质量信号。
|
|
205
207
|
- [x] **改进的 CI/CD:** 正式支持 Bun 和 Node 22 环境。*(v1.2 已发布)*
|
|
206
208
|
|
|
@@ -223,6 +225,10 @@ Lumina-Wiki 正在快速演进。这是我们的用户路线图:
|
|
|
223
225
|
|
|
224
226
|
## 7. 贡献与许可
|
|
225
227
|
|
|
228
|
+
### CLI 契约
|
|
229
|
+
|
|
230
|
+
CI 脚本和集成应参考 [`docs/cli-contract.md`](./docs/cli-contract.md) 了解 v1.x 稳定标志列表和退出码映射。未在其中列出的任何内容均为内部,可能在不另行通知的情况下更改。
|
|
231
|
+
|
|
226
232
|
### 本地开发(贡献者)
|
|
227
233
|
|
|
228
234
|
如果您想为 `lumina-wiki` 安装器做贡献:
|
package/bin/lumina.js
CHANGED
|
@@ -12,14 +12,15 @@
|
|
|
12
12
|
*
|
|
13
13
|
* Flags (all commands):
|
|
14
14
|
* --directory <path> — installation directory (defaults to current directory)
|
|
15
|
-
* --cwd <path> —
|
|
15
|
+
* --cwd <path> — [deprecated] alias for --directory; removed in v2.0
|
|
16
16
|
* --yes, -y — accept all defaults (non-interactive / CI)
|
|
17
17
|
* --no-update — skip npm registry version check
|
|
18
18
|
* --re-link — recompute symlink/junction/copy strategy
|
|
19
19
|
* --packs <list> — comma-separated pack list for non-interactive install
|
|
20
20
|
* --ide-targets <list> — comma-separated IDE target list for non-interactive install
|
|
21
21
|
*
|
|
22
|
-
* Exit codes: 0 success, 1 user error, 2 filesystem
|
|
22
|
+
* Exit codes: 0 success, 1 user error, 2 filesystem/safety, 3 internal/network,
|
|
23
|
+
* 4 user cancelled (Ctrl-C in interactive prompt or declined confirm)
|
|
23
24
|
*/
|
|
24
25
|
|
|
25
26
|
import { createRequire } from 'node:module';
|
|
@@ -76,6 +77,39 @@ if (handledVersion) process.exit(0);
|
|
|
76
77
|
const { Command, Option } = await import('commander');
|
|
77
78
|
const program = new Command();
|
|
78
79
|
|
|
80
|
+
// Exit code contract (see docs/planning-artifacts/audits/cli-contract-audit.md
|
|
81
|
+
// and `--help` text below). Caught errors map as follows:
|
|
82
|
+
// - RangeError (from safePath) → 2 (path safety)
|
|
83
|
+
// - err.code in {EACCES, EPERM} → 2 (filesystem perms)
|
|
84
|
+
// - err.code === 2 / err.code === 3 → preserved
|
|
85
|
+
// - other string fs codes (E*) → 3 (internal/io: ENOENT, EBUSY, EIO,
|
|
86
|
+
// EROFS, ENOSPC, ENOTDIR, …)
|
|
87
|
+
// - everything else → 1 (user error)
|
|
88
|
+
// ---------------------------------------------------------------------------
|
|
89
|
+
function exitCodeFor(err, defaultCode = 1) {
|
|
90
|
+
if (err instanceof RangeError) return 2;
|
|
91
|
+
if (err.code === 'EACCES' || err.code === 'EPERM') return 2;
|
|
92
|
+
if (err.code === 2) return 2;
|
|
93
|
+
if (err.code === 3) return 3;
|
|
94
|
+
if (typeof err.code === 'string' && err.code.startsWith('E')) return 3;
|
|
95
|
+
return defaultCode;
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
// ---------------------------------------------------------------------------
|
|
99
|
+
// Deprecation warnings — emitted to stderr once per invocation.
|
|
100
|
+
// Source of truth: docs/cli-contract.md.
|
|
101
|
+
// ---------------------------------------------------------------------------
|
|
102
|
+
let _cwdWarned = false;
|
|
103
|
+
function warnDeprecatedCwdIfUsed(cmdOpts, globalOpts) {
|
|
104
|
+
if (_cwdWarned) return;
|
|
105
|
+
if (cmdOpts.cwd != null || globalOpts.cwd != null) {
|
|
106
|
+
process.stderr.write(
|
|
107
|
+
'[deprecated] --cwd is deprecated and will be removed in v2.0. Use --directory instead.\n'
|
|
108
|
+
);
|
|
109
|
+
_cwdWarned = true;
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
|
|
79
113
|
program
|
|
80
114
|
.name('lumina')
|
|
81
115
|
.description('Lumina Wiki — domain-agnostic, multi-IDE wiki scaffolder')
|
|
@@ -84,8 +118,9 @@ program
|
|
|
84
118
|
Exit codes:
|
|
85
119
|
0 success
|
|
86
120
|
1 user error (bad flag, missing prereq)
|
|
87
|
-
2 filesystem
|
|
88
|
-
3
|
|
121
|
+
2 filesystem / safety (permission denied, path outside cwd, unknown pack slug)
|
|
122
|
+
3 internal / network (atomicWrite failure, 5xx, upgrade incompatibility, lint catastrophic)
|
|
123
|
+
4 user cancelled (Ctrl-C in interactive prompt or declined confirm)
|
|
89
124
|
|
|
90
125
|
Flags applicable to all commands:
|
|
91
126
|
--directory <path> installation directory (defaults to current directory)
|
|
@@ -110,13 +145,33 @@ Examples:
|
|
|
110
145
|
// ---------------------------------------------------------------------------
|
|
111
146
|
// Global options
|
|
112
147
|
// ---------------------------------------------------------------------------
|
|
148
|
+
//
|
|
149
|
+
// IMPORTANT: --directory has NO default here on purpose.
|
|
150
|
+
// If we set `process.cwd()` as the default, commander stores it on
|
|
151
|
+
// `globalOpts.directory` for every invocation — even when the user only
|
|
152
|
+
// passed `--cwd <path>` (which lands on globalOpts.cwd because commander
|
|
153
|
+
// hoists global-shaped flags up regardless of where in argv they appear).
|
|
154
|
+
// The merge expression in each subcommand uses
|
|
155
|
+
// cmdOpts.directory ?? cmdOpts.cwd ?? globalOpts.directory ?? globalOpts.cwd ?? process.cwd()
|
|
156
|
+
// and `??` short-circuits as soon as one of those is non-nullish — so a
|
|
157
|
+
// program-level default of process.cwd() would *always* win over the user's
|
|
158
|
+
// `--cwd` value. The trailing `?? process.cwd()` in the merge expression is
|
|
159
|
+
// the single source of truth for the no-flag default; do not duplicate it
|
|
160
|
+
// here. Regression: see test "install --cwd <tmp> writes into <tmp>, not cwd".
|
|
113
161
|
program
|
|
114
|
-
.option('--directory <path>', 'installation directory'
|
|
162
|
+
.option('--directory <path>', 'installation directory')
|
|
115
163
|
.addOption(new Option('--cwd <path>', 'alias for --directory').hideHelp())
|
|
116
164
|
.option('-y, --yes', 'accept all defaults (non-interactive)')
|
|
117
165
|
.option('--no-update', 'skip npm registry version check')
|
|
118
166
|
.option('--re-link', 'recompute symlink strategy from current platform capabilities');
|
|
119
167
|
|
|
168
|
+
// Single source of truth for --cwd deprecation: fires once before any
|
|
169
|
+
// subcommand action regardless of whether --cwd was passed globally or
|
|
170
|
+
// per-command. New subcommands inherit this for free.
|
|
171
|
+
program.hook('preAction', (_thisCommand, actionCommand) => {
|
|
172
|
+
warnDeprecatedCwdIfUsed(actionCommand.opts(), program.opts());
|
|
173
|
+
});
|
|
174
|
+
|
|
120
175
|
// ---------------------------------------------------------------------------
|
|
121
176
|
// --version / -v — print immediately then do async update check
|
|
122
177
|
// ---------------------------------------------------------------------------
|
|
@@ -169,11 +224,9 @@ program
|
|
|
169
224
|
} catch (err) {
|
|
170
225
|
// Top-level catch: locale may not be resolved yet (pre-loadLocale path).
|
|
171
226
|
// Error strings kept as EN literals — machine-readable, intentionally exempt.
|
|
172
|
-
const isPermError = err.code === 'EACCES' || err.code === 'EPERM';
|
|
173
|
-
const isRangeError = err instanceof RangeError;
|
|
174
227
|
console.error(`[error] ${err.message}`);
|
|
175
228
|
if (process.env.DEBUG) console.error(err.stack);
|
|
176
|
-
process.exit(
|
|
229
|
+
process.exit(exitCodeFor(err));
|
|
177
230
|
}
|
|
178
231
|
});
|
|
179
232
|
|
|
@@ -200,7 +253,7 @@ program
|
|
|
200
253
|
} catch (err) {
|
|
201
254
|
console.error(`[error] ${err.message}`);
|
|
202
255
|
if (process.env.DEBUG) console.error(err.stack);
|
|
203
|
-
process.exit(
|
|
256
|
+
process.exit(exitCodeFor(err));
|
|
204
257
|
}
|
|
205
258
|
});
|
|
206
259
|
|
|
@@ -235,7 +288,9 @@ discover
|
|
|
235
288
|
} catch (err) {
|
|
236
289
|
console.error(`[error] ${err.message}`);
|
|
237
290
|
if (process.env.DEBUG) console.error(err.stack);
|
|
238
|
-
|
|
291
|
+
// Unhandled exceptions from discover-runner are by definition not user
|
|
292
|
+
// errors (main() handles those), so default unknown → 3 (internal).
|
|
293
|
+
process.exit(exitCodeFor(err, 3));
|
|
239
294
|
}
|
|
240
295
|
});
|
|
241
296
|
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"$schema": "https://json.schemastore.org/package.json",
|
|
3
3
|
"name": "lumina-wiki",
|
|
4
|
-
"version": "1.
|
|
4
|
+
"version": "1.4.0",
|
|
5
5
|
"description": "Domain-agnostic, multi-IDE wiki scaffolder — Karpathy's LLM-Wiki vision, cross-platform and pack-based.",
|
|
6
6
|
"keywords": [
|
|
7
7
|
"llm-wiki",
|
|
@@ -83,10 +83,11 @@
|
|
|
83
83
|
"devDependencies": {},
|
|
84
84
|
"scripts": {
|
|
85
85
|
"test": "npm run test:installer",
|
|
86
|
-
"test:installer": "node --test src/installer/commands.test.js src/installer/fs.test.js src/installer/locales.test.js src/installer/manifest.test.js src/installer/prompts.test.js src/installer/readme-templates.test.js src/installer/template-engine.test.js src/installer/update-check.test.js",
|
|
86
|
+
"test:installer": "node --test bin/lumina.flags.test.js bin/lumina.deprecations.test.js bin/lumina.cancel.test.js src/installer/commands.test.js src/installer/fs.test.js src/installer/locales.test.js src/installer/manifest.test.js src/installer/prompts.test.js src/installer/readme-templates.test.js src/installer/template-engine.test.js src/installer/update-check.test.js",
|
|
87
87
|
"test:scripts": "node --test src/scripts/lint.test.mjs src/scripts/reset.test.mjs src/scripts/wiki.test.mjs src/scripts/discover-runner.test.mjs src/scripts/external-ids.test.mjs src/scripts/parse-ids.test.mjs src/scripts/merge-ids.test.mjs src/scripts/build-source.test.mjs src/scripts/wiki-yaml-object.test.mjs",
|
|
88
|
-
"test:python": "
|
|
88
|
+
"test:python": "node scripts/run-pytest.mjs",
|
|
89
89
|
"test:all": "npm run test:installer && npm run test:scripts && npm run test:python",
|
|
90
|
+
"test:catalog": "node --test scripts/verify-lumi-help.test.mjs",
|
|
90
91
|
"test:fs": "node --test src/installer/fs.test.js",
|
|
91
92
|
"test:manifest": "node --test src/installer/manifest.test.js",
|
|
92
93
|
"test:template": "node --test src/installer/template-engine.test.js",
|
|
@@ -37,6 +37,7 @@ import {
|
|
|
37
37
|
writeSkillsManifest,
|
|
38
38
|
readFilesManifest,
|
|
39
39
|
writeFilesManifest,
|
|
40
|
+
cleanupObsoleteCatalog,
|
|
40
41
|
MANIFEST_SCHEMA_VERSION,
|
|
41
42
|
} from './manifest.js';
|
|
42
43
|
import {
|
|
@@ -338,6 +339,10 @@ export async function installCommand(opts = {}) {
|
|
|
338
339
|
await writeManifest(projectRoot, manifest);
|
|
339
340
|
await writeSkillsManifest(projectRoot, skillRows);
|
|
340
341
|
await writeFilesManifest(projectRoot, fileRows);
|
|
342
|
+
// Remove pre-v1.4 catalog files (skills-catalog.md, _state/skills-manifest.json)
|
|
343
|
+
// if they linger from an earlier install. The canonical catalog is
|
|
344
|
+
// _lumina/schema/lumi-help.csv, rendered by renderSchemaDocs above.
|
|
345
|
+
await cleanupObsoleteCatalog(projectRoot);
|
|
341
346
|
|
|
342
347
|
// 17.5. Post-upgrade: spawn lint --summary, print banner if findings exist
|
|
343
348
|
if (isUpgrade && existingManifest.packageVersion !== PKG.version) {
|
|
@@ -1025,6 +1030,7 @@ function getSkillDefs(packs) {
|
|
|
1025
1030
|
{ name: 'reset', canonicalId: 'lumi-reset', displayName: '/lumi-reset' },
|
|
1026
1031
|
{ name: 'verify', canonicalId: 'lumi-verify', displayName: '/lumi-verify' },
|
|
1027
1032
|
{ name: 'migrate-legacy', canonicalId: 'lumi-migrate-legacy', displayName: '/lumi-migrate-legacy' },
|
|
1033
|
+
{ name: 'help', canonicalId: 'lumi-help', displayName: '/lumi-help' },
|
|
1028
1034
|
];
|
|
1029
1035
|
for (const s of coreSkills) {
|
|
1030
1036
|
defs.push({ ...s, pack: 'core', srcPackPath: 'core' });
|
|
@@ -1086,7 +1092,7 @@ async function copyTools(projectRoot, { research }) {
|
|
|
1086
1092
|
|
|
1087
1093
|
async function renderSchemaDocs(projectRoot, templateVars) {
|
|
1088
1094
|
const schemaDir = join(projectRoot, '_lumina', 'schema');
|
|
1089
|
-
const schemaDocs = ['page-templates.md', 'cross-reference-packs.md', 'graph-packs.md'];
|
|
1095
|
+
const schemaDocs = ['page-templates.md', 'cross-reference-packs.md', 'graph-packs.md', 'lumi-help.csv', 'lumi-help-runbook.md'];
|
|
1090
1096
|
|
|
1091
1097
|
for (const doc of schemaDocs) {
|
|
1092
1098
|
const templatePath = join(TEMPLATES_DIR, '_lumina', 'schema', doc);
|
|
@@ -1215,6 +1221,8 @@ async function buildFilesManifest(projectRoot, packs, pkgVersion) {
|
|
|
1215
1221
|
'_lumina/schema/page-templates.md',
|
|
1216
1222
|
'_lumina/schema/cross-reference-packs.md',
|
|
1217
1223
|
'_lumina/schema/graph-packs.md',
|
|
1224
|
+
'_lumina/schema/lumi-help.csv',
|
|
1225
|
+
'_lumina/schema/lumi-help-runbook.md',
|
|
1218
1226
|
'CLAUDE.md',
|
|
1219
1227
|
'AGENTS.md',
|
|
1220
1228
|
'GEMINI.md',
|
|
@@ -3,10 +3,17 @@
|
|
|
3
3
|
* @description Reader/writer for the three Lumina installer state files.
|
|
4
4
|
*
|
|
5
5
|
* Three state files (single concern each, atomic write per file):
|
|
6
|
-
* 1. _lumina/manifest.json
|
|
7
|
-
* 2. _lumina/_state/skills-manifest.csv — skill inventory
|
|
6
|
+
* 1. _lumina/manifest.json — install state
|
|
7
|
+
* 2. _lumina/_state/skills-manifest.csv — skill inventory (paths/sha/version)
|
|
8
8
|
* 3. _lumina/_state/files-manifest.csv — hash tracking
|
|
9
9
|
*
|
|
10
|
+
* The workflow catalog is _lumina/schema/lumi-help.csv — it is the
|
|
11
|
+
* canonical source of truth for /lumi-help (read directly at runtime). No
|
|
12
|
+
* derived JSON mirror exists; commands.js renders the .csv template at
|
|
13
|
+
* install time and lumi-help reads it via Bash. Earlier versions of the
|
|
14
|
+
* installer wrote a derived _state/skills-manifest.json — that file is now
|
|
15
|
+
* obsolete and is cleaned up on re-install.
|
|
16
|
+
*
|
|
10
17
|
* All writes go through atomicWrite from fs.js.
|
|
11
18
|
* Reads are defensive: missing file → null; truncated CSV → empty rows + warning.
|
|
12
19
|
*/
|
|
@@ -355,12 +362,41 @@ export function migrateManifest(manifest, targetVersion) {
|
|
|
355
362
|
return m;
|
|
356
363
|
}
|
|
357
364
|
|
|
365
|
+
// ---------------------------------------------------------------------------
|
|
366
|
+
// Obsolete-file cleanup
|
|
367
|
+
// ---------------------------------------------------------------------------
|
|
368
|
+
|
|
369
|
+
/**
|
|
370
|
+
* Remove obsolete catalog files left behind by older installs.
|
|
371
|
+
*
|
|
372
|
+
* Pre-v1.4 installs wrote two files that are no longer used:
|
|
373
|
+
* - _lumina/schema/skills-catalog.md (replaced by lumi-help.csv)
|
|
374
|
+
* - _lumina/_state/skills-manifest.json (no longer derived)
|
|
375
|
+
*
|
|
376
|
+
* This is best-effort — missing files are not an error.
|
|
377
|
+
*
|
|
378
|
+
* @param {string} projectRoot
|
|
379
|
+
* @returns {Promise<void>}
|
|
380
|
+
*/
|
|
381
|
+
export async function cleanupObsoleteCatalog(projectRoot) {
|
|
382
|
+
const { unlink } = await import('node:fs/promises');
|
|
383
|
+
const candidates = [
|
|
384
|
+
join(projectRoot, '_lumina', 'schema', 'skills-catalog.md'),
|
|
385
|
+
join(projectRoot, '_lumina', '_state', 'skills-manifest.json'),
|
|
386
|
+
];
|
|
387
|
+
await Promise.all(candidates.map(async (p) => {
|
|
388
|
+
try { await unlink(p); } catch (err) {
|
|
389
|
+
if (err.code !== 'ENOENT') throw err;
|
|
390
|
+
}
|
|
391
|
+
}));
|
|
392
|
+
}
|
|
393
|
+
|
|
358
394
|
// ---------------------------------------------------------------------------
|
|
359
395
|
// State file paths helper
|
|
360
396
|
// ---------------------------------------------------------------------------
|
|
361
397
|
|
|
362
398
|
/**
|
|
363
|
-
* Return the canonical paths for all
|
|
399
|
+
* Return the canonical paths for all installer state files.
|
|
364
400
|
*
|
|
365
401
|
* @param {string} projectRoot
|
|
366
402
|
* @returns {{ manifestJson: string, skillsCsv: string, filesCsv: string }}
|
package/src/installer/prompts.js
CHANGED
|
@@ -143,7 +143,7 @@ export function buildPromptList(existingManifest, defaultLocale = 'en') {
|
|
|
143
143
|
/**
|
|
144
144
|
* Run the five interactive install prompts.
|
|
145
145
|
* Returns default answers immediately when `acceptDefaults` is true (--yes mode).
|
|
146
|
-
* Calls process.exit(
|
|
146
|
+
* Calls process.exit(4) if the user cancels (Ctrl-C) or declines a confirm prompt.
|
|
147
147
|
*
|
|
148
148
|
* @param {object} [opts]
|
|
149
149
|
* @param {boolean} [opts.acceptDefaults=false] - Skip prompts; return defaults.
|
|
@@ -174,7 +174,7 @@ export async function runInstallPrompts({ acceptDefaults = false, cwd = process.
|
|
|
174
174
|
if (isCancel(localeRaw)) {
|
|
175
175
|
// t may be EN or may not be loaded yet — use cancel string from t if available
|
|
176
176
|
cancel(t ? t('prompt.cancelled') : 'Installation cancelled.');
|
|
177
|
-
process.exit(
|
|
177
|
+
process.exit(4);
|
|
178
178
|
}
|
|
179
179
|
const locale = localeRaw;
|
|
180
180
|
const langDefault = LOCALE_LANGUAGE_NAME[locale] ?? 'English';
|
|
@@ -192,7 +192,7 @@ export async function runInstallPrompts({ acceptDefaults = false, cwd = process.
|
|
|
192
192
|
});
|
|
193
193
|
if (isCancel(proceed) || !proceed) {
|
|
194
194
|
cancel(t ? t('prompt.cancelled') : 'Installation cancelled.');
|
|
195
|
-
process.exit(
|
|
195
|
+
process.exit(4);
|
|
196
196
|
}
|
|
197
197
|
}
|
|
198
198
|
|
|
@@ -203,7 +203,7 @@ export async function runInstallPrompts({ acceptDefaults = false, cwd = process.
|
|
|
203
203
|
placeholder: cwdAbs,
|
|
204
204
|
defaultValue: cwdAbs,
|
|
205
205
|
});
|
|
206
|
-
if (isCancel(directoryRaw)) { cancel(t ? t('prompt.cancelled') : 'Installation cancelled.'); process.exit(
|
|
206
|
+
if (isCancel(directoryRaw)) { cancel(t ? t('prompt.cancelled') : 'Installation cancelled.'); process.exit(4); }
|
|
207
207
|
const directory = expandUserPath(directoryRaw, cwdAbs);
|
|
208
208
|
const projectName = defaultProjectName(directory);
|
|
209
209
|
|
|
@@ -212,7 +212,7 @@ export async function runInstallPrompts({ acceptDefaults = false, cwd = process.
|
|
|
212
212
|
message: t ? t('prompt.purpose.message') : 'Research purpose (optional — describe what this wiki is for)',
|
|
213
213
|
placeholder: t ? t('prompt.purpose.placeholder') : 'e.g. Track flash-attention variants for a survey',
|
|
214
214
|
});
|
|
215
|
-
if (isCancel(researchPurposeRaw)) { cancel(t ? t('prompt.cancelled') : 'Installation cancelled.'); process.exit(
|
|
215
|
+
if (isCancel(researchPurposeRaw)) { cancel(t ? t('prompt.cancelled') : 'Installation cancelled.'); process.exit(4); }
|
|
216
216
|
const researchPurpose = researchPurposeRaw || '';
|
|
217
217
|
|
|
218
218
|
// ── Prompt 3: IDE targets ────────────────────────────────────────────────
|
|
@@ -230,7 +230,7 @@ export async function runInstallPrompts({ acceptDefaults = false, cwd = process.
|
|
|
230
230
|
initialValues: ['claude_code'],
|
|
231
231
|
required: false,
|
|
232
232
|
});
|
|
233
|
-
if (isCancel(ideTargetsRaw)) { cancel(t ? t('prompt.cancelled') : 'Installation cancelled.'); process.exit(
|
|
233
|
+
if (isCancel(ideTargetsRaw)) { cancel(t ? t('prompt.cancelled') : 'Installation cancelled.'); process.exit(4); }
|
|
234
234
|
const ideTargets = Array.isArray(ideTargetsRaw) && ideTargetsRaw.length > 0
|
|
235
235
|
? ideTargetsRaw
|
|
236
236
|
: ['claude_code'];
|
|
@@ -244,7 +244,7 @@ export async function runInstallPrompts({ acceptDefaults = false, cwd = process.
|
|
|
244
244
|
],
|
|
245
245
|
required: false,
|
|
246
246
|
});
|
|
247
|
-
if (isCancel(packsRaw)) { cancel(t ? t('prompt.cancelled') : 'Installation cancelled.'); process.exit(
|
|
247
|
+
if (isCancel(packsRaw)) { cancel(t ? t('prompt.cancelled') : 'Installation cancelled.'); process.exit(4); }
|
|
248
248
|
const selectedPacks = Array.isArray(packsRaw) ? packsRaw : [];
|
|
249
249
|
const packs = ['core', ...selectedPacks.filter(p => p !== 'core')];
|
|
250
250
|
|
|
@@ -254,7 +254,7 @@ export async function runInstallPrompts({ acceptDefaults = false, cwd = process.
|
|
|
254
254
|
placeholder: langDefault,
|
|
255
255
|
defaultValue: langDefault,
|
|
256
256
|
});
|
|
257
|
-
if (isCancel(communicationLangRaw)) { cancel(t ? t('prompt.cancelled') : 'Installation cancelled.'); process.exit(
|
|
257
|
+
if (isCancel(communicationLangRaw)) { cancel(t ? t('prompt.cancelled') : 'Installation cancelled.'); process.exit(4); }
|
|
258
258
|
const communicationLang = communicationLangRaw || langDefault;
|
|
259
259
|
|
|
260
260
|
const documentOutputLangRaw = await text({
|
|
@@ -262,7 +262,7 @@ export async function runInstallPrompts({ acceptDefaults = false, cwd = process.
|
|
|
262
262
|
placeholder: langDefault,
|
|
263
263
|
defaultValue: langDefault,
|
|
264
264
|
});
|
|
265
|
-
if (isCancel(documentOutputLangRaw)) { cancel(t ? t('prompt.cancelled') : 'Installation cancelled.'); process.exit(
|
|
265
|
+
if (isCancel(documentOutputLangRaw)) { cancel(t ? t('prompt.cancelled') : 'Installation cancelled.'); process.exit(4); }
|
|
266
266
|
const documentOutputLang = documentOutputLangRaw || langDefault;
|
|
267
267
|
|
|
268
268
|
return {
|
|
@@ -0,0 +1,136 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: lumi-help
|
|
3
|
+
description: >
|
|
4
|
+
Orient the user in their Lumina wiki workspace. Three modes:
|
|
5
|
+
Orientation (default — recommend ONE next action; offer to run),
|
|
6
|
+
Catalog (on `skills`/`list` arg or features question — render
|
|
7
|
+
lumi-help.csv grouped by pack), Framework Q&A (on `explain`
|
|
8
|
+
arg or how-it-works question — answer from local docs with
|
|
9
|
+
citations). Use when the user says "help", "what next", "I'm lost",
|
|
10
|
+
asks for orientation, or asks how Lumina works.
|
|
11
|
+
allowed-tools:
|
|
12
|
+
- Bash
|
|
13
|
+
- Read
|
|
14
|
+
---
|
|
15
|
+
|
|
16
|
+
# /lumi-help
|
|
17
|
+
|
|
18
|
+
Read `README.md` at the project root before this SKILL.md.
|
|
19
|
+
|
|
20
|
+
This file is the contract — it has everything you need for normal invocations.
|
|
21
|
+
For precision detail (exact Bash commands, full output templates, multilingual
|
|
22
|
+
keyword lists, fallback codes) consult `_lumina/schema/lumi-help-runbook.md`
|
|
23
|
+
**only when the relevant section explicitly points to it**. Don't load it
|
|
24
|
+
upfront — Mode B never needs it.
|
|
25
|
+
|
|
26
|
+
## Purpose
|
|
27
|
+
|
|
28
|
+
Help the user know:
|
|
29
|
+
|
|
30
|
+
1. **Where they are** — installed packs, what's done, what's pending.
|
|
31
|
+
2. **What to do next** — ONE recommended skill with a cited reason.
|
|
32
|
+
3. **How to invoke it** — name, args, language hint; offer to run for them.
|
|
33
|
+
4. **What's available** — full catalog grouped by pack on demand.
|
|
34
|
+
5. **How Lumina works** — framework questions answered from local docs with citations.
|
|
35
|
+
|
|
36
|
+
## Step 0 · Read languages, ALWAYS first
|
|
37
|
+
|
|
38
|
+
Before mode routing, read `_lumina/config/lumina.config.yaml` and bind:
|
|
39
|
+
|
|
40
|
+
- `COMM_LANG` ← `communication_language` — language of every word back to user.
|
|
41
|
+
- `DOC_LANG` ← `document_output_language` — surfaced when recommending a write-skill.
|
|
42
|
+
|
|
43
|
+
User never passes a language flag. Match input tone (casual ↔ formal).
|
|
44
|
+
|
|
45
|
+
## Three modes (router decides AFTER Step 0, BEFORE other reads)
|
|
46
|
+
|
|
47
|
+
| Trigger | Mode | Job |
|
|
48
|
+
|---|---|---|
|
|
49
|
+
| no arg, or "help / what next / I'm lost" | **A · Orientation** | recommend ONE next action; offer to run |
|
|
50
|
+
| `skills`/`catalog`/`list`, or features question | **B · Catalog** | render `lumi-help.csv` grouped by pack |
|
|
51
|
+
| `explain`/`docs`, or how-it-works question | **C · Q&A** | answer with doc citations |
|
|
52
|
+
|
|
53
|
+
Keyword detection is multilingual (EN + VI + ZH). Mode B takes precedence over
|
|
54
|
+
C. If the question is about wiki *content* (not the framework), bridge to
|
|
55
|
+
`/lumi-ask` instead of answering in Mode C.
|
|
56
|
+
|
|
57
|
+
> When the user's input language is not English, or when the trigger is borderline,
|
|
58
|
+
> read the full keyword lists at `_lumina/schema/lumi-help-runbook.md` § Router
|
|
59
|
+
> before deciding. English plain-text triggers can be matched from this table alone.
|
|
60
|
+
|
|
61
|
+
## Mode A — Orientation (5 steps: locate → detect → compute → ground → cite)
|
|
62
|
+
|
|
63
|
+
Decision ladder is **load-bearing** — pick first match in this order:
|
|
64
|
+
|
|
65
|
+
1. Manifest missing → `/lumi-init`.
|
|
66
|
+
2. Required skill with both gates satisfied (`after` AND `before`),
|
|
67
|
+
completed=false → that skill.
|
|
68
|
+
3. raw/ files not yet ingested → `/lumi-ingest`.
|
|
69
|
+
4. Default → `/lumi-ask`.
|
|
70
|
+
|
|
71
|
+
Output: skill recommendation + one-sentence reason in `COMM_LANG` + `→ Run`
|
|
72
|
+
line + (write-skill only) `DOC_LANG` note + citation arrow + **"Want me to run
|
|
73
|
+
it now? (yes / show me first / skip)"**. Skip the prompt for case (4). On
|
|
74
|
+
"yes" → invoke; otherwise don't.
|
|
75
|
+
|
|
76
|
+
> For the exact Bash reads at each step (locate / detect / ground), the full
|
|
77
|
+
> formal-and-casual output templates, the idle-wiki hint format, and fallback
|
|
78
|
+
> codes (`__NO_MANIFEST__`, `__NO_CATALOG__`, `__NO_GRAPH__`, `__NO_DATE__`),
|
|
79
|
+
> read `_lumina/schema/lumi-help-runbook.md` § Mode A before producing output.
|
|
80
|
+
|
|
81
|
+
## Mode B — Catalog
|
|
82
|
+
|
|
83
|
+
Parse `_lumina/schema/lumi-help.csv`. Group rows by `pack` in order
|
|
84
|
+
core → research → reading → other (alphabetical). Pack labels are hardcoded:
|
|
85
|
+
|
|
86
|
+
- `core` → "Core (always installed)"
|
|
87
|
+
- `research` → "Research pack"
|
|
88
|
+
- `reading` → "Reading pack"
|
|
89
|
+
- other → pack name with first letter capitalized
|
|
90
|
+
|
|
91
|
+
Each entry: `` `[<menu>]` `/<id>` <args if non-empty> — <description> ``. End with
|
|
92
|
+
two footer pointers to `/lumi-help` (orientation) and `/lumi-help explain <topic>`
|
|
93
|
+
(framework Q&A). **Mode B never needs the runbook.**
|
|
94
|
+
|
|
95
|
+
## Mode C — Framework Q&A (5 steps: same skeleton as A)
|
|
96
|
+
|
|
97
|
+
Doc paths are stable, all shipped to the workspace at install time:
|
|
98
|
+
|
|
99
|
+
| Doc | When |
|
|
100
|
+
|---|---|
|
|
101
|
+
| `README.md` (`<!-- lumina:schema -->` block) | core concepts: layout, page types, link syntax, cross-reference rules, constraints, skills overview |
|
|
102
|
+
| `_lumina/schema/page-templates.md` | page-type frontmatter + section structure |
|
|
103
|
+
| `_lumina/schema/cross-reference-packs.md` | bidirectional-link rules and pack extensions |
|
|
104
|
+
| `_lumina/schema/graph-packs.md` | edge type vocabulary for `wiki/graph/edges.jsonl` |
|
|
105
|
+
| `.agents/skills/<skill-id>/SKILL.md` | when the question is specifically about one skill's behavior |
|
|
106
|
+
|
|
107
|
+
Use the Read tool (not Bash). Read just the slice you need. Build a 1–4
|
|
108
|
+
sentence answer in `COMM_LANG` with a `**Source**:` line. If no doc covers
|
|
109
|
+
the question, say so and point at the closest.
|
|
110
|
+
|
|
111
|
+
> For the exact output templates (formal, casual, no-doc fallback) and the
|
|
112
|
+
> rules for when to append the optional "→ Try it" line, read
|
|
113
|
+
> `_lumina/schema/lumi-help-runbook.md` § Mode C before producing output.
|
|
114
|
+
|
|
115
|
+
## Data sources (read-only)
|
|
116
|
+
|
|
117
|
+
| Source | Read in |
|
|
118
|
+
|---|---|
|
|
119
|
+
| `_lumina/config/lumina.config.yaml` | Step 0 (every invocation) |
|
|
120
|
+
| `_lumina/manifest.json` | Mode A |
|
|
121
|
+
| `_lumina/schema/lumi-help.csv` | Mode A, B |
|
|
122
|
+
| `wiki.mjs list-entities`, `wiki/log.md`, `raw/`, `wiki/index.md` | Mode A |
|
|
123
|
+
| `README.md` schema block, `_lumina/schema/page-templates.md`, `cross-reference-packs.md`, `graph-packs.md`, target skill's `SKILL.md` | Mode C |
|
|
124
|
+
|
|
125
|
+
## Constraints
|
|
126
|
+
|
|
127
|
+
- Read only the sources above. Never write a file. Never call mutating
|
|
128
|
+
`wiki.mjs` subcommands. Read-only ones allowed: `list-entities`,
|
|
129
|
+
`read-meta`, `read-edges`, `read-citations`, `resolve-alias`.
|
|
130
|
+
- Respond in `COMM_LANG`. Surface `DOC_LANG` next to write-skills.
|
|
131
|
+
- Cite every non-trivial claim in Mode C — if a doc does not say it, don't assert it.
|
|
132
|
+
- Never read `wiki/` page bodies in Mode C, never `raw/`. Reading another skill's `SKILL.md` is allowed only when the user's question is specifically about that skill's behavior.
|
|
133
|
+
- "Want me to run it now?" is a soft prompt — invoke only on affirmative reply.
|
|
134
|
+
- Match the user's tone (casual ↔ formal).
|
|
135
|
+
- All Bash reads happen before reasoning; never infer state from prior conversation.
|
|
136
|
+
- When recommending a verification skill (`/lumi-check`, `/lumi-verify`) right after a write skill (`/lumi-ingest`, `/lumi-edit`, `/lumi-research-*`, `/lumi-reading-*`), suggest the user run it in a fresh context window or via a subagent — the writing context biases the check.
|
package/src/templates/README.md
CHANGED
|
@@ -187,6 +187,7 @@ Skills live in `.agents/skills/` and are invoked via slash commands. Active inst
|
|
|
187
187
|
| `/lumi-check` | manual/weekly | Lint: broken links, orphans, missing reverse links |
|
|
188
188
|
| `/lumi-reset` | manual | Scoped destructive cleanup |
|
|
189
189
|
| `/lumi-verify` | manual | Check that wiki pages match the sources they cite; reports suspicious statements for the user to review; never auto-edits |
|
|
190
|
+
| `/lumi-help` | manual | Read workspace state and recommend the single next action; offer to run it. Argument `skills`/`catalog`/`list` (or a "what's available" question) switches to catalog mode and renders `_lumina/schema/lumi-help.csv` grouped by pack. |
|
|
190
191
|
|
|
191
192
|
{{#if pack_research}}### Pack: research
|
|
192
193
|
|
|
@@ -0,0 +1,220 @@
|
|
|
1
|
+
# /lumi-help — runbook
|
|
2
|
+
|
|
3
|
+
Procedural detail for the `lumi-help` skill: exact Bash commands, output
|
|
4
|
+
templates, multilingual keyword lists, and fallback codes. The skill reads this
|
|
5
|
+
file when it needs precision; `SKILL.md` is the contract.
|
|
6
|
+
|
|
7
|
+
## Step 0 · Read languages (always first)
|
|
8
|
+
|
|
9
|
+
```bash
|
|
10
|
+
sed -n 's/^communication_language:[[:space:]]*"\?\([^"]*\)"\?.*/\1/p' \
|
|
11
|
+
_lumina/config/lumina.config.yaml
|
|
12
|
+
sed -n 's/^document_output_language:[[:space:]]*"\?\([^"]*\)"\?.*/\1/p' \
|
|
13
|
+
_lumina/config/lumina.config.yaml
|
|
14
|
+
```
|
|
15
|
+
|
|
16
|
+
Bind two locals:
|
|
17
|
+
|
|
18
|
+
- `COMM_LANG` — every word back to user. If config missing or empty, fall back
|
|
19
|
+
to the language detected from the user's most recent message.
|
|
20
|
+
- `DOC_LANG` — surface this whenever recommending a write-skill (`/lumi-init`,
|
|
21
|
+
`/lumi-ingest`, `/lumi-edit`, `/lumi-research-*`, `/lumi-reading-*`).
|
|
22
|
+
|
|
23
|
+
Also snap tone from the input: casual → casual response register; formal →
|
|
24
|
+
templated form.
|
|
25
|
+
|
|
26
|
+
## Router · multilingual keyword lists
|
|
27
|
+
|
|
28
|
+
**Mode B — Catalog** (priority over C):
|
|
29
|
+
`skills`, `catalog`, `list`, `features`, `available`, `commands`,
|
|
30
|
+
`capabilities`, `tính năng`, `khả năng`, `lệnh`, `liệt kê`, `có gì`,
|
|
31
|
+
`có những gì`, `什么命令`, `有什么`, `命令`.
|
|
32
|
+
|
|
33
|
+
**Mode C — Framework Q&A** requires BOTH:
|
|
34
|
+
|
|
35
|
+
- *Question form*: `how does`, `how do`, `what is`, `what's the difference`,
|
|
36
|
+
`why does`, `why do`, `explain`, `tell me about`, `cách nào`, `như thế nào`,
|
|
37
|
+
`tại sao`, `giải thích`, `nghĩa là gì`, `怎么`, `如何`, `为什么`, `解释`, `什么是`.
|
|
38
|
+
- *Plus Lumina noun*: `lumi-`, `wiki`, `raw`, `foundations`, `outputs`,
|
|
39
|
+
`summary`, `concepts`, `sources`, `ingest`, `bidirectional`, `link`, `edge`,
|
|
40
|
+
`graph`, `frontmatter`, `slug`, `pack`, `lint`, `manifest`, `Lumina`.
|
|
41
|
+
|
|
42
|
+
If the question is about wiki *content* (not framework), bridge to `/lumi-ask`
|
|
43
|
+
instead of Mode C.
|
|
44
|
+
|
|
45
|
+
## Mode A · Bash reads
|
|
46
|
+
|
|
47
|
+
### Step a · Locate
|
|
48
|
+
|
|
49
|
+
```bash
|
|
50
|
+
cat _lumina/manifest.json 2>/dev/null || echo "__NO_MANIFEST__"
|
|
51
|
+
cat _lumina/schema/lumi-help.csv 2>/dev/null || echo "__NO_CATALOG__"
|
|
52
|
+
date +%Y-%m-%d 2>/dev/null || echo "__NO_DATE__"
|
|
53
|
+
```
|
|
54
|
+
|
|
55
|
+
`__NO_MANIFEST__` → recommend `/lumi-init`, stop.
|
|
56
|
+
`__NO_CATALOG__` → recommend re-running `npx lumina-wiki install`, stop.
|
|
57
|
+
|
|
58
|
+
CSV header: `id,menu,pack,phase,after,before,required,args,outputs,description`.
|
|
59
|
+
Multi-value fields (`after`, `before`, `outputs`) are semicolon-separated.
|
|
60
|
+
|
|
61
|
+
### Step b · Detect state
|
|
62
|
+
|
|
63
|
+
```bash
|
|
64
|
+
node _lumina/scripts/wiki.mjs list-entities 2>/dev/null || echo "__NO_GRAPH__"
|
|
65
|
+
grep -E "^## \[[0-9]{4}-[0-9]{2}-[0-9]{2}\] " "wiki/log.md" 2>/dev/null | tail -n 30
|
|
66
|
+
find "raw/" -maxdepth 1 -type f ! -name ".*" ! -name ".gitkeep" 2>/dev/null | sort
|
|
67
|
+
sed -n '/^<!-- lumina:index -->/,/^<!-- \/lumina:index -->/p' "wiki/index.md" 2>/dev/null | head -200
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
Substitute `raw/`, `wiki/log.md`, `wiki/index.md` with paths from
|
|
71
|
+
`manifest.resolvedPaths` if relocated. `__NO_GRAPH__` → entity counts = 0.
|
|
72
|
+
|
|
73
|
+
The index grep is supplementary context only — `list-entities` is the source
|
|
74
|
+
of truth for ingested-entity coverage. Detect raw/ orphans by diffing the
|
|
75
|
+
`find raw/` output against `list-entities`, not against the index block.
|
|
76
|
+
|
|
77
|
+
### Step c · Compute next (DAG over CSV)
|
|
78
|
+
|
|
79
|
+
For every row S:
|
|
80
|
+
|
|
81
|
+
1. Pack gating — already done by installer (rendered CSV is in-scope).
|
|
82
|
+
2. Completion — true if any `S.outputs` glob matches a live entity OR `S.id`
|
|
83
|
+
appears in parsed log entries.
|
|
84
|
+
3. Upstream — every id in `S.after` must be completed.
|
|
85
|
+
4. Downstream — for every other row T, if `T.before` contains `S.id`, S depends
|
|
86
|
+
on T: don't pick S before T has run.
|
|
87
|
+
5. Phase order: `1-bootstrap → 2-ingest → 3-query → anytime`.
|
|
88
|
+
|
|
89
|
+
Pick (first match wins):
|
|
90
|
+
|
|
91
|
+
1. Manifest missing → `lumi-init`. *Reason: workspace not initialized.*
|
|
92
|
+
2. Required skill, both gates satisfied, completed=false (phase order) →
|
|
93
|
+
that skill. *Reason: this required step is the next gate.*
|
|
94
|
+
3. raw/ orphans exist → `lumi-ingest`. *Reason: N file(s) in `raw/` not yet
|
|
95
|
+
ingested. Include filenames when N ≤ 3.*
|
|
96
|
+
4. Default → `lumi-ask`. *Reason: wiki is healthy — query the knowledge base.*
|
|
97
|
+
|
|
98
|
+
Idle hint (additive, never replaces primary): if last `## [YYYY-MM-DD]` log
|
|
99
|
+
heading is more than 30 days before today, append:
|
|
100
|
+
|
|
101
|
+
> Hint: No wiki activity in N days — `/lumi-check` runs a graph-health audit when you're ready.
|
|
102
|
+
|
|
103
|
+
### Step d · Ground
|
|
104
|
+
|
|
105
|
+
For S.id `lumi-X`, citation in priority order:
|
|
106
|
+
|
|
107
|
+
1. `node _lumina/scripts/wiki.mjs resolve-alias "X"` → if returns
|
|
108
|
+
`foundations/<slug>`, use it.
|
|
109
|
+
2. Else: `.agents/skills/lumi-X/SKILL.md` "<section heading>".
|
|
110
|
+
3. Else: `README.md` "Available Skills".
|
|
111
|
+
|
|
112
|
+
One `resolve-alias` call. No retry. Omit citation arrow when none found.
|
|
113
|
+
|
|
114
|
+
### Step e · Output template (formal register)
|
|
115
|
+
|
|
116
|
+
```
|
|
117
|
+
## Lumina — Next action
|
|
118
|
+
|
|
119
|
+
**`[<menu>]` /<skill-name>** — <display name>
|
|
120
|
+
[Reason — one sentence in COMM_LANG]
|
|
121
|
+
|
|
122
|
+
→ Run: `/<skill-name>` [<args>]
|
|
123
|
+
[if write-skill: "Wiki pages will be written in <DOC_LANG>."]
|
|
124
|
+
|
|
125
|
+
↳ <citation path> ← only if step d found one
|
|
126
|
+
[Idle-wiki hint, if applicable]
|
|
127
|
+
|
|
128
|
+
Want me to run it now? (yes / show me first / skip)
|
|
129
|
+
|
|
130
|
+
To see every available skill: `/lumi-help skills`
|
|
131
|
+
For how Lumina works: `/lumi-help explain <topic>`
|
|
132
|
+
```
|
|
133
|
+
|
|
134
|
+
Skip the "Want me to run it now?" prompt for case (4) — `/lumi-ask` is the
|
|
135
|
+
default-healthy state, no offer needed.
|
|
136
|
+
|
|
137
|
+
If user replies "yes" → invoke the recommended skill in this conversation.
|
|
138
|
+
Anything non-affirmative ("show me first", "skip", silence) → don't invoke.
|
|
139
|
+
|
|
140
|
+
### Step e · Output template (casual register)
|
|
141
|
+
|
|
142
|
+
When input is casual ("hi bro", "chào", "嗨", emojis, slang), drop the `##`
|
|
143
|
+
heading and the trailing two-line footer. Lead with a one-sentence answer in
|
|
144
|
+
`COMM_LANG`, then the bullet, then "Want me to run it now?". Keep the citation
|
|
145
|
+
arrow `↳` if step d found one.
|
|
146
|
+
|
|
147
|
+
## Mode B · Catalog rendering
|
|
148
|
+
|
|
149
|
+
```bash
|
|
150
|
+
cat _lumina/schema/lumi-help.csv 2>/dev/null || echo "__NO_CATALOG__"
|
|
151
|
+
```
|
|
152
|
+
|
|
153
|
+
Parse with the canonical header. Group rows by `pack` in order:
|
|
154
|
+
core → research → reading → other (alphabetical). Within each group, preserve
|
|
155
|
+
the row order in the CSV.
|
|
156
|
+
|
|
157
|
+
Pack labels (hardcoded):
|
|
158
|
+
|
|
159
|
+
- `core` → "Core (always installed)"
|
|
160
|
+
- `research` → "Research pack"
|
|
161
|
+
- `reading` → "Reading pack"
|
|
162
|
+
- other → pack name with first letter capitalized
|
|
163
|
+
|
|
164
|
+
Output:
|
|
165
|
+
|
|
166
|
+
```
|
|
167
|
+
## Lumina — Skills catalog
|
|
168
|
+
|
|
169
|
+
### <Pack label>
|
|
170
|
+
|
|
171
|
+
- `[<menu>]` `/<id>` <args if non-empty> — <description>
|
|
172
|
+
- `[<menu>]` `/<id>` <args if non-empty> — <description>
|
|
173
|
+
|
|
174
|
+
### <Pack label>
|
|
175
|
+
|
|
176
|
+
- ...
|
|
177
|
+
|
|
178
|
+
→ For a recommendation based on your current state: `/lumi-help`
|
|
179
|
+
→ For a how-it-works question about Lumina: `/lumi-help explain <topic>`
|
|
180
|
+
```
|
|
181
|
+
|
|
182
|
+
Render `args` directly after the id with a single space, e.g.
|
|
183
|
+
`` `/lumi-ingest` [path/to/file] — read a source ... ``.
|
|
184
|
+
|
|
185
|
+
`__NO_CATALOG__` → fall back to Mode A with a one-line note that the catalog
|
|
186
|
+
file is missing and re-running `npx lumina-wiki install` is needed. Never
|
|
187
|
+
invent a skill list from memory.
|
|
188
|
+
|
|
189
|
+
## Mode C · Output templates
|
|
190
|
+
|
|
191
|
+
### Formal register
|
|
192
|
+
|
|
193
|
+
```
|
|
194
|
+
## Lumina — <topic phrased as a noun>
|
|
195
|
+
|
|
196
|
+
<direct answer, 1–4 sentences in COMM_LANG>
|
|
197
|
+
|
|
198
|
+
**Source**: `<path>` § <section heading>
|
|
199
|
+
[Optional 2nd source line if claim spans multiple docs]
|
|
200
|
+
|
|
201
|
+
→ Try it: `/<skill-name>` [<args>] — <one-line nudge>
|
|
202
|
+
[if Try-it points at a write-skill: "Wiki pages will be written in <DOC_LANG>."]
|
|
203
|
+
```
|
|
204
|
+
|
|
205
|
+
The "Try it" line is optional — include only when an obviously-relevant next
|
|
206
|
+
skill exists (e.g. an "explain ingest" answer naturally points at
|
|
207
|
+
`/lumi-ingest`).
|
|
208
|
+
|
|
209
|
+
### No-doc fallback
|
|
210
|
+
|
|
211
|
+
```
|
|
212
|
+
## Lumina — <topic>
|
|
213
|
+
|
|
214
|
+
The local docs don't cover this directly. The closest reference is `<path>`. You can also open an issue at the lumina-wiki repository if this is a real gap.
|
|
215
|
+
```
|
|
216
|
+
|
|
217
|
+
### Casual register
|
|
218
|
+
|
|
219
|
+
Same content, but drop the `## Lumina — <topic>` heading and lead with the
|
|
220
|
+
answer directly. Keep the `**Source**:` line — citations are non-negotiable.
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
id,menu,pack,phase,after,before,required,args,outputs,description
|
|
2
|
+
lumi-init,IN,core,1-bootstrap,,lumi-ingest,true,,_lumina/manifest.json;wiki/index.md,bootstrap a wiki from existing raw/ content
|
|
3
|
+
lumi-ingest,IG,core,2-ingest,lumi-init,lumi-ask,true,[path/to/file],wiki/sources/**;wiki/log.md,"read a source and write a wiki page (drafts shown for review, then continues unless judgment is needed)"
|
|
4
|
+
lumi-ask,AS,core,3-query,lumi-ingest,,false,[your question],wiki/outputs/**;wiki/summary/**,"query the wiki, synthesize an answer, optionally file a page"
|
|
5
|
+
lumi-edit,ED,core,anytime,,,false,[path/to/wiki/page],,"add, remove, or revise wiki content"
|
|
6
|
+
lumi-check,CH,core,anytime,,,false,,,"lint — broken links, orphans, missing reverse links"
|
|
7
|
+
lumi-reset,RS,core,anytime,,,false,,,scoped destructive cleanup (never touches raw/)
|
|
8
|
+
lumi-verify,VR,core,anytime,,,false,,,flag wiki claims that diverge from cited sources (never auto-edits)
|
|
9
|
+
lumi-help,HP,core,anytime,,,false,[skills | explain <topic>],,"orient yourself; recommend the next action; answer questions about Lumina itself"
|
|
10
|
+
{{#if pack_research}}
|
|
11
|
+
lumi-research-prefill,RP,research,1-bootstrap,lumi-init,lumi-ingest,false,,wiki/foundations/**,seed foundations/ to prevent concept duplication
|
|
12
|
+
lumi-research-discover,RD,research,2-ingest,,lumi-ingest,false,,raw/discovered/**,ranked candidate shortlist of new sources
|
|
13
|
+
lumi-research-watchlist,RW,research,anytime,,lumi-research-discover,false,,_lumina/config/watchlist.yml,choose topics for scheduled discovery
|
|
14
|
+
lumi-research-survey,RV,research,3-query,lumi-ingest,,false,,wiki/summary/**,narrative synthesis across a topic's sources
|
|
15
|
+
lumi-research-topic,RT,research,3-query,lumi-ingest,,false,,wiki/topics/**,cluster concepts and sources into a thematic topic page
|
|
16
|
+
lumi-research-setup,RSS,research,anytime,,,false,,.env,interactive API key configuration
|
|
17
|
+
{{/if}}
|
|
18
|
+
{{#if pack_reading}}
|
|
19
|
+
lumi-reading-chapter-ingest,RCI,reading,2-ingest,lumi-init,,false,[chapter],wiki/chapters/**;wiki/characters/**;wiki/themes/**;wiki/plot/**,file a chapter; update characters/themes/plot pages
|
|
20
|
+
lumi-reading-character-track,RCT,reading,3-query,lumi-reading-chapter-ingest,,false,,wiki/characters/**,build or refresh a character profile across chapters
|
|
21
|
+
lumi-reading-theme-map,RTM,reading,3-query,lumi-reading-chapter-ingest,,false,,wiki/themes/**,trace a theme across chapters with citations
|
|
22
|
+
lumi-reading-plot-recap,RPR,reading,3-query,lumi-reading-chapter-ingest,,false,,wiki/plot/**,spoiler-bounded plot summary up to a chapter
|
|
23
|
+
{{/if}}
|
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
"""
|
|
2
2
|
prepare_source.py — Normalize a local file into an ingest-ready package.
|
|
3
3
|
|
|
4
|
-
Accepts one local file (PDF, .tex, .html, .md
|
|
5
|
-
output package at raw/tmp/<slug>/ containing:
|
|
4
|
+
Accepts one local file (PDF, .tex, .html, .md, .txt, .docx, .rtf, .epub)
|
|
5
|
+
and produces a deterministic output package at raw/tmp/<slug>/ containing:
|
|
6
6
|
source.<ext> — original file (hard-link or copy)
|
|
7
7
|
meta.json — extracted metadata (title, type, sha256, ext, slug, size)
|
|
8
8
|
text.txt — extracted plain text
|
|
@@ -61,10 +61,19 @@ from typing import Any
|
|
|
61
61
|
# Constants
|
|
62
62
|
# ---------------------------------------------------------------------------
|
|
63
63
|
|
|
64
|
-
SUPPORTED_EXTENSIONS = {".pdf", ".tex", ".html", ".htm", ".md", ".txt"}
|
|
64
|
+
SUPPORTED_EXTENSIONS = {".pdf", ".tex", ".html", ".htm", ".md", ".txt", ".docx", ".rtf", ".epub"}
|
|
65
65
|
# Slug is the first 16 hex chars of the file's SHA256 — enough uniqueness.
|
|
66
66
|
SLUG_LENGTH = 16
|
|
67
67
|
|
|
68
|
+
# Zip-bomb defense thresholds for ZIP-of-XML formats (.docx, .epub).
|
|
69
|
+
# Raw caps reject oversized files outright; decompressed caps reject ratio
|
|
70
|
+
# attacks. Pre-flight only — does not stream the archive.
|
|
71
|
+
MAX_DOCX_BYTES = 50_000_000 # 50 MB raw .docx
|
|
72
|
+
MAX_DOCX_EXTRACTED_BYTES = 200_000_000 # 200 MB total uncompressed
|
|
73
|
+
MAX_EPUB_BYTES = 100_000_000 # 100 MB raw .epub (long novels + images)
|
|
74
|
+
MAX_EPUB_EXTRACTED_BYTES = 500_000_000 # 500 MB total uncompressed
|
|
75
|
+
EPUB_SIZE_HINT_BYTES = 1_000_000 # extracted-text threshold for stderr note
|
|
76
|
+
|
|
68
77
|
|
|
69
78
|
# ---------------------------------------------------------------------------
|
|
70
79
|
# Helpers
|
|
@@ -243,6 +252,136 @@ def _extract_html_text(path: Path) -> str:
|
|
|
243
252
|
return extractor.get_text()
|
|
244
253
|
|
|
245
254
|
|
|
255
|
+
def _check_zip_safety(path: Path, max_bytes: int, max_extracted: int) -> None:
|
|
256
|
+
"""Pre-flight zip-bomb defense: cap raw size + sum of uncompressed sizes."""
|
|
257
|
+
import zipfile
|
|
258
|
+
|
|
259
|
+
raw_size = path.stat().st_size
|
|
260
|
+
if raw_size > max_bytes:
|
|
261
|
+
raise ValueError(
|
|
262
|
+
f"File too large for safe extraction: {raw_size} bytes "
|
|
263
|
+
f"> {max_bytes}. Refusing to ingest."
|
|
264
|
+
)
|
|
265
|
+
try:
|
|
266
|
+
with zipfile.ZipFile(path) as zf:
|
|
267
|
+
total = sum(info.file_size for info in zf.infolist())
|
|
268
|
+
if total > max_extracted:
|
|
269
|
+
raise ValueError(
|
|
270
|
+
f"Decompressed size {total} bytes > {max_extracted}. "
|
|
271
|
+
f"Suspected zip-bomb; refusing to ingest."
|
|
272
|
+
)
|
|
273
|
+
except zipfile.BadZipFile as exc:
|
|
274
|
+
raise ValueError(f"Not a valid zip-based document: {exc}") from exc
|
|
275
|
+
|
|
276
|
+
|
|
277
|
+
def _extract_docx_text(path: Path) -> str:
|
|
278
|
+
"""Extract text from .docx. Body paragraphs only; shapes/textboxes excluded."""
|
|
279
|
+
try:
|
|
280
|
+
import defusedxml # type: ignore[import-untyped]
|
|
281
|
+
defusedxml.defuse_stdlib()
|
|
282
|
+
from docx import Document # type: ignore[import-untyped]
|
|
283
|
+
except ImportError as exc:
|
|
284
|
+
raise ValueError(
|
|
285
|
+
"python-docx and defusedxml required for .docx ingestion. "
|
|
286
|
+
"Install: pip install python-docx defusedxml. "
|
|
287
|
+
f"(Underlying: {exc})"
|
|
288
|
+
) from exc
|
|
289
|
+
|
|
290
|
+
_check_zip_safety(path, MAX_DOCX_BYTES, MAX_DOCX_EXTRACTED_BYTES)
|
|
291
|
+
|
|
292
|
+
try:
|
|
293
|
+
doc = Document(str(path))
|
|
294
|
+
except Exception as exc: # noqa: BLE001 — python-docx raises wide; surface as ValueError
|
|
295
|
+
raise ValueError(f"Cannot parse .docx (corrupt or DRM): {exc}") from exc
|
|
296
|
+
|
|
297
|
+
return "\n".join(p.text for p in doc.paragraphs)
|
|
298
|
+
|
|
299
|
+
|
|
300
|
+
def _extract_rtf_text(path: Path) -> str:
|
|
301
|
+
"""Extract plain text from .rtf using striprtf."""
|
|
302
|
+
try:
|
|
303
|
+
from striprtf.striprtf import rtf_to_text # type: ignore[import-untyped]
|
|
304
|
+
except ImportError as exc:
|
|
305
|
+
raise ValueError(
|
|
306
|
+
"striprtf required for .rtf ingestion. "
|
|
307
|
+
"Install: pip install striprtf. "
|
|
308
|
+
f"(Underlying: {exc})"
|
|
309
|
+
) from exc
|
|
310
|
+
try:
|
|
311
|
+
src = path.read_text(encoding="utf-8", errors="replace")
|
|
312
|
+
except OSError as exc:
|
|
313
|
+
raise ValueError(f"Cannot read .rtf file: {exc}") from exc
|
|
314
|
+
try:
|
|
315
|
+
return rtf_to_text(src)
|
|
316
|
+
except Exception as exc: # noqa: BLE001 — match docx/epub: surface as ValueError exit 2
|
|
317
|
+
raise ValueError(f"Cannot parse .rtf: {exc}") from exc
|
|
318
|
+
|
|
319
|
+
|
|
320
|
+
def _epub_is_drm_protected(path: Path) -> bool:
|
|
321
|
+
"""Detect DRM by presence of META-INF/encryption.xml in the archive."""
|
|
322
|
+
import zipfile
|
|
323
|
+
|
|
324
|
+
try:
|
|
325
|
+
with zipfile.ZipFile(path) as zf:
|
|
326
|
+
return "META-INF/encryption.xml" in zf.namelist()
|
|
327
|
+
except zipfile.BadZipFile:
|
|
328
|
+
return False
|
|
329
|
+
|
|
330
|
+
|
|
331
|
+
def _extract_epub_text(path: Path) -> str:
|
|
332
|
+
"""Extract flat text from .epub by walking the spine. v1 = flat text only."""
|
|
333
|
+
try:
|
|
334
|
+
import warnings
|
|
335
|
+
import defusedxml # type: ignore[import-untyped]
|
|
336
|
+
defusedxml.defuse_stdlib()
|
|
337
|
+
import ebooklib # type: ignore[import-untyped]
|
|
338
|
+
from ebooklib import epub # type: ignore[import-untyped]
|
|
339
|
+
from bs4 import BeautifulSoup # type: ignore[import-untyped]
|
|
340
|
+
except ImportError as exc:
|
|
341
|
+
raise ValueError(
|
|
342
|
+
"ebooklib, beautifulsoup4, and defusedxml required for .epub ingestion. "
|
|
343
|
+
"Install: pip install ebooklib beautifulsoup4 defusedxml. "
|
|
344
|
+
f"(Underlying: {exc})"
|
|
345
|
+
) from exc
|
|
346
|
+
|
|
347
|
+
_check_zip_safety(path, MAX_EPUB_BYTES, MAX_EPUB_EXTRACTED_BYTES)
|
|
348
|
+
|
|
349
|
+
if _epub_is_drm_protected(path):
|
|
350
|
+
raise ValueError(
|
|
351
|
+
"EPUB is DRM-protected (META-INF/encryption.xml present). "
|
|
352
|
+
"DRM removal is the user's responsibility; Lumina does not strip DRM."
|
|
353
|
+
)
|
|
354
|
+
|
|
355
|
+
try:
|
|
356
|
+
book = epub.read_epub(str(path))
|
|
357
|
+
except Exception as exc: # noqa: BLE001 — ebooklib raises wide on bad XML
|
|
358
|
+
raise ValueError(f"Cannot parse .epub (corrupt or unsupported): {exc}") from exc
|
|
359
|
+
|
|
360
|
+
parts: list[str] = []
|
|
361
|
+
with warnings.catch_warnings():
|
|
362
|
+
warnings.filterwarnings("ignore", module="bs4")
|
|
363
|
+
for spine_entry in book.spine:
|
|
364
|
+
try:
|
|
365
|
+
doc = book.get_item_with_id(spine_entry[0])
|
|
366
|
+
except Exception: # noqa: BLE001
|
|
367
|
+
continue
|
|
368
|
+
if doc is None or doc.get_type() != ebooklib.ITEM_DOCUMENT:
|
|
369
|
+
continue
|
|
370
|
+
soup = BeautifulSoup(doc.get_content(), "html.parser")
|
|
371
|
+
text = soup.get_text(separator="\n").strip()
|
|
372
|
+
if text:
|
|
373
|
+
parts.append(text)
|
|
374
|
+
|
|
375
|
+
full = "\n\n".join(parts)
|
|
376
|
+
size_bytes = len(full.encode("utf-8"))
|
|
377
|
+
if size_bytes > EPUB_SIZE_HINT_BYTES:
|
|
378
|
+
_err(
|
|
379
|
+
f"Note: extracted EPUB text {size_bytes:,} bytes (> 1 MB). "
|
|
380
|
+
"Future: /lumi-chapter-ingest may help once EPUB support lands."
|
|
381
|
+
)
|
|
382
|
+
return full
|
|
383
|
+
|
|
384
|
+
|
|
246
385
|
def _extract_text(path: Path) -> str:
|
|
247
386
|
"""Dispatch text extraction by file extension."""
|
|
248
387
|
ext = path.suffix.lower()
|
|
@@ -252,6 +391,12 @@ def _extract_text(path: Path) -> str:
|
|
|
252
391
|
return _extract_tex_text(path)
|
|
253
392
|
if ext in (".html", ".htm"):
|
|
254
393
|
return _extract_html_text(path)
|
|
394
|
+
if ext == ".docx":
|
|
395
|
+
return _extract_docx_text(path)
|
|
396
|
+
if ext == ".rtf":
|
|
397
|
+
return _extract_rtf_text(path)
|
|
398
|
+
if ext == ".epub":
|
|
399
|
+
return _extract_epub_text(path)
|
|
255
400
|
# .md, .txt, and other text files — read as UTF-8
|
|
256
401
|
try:
|
|
257
402
|
return path.read_text(encoding="utf-8", errors="replace")
|
|
@@ -366,6 +511,9 @@ def _guess_type(ext: str) -> str:
|
|
|
366
511
|
".htm": "webpage",
|
|
367
512
|
".md": "markdown",
|
|
368
513
|
".txt": "text",
|
|
514
|
+
".docx": "docx",
|
|
515
|
+
".rtf": "rtf",
|
|
516
|
+
".epub": "epub",
|
|
369
517
|
}.get(ext, "unknown")
|
|
370
518
|
|
|
371
519
|
|
|
@@ -377,8 +525,9 @@ def main(argv: list[str] | None = None) -> None:
|
|
|
377
525
|
parser = argparse.ArgumentParser(
|
|
378
526
|
prog="prepare_source.py",
|
|
379
527
|
description=(
|
|
380
|
-
"Normalize a local file (PDF, .tex, .html, .md
|
|
381
|
-
"package under raw/tmp/<slug>/.
|
|
528
|
+
"Normalize a local file (PDF, .tex, .html, .md, .txt, .docx, .rtf, "
|
|
529
|
+
".epub) into an ingest-ready package under raw/tmp/<slug>/. "
|
|
530
|
+
"Deterministic: same input -> same output."
|
|
382
531
|
),
|
|
383
532
|
)
|
|
384
533
|
parser.add_argument("file", help="Path to the source file to prepare.")
|
|
@@ -13,6 +13,14 @@ pypdf>=4.0.0
|
|
|
13
13
|
# fetch_arxiv.py, fetch_s2.py, fetch_wikipedia.py, fetch_deepxiv.py, discover.py
|
|
14
14
|
requests>=2.31.0
|
|
15
15
|
|
|
16
|
+
# ─── Local text-document ingestion (research pack) ──────────────────────────
|
|
17
|
+
# prepare_source.py extractors for .docx/.rtf/.epub
|
|
18
|
+
python-docx>=1.1.0
|
|
19
|
+
striprtf>=0.0.26
|
|
20
|
+
ebooklib>=0.18
|
|
21
|
+
beautifulsoup4>=4.12.0
|
|
22
|
+
defusedxml>=0.7.1
|
|
23
|
+
|
|
16
24
|
# ─── Development ─────────────────────────────────────────────────────────────
|
|
17
25
|
pytest>=7.0.0
|
|
18
26
|
pytest-cov>=4.0.0
|