oh-my-llmwikimode 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +494 -0
- package/bin/llmwiki.js +1493 -0
- package/docs/INSTALLATION.md +228 -0
- package/docs/SCOPE_LOCK.md +79 -0
- package/docs/STAGE1_GUIDE.md +265 -0
- package/docs/STAGE2_AGENT_TEAM_GUIDE.md +141 -0
- package/docs/STAGE3_CONVERSATIONAL_GROWTH_GUIDE.md +50 -0
- package/docs/TEST_WORKSHEET.md +120 -0
- package/docs/github-private-bootstrap.md +53 -0
- package/docs/release.md +79 -0
- package/docs/stage4-slice1-manual-test.md +259 -0
- package/docs/stage4-slice1-user-guide.md +269 -0
- package/docs/user-guide-ko.md +452 -0
- package/package.json +76 -0
- package/scripts/install-llmwiki.ps1 +229 -0
- package/src/config.js +74 -0
- package/src/curator/browser-data.js +134 -0
- package/src/curator/queue.js +324 -0
- package/src/curator/schema.js +237 -0
- package/src/curator/scoring.js +83 -0
- package/src/hooks.js +199 -0
- package/src/librarian/schema.js +218 -0
- package/src/librarian/weekly-digest.js +478 -0
- package/src/security.js +127 -0
- package/src/server.js +860 -0
- package/src/stage4/graph-reasoning/analyzer.js +255 -0
- package/src/stage4/graph-reasoning/browser-data.js +130 -0
- package/src/stage4/graph-reasoning/index.js +35 -0
- package/src/stage4/graph-reasoning/loader.js +122 -0
- package/src/stage4/graph-reasoning/queue.js +154 -0
- package/src/stage4/graph-reasoning/schema.js +190 -0
- package/src/team/browser-data.js +142 -0
- package/src/team/capabilities.js +79 -0
- package/src/team/dispatch.js +108 -0
- package/src/team/queue.js +290 -0
- package/src/team/schema.js +225 -0
- package/src/team/shared-memory.js +183 -0
- package/src/todo/browser-data.js +71 -0
- package/src/todo/queue.js +159 -0
- package/src/todo/schema.js +90 -0
- package/src/utils/embedding-model.js +111 -0
- package/src/wiki/alias-suggestions.js +180 -0
- package/src/wiki/browser-data.js +284 -0
- package/src/wiki/doctor.js +218 -0
- package/src/wiki/entry-normalizer.js +139 -0
- package/src/wiki/ingest.js +443 -0
- package/src/wiki/lesson-proposal-analyzer.js +463 -0
- package/src/wiki/lesson-proposal-manager.js +331 -0
- package/src/wiki/lesson-template.js +182 -0
- package/src/wiki/lint.js +294 -0
- package/src/wiki/notebooklm-adapter.js +264 -0
- package/src/wiki/query.js +304 -0
- package/src/wiki/raw-manager.js +400 -0
- package/src/wiki/search-feedback.js +211 -0
- package/src/wiki/semantic-index.js +333 -0
- package/src/wiki/semantic-search.js +170 -0
- package/src/wiki/source-ledger.js +370 -0
- package/src/wiki/store.js +1329 -0
- package/src/wiki/usage-events.js +144 -0
package/src/wiki/lint.js
ADDED
|
@@ -0,0 +1,294 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Runtime lint engine for oh-my-llmwikimode wiki entries.
|
|
3
|
+
*
|
|
4
|
+
* This module is intentionally read-only: it reports validation findings but
|
|
5
|
+
* never rewrites, merges, deletes, or indexes wiki files.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
import crypto from "node:crypto";
|
|
9
|
+
import fs from "node:fs";
|
|
10
|
+
import path from "node:path";
|
|
11
|
+
import { containsPromptInjection, redactSecrets } from "../security.js";
|
|
12
|
+
import {
|
|
13
|
+
parseFrontmatter,
|
|
14
|
+
VALID_CONFIDENCES,
|
|
15
|
+
VALID_SOURCES,
|
|
16
|
+
VALID_STATUSES,
|
|
17
|
+
} from "./store.js";
|
|
18
|
+
|
|
19
|
+
const ENTRY_ROOTS = [
|
|
20
|
+
{ category: "inbox", segments: ["inbox"] },
|
|
21
|
+
{ category: "problems", segments: ["problems"] },
|
|
22
|
+
{ category: "lessons", segments: ["editorial", "lessons"] },
|
|
23
|
+
];
|
|
24
|
+
|
|
25
|
+
const REQUIRED_FIELDS = ["title", "status", "source"];
|
|
26
|
+
const EXCLUDED_STATUSES = new Set(["rejected", "superseded", "private", "needs-clarification"]);
|
|
27
|
+
|
|
28
|
+
function normalizeScalar(value) {
|
|
29
|
+
return String(value ?? "")
|
|
30
|
+
.replace(/\r?\n/g, " ")
|
|
31
|
+
.replace(/\s+/g, " ")
|
|
32
|
+
.trim();
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
function normalizeDisplayText(value) {
|
|
36
|
+
if (Array.isArray(value)) {
|
|
37
|
+
return value.map((item) => normalizeDisplayText(item)).filter(Boolean).join(" ");
|
|
38
|
+
}
|
|
39
|
+
return normalizeScalar(value);
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
function normalizeTitleKey(value) {
|
|
43
|
+
return normalizeDisplayText(value).toLowerCase().replace(/[^a-z0-9\p{L}\p{N}]/gu, "");
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
function contentHash(content) {
|
|
47
|
+
return crypto.createHash("sha256").update(content).digest("hex").slice(0, 16);
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
function toRelativePath(wikiRoot, fullPath) {
|
|
51
|
+
return path.relative(wikiRoot, fullPath).replace(/\\/g, "/");
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
function collectMarkdownFiles(rootDir) {
|
|
55
|
+
if (!fs.existsSync(rootDir)) return [];
|
|
56
|
+
|
|
57
|
+
const files = [];
|
|
58
|
+
function walk(dir) {
|
|
59
|
+
const entries = fs.readdirSync(dir, { withFileTypes: true }).sort((a, b) => a.name.localeCompare(b.name));
|
|
60
|
+
for (const entry of entries) {
|
|
61
|
+
const fullPath = path.join(dir, entry.name);
|
|
62
|
+
if (entry.isDirectory()) {
|
|
63
|
+
walk(fullPath);
|
|
64
|
+
} else if (entry.isFile() && entry.name.endsWith(".md")) {
|
|
65
|
+
files.push(fullPath);
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
walk(rootDir);
|
|
71
|
+
return files;
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
function createFinding(pathName, code, field, message, value) {
|
|
75
|
+
const finding = { path: pathName, code, field, message };
|
|
76
|
+
if (value !== undefined) finding.value = value;
|
|
77
|
+
return finding;
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
function isMissingRequiredValue(value) {
|
|
81
|
+
if (value === undefined || value === null) return true;
|
|
82
|
+
if (typeof value === "string") return value.trim() === "";
|
|
83
|
+
if (Array.isArray(value)) return value.length === 0 || normalizeDisplayText(value) === "";
|
|
84
|
+
return false;
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
function validateRequiredFields(frontmatter, relPath, errors) {
|
|
88
|
+
for (const field of REQUIRED_FIELDS) {
|
|
89
|
+
if (isMissingRequiredValue(frontmatter[field])) {
|
|
90
|
+
errors.push(createFinding(relPath, "missing_required_field", field, `${field} is required`));
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
function validateTitle(frontmatter, relPath, errors) {
|
|
96
|
+
const title = frontmatter.title;
|
|
97
|
+
if (!isMissingRequiredValue(title) && typeof title !== "string") {
|
|
98
|
+
errors.push(createFinding(relPath, "invalid_field_type", "title", "title must be a string", title));
|
|
99
|
+
}
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
function validateEnum(frontmatter, relPath, errors, field, validValues, normalizeValue) {
|
|
103
|
+
const value = frontmatter[field];
|
|
104
|
+
if (value === undefined || value === null || value === "") return;
|
|
105
|
+
|
|
106
|
+
if (typeof value !== "string") {
|
|
107
|
+
errors.push(createFinding(relPath, "invalid_field_type", field, `${field} must be a string`, value));
|
|
108
|
+
return;
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
const normalized = normalizeValue(value);
|
|
112
|
+
if (!validValues.includes(normalized)) {
|
|
113
|
+
errors.push(
|
|
114
|
+
createFinding(
|
|
115
|
+
relPath,
|
|
116
|
+
"invalid_enum_value",
|
|
117
|
+
field,
|
|
118
|
+
`${field} must be one of: ${validValues.join(", ")}`,
|
|
119
|
+
value
|
|
120
|
+
)
|
|
121
|
+
);
|
|
122
|
+
}
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
function isUnsafeRelativePath(value) {
|
|
126
|
+
if (typeof value !== "string" || value.trim() === "") return false;
|
|
127
|
+
const normalizedPath = value.replace(/\\/g, "/");
|
|
128
|
+
if (path.isAbsolute(value) || path.posix.isAbsolute(normalizedPath)) return true;
|
|
129
|
+
return normalizedPath.split("/").includes("..");
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
function validateRawPath(frontmatter, relPath, errors) {
|
|
133
|
+
if (frontmatter.raw_path === undefined) return;
|
|
134
|
+
|
|
135
|
+
if (typeof frontmatter.raw_path !== "string") {
|
|
136
|
+
errors.push(createFinding(relPath, "invalid_field_type", "raw_path", "raw_path must be a relative string", frontmatter.raw_path));
|
|
137
|
+
return;
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
if (isUnsafeRelativePath(frontmatter.raw_path)) {
|
|
141
|
+
errors.push(
|
|
142
|
+
createFinding(
|
|
143
|
+
relPath,
|
|
144
|
+
"unsafe_raw_path",
|
|
145
|
+
"raw_path",
|
|
146
|
+
"raw_path must be relative and must not contain path traversal",
|
|
147
|
+
frontmatter.raw_path
|
|
148
|
+
)
|
|
149
|
+
);
|
|
150
|
+
}
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
function validatePrivacy(content, relPath, warnings) {
|
|
154
|
+
if (redactSecrets(content) !== content) {
|
|
155
|
+
warnings.push(createFinding(relPath, "secret_detected", "content", "entry contains text that matches a secret pattern"));
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
if (containsPromptInjection(content)) {
|
|
159
|
+
warnings.push(createFinding(relPath, "prompt_injection_detected", "content", "entry contains a prompt-injection-like pattern"));
|
|
160
|
+
}
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
function addDuplicateWarnings(groups, warnings, duplicates) {
|
|
164
|
+
for (const group of groups) {
|
|
165
|
+
if (group.entries.length < 2) continue;
|
|
166
|
+
|
|
167
|
+
duplicates.push(group);
|
|
168
|
+
warnings.push({
|
|
169
|
+
code: group.type === "title" ? "duplicate_title" : "duplicate_content_hash",
|
|
170
|
+
type: group.type,
|
|
171
|
+
message: group.type === "title"
|
|
172
|
+
? `Duplicate normalized title: ${group.normalized_title}`
|
|
173
|
+
: `Duplicate content hash: ${group.hash}`,
|
|
174
|
+
entries: group.entries,
|
|
175
|
+
});
|
|
176
|
+
}
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
function buildDuplicateFindings(titleMap, hashMap, warnings) {
|
|
180
|
+
const duplicates = [];
|
|
181
|
+
const titleGroups = Array.from(titleMap.entries())
|
|
182
|
+
.filter(([, entries]) => entries.length > 1)
|
|
183
|
+
.map(([normalizedTitle, entries]) => ({
|
|
184
|
+
type: "title",
|
|
185
|
+
normalized_title: normalizedTitle,
|
|
186
|
+
entries,
|
|
187
|
+
}));
|
|
188
|
+
const hashGroups = Array.from(hashMap.entries())
|
|
189
|
+
.filter(([, entries]) => entries.length > 1)
|
|
190
|
+
.map(([hash, entries]) => ({
|
|
191
|
+
type: "content",
|
|
192
|
+
hash,
|
|
193
|
+
entries,
|
|
194
|
+
}));
|
|
195
|
+
|
|
196
|
+
addDuplicateWarnings(titleGroups, warnings, duplicates);
|
|
197
|
+
addDuplicateWarnings(hashGroups, warnings, duplicates);
|
|
198
|
+
return duplicates;
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
function scanEntryFile({ wikiRoot, fullPath, category, errors, warnings, titleMap, hashMap }) {
|
|
202
|
+
const relPath = toRelativePath(wikiRoot, fullPath);
|
|
203
|
+
const content = fs.readFileSync(fullPath, "utf-8");
|
|
204
|
+
const { frontmatter, body } = parseFrontmatter(content);
|
|
205
|
+
const title = normalizeDisplayText(frontmatter.title);
|
|
206
|
+
const status = typeof frontmatter.status === "string" ? frontmatter.status.toLowerCase() : frontmatter.status;
|
|
207
|
+
const source = typeof frontmatter.source === "string" ? frontmatter.source.toLowerCase() : frontmatter.source;
|
|
208
|
+
const confidence = typeof frontmatter.confidence === "string" ? frontmatter.confidence.toUpperCase() : frontmatter.confidence;
|
|
209
|
+
const excluded = typeof status === "string" && EXCLUDED_STATUSES.has(status);
|
|
210
|
+
|
|
211
|
+
validateRequiredFields(frontmatter, relPath, errors);
|
|
212
|
+
validateTitle(frontmatter, relPath, errors);
|
|
213
|
+
validateEnum(frontmatter, relPath, errors, "status", VALID_STATUSES, (value) => value.toLowerCase());
|
|
214
|
+
validateEnum(frontmatter, relPath, errors, "source", VALID_SOURCES, (value) => value.toLowerCase());
|
|
215
|
+
validateEnum(frontmatter, relPath, errors, "confidence", VALID_CONFIDENCES, (value) => value.toUpperCase());
|
|
216
|
+
validateRawPath(frontmatter, relPath, errors);
|
|
217
|
+
validatePrivacy(content, relPath, warnings);
|
|
218
|
+
|
|
219
|
+
const titleKey = normalizeTitleKey(frontmatter.title);
|
|
220
|
+
if (titleKey) {
|
|
221
|
+
const entries = titleMap.get(titleKey) || [];
|
|
222
|
+
entries.push(relPath);
|
|
223
|
+
titleMap.set(titleKey, entries);
|
|
224
|
+
}
|
|
225
|
+
|
|
226
|
+
const hash = contentHash(content);
|
|
227
|
+
const hashEntries = hashMap.get(hash) || [];
|
|
228
|
+
hashEntries.push(relPath);
|
|
229
|
+
hashMap.set(hash, hashEntries);
|
|
230
|
+
|
|
231
|
+
return {
|
|
232
|
+
path: relPath,
|
|
233
|
+
category,
|
|
234
|
+
title,
|
|
235
|
+
status,
|
|
236
|
+
source,
|
|
237
|
+
confidence,
|
|
238
|
+
excluded,
|
|
239
|
+
body_hash: contentHash(body),
|
|
240
|
+
content_hash: hash,
|
|
241
|
+
raw_path: frontmatter.raw_path,
|
|
242
|
+
};
|
|
243
|
+
}
|
|
244
|
+
|
|
245
|
+
export function lintWiki(wikiRoot) {
|
|
246
|
+
const rootPath = path.resolve(wikiRoot);
|
|
247
|
+
const errors = [];
|
|
248
|
+
const warnings = [];
|
|
249
|
+
const entries = [];
|
|
250
|
+
const titleMap = new Map();
|
|
251
|
+
const hashMap = new Map();
|
|
252
|
+
|
|
253
|
+
if (!fs.existsSync(rootPath)) {
|
|
254
|
+
errors.push({ code: "wiki_root_missing", path: rootPath, message: "wiki root does not exist" });
|
|
255
|
+
return { ok: false, errors, warnings, entries, duplicates: [] };
|
|
256
|
+
}
|
|
257
|
+
|
|
258
|
+
for (const entryRoot of ENTRY_ROOTS) {
|
|
259
|
+
const dir = path.join(rootPath, ...entryRoot.segments);
|
|
260
|
+
for (const fullPath of collectMarkdownFiles(dir)) {
|
|
261
|
+
try {
|
|
262
|
+
entries.push(
|
|
263
|
+
scanEntryFile({
|
|
264
|
+
wikiRoot: rootPath,
|
|
265
|
+
fullPath,
|
|
266
|
+
category: entryRoot.category,
|
|
267
|
+
errors,
|
|
268
|
+
warnings,
|
|
269
|
+
titleMap,
|
|
270
|
+
hashMap,
|
|
271
|
+
})
|
|
272
|
+
);
|
|
273
|
+
} catch (error) {
|
|
274
|
+
errors.push({
|
|
275
|
+
path: toRelativePath(rootPath, fullPath),
|
|
276
|
+
code: "unreadable_entry",
|
|
277
|
+
message: `entry could not be read: ${error.message}`,
|
|
278
|
+
});
|
|
279
|
+
}
|
|
280
|
+
}
|
|
281
|
+
}
|
|
282
|
+
|
|
283
|
+
const duplicates = buildDuplicateFindings(titleMap, hashMap, warnings);
|
|
284
|
+
|
|
285
|
+
return {
|
|
286
|
+
ok: errors.length === 0,
|
|
287
|
+
errors,
|
|
288
|
+
warnings,
|
|
289
|
+
entries,
|
|
290
|
+
duplicates,
|
|
291
|
+
};
|
|
292
|
+
}
|
|
293
|
+
|
|
294
|
+
export { EXCLUDED_STATUSES };
|
|
@@ -0,0 +1,264 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* NotebookLM artifact adapter for oh-my-llmwikimode
|
|
3
|
+
*
|
|
4
|
+
* Imports local NotebookLM export files into raw storage and creates
|
|
5
|
+
* searchable candidate wiki entries.
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
import fs from "node:fs";
|
|
9
|
+
import path from "node:path";
|
|
10
|
+
import crypto from "node:crypto";
|
|
11
|
+
import {
|
|
12
|
+
getWikiPaths,
|
|
13
|
+
ensureWikiStructure,
|
|
14
|
+
stringifyFrontmatter,
|
|
15
|
+
storeKnowledge,
|
|
16
|
+
} from "./store.js";
|
|
17
|
+
import { redactSecrets } from "../security.js";
|
|
18
|
+
|
|
19
|
+
/**
|
|
20
|
+
* Generate a stable run_id from query and current date.
|
|
21
|
+
*/
|
|
22
|
+
export function generateRunId(query, dateStr) {
|
|
23
|
+
const input = `${query}:${dateStr || new Date().toISOString().slice(0, 10)}`;
|
|
24
|
+
return crypto.createHash("sha256").update(input).digest("hex").slice(0, 16);
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
/**
|
|
28
|
+
* Compute SHA-256 hash of a file.
|
|
29
|
+
*/
|
|
30
|
+
export function hashFile(filePath) {
|
|
31
|
+
const content = fs.readFileSync(filePath);
|
|
32
|
+
return crypto.createHash("sha256").update(content).digest("hex");
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
/**
|
|
36
|
+
* Validate NotebookLM import input path.
|
|
37
|
+
*/
|
|
38
|
+
function validateInputPath(inputPath) {
|
|
39
|
+
if (!inputPath || typeof inputPath !== "string") {
|
|
40
|
+
return { success: false, error: "Input path is required" };
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
if (!fs.existsSync(inputPath)) {
|
|
44
|
+
return { success: false, error: "Input path does not exist" };
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
const stat = fs.lstatSync(inputPath);
|
|
48
|
+
if (stat.isSymbolicLink()) {
|
|
49
|
+
return { success: false, error: "Symbolic link directories are not allowed" };
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
if (!stat.isDirectory()) {
|
|
53
|
+
return { success: false, error: "Input path must be a directory" };
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
return { success: true };
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
const ALLOWED_EXTENSIONS = new Set([".md", ".txt", ".json", ".yaml", ".yml", ".csv"]);
|
|
60
|
+
const MAX_FILE_SIZE = 10 * 1024 * 1024; // 10MB
|
|
61
|
+
const MAX_FILE_COUNT = 50;
|
|
62
|
+
|
|
63
|
+
/**
|
|
64
|
+
* Import NotebookLM artifacts from a local export directory.
|
|
65
|
+
*/
|
|
66
|
+
export function importNotebookLmArtifacts(options) {
|
|
67
|
+
const {
|
|
68
|
+
inputPath,
|
|
69
|
+
wikiRoot,
|
|
70
|
+
originalQuery = "",
|
|
71
|
+
title,
|
|
72
|
+
tags = [],
|
|
73
|
+
runId: providedRunId,
|
|
74
|
+
} = options;
|
|
75
|
+
|
|
76
|
+
// Validate input path
|
|
77
|
+
const pathValidation = validateInputPath(inputPath);
|
|
78
|
+
if (!pathValidation.success) {
|
|
79
|
+
return pathValidation;
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
const paths = getWikiPaths(wikiRoot);
|
|
83
|
+
ensureWikiStructure(paths);
|
|
84
|
+
|
|
85
|
+
const runId = providedRunId || generateRunId(originalQuery);
|
|
86
|
+
const rawDir = path.join(paths.root, "raw", "notebooklm", "imports", runId);
|
|
87
|
+
|
|
88
|
+
// Check for run-id collision
|
|
89
|
+
if (fs.existsSync(rawDir)) {
|
|
90
|
+
return { success: false, error: `Run ID collision: ${runId} already exists` };
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
fs.mkdirSync(rawDir, { recursive: true });
|
|
94
|
+
|
|
95
|
+
// Collect artifacts with validation
|
|
96
|
+
const artifacts = [];
|
|
97
|
+
const files = fs.readdirSync(inputPath);
|
|
98
|
+
|
|
99
|
+
if (files.length > MAX_FILE_COUNT) {
|
|
100
|
+
return { success: false, error: `Too many files: ${files.length} > ${MAX_FILE_COUNT}` };
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
for (const file of files) {
|
|
104
|
+
const sourcePath = path.join(inputPath, file);
|
|
105
|
+
|
|
106
|
+
// Skip symlinks
|
|
107
|
+
if (fs.lstatSync(sourcePath).isSymbolicLink()) {
|
|
108
|
+
continue;
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
if (!fs.statSync(sourcePath).isFile()) {
|
|
112
|
+
continue;
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
// Check extension
|
|
116
|
+
const ext = path.extname(file).toLowerCase();
|
|
117
|
+
if (!ALLOWED_EXTENSIONS.has(ext)) {
|
|
118
|
+
continue;
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
// Check size
|
|
122
|
+
const size = fs.statSync(sourcePath).size;
|
|
123
|
+
if (size > MAX_FILE_SIZE) {
|
|
124
|
+
continue;
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
const destPath = path.join(rawDir, file);
|
|
128
|
+
|
|
129
|
+
// Read and redact before writing
|
|
130
|
+
const content = fs.readFileSync(sourcePath, "utf-8");
|
|
131
|
+
const redactedContent = redactSecrets(content);
|
|
132
|
+
fs.writeFileSync(destPath, redactedContent);
|
|
133
|
+
|
|
134
|
+
artifacts.push({
|
|
135
|
+
relative_path: file,
|
|
136
|
+
sha256: hashFile(destPath),
|
|
137
|
+
bytes: fs.statSync(destPath).size,
|
|
138
|
+
});
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
if (artifacts.length === 0) {
|
|
142
|
+
// Clean up empty raw dir
|
|
143
|
+
try {
|
|
144
|
+
fs.rmSync(rawDir, { recursive: true });
|
|
145
|
+
} catch {
|
|
146
|
+
// ignore cleanup error
|
|
147
|
+
}
|
|
148
|
+
return { success: false, error: "No valid artifacts found in input path" };
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
// Write manifest
|
|
152
|
+
const manifest = {
|
|
153
|
+
source: "notebooklm",
|
|
154
|
+
run_id: runId,
|
|
155
|
+
original_query: originalQuery,
|
|
156
|
+
created_at: new Date().toISOString(),
|
|
157
|
+
files: artifacts,
|
|
158
|
+
};
|
|
159
|
+
|
|
160
|
+
const manifestPath = path.join(rawDir, "manifest.json");
|
|
161
|
+
fs.writeFileSync(manifestPath, JSON.stringify(manifest, null, 2));
|
|
162
|
+
|
|
163
|
+
// Create candidate entry
|
|
164
|
+
const reportFile = artifacts.find((a) => a.relative_path === "report.md");
|
|
165
|
+
const candidatePaths = [];
|
|
166
|
+
|
|
167
|
+
if (reportFile) {
|
|
168
|
+
const reportPath = path.join(rawDir, "report.md");
|
|
169
|
+
const reportContent = fs.readFileSync(reportPath, "utf-8");
|
|
170
|
+
const safeContent = redactSecrets(reportContent);
|
|
171
|
+
|
|
172
|
+
const candidateTitle = title || (originalQuery ? originalQuery.slice(0, 80) : "NotebookLM Import");
|
|
173
|
+
const candidateTags = [...tags, "notebooklm", "research", "candidate"];
|
|
174
|
+
|
|
175
|
+
const candidatePath = storeKnowledge(wikiRoot, {
|
|
176
|
+
summary: candidateTitle,
|
|
177
|
+
details: safeContent,
|
|
178
|
+
tags: candidateTags,
|
|
179
|
+
status: "candidate",
|
|
180
|
+
source: "notebooklm",
|
|
181
|
+
});
|
|
182
|
+
|
|
183
|
+
if (candidatePath) {
|
|
184
|
+
// Append provenance metadata to the candidate file
|
|
185
|
+
const candidateContent = fs.readFileSync(candidatePath, "utf-8");
|
|
186
|
+
const { frontmatter, body } = parseCandidateFrontmatter(candidateContent);
|
|
187
|
+
|
|
188
|
+
frontmatter.original_query = originalQuery;
|
|
189
|
+
frontmatter.run_id = runId;
|
|
190
|
+
frontmatter.raw_path = path.relative(wikiRoot, rawDir).replace(/\\/g, "/");
|
|
191
|
+
frontmatter.confidence = "AMBIGUOUS";
|
|
192
|
+
frontmatter.imported_at = new Date().toISOString();
|
|
193
|
+
|
|
194
|
+
const updatedContent = stringifyFrontmatter(frontmatter) + "\n" + body;
|
|
195
|
+
fs.writeFileSync(candidatePath, updatedContent);
|
|
196
|
+
candidatePaths.push(candidatePath);
|
|
197
|
+
}
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
return {
|
|
201
|
+
success: true,
|
|
202
|
+
runId,
|
|
203
|
+
rawPath: rawDir,
|
|
204
|
+
manifestPath,
|
|
205
|
+
candidatePaths,
|
|
206
|
+
artifactCount: artifacts.length,
|
|
207
|
+
};
|
|
208
|
+
}
|
|
209
|
+
|
|
210
|
+
/**
|
|
211
|
+
* Parse frontmatter from a candidate file (lightweight wrapper).
|
|
212
|
+
*/
|
|
213
|
+
function parseCandidateFrontmatter(content) {
|
|
214
|
+
const match = content.match(/^---\s*\n([\s\S]*?)\n---\s*\n([\s\S]*)$/);
|
|
215
|
+
if (!match) return { frontmatter: {}, body: content };
|
|
216
|
+
|
|
217
|
+
const lines = match[1].split("\n");
|
|
218
|
+
const frontmatter = {};
|
|
219
|
+
let currentKey = null;
|
|
220
|
+
let currentArray = null;
|
|
221
|
+
|
|
222
|
+
for (const line of lines) {
|
|
223
|
+
const trimmedLine = line.trim();
|
|
224
|
+
|
|
225
|
+
if (trimmedLine.startsWith("- ") && currentArray) {
|
|
226
|
+
currentArray.push(trimmedLine.slice(2));
|
|
227
|
+
continue;
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
const keyValueMatch = line.match(/^(\w+):\s*(.*)$/);
|
|
231
|
+
if (keyValueMatch) {
|
|
232
|
+
const [, key, value] = keyValueMatch;
|
|
233
|
+
const trimmedValue = value.trim();
|
|
234
|
+
if (trimmedValue.startsWith('"') && trimmedValue.endsWith('"')) {
|
|
235
|
+
try {
|
|
236
|
+
frontmatter[key] = JSON.parse(trimmedValue);
|
|
237
|
+
} catch {
|
|
238
|
+
frontmatter[key] = trimmedValue.slice(1, -1);
|
|
239
|
+
}
|
|
240
|
+
currentKey = key;
|
|
241
|
+
currentArray = null;
|
|
242
|
+
} else if (trimmedValue.startsWith("[") && trimmedValue.endsWith("]")) {
|
|
243
|
+
frontmatter[key] = trimmedValue.slice(1, -1).split(",").map((s) => s.trim());
|
|
244
|
+
currentKey = key;
|
|
245
|
+
currentArray = null;
|
|
246
|
+
} else if (trimmedValue) {
|
|
247
|
+
frontmatter[key] = trimmedValue;
|
|
248
|
+
currentKey = key;
|
|
249
|
+
currentArray = null;
|
|
250
|
+
} else {
|
|
251
|
+
currentKey = key;
|
|
252
|
+
currentArray = null;
|
|
253
|
+
}
|
|
254
|
+
} else if (trimmedLine.startsWith("- ")) {
|
|
255
|
+
currentArray = [];
|
|
256
|
+
if (currentKey) {
|
|
257
|
+
frontmatter[currentKey] = currentArray;
|
|
258
|
+
}
|
|
259
|
+
currentArray.push(trimmedLine.slice(2));
|
|
260
|
+
}
|
|
261
|
+
}
|
|
262
|
+
|
|
263
|
+
return { frontmatter, body: match[2].trim() };
|
|
264
|
+
}
|