oh-my-llmwikimode 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +494 -0
- package/bin/llmwiki.js +1493 -0
- package/docs/INSTALLATION.md +228 -0
- package/docs/SCOPE_LOCK.md +79 -0
- package/docs/STAGE1_GUIDE.md +265 -0
- package/docs/STAGE2_AGENT_TEAM_GUIDE.md +141 -0
- package/docs/STAGE3_CONVERSATIONAL_GROWTH_GUIDE.md +50 -0
- package/docs/TEST_WORKSHEET.md +120 -0
- package/docs/github-private-bootstrap.md +53 -0
- package/docs/release.md +79 -0
- package/docs/stage4-slice1-manual-test.md +259 -0
- package/docs/stage4-slice1-user-guide.md +269 -0
- package/docs/user-guide-ko.md +452 -0
- package/package.json +76 -0
- package/scripts/install-llmwiki.ps1 +229 -0
- package/src/config.js +74 -0
- package/src/curator/browser-data.js +134 -0
- package/src/curator/queue.js +324 -0
- package/src/curator/schema.js +237 -0
- package/src/curator/scoring.js +83 -0
- package/src/hooks.js +199 -0
- package/src/librarian/schema.js +218 -0
- package/src/librarian/weekly-digest.js +478 -0
- package/src/security.js +127 -0
- package/src/server.js +860 -0
- package/src/stage4/graph-reasoning/analyzer.js +255 -0
- package/src/stage4/graph-reasoning/browser-data.js +130 -0
- package/src/stage4/graph-reasoning/index.js +35 -0
- package/src/stage4/graph-reasoning/loader.js +122 -0
- package/src/stage4/graph-reasoning/queue.js +154 -0
- package/src/stage4/graph-reasoning/schema.js +190 -0
- package/src/team/browser-data.js +142 -0
- package/src/team/capabilities.js +79 -0
- package/src/team/dispatch.js +108 -0
- package/src/team/queue.js +290 -0
- package/src/team/schema.js +225 -0
- package/src/team/shared-memory.js +183 -0
- package/src/todo/browser-data.js +71 -0
- package/src/todo/queue.js +159 -0
- package/src/todo/schema.js +90 -0
- package/src/utils/embedding-model.js +111 -0
- package/src/wiki/alias-suggestions.js +180 -0
- package/src/wiki/browser-data.js +284 -0
- package/src/wiki/doctor.js +218 -0
- package/src/wiki/entry-normalizer.js +139 -0
- package/src/wiki/ingest.js +443 -0
- package/src/wiki/lesson-proposal-analyzer.js +463 -0
- package/src/wiki/lesson-proposal-manager.js +331 -0
- package/src/wiki/lesson-template.js +182 -0
- package/src/wiki/lint.js +294 -0
- package/src/wiki/notebooklm-adapter.js +264 -0
- package/src/wiki/query.js +304 -0
- package/src/wiki/raw-manager.js +400 -0
- package/src/wiki/search-feedback.js +211 -0
- package/src/wiki/semantic-index.js +333 -0
- package/src/wiki/semantic-search.js +170 -0
- package/src/wiki/source-ledger.js +370 -0
- package/src/wiki/store.js +1329 -0
- package/src/wiki/usage-events.js +144 -0
|
@@ -0,0 +1,304 @@
|
|
|
1
|
+
import fs from "node:fs";
|
|
2
|
+
import path from "node:path";
|
|
3
|
+
import { wrapWithBoundary } from "../security.js";
|
|
4
|
+
import { buildIndex, getWikiPaths, search } from "./store.js";
|
|
5
|
+
import {
|
|
6
|
+
normalizeConfidenceScore,
|
|
7
|
+
normalizeEntryStatus,
|
|
8
|
+
normalizeEntryTags,
|
|
9
|
+
normalizeEntryText,
|
|
10
|
+
normalizeIndexEntry,
|
|
11
|
+
normalizeWikiIndex,
|
|
12
|
+
} from "./entry-normalizer.js";
|
|
13
|
+
|
|
14
|
+
const DEFAULT_MAX_RESULTS = 3;
|
|
15
|
+
const DEFAULT_MAX_NEIGHBORS = 6;
|
|
16
|
+
const EXCLUDED_QUERY_STATUSES = new Set([
|
|
17
|
+
"rejected",
|
|
18
|
+
"superseded",
|
|
19
|
+
"private",
|
|
20
|
+
"needs-clarification",
|
|
21
|
+
]);
|
|
22
|
+
const EXCLUDED_GRAPH_STATUSES = new Set(["rejected", "superseded", "private"]);
|
|
23
|
+
|
|
24
|
+
function normalizeStatus(status) {
|
|
25
|
+
return normalizeEntryStatus(status);
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
function isQueryable(entry) {
|
|
29
|
+
return entry && !EXCLUDED_QUERY_STATUSES.has(normalizeStatus(entry.status));
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
function readJson(filePath) {
|
|
33
|
+
return JSON.parse(fs.readFileSync(filePath, "utf-8"));
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
function tryReadJson(filePath) {
|
|
37
|
+
try {
|
|
38
|
+
if (!fs.existsSync(filePath)) return null;
|
|
39
|
+
return readJson(filePath);
|
|
40
|
+
} catch {
|
|
41
|
+
return null;
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
function expectedGraphNodeIds(index) {
|
|
46
|
+
return new Set(
|
|
47
|
+
(index.entries || [])
|
|
48
|
+
.filter((entry) => !EXCLUDED_GRAPH_STATUSES.has(normalizeStatus(entry.status)))
|
|
49
|
+
.map((entry) => entry.path)
|
|
50
|
+
);
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
function isGraphStale(index, graph) {
|
|
54
|
+
if (!index || !Array.isArray(index.entries)) return true;
|
|
55
|
+
if (!graph || graph.version !== 1) return true;
|
|
56
|
+
if (!Array.isArray(graph.nodes) || !Array.isArray(graph.edges)) return true;
|
|
57
|
+
|
|
58
|
+
const expectedNodeIds = expectedGraphNodeIds(index);
|
|
59
|
+
const actualNodeIds = new Set(graph.nodes.map((node) => node.id));
|
|
60
|
+
|
|
61
|
+
if (expectedNodeIds.size !== actualNodeIds.size) return true;
|
|
62
|
+
for (const nodeId of expectedNodeIds) {
|
|
63
|
+
if (!actualNodeIds.has(nodeId)) return true;
|
|
64
|
+
}
|
|
65
|
+
return false;
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
function loadQueryArtifacts(wikiRoot) {
|
|
69
|
+
const paths = getWikiPaths(wikiRoot);
|
|
70
|
+
const graphPath = path.join(paths.system, "graph.json");
|
|
71
|
+
let index = tryReadJson(paths.indexFile);
|
|
72
|
+
let graph = tryReadJson(graphPath);
|
|
73
|
+
let rebuilt = false;
|
|
74
|
+
|
|
75
|
+
if (index) {
|
|
76
|
+
index = normalizeWikiIndex(index);
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
if (!index || isGraphStale(index, graph)) {
|
|
80
|
+
index = normalizeWikiIndex(buildIndex(wikiRoot));
|
|
81
|
+
graph = tryReadJson(graphPath);
|
|
82
|
+
rebuilt = true;
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
if (!graph) {
|
|
86
|
+
graph = { version: 1, nodes: [], edges: [], communities: [] };
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
return {
|
|
90
|
+
index,
|
|
91
|
+
graph,
|
|
92
|
+
rebuilt,
|
|
93
|
+
indexPath: paths.indexFile,
|
|
94
|
+
graphPath,
|
|
95
|
+
};
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
function mapEntriesByPath(entries) {
|
|
99
|
+
const entriesByPath = new Map();
|
|
100
|
+
for (const entry of entries || []) {
|
|
101
|
+
entriesByPath.set(entry.path, entry);
|
|
102
|
+
}
|
|
103
|
+
return entriesByPath;
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
function mapNodesById(nodes) {
|
|
107
|
+
const nodesById = new Map();
|
|
108
|
+
for (const node of nodes || []) {
|
|
109
|
+
nodesById.set(node.id, node);
|
|
110
|
+
}
|
|
111
|
+
return nodesById;
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
function confidenceScore(value, fallback = 1.0) {
|
|
115
|
+
return normalizeConfidenceScore(value, fallback);
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
function normalizeEntry(entry, node, extras = {}) {
|
|
119
|
+
const normalizedEntry = normalizeIndexEntry(entry, node?.path || node?.id || "");
|
|
120
|
+
const nodeTitle = normalizeEntryText(node?.label || node?.title);
|
|
121
|
+
const entryTags = normalizedEntry.tags.length > 0 ? normalizedEntry.tags : normalizeEntryTags(node?.tags);
|
|
122
|
+
|
|
123
|
+
return {
|
|
124
|
+
path: normalizedEntry.path,
|
|
125
|
+
title: normalizedEntry.title || nodeTitle || normalizedEntry.path,
|
|
126
|
+
category: normalizedEntry.category || normalizeEntryText(node?.category) || "wiki",
|
|
127
|
+
tags: entryTags,
|
|
128
|
+
status: normalizeStatus(normalizedEntry.status || node?.status),
|
|
129
|
+
score: typeof entry.score === "number" ? entry.score : undefined,
|
|
130
|
+
confidence: normalizeEntryText(node?.confidence || normalizedEntry.confidence || "EXTRACTED"),
|
|
131
|
+
confidence_score: confidenceScore(node?.confidence_score ?? normalizedEntry.confidence_score),
|
|
132
|
+
...extras,
|
|
133
|
+
};
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
function buildDirectMatches(query, index, nodesById, maxResults) {
|
|
137
|
+
return search(query, index, maxResults)
|
|
138
|
+
.filter(isQueryable)
|
|
139
|
+
.map((entry) => normalizeEntry(entry, nodesById.get(entry.path)));
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
function addNeighbor(neighborsByPath, neighbor, relation) {
|
|
143
|
+
const existing = neighborsByPath.get(neighbor.path);
|
|
144
|
+
if (!existing) {
|
|
145
|
+
neighborsByPath.set(neighbor.path, neighbor);
|
|
146
|
+
return;
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
existing.relations.push(relation);
|
|
150
|
+
existing.relation_labels = [...new Set(existing.relations.map((item) => item.relation_label))];
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
function buildGraphNeighbors(directMatches, graph, entriesByPath, nodesById, maxNeighbors) {
|
|
154
|
+
const directPaths = new Set(directMatches.map((entry) => entry.path));
|
|
155
|
+
const neighborsByPath = new Map();
|
|
156
|
+
|
|
157
|
+
for (const edge of graph.edges || []) {
|
|
158
|
+
const sourceIsDirect = directPaths.has(edge.source);
|
|
159
|
+
const targetIsDirect = directPaths.has(edge.target);
|
|
160
|
+
if (!sourceIsDirect && !targetIsDirect) continue;
|
|
161
|
+
|
|
162
|
+
const matchedPath = sourceIsDirect ? edge.source : edge.target;
|
|
163
|
+
const neighborPath = sourceIsDirect ? edge.target : edge.source;
|
|
164
|
+
if (neighborPath === matchedPath) continue;
|
|
165
|
+
|
|
166
|
+
const entry = entriesByPath.get(neighborPath);
|
|
167
|
+
const node = nodesById.get(neighborPath);
|
|
168
|
+
if (!entry || !isQueryable(entry)) continue;
|
|
169
|
+
|
|
170
|
+
const relation = {
|
|
171
|
+
relation_label: edge.relation || "related",
|
|
172
|
+
source_path: edge.source,
|
|
173
|
+
target_path: edge.target,
|
|
174
|
+
via_path: matchedPath,
|
|
175
|
+
confidence: edge.confidence || "EXTRACTED",
|
|
176
|
+
confidence_score: confidenceScore(edge.confidence_score),
|
|
177
|
+
};
|
|
178
|
+
|
|
179
|
+
const neighbor = normalizeEntry(entry, node, {
|
|
180
|
+
relation_label: relation.relation_label,
|
|
181
|
+
relation_labels: [relation.relation_label],
|
|
182
|
+
relation_source_path: relation.source_path,
|
|
183
|
+
relation_target_path: relation.target_path,
|
|
184
|
+
via_path: relation.via_path,
|
|
185
|
+
confidence: relation.confidence,
|
|
186
|
+
confidence_score: relation.confidence_score,
|
|
187
|
+
relations: [relation],
|
|
188
|
+
});
|
|
189
|
+
|
|
190
|
+
addNeighbor(neighborsByPath, neighbor, relation);
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
return [...neighborsByPath.values()]
|
|
194
|
+
.sort((a, b) => {
|
|
195
|
+
if (b.confidence_score !== a.confidence_score) return b.confidence_score - a.confidence_score;
|
|
196
|
+
return String(a.title ?? "").localeCompare(String(b.title ?? ""));
|
|
197
|
+
})
|
|
198
|
+
.slice(0, maxNeighbors);
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
function buildDuplicateWarnings(duplicates, returnedPaths) {
|
|
202
|
+
return (duplicates || [])
|
|
203
|
+
.filter((warning) => Array.isArray(warning.entries) && warning.entries.some((entryPath) => returnedPaths.has(entryPath)))
|
|
204
|
+
.map((warning) => ({
|
|
205
|
+
...warning,
|
|
206
|
+
relevant_entries: warning.entries.filter((entryPath) => returnedPaths.has(entryPath)),
|
|
207
|
+
}));
|
|
208
|
+
}
|
|
209
|
+
|
|
210
|
+
function formatEntryLine(entry) {
|
|
211
|
+
const parts = [`- ${entry.title}`, `path: ${entry.path}`, `status: ${entry.status}`];
|
|
212
|
+
if (entry.relation_label) parts.push(`relation: ${entry.relation_label}`);
|
|
213
|
+
if (entry.via_path) parts.push(`via: ${entry.via_path}`);
|
|
214
|
+
parts.push(`confidence: ${entry.confidence} ${entry.confidence_score}`);
|
|
215
|
+
return parts.join(" | ");
|
|
216
|
+
}
|
|
217
|
+
|
|
218
|
+
export function formatQueryForContext(result, budget = 2500) {
|
|
219
|
+
const lines = [
|
|
220
|
+
"--- Graph-Expanded Wiki Query ---",
|
|
221
|
+
`Query: ${result.query}`,
|
|
222
|
+
"",
|
|
223
|
+
"## Direct matches",
|
|
224
|
+
];
|
|
225
|
+
|
|
226
|
+
if (result.direct_matches.length === 0) {
|
|
227
|
+
lines.push("- None");
|
|
228
|
+
} else {
|
|
229
|
+
for (const entry of result.direct_matches) {
|
|
230
|
+
lines.push(formatEntryLine(entry));
|
|
231
|
+
}
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
lines.push("", "## One-hop graph neighbors");
|
|
235
|
+
if (result.graph_neighbors.length === 0) {
|
|
236
|
+
lines.push("- None");
|
|
237
|
+
} else {
|
|
238
|
+
for (const entry of result.graph_neighbors) {
|
|
239
|
+
lines.push(formatEntryLine(entry));
|
|
240
|
+
}
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
if (result.duplicate_warnings.length > 0) {
|
|
244
|
+
lines.push("", "## Duplicate warnings");
|
|
245
|
+
for (const warning of result.duplicate_warnings) {
|
|
246
|
+
lines.push(`- ${warning.type}: ${warning.entries.join(", ")}`);
|
|
247
|
+
}
|
|
248
|
+
}
|
|
249
|
+
|
|
250
|
+
let content = lines.join("\n");
|
|
251
|
+
if (content.length > budget) {
|
|
252
|
+
content = `${content.slice(0, Math.max(0, budget - 32))}\n... [truncated]`;
|
|
253
|
+
}
|
|
254
|
+
|
|
255
|
+
return wrapWithBoundary(content);
|
|
256
|
+
}
|
|
257
|
+
|
|
258
|
+
export function queryWiki(wikiRoot, query, options = {}) {
|
|
259
|
+
const searchText = String(query || "").trim();
|
|
260
|
+
if (!searchText) {
|
|
261
|
+
return { success: false, error: "query requires search text" };
|
|
262
|
+
}
|
|
263
|
+
|
|
264
|
+
const maxResults = Math.max(1, Math.min(Number(options.maxResults) || DEFAULT_MAX_RESULTS, 20));
|
|
265
|
+
const maxNeighbors = Math.max(
|
|
266
|
+
0,
|
|
267
|
+
Math.min(Number(options.maxNeighbors) || Math.max(maxResults, DEFAULT_MAX_NEIGHBORS), 40)
|
|
268
|
+
);
|
|
269
|
+
const artifacts = loadQueryArtifacts(wikiRoot);
|
|
270
|
+
const entriesByPath = mapEntriesByPath(artifacts.index.entries);
|
|
271
|
+
const nodesById = mapNodesById(artifacts.graph.nodes);
|
|
272
|
+
const directMatches = buildDirectMatches(searchText, artifacts.index, nodesById, maxResults);
|
|
273
|
+
const graphNeighbors = buildGraphNeighbors(directMatches, artifacts.graph, entriesByPath, nodesById, maxNeighbors);
|
|
274
|
+
const returnedPaths = new Set([
|
|
275
|
+
...directMatches.map((entry) => entry.path),
|
|
276
|
+
...graphNeighbors.map((entry) => entry.path),
|
|
277
|
+
]);
|
|
278
|
+
const duplicateWarnings = buildDuplicateWarnings(artifacts.index.duplicates, returnedPaths);
|
|
279
|
+
|
|
280
|
+
const result = {
|
|
281
|
+
success: true,
|
|
282
|
+
query: searchText,
|
|
283
|
+
wikiRoot,
|
|
284
|
+
rebuilt: artifacts.rebuilt,
|
|
285
|
+
direct_matches: directMatches,
|
|
286
|
+
graph_neighbors: graphNeighbors,
|
|
287
|
+
duplicate_warnings: duplicateWarnings,
|
|
288
|
+
meta: {
|
|
289
|
+
indexPath: artifacts.indexPath,
|
|
290
|
+
graphPath: artifacts.graphPath,
|
|
291
|
+
direct_count: directMatches.length,
|
|
292
|
+
neighbor_count: graphNeighbors.length,
|
|
293
|
+
},
|
|
294
|
+
message: `Found ${directMatches.length} direct match(es) and ${graphNeighbors.length} graph neighbor(s).`,
|
|
295
|
+
};
|
|
296
|
+
|
|
297
|
+
if (options.format === "context") {
|
|
298
|
+
result.format = "context";
|
|
299
|
+
result.context = formatQueryForContext(result, options.contextBudget);
|
|
300
|
+
result.message = result.context;
|
|
301
|
+
}
|
|
302
|
+
|
|
303
|
+
return result;
|
|
304
|
+
}
|
|
@@ -0,0 +1,400 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Raw folder manager for oh-my-llmwikimode
|
|
3
|
+
*
|
|
4
|
+
* Unified management of raw/ subsystems: sources, notebooklm, packs.
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
import fs from "node:fs";
|
|
8
|
+
import path from "node:path";
|
|
9
|
+
import { getWikiPaths } from "./store.js";
|
|
10
|
+
import { readSourceRecords } from "./source-ledger.js";
|
|
11
|
+
|
|
12
|
+
function compareStrings(left, right) {
|
|
13
|
+
return String(left ?? "").localeCompare(String(right ?? ""));
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
function statOrNull(filePath) {
|
|
17
|
+
try {
|
|
18
|
+
return fs.statSync(filePath);
|
|
19
|
+
} catch {
|
|
20
|
+
return null;
|
|
21
|
+
}
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
function readJsonFile(filePath) {
|
|
25
|
+
try {
|
|
26
|
+
return JSON.parse(fs.readFileSync(filePath, "utf-8"));
|
|
27
|
+
} catch {
|
|
28
|
+
return null;
|
|
29
|
+
}
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
/**
|
|
33
|
+
* Ensure only raw/ base directories exist.
|
|
34
|
+
* Does NOT create inbox/, problems/, editorial/lessons/.
|
|
35
|
+
*/
|
|
36
|
+
function ensureRawStructure(wikiRoot) {
|
|
37
|
+
const paths = getWikiPaths(wikiRoot);
|
|
38
|
+
const dirs = [paths.rawRoot, paths.rawSources, paths.rawNotebooklm, paths.rawPacks];
|
|
39
|
+
for (const dir of dirs) {
|
|
40
|
+
if (!fs.existsSync(dir)) {
|
|
41
|
+
fs.mkdirSync(dir, { recursive: true });
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
/**
|
|
47
|
+
* List all raw contents categorized by subsystem.
|
|
48
|
+
* Pure read function — does not create curated directories.
|
|
49
|
+
*/
|
|
50
|
+
export function listRawContents(wikiRoot) {
|
|
51
|
+
const paths = getWikiPaths(wikiRoot);
|
|
52
|
+
// Only ensure raw dirs exist if we're writing; for read-only, skip.
|
|
53
|
+
// But if raw/ doesn't exist at all, we still need the paths object.
|
|
54
|
+
|
|
55
|
+
const result = {
|
|
56
|
+
sources: [],
|
|
57
|
+
notebooklm: [],
|
|
58
|
+
packs: [],
|
|
59
|
+
};
|
|
60
|
+
|
|
61
|
+
// Sources: read from ledger
|
|
62
|
+
const ledgerResult = readSourceRecords(wikiRoot);
|
|
63
|
+
if (ledgerResult.success && Array.isArray(ledgerResult.records)) {
|
|
64
|
+
result.sources = ledgerResult.records.map((r) => ({
|
|
65
|
+
source_id: r.source_id,
|
|
66
|
+
title: r.title,
|
|
67
|
+
kind: r.kind,
|
|
68
|
+
imported_at: r.imported_at,
|
|
69
|
+
stored_path: r.stored_path,
|
|
70
|
+
redaction: r.redaction,
|
|
71
|
+
}));
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
// NotebookLM: scan imports directory
|
|
75
|
+
const notebooklmImportsDir = path.join(paths.rawNotebooklm, "imports");
|
|
76
|
+
if (fs.existsSync(notebooklmImportsDir)) {
|
|
77
|
+
const runDirs = fs.readdirSync(notebooklmImportsDir, { withFileTypes: true })
|
|
78
|
+
.filter((d) => d.isDirectory())
|
|
79
|
+
.map((d) => d.name);
|
|
80
|
+
|
|
81
|
+
for (const runId of runDirs) {
|
|
82
|
+
const runDir = path.join(notebooklmImportsDir, runId);
|
|
83
|
+
const manifestPath = path.join(runDir, "manifest.json");
|
|
84
|
+
const manifest = readJsonFile(manifestPath);
|
|
85
|
+
|
|
86
|
+
if (manifest) {
|
|
87
|
+
result.notebooklm.push({
|
|
88
|
+
run_id: manifest.run_id || runId,
|
|
89
|
+
original_query: manifest.original_query || "",
|
|
90
|
+
created_at: manifest.created_at || "",
|
|
91
|
+
file_count: Array.isArray(manifest.files) ? manifest.files.length : 0,
|
|
92
|
+
raw_path: path.relative(wikiRoot, runDir).replace(/\\/g, "/"),
|
|
93
|
+
});
|
|
94
|
+
} else {
|
|
95
|
+
// Fallback: count files directly
|
|
96
|
+
const files = fs.readdirSync(runDir, { withFileTypes: true })
|
|
97
|
+
.filter((e) => e.isFile())
|
|
98
|
+
.map((e) => e.name);
|
|
99
|
+
result.notebooklm.push({
|
|
100
|
+
run_id: runId,
|
|
101
|
+
original_query: "",
|
|
102
|
+
created_at: "",
|
|
103
|
+
file_count: files.length,
|
|
104
|
+
raw_path: path.relative(wikiRoot, runDir).replace(/\\/g, "/"),
|
|
105
|
+
});
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
// Packs: scan packs directory for {pack_id}/{version}/ structure
|
|
111
|
+
if (fs.existsSync(paths.rawPacks)) {
|
|
112
|
+
const packIdDirs = fs.readdirSync(paths.rawPacks, { withFileTypes: true })
|
|
113
|
+
.filter((d) => d.isDirectory())
|
|
114
|
+
.map((d) => d.name);
|
|
115
|
+
|
|
116
|
+
for (const packId of packIdDirs) {
|
|
117
|
+
const packIdPath = path.join(paths.rawPacks, packId);
|
|
118
|
+
const versionDirs = fs.readdirSync(packIdPath, { withFileTypes: true })
|
|
119
|
+
.filter((d) => d.isDirectory())
|
|
120
|
+
.map((d) => d.name);
|
|
121
|
+
|
|
122
|
+
for (const version of versionDirs) {
|
|
123
|
+
const versionPath = path.join(packIdPath, version);
|
|
124
|
+
const manifestPath = path.join(versionPath, "manifest.json");
|
|
125
|
+
const manifest = readJsonFile(manifestPath);
|
|
126
|
+
|
|
127
|
+
if (manifest) {
|
|
128
|
+
result.packs.push({
|
|
129
|
+
pack_id: manifest.id || packId,
|
|
130
|
+
version: manifest.version || version,
|
|
131
|
+
title: manifest.title || packId,
|
|
132
|
+
entry_count: Array.isArray(manifest.entries) ? manifest.entries.length : 0,
|
|
133
|
+
raw_path: path.relative(wikiRoot, versionPath).replace(/\\/g, "/"),
|
|
134
|
+
});
|
|
135
|
+
} else {
|
|
136
|
+
// Fallback: count .md files in version dir
|
|
137
|
+
const entries = fs.readdirSync(versionPath, { withFileTypes: true })
|
|
138
|
+
.filter((e) => e.isFile() && e.name.endsWith(".md"))
|
|
139
|
+
.length;
|
|
140
|
+
result.packs.push({
|
|
141
|
+
pack_id: packId,
|
|
142
|
+
version,
|
|
143
|
+
title: packId,
|
|
144
|
+
entry_count: entries,
|
|
145
|
+
raw_path: path.relative(wikiRoot, versionPath).replace(/\\/g, "/"),
|
|
146
|
+
});
|
|
147
|
+
}
|
|
148
|
+
}
|
|
149
|
+
}
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
// Sort each category
|
|
153
|
+
result.sources.sort((a, b) => compareStrings(a.source_id, b.source_id));
|
|
154
|
+
result.notebooklm.sort((a, b) => compareStrings(a.run_id, b.run_id));
|
|
155
|
+
result.packs.sort((a, b) => compareStrings(a.pack_id, b.pack_id) || compareStrings(a.version, b.version));
|
|
156
|
+
|
|
157
|
+
return result;
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
/**
|
|
161
|
+
* Get summary statistics for raw/ folder.
|
|
162
|
+
* Pure read function — does not create curated directories.
|
|
163
|
+
*/
|
|
164
|
+
export function getRawSummary(wikiRoot) {
|
|
165
|
+
const paths = getWikiPaths(wikiRoot);
|
|
166
|
+
|
|
167
|
+
const categories = {
|
|
168
|
+
sources: { count: 0, total_bytes: 0, last_updated: null },
|
|
169
|
+
notebooklm: { count: 0, total_bytes: 0, last_updated: null },
|
|
170
|
+
packs: { count: 0, total_bytes: 0, last_updated: null },
|
|
171
|
+
};
|
|
172
|
+
|
|
173
|
+
// Sources
|
|
174
|
+
const ledgerResult = readSourceRecords(wikiRoot);
|
|
175
|
+
if (ledgerResult.success && Array.isArray(ledgerResult.records)) {
|
|
176
|
+
categories.sources.count = ledgerResult.records.length;
|
|
177
|
+
for (const r of ledgerResult.records) {
|
|
178
|
+
if (r.stored_path) {
|
|
179
|
+
const fullPath = path.join(wikiRoot, r.stored_path);
|
|
180
|
+
const st = statOrNull(fullPath);
|
|
181
|
+
if (st) {
|
|
182
|
+
categories.sources.total_bytes += st.size;
|
|
183
|
+
}
|
|
184
|
+
}
|
|
185
|
+
if (r.imported_at) {
|
|
186
|
+
if (!categories.sources.last_updated || r.imported_at > categories.sources.last_updated) {
|
|
187
|
+
categories.sources.last_updated = r.imported_at;
|
|
188
|
+
}
|
|
189
|
+
}
|
|
190
|
+
}
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
// NotebookLM
|
|
194
|
+
const notebooklmImportsDir = path.join(paths.rawNotebooklm, "imports");
|
|
195
|
+
if (fs.existsSync(notebooklmImportsDir)) {
|
|
196
|
+
const runDirs = fs.readdirSync(notebooklmImportsDir, { withFileTypes: true })
|
|
197
|
+
.filter((d) => d.isDirectory());
|
|
198
|
+
categories.notebooklm.count = runDirs.length;
|
|
199
|
+
|
|
200
|
+
for (const runDir of runDirs) {
|
|
201
|
+
const runPath = path.join(notebooklmImportsDir, runDir.name);
|
|
202
|
+
const files = fs.readdirSync(runPath, { withFileTypes: true })
|
|
203
|
+
.filter((e) => e.isFile());
|
|
204
|
+
for (const file of files) {
|
|
205
|
+
const st = statOrNull(path.join(runPath, file.name));
|
|
206
|
+
if (st) {
|
|
207
|
+
categories.notebooklm.total_bytes += st.size;
|
|
208
|
+
}
|
|
209
|
+
}
|
|
210
|
+
// Check manifest for created_at
|
|
211
|
+
const manifest = readJsonFile(path.join(runPath, "manifest.json"));
|
|
212
|
+
if (manifest?.created_at) {
|
|
213
|
+
if (!categories.notebooklm.last_updated || manifest.created_at > categories.notebooklm.last_updated) {
|
|
214
|
+
categories.notebooklm.last_updated = manifest.created_at;
|
|
215
|
+
}
|
|
216
|
+
}
|
|
217
|
+
}
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
// Packs: {pack_id}/{version}/ structure
|
|
221
|
+
if (fs.existsSync(paths.rawPacks)) {
|
|
222
|
+
const packIdDirs = fs.readdirSync(paths.rawPacks, { withFileTypes: true })
|
|
223
|
+
.filter((d) => d.isDirectory());
|
|
224
|
+
|
|
225
|
+
for (const packIdDir of packIdDirs) {
|
|
226
|
+
const packIdPath = path.join(paths.rawPacks, packIdDir.name);
|
|
227
|
+
const versionDirs = fs.readdirSync(packIdPath, { withFileTypes: true })
|
|
228
|
+
.filter((d) => d.isDirectory());
|
|
229
|
+
|
|
230
|
+
categories.packs.count += versionDirs.length;
|
|
231
|
+
|
|
232
|
+
for (const versionDir of versionDirs) {
|
|
233
|
+
const versionPath = path.join(packIdPath, versionDir.name);
|
|
234
|
+
// Use string paths (not Dirent) when recursive: true, because Dirent.name is just basename
|
|
235
|
+
const entries = fs.readdirSync(versionPath, { recursive: true })
|
|
236
|
+
.filter((relPath) => fs.statSync(path.join(versionPath, relPath)).isFile());
|
|
237
|
+
for (const relPath of entries) {
|
|
238
|
+
const fullPath = path.join(versionPath, relPath);
|
|
239
|
+
const st = statOrNull(fullPath);
|
|
240
|
+
if (st) {
|
|
241
|
+
categories.packs.total_bytes += st.size;
|
|
242
|
+
}
|
|
243
|
+
}
|
|
244
|
+
// Check manifest for updated_at or created_at
|
|
245
|
+
const manifest = readJsonFile(path.join(versionPath, "manifest.json"));
|
|
246
|
+
const ts = manifest?.updated_at || manifest?.created_at || null;
|
|
247
|
+
if (ts) {
|
|
248
|
+
if (!categories.packs.last_updated || ts > categories.packs.last_updated) {
|
|
249
|
+
categories.packs.last_updated = ts;
|
|
250
|
+
}
|
|
251
|
+
}
|
|
252
|
+
}
|
|
253
|
+
}
|
|
254
|
+
}
|
|
255
|
+
|
|
256
|
+
const totalFiles = categories.sources.count + categories.notebooklm.count + categories.packs.count;
|
|
257
|
+
const totalBytes = categories.sources.total_bytes + categories.notebooklm.total_bytes + categories.packs.total_bytes;
|
|
258
|
+
|
|
259
|
+
return {
|
|
260
|
+
wiki_root: wikiRoot,
|
|
261
|
+
raw_root: "raw/",
|
|
262
|
+
categories,
|
|
263
|
+
total_files: totalFiles,
|
|
264
|
+
total_bytes: totalBytes,
|
|
265
|
+
};
|
|
266
|
+
}
|
|
267
|
+
|
|
268
|
+
/**
|
|
269
|
+
* Validate raw/ directory structure.
|
|
270
|
+
* Does NOT modify the filesystem.
|
|
271
|
+
*/
|
|
272
|
+
function pathType(filePath) {
|
|
273
|
+
try {
|
|
274
|
+
const st = fs.statSync(filePath);
|
|
275
|
+
if (st.isDirectory()) return "directory";
|
|
276
|
+
if (st.isFile()) return "file";
|
|
277
|
+
return "other";
|
|
278
|
+
} catch {
|
|
279
|
+
return "missing";
|
|
280
|
+
}
|
|
281
|
+
}
|
|
282
|
+
|
|
283
|
+
export function validateRawStructure(wikiRoot) {
|
|
284
|
+
const paths = getWikiPaths(wikiRoot);
|
|
285
|
+
|
|
286
|
+
const checks = [];
|
|
287
|
+
let warnings = 0;
|
|
288
|
+
|
|
289
|
+
function checkPath(label, filePath, required) {
|
|
290
|
+
const type = pathType(filePath);
|
|
291
|
+
const exists = type === "directory";
|
|
292
|
+
let message;
|
|
293
|
+
if (exists) {
|
|
294
|
+
message = "OK";
|
|
295
|
+
} else if (type === "file") {
|
|
296
|
+
message = `ERROR: ${label} is a file, expected directory`;
|
|
297
|
+
warnings++;
|
|
298
|
+
} else if (type === "missing") {
|
|
299
|
+
message = required ? `Missing ${label}` : `No ${label} yet`;
|
|
300
|
+
} else {
|
|
301
|
+
message = `ERROR: ${label} has unexpected type`;
|
|
302
|
+
warnings++;
|
|
303
|
+
}
|
|
304
|
+
checks.push({
|
|
305
|
+
path: label,
|
|
306
|
+
exists,
|
|
307
|
+
required,
|
|
308
|
+
message,
|
|
309
|
+
type,
|
|
310
|
+
});
|
|
311
|
+
return exists;
|
|
312
|
+
}
|
|
313
|
+
|
|
314
|
+
// Check raw root exists and is a directory
|
|
315
|
+
const rawRootExists = checkPath("raw/", paths.rawRoot, true);
|
|
316
|
+
|
|
317
|
+
// Check sources structure
|
|
318
|
+
checkPath("raw/sources/", paths.rawSources, false);
|
|
319
|
+
|
|
320
|
+
// Check notebooklm structure
|
|
321
|
+
checkPath("raw/notebooklm/", paths.rawNotebooklm, false);
|
|
322
|
+
|
|
323
|
+
// Check packs structure
|
|
324
|
+
const packsExists = checkPath("raw/packs/", paths.rawPacks, false);
|
|
325
|
+
|
|
326
|
+
// Validate pack substructure if packs exist as directory
|
|
327
|
+
if (packsExists) {
|
|
328
|
+
const packChildren = fs.readdirSync(paths.rawPacks, { withFileTypes: true });
|
|
329
|
+
|
|
330
|
+
// Check for non-directory children in raw/packs/
|
|
331
|
+
for (const child of packChildren) {
|
|
332
|
+
if (!child.isDirectory()) {
|
|
333
|
+
checks.push({
|
|
334
|
+
path: `raw/packs/${child.name}`,
|
|
335
|
+
exists: true,
|
|
336
|
+
required: false,
|
|
337
|
+
message: `WARNING: Expected pack directory, found ${child.isFile() ? "file" : "other"}`,
|
|
338
|
+
warning: true,
|
|
339
|
+
});
|
|
340
|
+
warnings++;
|
|
341
|
+
}
|
|
342
|
+
}
|
|
343
|
+
|
|
344
|
+
const packIdDirs = packChildren.filter((d) => d.isDirectory());
|
|
345
|
+
|
|
346
|
+
for (const packIdDir of packIdDirs) {
|
|
347
|
+
const packIdPath = path.join(paths.rawPacks, packIdDir.name);
|
|
348
|
+
const versionDirs = fs.readdirSync(packIdPath, { withFileTypes: true })
|
|
349
|
+
.filter((d) => d.isDirectory());
|
|
350
|
+
|
|
351
|
+
if (versionDirs.length === 0) {
|
|
352
|
+
checks.push({
|
|
353
|
+
path: `raw/packs/${packIdDir.name}/`,
|
|
354
|
+
exists: true,
|
|
355
|
+
required: false,
|
|
356
|
+
message: "WARNING: No version subdirectory",
|
|
357
|
+
warning: true,
|
|
358
|
+
});
|
|
359
|
+
warnings++;
|
|
360
|
+
} else {
|
|
361
|
+
for (const versionDir of versionDirs) {
|
|
362
|
+
const versionPath = path.join(packIdPath, versionDir.name);
|
|
363
|
+
const manifestPath = path.join(versionPath, "manifest.json");
|
|
364
|
+
const manifestType = pathType(manifestPath);
|
|
365
|
+
const hasManifest = manifestType === "file";
|
|
366
|
+
if (!hasManifest) {
|
|
367
|
+
checks.push({
|
|
368
|
+
path: `raw/packs/${packIdDir.name}/${versionDir.name}/manifest.json`,
|
|
369
|
+
exists: false,
|
|
370
|
+
required: false,
|
|
371
|
+
message: manifestType === "directory"
|
|
372
|
+
? "WARNING: manifest.json is a directory"
|
|
373
|
+
: "WARNING: Missing manifest.json",
|
|
374
|
+
warning: true,
|
|
375
|
+
});
|
|
376
|
+
warnings++;
|
|
377
|
+
}
|
|
378
|
+
}
|
|
379
|
+
checks.push({
|
|
380
|
+
path: `raw/packs/${packIdDir.name}/`,
|
|
381
|
+
exists: true,
|
|
382
|
+
required: false,
|
|
383
|
+
message: `${versionDirs.length} version subdirectory(s)`,
|
|
384
|
+
});
|
|
385
|
+
}
|
|
386
|
+
}
|
|
387
|
+
}
|
|
388
|
+
|
|
389
|
+
const requiredOk = checks.every((c) => !c.required || c.exists);
|
|
390
|
+
const valid = requiredOk && warnings === 0;
|
|
391
|
+
|
|
392
|
+
return {
|
|
393
|
+
valid,
|
|
394
|
+
wiki_root: wikiRoot,
|
|
395
|
+
checks,
|
|
396
|
+
warnings,
|
|
397
|
+
};
|
|
398
|
+
}
|
|
399
|
+
|
|
400
|
+
export { ensureRawStructure };
|