@nusoft/nuos-build-catalogue 0.10.0 → 0.10.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli.d.ts +13 -0
- package/dist/cli.js +472 -0
- package/dist/commands/create.d.ts +70 -0
- package/dist/commands/create.js +341 -0
- package/dist/commands/format.d.ts +19 -0
- package/dist/commands/format.js +89 -0
- package/dist/commands/handlers.d.ts +35 -0
- package/dist/commands/handlers.js +132 -0
- package/dist/commands/init.d.ts +41 -0
- package/dist/commands/init.js +289 -0
- package/dist/commands/prompt.d.ts +44 -0
- package/dist/commands/prompt.js +100 -0
- package/dist/commands/write.d.ts +39 -0
- package/dist/commands/write.js +247 -0
- package/dist/embedder/ollama.d.ts +54 -0
- package/dist/embedder/ollama.js +164 -0
- package/dist/embedder/openai.d.ts +21 -0
- package/dist/embedder/openai.js +56 -0
- package/dist/embedder/select.d.ts +9 -0
- package/dist/embedder/select.js +27 -0
- package/dist/embedder/stub.d.ts +15 -0
- package/dist/embedder/stub.js +40 -0
- package/dist/embedder/types.d.ts +21 -0
- package/dist/embedder/types.js +6 -0
- package/dist/embedder/vertex.d.ts +41 -0
- package/dist/embedder/vertex.js +94 -0
- package/dist/indexer/chunk.d.ts +20 -0
- package/dist/indexer/chunk.js +196 -0
- package/dist/indexer/crawl.d.ts +20 -0
- package/dist/indexer/crawl.js +66 -0
- package/dist/indexer/metadata.d.ts +21 -0
- package/dist/indexer/metadata.js +126 -0
- package/dist/indexer/upsert.d.ts +26 -0
- package/dist/indexer/upsert.js +152 -0
- package/dist/migrate/parsers.d.ts +17 -0
- package/dist/migrate/parsers.js +123 -0
- package/dist/migrate/run.d.ts +22 -0
- package/dist/migrate/run.js +142 -0
- package/dist/migrate/store.d.ts +20 -0
- package/dist/migrate/store.js +52 -0
- package/dist/migrate/types.d.ts +57 -0
- package/dist/migrate/types.js +13 -0
- package/dist/regenerate/check.d.ts +11 -0
- package/dist/regenerate/check.js +97 -0
- package/dist/regenerate/diff.d.ts +18 -0
- package/dist/regenerate/diff.js +38 -0
- package/dist/regenerate/types.d.ts +52 -0
- package/dist/regenerate/types.js +14 -0
- package/dist/runtime/ac-parse.d.ts +63 -0
- package/dist/runtime/ac-parse.js +196 -0
- package/dist/runtime/markdown-edit.d.ts +53 -0
- package/dist/runtime/markdown-edit.js +101 -0
- package/dist/runtime/markdown-render.d.ts +27 -0
- package/dist/runtime/markdown-render.js +209 -0
- package/dist/runtime/mis-adapter.d.ts +35 -0
- package/dist/runtime/mis-adapter.js +364 -0
- package/dist/runtime/runtime.d.ts +20 -0
- package/dist/runtime/runtime.js +39 -0
- package/dist/search/format.d.ts +6 -0
- package/dist/search/format.js +23 -0
- package/dist/search/query.d.ts +29 -0
- package/dist/search/query.js +71 -0
- package/dist/store/open.d.ts +14 -0
- package/dist/store/open.js +16 -0
- package/package.json +3 -2
|
@@ -0,0 +1,152 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Index orchestrator — crawl + chunk + extract metadata + embed + upsert.
|
|
3
|
+
*
|
|
4
|
+
* Hash-based incremental: a separate `.nuos-catalogue/hashes.json` tracks
|
|
5
|
+
* the last-indexed content hash per file. Unchanged files are skipped.
|
|
6
|
+
* Deleted files are removed from the index.
|
|
7
|
+
*/
|
|
8
|
+
import { readFile, mkdir, writeFile } from 'node:fs/promises';
|
|
9
|
+
import { existsSync } from 'node:fs';
|
|
10
|
+
import { createHash } from 'node:crypto';
|
|
11
|
+
import path from 'node:path';
|
|
12
|
+
import { crawl } from './crawl.js';
|
|
13
|
+
import { chunkMarkdown } from './chunk.js';
|
|
14
|
+
import { extractMetadata } from './metadata.js';
|
|
15
|
+
import { TENANT } from '../store/open.js';
|
|
16
|
+
export async function runIndex(config) {
|
|
17
|
+
const startedAt = Date.now();
|
|
18
|
+
const files = await crawl({ catalogueRoot: config.catalogueRoot });
|
|
19
|
+
const previous = await loadHashes(config.hashFilePath);
|
|
20
|
+
const next = {};
|
|
21
|
+
let indexed = 0;
|
|
22
|
+
let updated = 0;
|
|
23
|
+
let unchanged = 0;
|
|
24
|
+
let totalChunks = 0;
|
|
25
|
+
// Collect chunks for files that need (re)indexing
|
|
26
|
+
const pending = [];
|
|
27
|
+
for (const file of files) {
|
|
28
|
+
const content = await readFile(file.absolutePath, 'utf8');
|
|
29
|
+
const sha = sha256(content);
|
|
30
|
+
const prev = previous[file.relativePath];
|
|
31
|
+
if (!config.force && prev && prev.sha === sha) {
|
|
32
|
+
unchanged += 1;
|
|
33
|
+
next[file.relativePath] = prev;
|
|
34
|
+
continue;
|
|
35
|
+
}
|
|
36
|
+
const meta = await extractMetadata(file.absolutePath, file.relativePath, content);
|
|
37
|
+
const chunks = chunkMarkdown(file.relativePath, content);
|
|
38
|
+
pending.push({ file, chunks, meta, sha });
|
|
39
|
+
totalChunks += chunks.length;
|
|
40
|
+
if (prev)
|
|
41
|
+
updated += 1;
|
|
42
|
+
else
|
|
43
|
+
indexed += 1;
|
|
44
|
+
}
|
|
45
|
+
// Embed and upsert pending chunks in batches
|
|
46
|
+
if (pending.length > 0 && !config.dryRun) {
|
|
47
|
+
const allUpserts = [];
|
|
48
|
+
for (const item of pending) {
|
|
49
|
+
const texts = item.chunks.map((c) => c.text);
|
|
50
|
+
const embeddings = await config.embedder.embed(texts);
|
|
51
|
+
item.chunks.forEach((c, i) => {
|
|
52
|
+
allUpserts.push({
|
|
53
|
+
chunkId: c.id,
|
|
54
|
+
text: c.text,
|
|
55
|
+
embedding: embeddings[i],
|
|
56
|
+
fileMeta: item.meta,
|
|
57
|
+
headings: c.headings,
|
|
58
|
+
startLine: c.startLine,
|
|
59
|
+
endLine: c.endLine,
|
|
60
|
+
});
|
|
61
|
+
});
|
|
62
|
+
// Remove any old chunk ids that are no longer present
|
|
63
|
+
const oldIds = new Set(previous[item.file.relativePath]?.chunkIds ?? []);
|
|
64
|
+
const currentIds = new Set(item.chunks.map((c) => c.id));
|
|
65
|
+
for (const stale of oldIds) {
|
|
66
|
+
if (!currentIds.has(stale)) {
|
|
67
|
+
await safeDelete(config.store, stale);
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
next[item.file.relativePath] = {
|
|
71
|
+
sha: item.sha,
|
|
72
|
+
chunkIds: item.chunks.map((c) => c.id),
|
|
73
|
+
};
|
|
74
|
+
}
|
|
75
|
+
for (const u of allUpserts) {
|
|
76
|
+
// Indexed as document_chunk because that's what these are: chunks
|
|
77
|
+
// of standalone markdown documents, not NuWiki articles with
|
|
78
|
+
// section/citation/graph structure. Search uses retrieveContext()
|
|
79
|
+
// (not searchKnowledge, which is the NuWiki four-layer entry point).
|
|
80
|
+
await config.store.upsert({
|
|
81
|
+
id: u.chunkId,
|
|
82
|
+
kind: 'document_chunk',
|
|
83
|
+
embedding: u.embedding,
|
|
84
|
+
text: u.text,
|
|
85
|
+
tenant: TENANT,
|
|
86
|
+
metadata: {
|
|
87
|
+
path: u.fileMeta.path,
|
|
88
|
+
file_kind: u.fileMeta.kind,
|
|
89
|
+
id_in_kind: u.fileMeta.idInKind ?? '',
|
|
90
|
+
status: u.fileMeta.status ?? '',
|
|
91
|
+
date: u.fileMeta.date ?? '',
|
|
92
|
+
headings: u.headings.join(' / '),
|
|
93
|
+
start_line: u.startLine,
|
|
94
|
+
end_line: u.endLine,
|
|
95
|
+
cross_refs: u.fileMeta.crossRefs.join(','),
|
|
96
|
+
},
|
|
97
|
+
});
|
|
98
|
+
}
|
|
99
|
+
}
|
|
100
|
+
// Detect and remove files that vanished
|
|
101
|
+
let deleted = 0;
|
|
102
|
+
for (const oldRelPath of Object.keys(previous)) {
|
|
103
|
+
if (!files.some((f) => f.relativePath === oldRelPath)) {
|
|
104
|
+
deleted += 1;
|
|
105
|
+
if (!config.dryRun) {
|
|
106
|
+
for (const id of previous[oldRelPath].chunkIds) {
|
|
107
|
+
await safeDelete(config.store, id);
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
if (!config.dryRun) {
|
|
113
|
+
await saveHashes(config.hashFilePath, next);
|
|
114
|
+
}
|
|
115
|
+
return {
|
|
116
|
+
indexed,
|
|
117
|
+
updated,
|
|
118
|
+
deleted,
|
|
119
|
+
unchanged,
|
|
120
|
+
chunks: totalChunks,
|
|
121
|
+
durationMs: Date.now() - startedAt,
|
|
122
|
+
};
|
|
123
|
+
}
|
|
124
|
+
function sha256(content) {
|
|
125
|
+
return createHash('sha256').update(content).digest('hex');
|
|
126
|
+
}
|
|
127
|
+
async function loadHashes(filePath) {
|
|
128
|
+
if (!existsSync(filePath))
|
|
129
|
+
return {};
|
|
130
|
+
try {
|
|
131
|
+
const buf = await readFile(filePath, 'utf8');
|
|
132
|
+
return JSON.parse(buf);
|
|
133
|
+
}
|
|
134
|
+
catch {
|
|
135
|
+
return {};
|
|
136
|
+
}
|
|
137
|
+
}
|
|
138
|
+
async function saveHashes(filePath, table) {
|
|
139
|
+
await mkdir(path.dirname(filePath), { recursive: true });
|
|
140
|
+
await writeFile(filePath, JSON.stringify(table, null, 2) + '\n', 'utf8');
|
|
141
|
+
}
|
|
142
|
+
async function safeDelete(store, id) {
|
|
143
|
+
try {
|
|
144
|
+
await store.delete({
|
|
145
|
+
ids: [id],
|
|
146
|
+
tenant: TENANT,
|
|
147
|
+
});
|
|
148
|
+
}
|
|
149
|
+
catch {
|
|
150
|
+
// NuVector v0.1.0 delete API shape may vary; failure here is non-fatal
|
|
151
|
+
}
|
|
152
|
+
}
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Per-register markdown parsers.
|
|
3
|
+
*
|
|
4
|
+
* Each parser tolerates the pre-D046 shape — fields that may or may not
|
|
5
|
+
* be present default to null. The shared shape extracted is title +
|
|
6
|
+
* status; the rest of the file body is preserved verbatim in
|
|
7
|
+
* `rawMarkdown`.
|
|
8
|
+
*/
|
|
9
|
+
import type { MigratedRecord, Register } from './types.js';
|
|
10
|
+
export interface ParseFileInput {
|
|
11
|
+
absolutePath: string;
|
|
12
|
+
relativePath: string;
|
|
13
|
+
content: string;
|
|
14
|
+
register: Register;
|
|
15
|
+
}
|
|
16
|
+
export declare function parseFile(input: ParseFileInput): Promise<MigratedRecord>;
|
|
17
|
+
export declare function registerForRelativePath(relativePath: string): Register | null;
|
|
@@ -0,0 +1,123 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Per-register markdown parsers.
|
|
3
|
+
*
|
|
4
|
+
* Each parser tolerates the pre-D046 shape — fields that may or may not
|
|
5
|
+
* be present default to null. The shared shape extracted is title +
|
|
6
|
+
* status; the rest of the file body is preserved verbatim in
|
|
7
|
+
* `rawMarkdown`.
|
|
8
|
+
*/
|
|
9
|
+
import { stat } from 'node:fs/promises';
|
|
10
|
+
import path from 'node:path';
|
|
11
|
+
// WU filenames may carry a single lowercase-letter suffix to denote
|
|
12
|
+
// sub-WUs that share a parent number (e.g. 030g-..., 072a-..., 072b-...).
|
|
13
|
+
// The integer `number` is the digit portion; the `handle` carries the
|
|
14
|
+
// suffix verbatim so wu-030 and wu-030g remain distinct records.
|
|
15
|
+
const FILENAME_PATTERNS = {
|
|
16
|
+
work_unit: /^(?<num>\d{1,4})(?<suffix>[a-z]?)-(?<slug>.+)\.md$/,
|
|
17
|
+
decision: /^D(?<num>\d{3,})-(?<slug>.+)\.md$/,
|
|
18
|
+
open_question: /^Q(?<num>\d{3,})-(?<slug>.+)\.md$/,
|
|
19
|
+
persona: /^P(?<num>\d{3,})-(?<slug>.+)\.md$/,
|
|
20
|
+
};
|
|
21
|
+
export async function parseFile(input) {
|
|
22
|
+
const { absolutePath, relativePath, content, register } = input;
|
|
23
|
+
const filename = path.basename(relativePath);
|
|
24
|
+
// Skip _index.md and template files in the parent walker — but if they
|
|
25
|
+
// ever reach here, fail loudly so we don't silently migrate noise.
|
|
26
|
+
if (filename.startsWith('_') || filename.includes('template')) {
|
|
27
|
+
throw new Error(`parseFile: ${relativePath} looks like a non-artefact file (index/template); the walker should have skipped it`);
|
|
28
|
+
}
|
|
29
|
+
const pattern = FILENAME_PATTERNS[register];
|
|
30
|
+
const match = pattern.exec(filename);
|
|
31
|
+
if (!match || !match.groups) {
|
|
32
|
+
throw new Error(`parseFile: ${relativePath} does not match the ${register} filename pattern ${pattern}`);
|
|
33
|
+
}
|
|
34
|
+
const number = parseInt(match.groups.num, 10);
|
|
35
|
+
if (!Number.isInteger(number) || number < 1) {
|
|
36
|
+
throw new Error(`parseFile: ${relativePath} produced invalid number ${match.groups.num}`);
|
|
37
|
+
}
|
|
38
|
+
const slug = match.groups.slug;
|
|
39
|
+
const suffix = match.groups.suffix ?? '';
|
|
40
|
+
const handle = formatHandle(register, number, suffix);
|
|
41
|
+
const title = extractTitle(content) ?? slugToTitle(slug);
|
|
42
|
+
const status = extractStatus(content);
|
|
43
|
+
let fileModifiedAt;
|
|
44
|
+
try {
|
|
45
|
+
const s = await stat(absolutePath);
|
|
46
|
+
fileModifiedAt = s.mtime.toISOString();
|
|
47
|
+
}
|
|
48
|
+
catch {
|
|
49
|
+
fileModifiedAt = new Date().toISOString();
|
|
50
|
+
}
|
|
51
|
+
const record = {
|
|
52
|
+
handle,
|
|
53
|
+
number,
|
|
54
|
+
register,
|
|
55
|
+
title,
|
|
56
|
+
status,
|
|
57
|
+
slug,
|
|
58
|
+
sourcePath: relativePath,
|
|
59
|
+
rawMarkdown: content,
|
|
60
|
+
fileModifiedAt,
|
|
61
|
+
migratedAt: new Date().toISOString(),
|
|
62
|
+
migratedFrom: 'markdown',
|
|
63
|
+
};
|
|
64
|
+
return record;
|
|
65
|
+
}
|
|
66
|
+
function formatHandle(register, n, suffix = '') {
|
|
67
|
+
switch (register) {
|
|
68
|
+
case 'work_unit':
|
|
69
|
+
return `wu-${String(n).padStart(3, '0')}${suffix}`;
|
|
70
|
+
case 'decision':
|
|
71
|
+
return `D${String(n).padStart(3, '0')}`;
|
|
72
|
+
case 'open_question':
|
|
73
|
+
return `Q${String(n).padStart(3, '0')}`;
|
|
74
|
+
case 'persona':
|
|
75
|
+
return `P${String(n).padStart(3, '0')}`;
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
function extractTitle(content) {
|
|
79
|
+
// First H1 wins. Leading whitespace and trailing whitespace trimmed.
|
|
80
|
+
const m = /^#\s+(.+?)\s*$/m.exec(content);
|
|
81
|
+
if (!m)
|
|
82
|
+
return null;
|
|
83
|
+
return m[1].trim();
|
|
84
|
+
}
|
|
85
|
+
function extractStatus(content) {
|
|
86
|
+
// Recognise both "**Status:** ..." (decisions, sessions) and
|
|
87
|
+
// "| Status | ... |" pipe-table rows (some WUs use a metadata table).
|
|
88
|
+
// Returns the raw status string for downstream interpretation; we
|
|
89
|
+
// don't normalise to a typed enum here because the pre-D046 shape
|
|
90
|
+
// includes states like "🟢 ready" with emoji prefixes.
|
|
91
|
+
const bold = /^\*\*Status:\*\*\s*(.+?)\s*$/m.exec(content);
|
|
92
|
+
if (bold)
|
|
93
|
+
return bold[1].trim();
|
|
94
|
+
const table = /^\|\s*Status\s*\|\s*(.+?)\s*\|\s*$/m.exec(content);
|
|
95
|
+
if (table)
|
|
96
|
+
return table[1].trim();
|
|
97
|
+
return null;
|
|
98
|
+
}
|
|
99
|
+
function slugToTitle(slug) {
|
|
100
|
+
return slug
|
|
101
|
+
.split('-')
|
|
102
|
+
.map((part) => (part.length > 0 ? part[0].toUpperCase() + part.slice(1) : part))
|
|
103
|
+
.join(' ');
|
|
104
|
+
}
|
|
105
|
+
export function registerForRelativePath(relativePath) {
|
|
106
|
+
// The walker uses this to assign each file to a register based on its
|
|
107
|
+
// top-level directory. Subdirectories like work-units/done/ and
|
|
108
|
+
// decisions/superseded/ map to the parent register.
|
|
109
|
+
const normalised = relativePath.replace(/\\/g, '/');
|
|
110
|
+
const top = normalised.split('/')[0];
|
|
111
|
+
switch (top) {
|
|
112
|
+
case 'work-units':
|
|
113
|
+
return 'work_unit';
|
|
114
|
+
case 'decisions':
|
|
115
|
+
return 'decision';
|
|
116
|
+
case 'open-questions':
|
|
117
|
+
return 'open_question';
|
|
118
|
+
case 'personas':
|
|
119
|
+
return 'persona';
|
|
120
|
+
default:
|
|
121
|
+
return null;
|
|
122
|
+
}
|
|
123
|
+
}
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Migration runner.
|
|
3
|
+
*
|
|
4
|
+
* Walks the four register directories under the catalogue root, parses
|
|
5
|
+
* each artefact file, and writes a `MigratedRecord` to the JSON-backed
|
|
6
|
+
* workflow store. Idempotent: re-running on a clean catalogue produces
|
|
7
|
+
* zero new records on the second pass.
|
|
8
|
+
*
|
|
9
|
+
* Per the WU 111 spec, this runner does NOT use the NuFlow runtime
|
|
10
|
+
* lifecycle. Migration is bulk back-fill, not a series of build-
|
|
11
|
+
* maintainer decisions; using the runtime would force every legacy
|
|
12
|
+
* artefact through propose → confirm → approve → commit, which is
|
|
13
|
+
* neither honest nor scalable.
|
|
14
|
+
*/
|
|
15
|
+
import type { MigrationReport } from './types.js';
|
|
16
|
+
import type { WorkflowStore } from './store.js';
|
|
17
|
+
export interface RunMigrateConfig {
|
|
18
|
+
catalogueRoot: string;
|
|
19
|
+
store: WorkflowStore;
|
|
20
|
+
dryRun?: boolean;
|
|
21
|
+
}
|
|
22
|
+
export declare function runMigrate(config: RunMigrateConfig): Promise<MigrationReport>;
|
|
@@ -0,0 +1,142 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Migration runner.
|
|
3
|
+
*
|
|
4
|
+
* Walks the four register directories under the catalogue root, parses
|
|
5
|
+
* each artefact file, and writes a `MigratedRecord` to the JSON-backed
|
|
6
|
+
* workflow store. Idempotent: re-running on a clean catalogue produces
|
|
7
|
+
* zero new records on the second pass.
|
|
8
|
+
*
|
|
9
|
+
* Per the WU 111 spec, this runner does NOT use the NuFlow runtime
|
|
10
|
+
* lifecycle. Migration is bulk back-fill, not a series of build-
|
|
11
|
+
* maintainer decisions; using the runtime would force every legacy
|
|
12
|
+
* artefact through propose → confirm → approve → commit, which is
|
|
13
|
+
* neither honest nor scalable.
|
|
14
|
+
*/
|
|
15
|
+
import { readdir, readFile } from 'node:fs/promises';
|
|
16
|
+
import path from 'node:path';
|
|
17
|
+
import { parseFile, registerForRelativePath } from './parsers.js';
|
|
18
|
+
const REGISTER_DIRS = ['work_unit', 'decision', 'open_question', 'persona'];
|
|
19
|
+
const REGISTER_TO_DIRNAME = {
|
|
20
|
+
work_unit: 'work-units',
|
|
21
|
+
decision: 'decisions',
|
|
22
|
+
open_question: 'open-questions',
|
|
23
|
+
persona: 'personas',
|
|
24
|
+
};
|
|
25
|
+
export async function runMigrate(config) {
|
|
26
|
+
const startedAt = Date.now();
|
|
27
|
+
const byRegister = {
|
|
28
|
+
work_unit: { scanned: 0, migrated: 0, skipped: 0 },
|
|
29
|
+
decision: { scanned: 0, migrated: 0, skipped: 0 },
|
|
30
|
+
open_question: { scanned: 0, migrated: 0, skipped: 0 },
|
|
31
|
+
persona: { scanned: 0, migrated: 0, skipped: 0 },
|
|
32
|
+
};
|
|
33
|
+
let scanned = 0;
|
|
34
|
+
let migrated = 0;
|
|
35
|
+
let skipped = 0;
|
|
36
|
+
const conflicts = [];
|
|
37
|
+
for (const register of REGISTER_DIRS) {
|
|
38
|
+
const dirName = REGISTER_TO_DIRNAME[register];
|
|
39
|
+
const baseDir = path.join(config.catalogueRoot, dirName);
|
|
40
|
+
const files = await collectArtefactFiles(baseDir, dirName);
|
|
41
|
+
for (const relPath of files) {
|
|
42
|
+
const inferredRegister = registerForRelativePath(relPath);
|
|
43
|
+
if (inferredRegister !== register) {
|
|
44
|
+
// Defensive: shouldn't happen given the walker scoping above.
|
|
45
|
+
throw new Error(`runMigrate: register mismatch for ${relPath} (expected ${register}, got ${inferredRegister})`);
|
|
46
|
+
}
|
|
47
|
+
const absolutePath = path.join(config.catalogueRoot, relPath);
|
|
48
|
+
const content = await readFile(absolutePath, 'utf8');
|
|
49
|
+
const record = await parseFile({
|
|
50
|
+
absolutePath,
|
|
51
|
+
relativePath: relPath,
|
|
52
|
+
content,
|
|
53
|
+
register,
|
|
54
|
+
});
|
|
55
|
+
scanned += 1;
|
|
56
|
+
byRegister[register].scanned += 1;
|
|
57
|
+
if (config.store.has(record.handle)) {
|
|
58
|
+
const existing = config.store.get(record.handle);
|
|
59
|
+
if (existing && existing.sourcePath !== record.sourcePath) {
|
|
60
|
+
// A different file claimed this handle. This is a real
|
|
61
|
+
// catalogue-discipline issue (e.g. two WUs sharing the same
|
|
62
|
+
// number prefix); surface it rather than silently dropping.
|
|
63
|
+
conflicts.push({
|
|
64
|
+
handle: record.handle,
|
|
65
|
+
winnerSourcePath: existing.sourcePath,
|
|
66
|
+
loserSourcePath: record.sourcePath,
|
|
67
|
+
});
|
|
68
|
+
}
|
|
69
|
+
skipped += 1;
|
|
70
|
+
byRegister[register].skipped += 1;
|
|
71
|
+
continue;
|
|
72
|
+
}
|
|
73
|
+
if (!config.dryRun) {
|
|
74
|
+
config.store.put(record);
|
|
75
|
+
}
|
|
76
|
+
migrated += 1;
|
|
77
|
+
byRegister[register].migrated += 1;
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
if (!config.dryRun) {
|
|
81
|
+
await config.store.flush();
|
|
82
|
+
}
|
|
83
|
+
return {
|
|
84
|
+
scanned,
|
|
85
|
+
migrated,
|
|
86
|
+
skipped,
|
|
87
|
+
conflicts,
|
|
88
|
+
byRegister,
|
|
89
|
+
durationMs: Date.now() - startedAt,
|
|
90
|
+
};
|
|
91
|
+
}
|
|
92
|
+
/**
|
|
93
|
+
* After in-memory store-puts, we still need to detect within-pass
|
|
94
|
+
* conflicts. The block above handles that via `config.store.has()` +
|
|
95
|
+
* `get()`; the result lands in the report's `conflicts` array.
|
|
96
|
+
*
|
|
97
|
+
* Collect markdown artefact files under a register directory, including
|
|
98
|
+
* one level of subdirectory (e.g. work-units/done, decisions/superseded).
|
|
99
|
+
* Skips index files (`_index.md`), templates (filename includes
|
|
100
|
+
* 'template'), and any non-.md files.
|
|
101
|
+
*/
|
|
102
|
+
async function collectArtefactFiles(baseDir, registerDirName) {
|
|
103
|
+
let entries;
|
|
104
|
+
try {
|
|
105
|
+
entries = (await readdir(baseDir, { withFileTypes: true }));
|
|
106
|
+
}
|
|
107
|
+
catch {
|
|
108
|
+
// Register directory may not exist (e.g. personas/ before any
|
|
109
|
+
// persona is authored). Treat as empty.
|
|
110
|
+
return [];
|
|
111
|
+
}
|
|
112
|
+
const files = [];
|
|
113
|
+
for (const entry of entries) {
|
|
114
|
+
const entryName = entry.name;
|
|
115
|
+
if (entry.isFile() && isArtefactFile(entryName)) {
|
|
116
|
+
files.push(`${registerDirName}/${entryName}`);
|
|
117
|
+
}
|
|
118
|
+
else if (entry.isDirectory()) {
|
|
119
|
+
// Recurse one level for done/, superseded/, etc.
|
|
120
|
+
const subPath = path.join(baseDir, entryName);
|
|
121
|
+
const subEntries = (await readdir(subPath, { withFileTypes: true }));
|
|
122
|
+
for (const sub of subEntries) {
|
|
123
|
+
const subName = sub.name;
|
|
124
|
+
if (sub.isFile() && isArtefactFile(subName)) {
|
|
125
|
+
files.push(`${registerDirName}/${entryName}/${subName}`);
|
|
126
|
+
}
|
|
127
|
+
}
|
|
128
|
+
}
|
|
129
|
+
}
|
|
130
|
+
return files;
|
|
131
|
+
}
|
|
132
|
+
function isArtefactFile(filename) {
|
|
133
|
+
if (!filename.endsWith('.md'))
|
|
134
|
+
return false;
|
|
135
|
+
if (filename.startsWith('_'))
|
|
136
|
+
return false; // _index.md, _template, etc.
|
|
137
|
+
if (filename.toLowerCase().includes('template'))
|
|
138
|
+
return false;
|
|
139
|
+
if (filename.toLowerCase().endsWith('-template.md'))
|
|
140
|
+
return false;
|
|
141
|
+
return true;
|
|
142
|
+
}
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* JSON-backed workflow record store.
|
|
3
|
+
*
|
|
4
|
+
* Phase G uses a flat JSON file at `.nuos-catalogue/workflows.json` for
|
|
5
|
+
* the migrated workflow records. Simple, inspectable, and sets up
|
|
6
|
+
* Phase I cleanly (markdown regeneration reads from the same file).
|
|
7
|
+
*
|
|
8
|
+
* NuVector cutover is a deliberate follow-up. The store interface is
|
|
9
|
+
* intentionally narrow (read by handle, write, list) so a NuVector
|
|
10
|
+
* adapter can be substituted later without changing call sites.
|
|
11
|
+
*/
|
|
12
|
+
import type { MigratedRecord } from './types.js';
|
|
13
|
+
export interface WorkflowStore {
|
|
14
|
+
has(handle: string): boolean;
|
|
15
|
+
get(handle: string): MigratedRecord | null;
|
|
16
|
+
put(record: MigratedRecord): void;
|
|
17
|
+
list(): MigratedRecord[];
|
|
18
|
+
flush(): Promise<void>;
|
|
19
|
+
}
|
|
20
|
+
export declare function openWorkflowStore(filePath: string): Promise<WorkflowStore>;
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* JSON-backed workflow record store.
|
|
3
|
+
*
|
|
4
|
+
* Phase G uses a flat JSON file at `.nuos-catalogue/workflows.json` for
|
|
5
|
+
* the migrated workflow records. Simple, inspectable, and sets up
|
|
6
|
+
* Phase I cleanly (markdown regeneration reads from the same file).
|
|
7
|
+
*
|
|
8
|
+
* NuVector cutover is a deliberate follow-up. The store interface is
|
|
9
|
+
* intentionally narrow (read by handle, write, list) so a NuVector
|
|
10
|
+
* adapter can be substituted later without changing call sites.
|
|
11
|
+
*/
|
|
12
|
+
import { readFile, writeFile, mkdir } from 'node:fs/promises';
|
|
13
|
+
import { existsSync } from 'node:fs';
|
|
14
|
+
import path from 'node:path';
|
|
15
|
+
export async function openWorkflowStore(filePath) {
|
|
16
|
+
const data = await load(filePath);
|
|
17
|
+
return {
|
|
18
|
+
has(handle) {
|
|
19
|
+
return Object.prototype.hasOwnProperty.call(data.records, handle);
|
|
20
|
+
},
|
|
21
|
+
get(handle) {
|
|
22
|
+
return data.records[handle] ?? null;
|
|
23
|
+
},
|
|
24
|
+
put(record) {
|
|
25
|
+
data.records[record.handle] = record;
|
|
26
|
+
},
|
|
27
|
+
list() {
|
|
28
|
+
return Object.values(data.records);
|
|
29
|
+
},
|
|
30
|
+
async flush() {
|
|
31
|
+
await persist(filePath, data);
|
|
32
|
+
},
|
|
33
|
+
};
|
|
34
|
+
}
|
|
35
|
+
async function load(filePath) {
|
|
36
|
+
if (!existsSync(filePath)) {
|
|
37
|
+
return { schemaVersion: 1, records: {} };
|
|
38
|
+
}
|
|
39
|
+
const raw = await readFile(filePath, 'utf8');
|
|
40
|
+
if (raw.trim().length === 0) {
|
|
41
|
+
return { schemaVersion: 1, records: {} };
|
|
42
|
+
}
|
|
43
|
+
const parsed = JSON.parse(raw);
|
|
44
|
+
if (parsed.schemaVersion !== 1 || typeof parsed.records !== 'object') {
|
|
45
|
+
throw new Error(`openWorkflowStore: ${filePath} has unrecognised shape (expected { schemaVersion: 1, records: {} })`);
|
|
46
|
+
}
|
|
47
|
+
return parsed;
|
|
48
|
+
}
|
|
49
|
+
async function persist(filePath, data) {
|
|
50
|
+
await mkdir(path.dirname(filePath), { recursive: true });
|
|
51
|
+
await writeFile(filePath, JSON.stringify(data, null, 2) + '\n', 'utf8');
|
|
52
|
+
}
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Minimal shapes the migrate runner produces. Phase G ships count-parity
|
|
3
|
+
* across the four registers (work_unit, decision, open_question,
|
|
4
|
+
* persona); rich field-level fidelity is deferred to the post-cutover
|
|
5
|
+
* authoring path (workflows write the new shape directly).
|
|
6
|
+
*
|
|
7
|
+
* The migration is a back-fill: the live catalogue's pre-D046 WUs and
|
|
8
|
+
* decisions don't have the new fields in their markdown, so the parser
|
|
9
|
+
* preserves the title, handle, number, status, slug, source path, raw
|
|
10
|
+
* markdown, and file mtime. Future authoring-via-workflow operations
|
|
11
|
+
* fill in the richer shape organically.
|
|
12
|
+
*/
|
|
13
|
+
export type Register = 'work_unit' | 'decision' | 'open_question' | 'persona';
|
|
14
|
+
export interface MigratedRecord {
|
|
15
|
+
/** wu-NNN | D### | Q### | P### */
|
|
16
|
+
handle: string;
|
|
17
|
+
/** Numeric portion of the handle (e.g. 111 from wu-111). */
|
|
18
|
+
number: number;
|
|
19
|
+
/** Which register this record belongs to. */
|
|
20
|
+
register: Register;
|
|
21
|
+
/** First H1 heading from the file, or filename-derived fallback. */
|
|
22
|
+
title: string;
|
|
23
|
+
/** Status string parsed from the file, or null if not surfaced. */
|
|
24
|
+
status: string | null;
|
|
25
|
+
/** Filename slug (the kebab-cased portion after the number prefix). */
|
|
26
|
+
slug: string;
|
|
27
|
+
/** Path relative to the catalogue root (e.g. work-units/done/111-...). */
|
|
28
|
+
sourcePath: string;
|
|
29
|
+
/** Full markdown body, preserved for future re-parsing. */
|
|
30
|
+
rawMarkdown: string;
|
|
31
|
+
/** ISO timestamp — file mtime; preserves "original timestamp" per spec. */
|
|
32
|
+
fileModifiedAt: string;
|
|
33
|
+
/** ISO timestamp — when this record was migrated. */
|
|
34
|
+
migratedAt: string;
|
|
35
|
+
/** Always 'markdown' for Phase G; future phases may add other origins. */
|
|
36
|
+
migratedFrom: 'markdown';
|
|
37
|
+
}
|
|
38
|
+
export interface HandleConflict {
|
|
39
|
+
handle: string;
|
|
40
|
+
/** The file that won (was already in the store or scanned first). */
|
|
41
|
+
winnerSourcePath: string;
|
|
42
|
+
/** The file that was dropped because its handle was already taken. */
|
|
43
|
+
loserSourcePath: string;
|
|
44
|
+
}
|
|
45
|
+
export interface MigrationReport {
|
|
46
|
+
scanned: number;
|
|
47
|
+
migrated: number;
|
|
48
|
+
skipped: number;
|
|
49
|
+
/** Files dropped because a different file already claimed the same handle. */
|
|
50
|
+
conflicts: HandleConflict[];
|
|
51
|
+
byRegister: Record<Register, {
|
|
52
|
+
scanned: number;
|
|
53
|
+
migrated: number;
|
|
54
|
+
skipped: number;
|
|
55
|
+
}>;
|
|
56
|
+
durationMs: number;
|
|
57
|
+
}
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Minimal shapes the migrate runner produces. Phase G ships count-parity
|
|
3
|
+
* across the four registers (work_unit, decision, open_question,
|
|
4
|
+
* persona); rich field-level fidelity is deferred to the post-cutover
|
|
5
|
+
* authoring path (workflows write the new shape directly).
|
|
6
|
+
*
|
|
7
|
+
* The migration is a back-fill: the live catalogue's pre-D046 WUs and
|
|
8
|
+
* decisions don't have the new fields in their markdown, so the parser
|
|
9
|
+
* preserves the title, handle, number, status, slug, source path, raw
|
|
10
|
+
* markdown, and file mtime. Future authoring-via-workflow operations
|
|
11
|
+
* fill in the richer shape organically.
|
|
12
|
+
*/
|
|
13
|
+
export {};
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Drift detection: walk the workflow store, compare each record's
|
|
3
|
+
* stored `rawMarkdown` to its source file, report differences.
|
|
4
|
+
*/
|
|
5
|
+
import type { WorkflowStore } from '../migrate/store.js';
|
|
6
|
+
import type { DriftReport, RegenerateConfig } from './types.js';
|
|
7
|
+
export interface CheckRegenerateConfig extends RegenerateConfig {
|
|
8
|
+
catalogueRoot: string;
|
|
9
|
+
store: WorkflowStore;
|
|
10
|
+
}
|
|
11
|
+
export declare function runRegenerate(config: CheckRegenerateConfig): Promise<DriftReport>;
|