rigjs 4.0.7 → 4.0.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/skills/rig-wiki/SKILL.md +23 -1
- package/RIG_CREW_SKILL.md +31 -8
- package/RIG_WIKI_SKILL.md +23 -1
- package/built/index.js +186 -224
- package/lib/crew/board.ts +8 -3
- package/lib/crew/doctor.ts +7 -3
- package/lib/crew/index.ts +11 -1
- package/lib/crew/project.ts +88 -2
- package/lib/crew/role.ts +6 -6
- package/lib/crew/task.ts +59 -10
- package/lib/crew/vault.ts +67 -17
- package/lib/wiki/config.ts +5 -1
- package/lib/wiki/db.ts +29 -0
- package/lib/wiki/fileTypes.ts +52 -0
- package/lib/wiki/index.ts +10 -0
- package/lib/wiki/ingest.ts +53 -9
- package/lib/wiki/init.ts +46 -5
- package/lib/wiki/scan.ts +0 -0
- package/lib/wiki/survey.ts +300 -0
- package/package.json +2 -2
package/lib/wiki/ingest.ts
CHANGED
|
@@ -18,7 +18,7 @@ import crypto from 'crypto';
|
|
|
18
18
|
import print from '../print';
|
|
19
19
|
import { requireVault, loadRigConfig, WikiEntry } from './config';
|
|
20
20
|
import { paths } from './paths';
|
|
21
|
-
import { recordLastRun } from './db';
|
|
21
|
+
import { recordLastRun, upsertSourceSha } from './db';
|
|
22
22
|
import { qmdEmbed } from './qmd';
|
|
23
23
|
import { adapters } from './agent/registry';
|
|
24
24
|
import { guardPath, refusalMessage } from './pathGuard';
|
|
@@ -118,6 +118,18 @@ export default async function wikiIngest(source: string, opts: IngestOpts): Prom
|
|
|
118
118
|
print.warn('your wiki content is committed to disk; only the vector index is stale.');
|
|
119
119
|
}
|
|
120
120
|
|
|
121
|
+
// Baseline this source into state.db.source_sha so future `rig wiki scan`
|
|
122
|
+
// can detect MODIFIED on this exact file. Key by root-relative path to
|
|
123
|
+
// match what scan uses for lookups.
|
|
124
|
+
try {
|
|
125
|
+
const stat = fs.statSync(absSource);
|
|
126
|
+
const sha = crypto.createHash('sha256').update(fs.readFileSync(absSource)).digest('hex');
|
|
127
|
+
const relFromRoot = path.relative(target.root, absSource);
|
|
128
|
+
upsertSourceSha(target.name, relFromRoot, sha, stat.mtimeMs);
|
|
129
|
+
} catch (e) {
|
|
130
|
+
print.warn(`source_sha upsert failed: ${(e as Error).message}. Future scans may misreport this file as NEW.`);
|
|
131
|
+
}
|
|
132
|
+
|
|
121
133
|
if (opts.json) {
|
|
122
134
|
// eslint-disable-next-line no-console
|
|
123
135
|
console.log(JSON.stringify({ ok: true, code: 0, data: { source: relSource, applied, rejected } }, null, 2));
|
|
@@ -223,21 +235,43 @@ function appendLog(wiki: WikiEntry, relSource: string, applied: string[], dryRun
|
|
|
223
235
|
// ----------------------------------------------------------------------
|
|
224
236
|
|
|
225
237
|
function buildPrompt(wiki: WikiEntry, sourceAbs: string): string {
|
|
226
|
-
const sourceRel = path.relative(wiki.path, sourceAbs);
|
|
227
238
|
const sourceSha = crypto.createHash('sha256').update(fs.readFileSync(sourceAbs)).digest('hex');
|
|
228
239
|
const today = new Date().toISOString();
|
|
229
240
|
|
|
241
|
+
// The Obsidian vault root is the parent of the rig-wiki/ metadata dir.
|
|
242
|
+
// The Obsidian vault NAME defaults to that dir's basename. All source-path
|
|
243
|
+
// references in generated pages use obsidian:// URLs so that links survive
|
|
244
|
+
// moves of the wiki dir and are clickable from inside Obsidian.
|
|
245
|
+
const obsidianRoot = path.dirname(wiki.path);
|
|
246
|
+
const obsidianVaultName = path.basename(obsidianRoot);
|
|
247
|
+
const sourceFromObsidianRoot = path.relative(obsidianRoot, sourceAbs);
|
|
248
|
+
const sourceObsidianUrl = obsidianUrl(obsidianVaultName, sourceFromObsidianRoot);
|
|
249
|
+
const ext = path.extname(sourceAbs).toLowerCase();
|
|
250
|
+
const isImage = ['.png', '.jpg', '.jpeg', '.gif', '.webp', '.bmp'].includes(ext);
|
|
251
|
+
const isPdf = ext === '.pdf';
|
|
252
|
+
const isSpreadsheet = ['.xlsx', '.xls', '.ods', '.numbers'].includes(ext);
|
|
253
|
+
|
|
230
254
|
return [
|
|
231
255
|
`You are running INGEST for the rig wiki at \`${wiki.path}\`.`,
|
|
256
|
+
`This wiki lives inside an Obsidian vault rooted at \`${obsidianRoot}\` (Obsidian vault name: "${obsidianVaultName}").`,
|
|
257
|
+
`Use obsidian:// URLs — never raw file paths — to reference sources or any file in the vault.`,
|
|
258
|
+
``,
|
|
259
|
+
`Source for this ingest: ${sourceObsidianUrl}`,
|
|
260
|
+
`Source filesystem path (for your Read tool only): ${sourceAbs}`,
|
|
232
261
|
``,
|
|
233
262
|
`Step 1 — ANALYSIS (do NOT write files yet):`,
|
|
234
|
-
` - Read \`purpose.md\`, \`schema.md\`, \`overview.md\`, \`index.md
|
|
235
|
-
` - Read the source
|
|
263
|
+
` - Read \`purpose.md\`, \`schema.md\`, \`overview.md\`, \`index.md\` from the wiki.`,
|
|
264
|
+
` - Read the source. Notes by type:`,
|
|
265
|
+
` · text / markdown / json / code → use the Read tool normally`,
|
|
266
|
+
isImage ? ` · this source is an IMAGE (${ext}) — Read it; you'll receive it as a visual input. Describe what's depicted, plus any visible text / numbers / structures.` : '',
|
|
267
|
+
isPdf ? ` · this source is a PDF — Read it; if it is >10 pages, read in chunks with the \`pages\` parameter.` : '',
|
|
268
|
+
isSpreadsheet ? ` · this source is a SPREADSHEET (${ext}) — the Read tool does NOT natively decode spreadsheets in v1. Try Read first; if it fails, write a stub source page with the obsidian:// link + filename + last-modified date, and append a \`reviews.md\` bullet asking the user to export it as CSV / JSON for re-ingest. Do NOT invent contents.` : '',
|
|
236
269
|
` - In your head, list: entities mentioned, concepts touched, contradictions vs existing pages, items that need human review.`,
|
|
237
270
|
``,
|
|
238
271
|
`Step 2 — GENERATION (write files):`,
|
|
239
|
-
` -
|
|
240
|
-
` -
|
|
272
|
+
` - Write the source summary to \`sources/<slug>.md\`. \`<slug>\` = source basename minus YYYY-MM-DD prefix and extension, kebab-case.`,
|
|
273
|
+
` - **If \`sources/<slug>.md\` already exists, UPDATE IT IN PLACE.** Do NOT create a sibling like \`<slug>-2.md\` or \`<slug>-updated.md\`. Re-ingest of the same source MUST overwrite its existing source page so any [[wikilinks]] pointing at \`[[<slug>]]\` keep working. Refresh \`last-updated\` and \`source-sha\` in the frontmatter; preserve \`ingested-at\` (the original first-ingest timestamp).`,
|
|
274
|
+
` - For each new or affected entity / concept / synthesis page, create or UPDATE the corresponding file under \`entities/\`, \`concepts/\`, \`synthesis/\` (at the vault root — there is no \`wiki/\` subdir). Same in-place rule: never create \`-2.md\` siblings.`,
|
|
241
275
|
` - Update \`index.md\` and \`overview.md\` to reflect the new content.`,
|
|
242
276
|
` - If anything is unclear or contradictory, append a bullet to \`reviews.md\`. Do NOT silently merge contradictions.`,
|
|
243
277
|
``,
|
|
@@ -251,16 +285,26 @@ function buildPrompt(wiki: WikiEntry, sourceAbs: string): string {
|
|
|
251
285
|
`Source pages additionally need:`,
|
|
252
286
|
'```yaml',
|
|
253
287
|
`source-sha: ${sourceSha}`,
|
|
254
|
-
`source-path: ${
|
|
288
|
+
`source-path: ${sourceObsidianUrl}`,
|
|
255
289
|
'```',
|
|
256
290
|
``,
|
|
291
|
+
`Reference rules — IMPORTANT:`,
|
|
292
|
+
` - To reference the original source file inside markdown body, use the obsidian:// URL: ${sourceObsidianUrl}`,
|
|
293
|
+
` - To reference OTHER files in the same Obsidian vault, build URLs the same way: \`obsidian://open?vault=${obsidianVaultName}&file=<vault-relative-path>\`. URL-encode spaces / specials with encodeURI, keep forward slashes.`,
|
|
294
|
+
` - To reference other wiki pages, use [[wikilink]] (slug only).`,
|
|
295
|
+
``,
|
|
257
296
|
`Hard rules — the host will REJECT any patch that violates these:`,
|
|
258
297
|
` - DO NOT modify \`raw/\`, \`purpose.md\`, or \`schema.md\`.`,
|
|
259
298
|
` - Use kebab-case slugs; no spaces; no date prefixes in page filenames.`,
|
|
260
|
-
` -
|
|
299
|
+
` - Every wiki page should link to ≥1 other page (via [[wikilink]] or obsidian:// URL).`,
|
|
261
300
|
` - For contradictions, write inline: \`> Contradiction: A vs B (see [[page-A]], [[page-B]])\`.`,
|
|
262
301
|
``,
|
|
263
302
|
`Output: stdout is for status only. All content goes to files via the Write/Edit tools.`,
|
|
264
303
|
`When done, print a single line: \`INGEST DONE: <slug>\`.`,
|
|
265
|
-
].join('\n');
|
|
304
|
+
].filter(Boolean).join('\n');
|
|
305
|
+
}
|
|
306
|
+
|
|
307
|
+
/** Build an Obsidian URL. encodeURI preserves `/`, encodes spaces and Unicode. */
|
|
308
|
+
function obsidianUrl(vaultName: string, fileRel: string): string {
|
|
309
|
+
return `obsidian://open?vault=${encodeURIComponent(vaultName)}&file=${encodeURI(fileRel)}`;
|
|
266
310
|
}
|
package/lib/wiki/init.ts
CHANGED
|
@@ -76,6 +76,34 @@ const SCHEMA_TMPL = `# Schema
|
|
|
76
76
|
- never edit raw/, purpose.md, schema.md
|
|
77
77
|
- raw/ file sha drift = error, not a re-ingest trigger
|
|
78
78
|
- living-doc paths (in include[]) sha drift = MODIFIED, propose re-ingest
|
|
79
|
+
|
|
80
|
+
## Ingestion policy
|
|
81
|
+
|
|
82
|
+
This section is consumed by \`rig wiki survey\` to decide which files under
|
|
83
|
+
the scan root are eligible to ingest. Edit the "Custom rules" subsection
|
|
84
|
+
to add wiki-specific filters; the default rules below cover the obvious
|
|
85
|
+
cases.
|
|
86
|
+
|
|
87
|
+
### Default — INGESTIBLE
|
|
88
|
+
- markdown / plain text: \`.md\` \`.markdown\` \`.txt\` \`.rst\`
|
|
89
|
+
- documents: \`.pdf\` (Claude reads natively)
|
|
90
|
+
- images of receipts / whiteboards / diagrams: \`.png\` \`.jpg\` \`.jpeg\` \`.webp\` \`.gif\`
|
|
91
|
+
- structured text: \`.csv\` \`.tsv\` \`.json\` \`.yaml\` \`.yml\` \`.toml\` \`.html\` \`.xml\`
|
|
92
|
+
|
|
93
|
+
### Default — NOT INGESTIBLE
|
|
94
|
+
- compressed archives: \`.zip\` \`.tar\` \`.tar.gz\` \`.tgz\` \`.gz\` \`.bz2\` \`.xz\` \`.7z\` \`.rar\` \`.dmg\` \`.iso\`
|
|
95
|
+
- binaries / native: \`.exe\` \`.dll\` \`.so\` \`.dylib\` \`.bin\` \`.o\` \`.a\` \`.lib\` \`.class\` \`.jar\` \`.pyc\` \`.node\` \`.wasm\`
|
|
96
|
+
- AV: \`.mp4\` \`.mov\` \`.mkv\` \`.avi\` \`.webm\` \`.mp3\` \`.wav\` \`.flac\` \`.aac\` \`.ogg\`
|
|
97
|
+
- design / proprietary: \`.psd\` \`.ai\` \`.fig\` \`.sketch\` \`.fla\` \`.indd\`
|
|
98
|
+
- lockfiles + build artifacts: \`yarn.lock\` \`package-lock.json\` \`pnpm-lock.yaml\` \`*.lock\` \`*.min.js\` \`*.map\`
|
|
99
|
+
- model weights / embeddings: \`.gguf\` \`.safetensors\` \`.bin\` \`.pt\` \`.onnx\` \`.h5\` \`.pkl\`
|
|
100
|
+
- anything in hidden dirs (segment starts with \`.\`) or .gitignored — refused by the path guard
|
|
101
|
+
|
|
102
|
+
### Custom rules (edit me)
|
|
103
|
+
|
|
104
|
+
- (e.g.) skip files in \`personal/work/archive/\`
|
|
105
|
+
- (e.g.) only English-language content
|
|
106
|
+
- (e.g.) skip files larger than 5MB
|
|
79
107
|
`;
|
|
80
108
|
|
|
81
109
|
const SUBDIRS = ['sources', 'entities', 'concepts', 'synthesis', 'queries'];
|
|
@@ -98,17 +126,30 @@ proposals/
|
|
|
98
126
|
* Defaults for a freshly-scoped vault. The user can edit
|
|
99
127
|
* `<vault>/.rig/config.yml` afterwards.
|
|
100
128
|
*
|
|
101
|
-
*
|
|
102
|
-
*
|
|
129
|
+
* `include` defaults to `**` (everything) — rig wiki is multimodal: Claude
|
|
130
|
+
* Read tool handles markdown / code / json natively, images and PDFs are
|
|
131
|
+
* read as visual / document inputs. The user can tighten this per-vault.
|
|
132
|
+
*
|
|
133
|
+
* `exclude` defaults to common binary-archive extensions whose contents
|
|
134
|
+
* can't be ingested without unpacking. Hidden directories (segments starting
|
|
135
|
+
* with `.`) and `.gitignore`'d files are skipped automatically by the
|
|
136
|
+
* scanner — no need to list them.
|
|
103
137
|
*/
|
|
104
138
|
function defaultVaultConfig(scope: string, rootRel: string): VaultConfig {
|
|
105
139
|
return {
|
|
106
140
|
name: scope,
|
|
107
141
|
root: rootRel,
|
|
108
|
-
include: ['
|
|
109
|
-
exclude: [
|
|
142
|
+
include: ['**'],
|
|
143
|
+
exclude: [
|
|
144
|
+
'*.zip', '**/*.zip',
|
|
145
|
+
'*.tar', '**/*.tar',
|
|
146
|
+
'*.tar.gz', '**/*.tar.gz',
|
|
147
|
+
'*.tgz', '**/*.tgz',
|
|
148
|
+
'*.7z', '**/*.7z',
|
|
149
|
+
'*.rar', '**/*.rar',
|
|
150
|
+
],
|
|
110
151
|
schedule: { scan: '0 */6 * * *', lint: '0 3 * * *', ingest: null },
|
|
111
|
-
ingestRules: [{ match: 'raw
|
|
152
|
+
ingestRules: [{ match: 'raw/**/*.*', mode: 'auto-on-new' }],
|
|
112
153
|
};
|
|
113
154
|
}
|
|
114
155
|
|
package/lib/wiki/scan.ts
CHANGED
|
Binary file
|
|
@@ -0,0 +1,300 @@
|
|
|
1
|
+
// `rig wiki survey` — schema-driven ingestion candidate triage.
|
|
2
|
+
//
|
|
3
|
+
// Walks the vault's scan root, skips obvious non-sources (hidden /
|
|
4
|
+
// gitignored / binary extensions), then asks the configured agent
|
|
5
|
+
// (Claude by default) to classify each remaining candidate against the
|
|
6
|
+
// wiki's schema.md "Ingestion policy" section.
|
|
7
|
+
//
|
|
8
|
+
// Output:
|
|
9
|
+
// - default: human-readable table path | decision | reason
|
|
10
|
+
// - --json: { ok, code, data: { wiki, decisions: [{path, decision, reason}] } }
|
|
11
|
+
//
|
|
12
|
+
// --apply iterates over `decision === 'ingest'` and runs the same code
|
|
13
|
+
// path as `rig wiki ingest <path>` for each, in series.
|
|
14
|
+
|
|
15
|
+
import fs from 'fs';
|
|
16
|
+
import path from 'path';
|
|
17
|
+
import { spawnSync } from 'child_process';
|
|
18
|
+
import print from '../print';
|
|
19
|
+
import { requireVault, loadRigConfig, WikiEntry } from './config';
|
|
20
|
+
import { isBinaryExtension } from './fileTypes';
|
|
21
|
+
import { adapters } from './agent/registry';
|
|
22
|
+
import { default as wikiIngest } from './ingest';
|
|
23
|
+
|
|
24
|
+
interface SurveyOpts {
|
|
25
|
+
apply?: boolean;
|
|
26
|
+
json?: boolean;
|
|
27
|
+
limit?: number; // cap candidates passed to the agent (cost/latency safety)
|
|
28
|
+
noAgent?: boolean; // skip Claude classification — local rules only
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
type Decision = 'ingest' | 'skip' | 'unclear';
|
|
32
|
+
|
|
33
|
+
interface SurveyRow {
|
|
34
|
+
path: string; // root-relative
|
|
35
|
+
decision: Decision;
|
|
36
|
+
reason: string;
|
|
37
|
+
size: number;
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
const DEFAULT_LIMIT = 500;
|
|
41
|
+
const AGENT_TIMEOUT_MS = 5 * 60 * 1000;
|
|
42
|
+
|
|
43
|
+
export default async function wikiSurvey(opts: SurveyOpts): Promise<void> {
|
|
44
|
+
const target = requireVault();
|
|
45
|
+
const candidates = collectCandidates(target);
|
|
46
|
+
|
|
47
|
+
if (candidates.length === 0) {
|
|
48
|
+
print.info('no candidates under scan root.');
|
|
49
|
+
return;
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
const limit = Math.max(1, opts.limit ?? DEFAULT_LIMIT);
|
|
53
|
+
if (candidates.length > limit) {
|
|
54
|
+
print.warn(`${candidates.length} candidates found; capping to first ${limit}. Pass --limit <n> to override.`);
|
|
55
|
+
}
|
|
56
|
+
const truncated = candidates.slice(0, limit);
|
|
57
|
+
|
|
58
|
+
// Classify
|
|
59
|
+
let rows: SurveyRow[];
|
|
60
|
+
if (opts.noAgent) {
|
|
61
|
+
rows = truncated.map(c => ({ path: c.rel, decision: 'ingest' as Decision, reason: 'no-agent mode — accepts every non-binary candidate', size: c.size }));
|
|
62
|
+
} else {
|
|
63
|
+
rows = await classifyWithAgent(target, truncated);
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
if (opts.json) {
|
|
67
|
+
// eslint-disable-next-line no-console
|
|
68
|
+
console.log(JSON.stringify({
|
|
69
|
+
ok: true, code: 0,
|
|
70
|
+
data: { wiki: target.name, decisions: rows },
|
|
71
|
+
}, null, 2));
|
|
72
|
+
} else {
|
|
73
|
+
printTable(target, rows);
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
if (opts.apply) await applyIngest(target, rows);
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
interface Candidate { abs: string; rel: string; size: number; }
|
|
80
|
+
|
|
81
|
+
function collectCandidates(entry: WikiEntry): Candidate[] {
|
|
82
|
+
const out: Candidate[] = [];
|
|
83
|
+
const root = entry.root;
|
|
84
|
+
const vaultRel = path.relative(root, entry.path) || path.basename(entry.path);
|
|
85
|
+
const stack = [root];
|
|
86
|
+
while (stack.length) {
|
|
87
|
+
const cur = stack.pop()!;
|
|
88
|
+
let entries: fs.Dirent[];
|
|
89
|
+
try { entries = fs.readdirSync(cur, { withFileTypes: true }); } catch { continue; }
|
|
90
|
+
for (const e of entries) {
|
|
91
|
+
if (e.name.startsWith('.')) continue;
|
|
92
|
+
const full = path.join(cur, e.name);
|
|
93
|
+
const rel = path.relative(root, full);
|
|
94
|
+
// Skip the vault dir itself (we don't ingest our own wiki pages)
|
|
95
|
+
if (rel === vaultRel || rel.startsWith(vaultRel + path.sep)) continue;
|
|
96
|
+
// Skip node_modules unconditionally — never useful as wiki sources
|
|
97
|
+
if (e.name === 'node_modules') continue;
|
|
98
|
+
if (e.isDirectory()) {
|
|
99
|
+
stack.push(full);
|
|
100
|
+
} else if (e.isFile() && !isBinaryExtension(full)) {
|
|
101
|
+
try {
|
|
102
|
+
const stat = fs.statSync(full);
|
|
103
|
+
out.push({ abs: full, rel, size: stat.size });
|
|
104
|
+
} catch { /* unreadable — skip */ }
|
|
105
|
+
}
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
// Gitignore filter via batch `git check-ignore --stdin -z` (best-effort,
|
|
109
|
+
// silent fallback outside a git repo)
|
|
110
|
+
const ignored = batchGitignored(root, out.map(c => c.abs));
|
|
111
|
+
return out.filter(c => !ignored.has(c.abs));
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
function batchGitignored(root: string, abs: string[]): Set<string> {
|
|
115
|
+
const ignored = new Set<string>();
|
|
116
|
+
if (abs.length === 0) return ignored;
|
|
117
|
+
const r = spawnSync('git', ['check-ignore', '--stdin', '-z'], {
|
|
118
|
+
cwd: root,
|
|
119
|
+
input: Buffer.from(abs.join('\0') + '\0'),
|
|
120
|
+
});
|
|
121
|
+
if (r.status === 128 || !r.stdout || r.stdout.length === 0) return ignored;
|
|
122
|
+
const lines = Buffer.isBuffer(r.stdout)
|
|
123
|
+
? r.stdout.toString('utf8').split('\0')
|
|
124
|
+
: String(r.stdout).split('\0');
|
|
125
|
+
for (const line of lines) if (line) ignored.add(path.resolve(root, line));
|
|
126
|
+
return ignored;
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
async function classifyWithAgent(target: WikiEntry, candidates: Candidate[]): Promise<SurveyRow[]> {
|
|
130
|
+
const rig = loadRigConfig();
|
|
131
|
+
const which = rig.wiki?.defaultAgent || 'claude';
|
|
132
|
+
const adapter = adapters.find(a => a.name === which);
|
|
133
|
+
const detect = adapter ? await adapter.detect() : { installed: false };
|
|
134
|
+
if (!adapter || !detect.installed) {
|
|
135
|
+
print.warn(`${which} not available — falling back to local rules (every non-binary candidate accepted).`);
|
|
136
|
+
return candidates.map(c => ({ path: c.rel, decision: 'ingest', reason: `local-rules (${which} unavailable)`, size: c.size }));
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
const policy = readPolicySection(target);
|
|
140
|
+
const prompt = buildPrompt(policy, candidates);
|
|
141
|
+
|
|
142
|
+
print.start(`${which} survey (${candidates.length} candidates)`);
|
|
143
|
+
const res = await adapter.run({
|
|
144
|
+
prompt,
|
|
145
|
+
cwd: target.path,
|
|
146
|
+
allowWrite: false,
|
|
147
|
+
tools: [],
|
|
148
|
+
timeoutMs: AGENT_TIMEOUT_MS,
|
|
149
|
+
});
|
|
150
|
+
|
|
151
|
+
if (!res.ok) {
|
|
152
|
+
print.warn(`${which} survey failed (code ${res.exitCode}) — falling back to local rules.`);
|
|
153
|
+
return candidates.map(c => ({ path: c.rel, decision: 'unclear', reason: 'agent-failed', size: c.size }));
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
const parsed = parseJsonDecisions(res.stdout, candidates);
|
|
157
|
+
if (!parsed) {
|
|
158
|
+
print.warn(`could not parse ${which}'s JSON response — falling back.`);
|
|
159
|
+
return candidates.map(c => ({ path: c.rel, decision: 'unclear', reason: 'parse-failed', size: c.size }));
|
|
160
|
+
}
|
|
161
|
+
return parsed;
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
function readPolicySection(target: WikiEntry): string {
|
|
165
|
+
const schemaPath = path.join(target.path, 'schema.md');
|
|
166
|
+
let body = '';
|
|
167
|
+
try { body = fs.readFileSync(schemaPath, 'utf8'); } catch { /* missing schema — use empty policy */ }
|
|
168
|
+
// Extract the "## Ingestion policy" section to end-of-file or next H2.
|
|
169
|
+
const m = body.match(/##\s+Ingestion policy[\s\S]*?(?=\n##\s|\n$|$)/i);
|
|
170
|
+
if (m) return m[0];
|
|
171
|
+
// Fallback: hand-rolled default if the schema doesn't have the section.
|
|
172
|
+
return [
|
|
173
|
+
'## Ingestion policy (default — schema.md has no explicit section)',
|
|
174
|
+
'Ingest: markdown, plain text, PDF, images of documents/receipts,',
|
|
175
|
+
'structured text (csv/json/yaml). Skip: archives, binaries, AV, design,',
|
|
176
|
+
'model weights, lockfiles, anything under hidden/gitignored paths.',
|
|
177
|
+
].join('\n');
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
function buildPrompt(policy: string, candidates: Candidate[]): string {
|
|
181
|
+
const list = candidates
|
|
182
|
+
.map((c, i) => `${i + 1}. ${c.rel} (${humanSize(c.size)})`)
|
|
183
|
+
.join('\n');
|
|
184
|
+
return [
|
|
185
|
+
`You are triaging files for a rig wiki ingestion run.`,
|
|
186
|
+
``,
|
|
187
|
+
`Below is the wiki's ingestion policy (extracted from schema.md):`,
|
|
188
|
+
``,
|
|
189
|
+
`\`\`\``,
|
|
190
|
+
policy.trim(),
|
|
191
|
+
`\`\`\``,
|
|
192
|
+
``,
|
|
193
|
+
`Below is the list of candidate files (already filtered for hidden /`,
|
|
194
|
+
`gitignored / known-binary extensions). For each candidate, decide:`,
|
|
195
|
+
``,
|
|
196
|
+
` - "ingest" — matches the policy, should become a wiki source`,
|
|
197
|
+
` - "skip" — should not be ingested per the policy`,
|
|
198
|
+
` - "unclear" — needs a human look (e.g. ambiguous filename)`,
|
|
199
|
+
``,
|
|
200
|
+
`Output ONE JSON array. Each element MUST be:`,
|
|
201
|
+
``,
|
|
202
|
+
` {"i": <1-based-index>, "decision": "ingest"|"skip"|"unclear", "reason": "<≤80 chars>"}`,
|
|
203
|
+
``,
|
|
204
|
+
`Output ONLY the JSON array. No prose, no markdown fences.`,
|
|
205
|
+
``,
|
|
206
|
+
`Candidates:`,
|
|
207
|
+
list,
|
|
208
|
+
].join('\n');
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
function parseJsonDecisions(stdout: string, candidates: Candidate[]): SurveyRow[] | null {
|
|
212
|
+
if (!stdout) return null;
|
|
213
|
+
// Find the first '[' and last ']' to handle stray prose from the model.
|
|
214
|
+
const start = stdout.indexOf('[');
|
|
215
|
+
const end = stdout.lastIndexOf(']');
|
|
216
|
+
if (start < 0 || end <= start) return null;
|
|
217
|
+
let parsed: unknown;
|
|
218
|
+
try { parsed = JSON.parse(stdout.slice(start, end + 1)); } catch { return null; }
|
|
219
|
+
if (!Array.isArray(parsed)) return null;
|
|
220
|
+
|
|
221
|
+
const byIndex = new Map<number, { decision: Decision; reason: string }>();
|
|
222
|
+
for (const item of parsed) {
|
|
223
|
+
if (!item || typeof item !== 'object') continue;
|
|
224
|
+
const o = item as Record<string, unknown>;
|
|
225
|
+
const i = typeof o.i === 'number' ? o.i : NaN;
|
|
226
|
+
const d = typeof o.decision === 'string' ? o.decision.toLowerCase() : '';
|
|
227
|
+
if (!isFinite(i) || (d !== 'ingest' && d !== 'skip' && d !== 'unclear')) continue;
|
|
228
|
+
byIndex.set(i, { decision: d as Decision, reason: typeof o.reason === 'string' ? o.reason : '' });
|
|
229
|
+
}
|
|
230
|
+
|
|
231
|
+
return candidates.map((c, idx) => {
|
|
232
|
+
const r = byIndex.get(idx + 1);
|
|
233
|
+
return r
|
|
234
|
+
? { path: c.rel, decision: r.decision, reason: r.reason, size: c.size }
|
|
235
|
+
: { path: c.rel, decision: 'unclear', reason: 'no-decision-from-agent', size: c.size };
|
|
236
|
+
});
|
|
237
|
+
}
|
|
238
|
+
|
|
239
|
+
function printTable(target: WikiEntry, rows: SurveyRow[]): void {
|
|
240
|
+
print.info(`survey: ${target.name} (${rows.length} candidate${rows.length === 1 ? '' : 's'})`);
|
|
241
|
+
const counts = rows.reduce((acc, r) => { acc[r.decision] = (acc[r.decision] || 0) + 1; return acc; }, {} as Record<string, number>);
|
|
242
|
+
// eslint-disable-next-line no-console
|
|
243
|
+
console.log(` ingest ${counts.ingest || 0} skip ${counts.skip || 0} unclear ${counts.unclear || 0}\n`);
|
|
244
|
+
|
|
245
|
+
const widths = {
|
|
246
|
+
decision: 7,
|
|
247
|
+
path: Math.min(60, Math.max(4, ...rows.map(r => r.path.length))),
|
|
248
|
+
size: Math.max(4, ...rows.map(r => humanSize(r.size).length)),
|
|
249
|
+
};
|
|
250
|
+
// eslint-disable-next-line no-console
|
|
251
|
+
console.log(` ${'DECISION'.padEnd(widths.decision)} ${'SIZE'.padStart(widths.size)} PATH`);
|
|
252
|
+
// eslint-disable-next-line no-console
|
|
253
|
+
console.log(` ${'-'.repeat(widths.decision)} ${'-'.repeat(widths.size)} ${'-'.repeat(widths.path)}`);
|
|
254
|
+
for (const r of rows) {
|
|
255
|
+
// eslint-disable-next-line no-console
|
|
256
|
+
console.log(` ${r.decision.padEnd(widths.decision)} ${humanSize(r.size).padStart(widths.size)} ${r.path}`);
|
|
257
|
+
if (r.decision !== 'ingest' && r.reason) {
|
|
258
|
+
// eslint-disable-next-line no-console
|
|
259
|
+
console.log(` ${''.padEnd(widths.decision)} ${''.padStart(widths.size)} ↳ ${r.reason}`);
|
|
260
|
+
}
|
|
261
|
+
}
|
|
262
|
+
// eslint-disable-next-line no-console
|
|
263
|
+
console.log('');
|
|
264
|
+
if (!counts.ingest) {
|
|
265
|
+
print.info(`nothing tagged "ingest". Edit schema.md's "Ingestion policy" if this is wrong.`);
|
|
266
|
+
} else {
|
|
267
|
+
print.info(`re-run with --apply to ingest the ${counts.ingest} "ingest" candidate${counts.ingest === 1 ? '' : 's'}.`);
|
|
268
|
+
}
|
|
269
|
+
}
|
|
270
|
+
|
|
271
|
+
async function applyIngest(target: WikiEntry, rows: SurveyRow[]): Promise<void> {
|
|
272
|
+
const targets = rows.filter(r => r.decision === 'ingest');
|
|
273
|
+
if (targets.length === 0) {
|
|
274
|
+
print.info('nothing to apply (no "ingest" decisions).');
|
|
275
|
+
return;
|
|
276
|
+
}
|
|
277
|
+
print.info(`applying ${targets.length} ingest${targets.length === 1 ? '' : 's'} (in series)…`);
|
|
278
|
+
let okCount = 0, failCount = 0;
|
|
279
|
+
for (const r of targets) {
|
|
280
|
+
const absSource = path.resolve(target.root, r.path);
|
|
281
|
+
print.start(`ingest ${r.path}`);
|
|
282
|
+
try {
|
|
283
|
+
// wikiIngest reads CWD-resolved vault; it'll pick up the same target.
|
|
284
|
+
// It calls process.exit on error, so wrap defensively if needed.
|
|
285
|
+
await wikiIngest(absSource, { dryRun: false });
|
|
286
|
+
okCount++;
|
|
287
|
+
} catch (e) {
|
|
288
|
+
failCount++;
|
|
289
|
+
print.error(`ingest ${r.path} failed: ${(e as Error).message}`);
|
|
290
|
+
}
|
|
291
|
+
}
|
|
292
|
+
print.succeed(`survey --apply done: ${okCount} ok, ${failCount} failed.`);
|
|
293
|
+
}
|
|
294
|
+
|
|
295
|
+
function humanSize(bytes: number): string {
|
|
296
|
+
if (bytes < 1024) return `${bytes}B`;
|
|
297
|
+
if (bytes < 1024 * 1024) return `${(bytes / 1024).toFixed(1)}K`;
|
|
298
|
+
if (bytes < 1024 * 1024 * 1024) return `${(bytes / 1024 / 1024).toFixed(1)}M`;
|
|
299
|
+
return `${(bytes / 1024 / 1024 / 1024).toFixed(1)}G`;
|
|
300
|
+
}
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "rigjs",
|
|
3
|
-
"version": "4.0.
|
|
4
|
-
"versionCode":
|
|
3
|
+
"version": "4.0.10",
|
|
4
|
+
"versionCode": 26052415,
|
|
5
5
|
"description": "A multi-repos dev tool based on yarn and git.Rigjs is intended to be the simplest way to develop,share and deliver codes between different developers or different projects.",
|
|
6
6
|
"keywords": [
|
|
7
7
|
"modular",
|