luca 3.0.2 → 3.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,561 @@
1
+ type ToolCount = Record<string, number>
2
+
3
+ type RepoConfig = {
4
+ name: string
5
+ repoPath: string
6
+ }
7
+
8
+ type SessionRecord = {
9
+ sessionId: string
10
+ sessionPath: string
11
+ repo: string
12
+ repoPath: string
13
+ branch?: string
14
+ firstInstruction?: string
15
+ taskType: string
16
+ qualityTier: 'gold' | 'silver' | 'bronze'
17
+ userTurns: number
18
+ assistantTurns: number
19
+ toolUses: number
20
+ toolUsage: ToolCount
21
+ changedFiles: string[]
22
+ commandsRun: string[]
23
+ testsRun: string[]
24
+ relevantHelpers: string[]
25
+ policyTrace: string[]
26
+ notes: string[]
27
+ snippets: string[]
28
+ reviewScore: number
29
+ }
30
+
31
+ type RepoSummary = {
32
+ repo: string
33
+ repoPath: string
34
+ generatedAt: string
35
+ sessionRoot: string
36
+ counts: {
37
+ mainSessions: number
38
+ subagentSessions: number
39
+ gold: number
40
+ silver: number
41
+ bronze: number
42
+ }
43
+ sourceInventory: Record<string, number>
44
+ topTools: ToolCount
45
+ topChangedFiles: Array<{ path: string, count: number }>
46
+ }
47
+
48
+ type ReviewQueueEntry = {
49
+ rank: number
50
+ session_id: string
51
+ source_repo: string
52
+ candidate_path: string
53
+ repo_path: string
54
+ branch?: string
55
+ first_instruction?: string
56
+ task_type: string
57
+ quality_tier: 'gold' | 'silver' | 'bronze'
58
+ canonicality_guess: 'canonical' | 'acceptable' | 'off-policy'
59
+ bucket_guess: 'canonical-policy' | 'strong-implementation' | 'planning' | 'rejects'
60
+ suggested_disposition: 'keep' | 'rewrite' | 'reject'
61
+ review_score: number
62
+ summary: string
63
+ changed_files: string[]
64
+ commands_run: string[]
65
+ tests_run: string[]
66
+ relevant_helpers: string[]
67
+ policy_signals: string[]
68
+ verification_signals: string[]
69
+ positive_signals: string[]
70
+ negative_signals: string[]
71
+ snippets: string[]
72
+ notes: string[]
73
+ }
74
+
75
+ const DEFAULT_REPOS: RepoConfig[] = [
76
+ { name: 'luca', repoPath: '/Users/jonathansoeder/@soederpop/projects/luca' },
77
+ { name: 'agentic-loop', repoPath: '/Users/jonathansoeder/@agentic-loop' },
78
+ ]
79
+
80
+ const OUTPUT_DIR = process.argv[2] || `${process.cwd()}/datasets/lora`
81
+
82
+ function inferHome(repoPath: string) {
83
+ const home = process.env.HOME || ''
84
+ if (home && !home.includes('/.hermes/profiles/')) return home
85
+ const userHomeMatch = repoPath.match(/^\/Users\/[^/]+/)
86
+ if (userHomeMatch) return userHomeMatch[0]
87
+ return home
88
+ }
89
+
90
+ function encodeClaudeProjectPath(cwd: string) {
91
+ return cwd.replace(/[^A-Za-z0-9]/g, '-')
92
+ }
93
+
94
+ function uniq<T>(items: T[]) {
95
+ return Array.from(new Set(items))
96
+ }
97
+
98
+ function inc(map: Record<string, number>, key: string, amount = 1) {
99
+ map[key] = (map[key] || 0) + amount
100
+ }
101
+
102
+ function relToRepo(repoPath: string, maybeAbsolute: string) {
103
+ return maybeAbsolute.startsWith(repoPath)
104
+ ? maybeAbsolute.slice(repoPath.length + 1)
105
+ : maybeAbsolute
106
+ }
107
+
108
+ function normalizeChangedPath(path: string) {
109
+ return path
110
+ .replace(/[:].*$/, '')
111
+ .replace(/[`,)\]\s]+$/g, '')
112
+ .trim()
113
+ }
114
+
115
+ function classifyTask(firstInstruction = '', changedFiles: string[]) {
116
+ const text = firstInstruction.toLowerCase()
117
+ const changed = changedFiles.join(' ').toLowerCase()
118
+
119
+ if (/what would it take|could i in theory|look at all of the methods|can we make equivalents|take a look|architect/.test(text)) return 'architecture'
120
+ if (/readme|docs|marketing website|tutorial|document|philosophy/.test(text) || /\.md$/.test(changed)) return 'docs'
121
+ if (/failing|bug|fix|error|broken|regression|should pass|green/.test(text)) return 'bugfix'
122
+ if (/refactor|rename all references|generated.*version control|migration/.test(text)) return 'refactor'
123
+ if (/test/.test(text) && changedFiles.some(file => file.startsWith('test/'))) return 'test-fix'
124
+ if (/workflow/.test(text)) return 'workflow'
125
+ if (/option|helper|implement|add|need|track|support|default|expose|bind|reload/.test(text)) return 'feature-add'
126
+
127
+ return 'investigation'
128
+ }
129
+
130
+ function detectCommands(command: string) {
131
+ const hits: string[] = []
132
+ if (/\bluca describe\b/.test(command)) hits.push('luca describe')
133
+ if (/\bluca eval\b/.test(command)) hits.push('luca eval')
134
+ if (/\bbun test\b|\bbun run test\b/.test(command)) hits.push('bun test')
135
+ if (/\bgit commit\b/.test(command)) hits.push('git commit')
136
+ if (/\bluca scaffold\b/.test(command)) hits.push('luca scaffold')
137
+ if (/\bluca workflow\b/.test(command)) hits.push('luca workflow')
138
+ return hits
139
+ }
140
+
141
+ function inferRelevantHelpers(commandHits: string[], changedFiles: string[], firstInstruction = '') {
142
+ const helpers: string[] = []
143
+ const text = `${firstInstruction} ${changedFiles.join(' ')}`.toLowerCase()
144
+
145
+ if (commandHits.includes('luca describe')) helpers.push('describe')
146
+ if (commandHits.includes('luca eval')) helpers.push('eval')
147
+ if (/assistant/.test(text)) helpers.push('assistant')
148
+ if (/conversation/.test(text)) helpers.push('conversation')
149
+ if (/contentdb|content-db/.test(text)) helpers.push('contentDb')
150
+ if (/claude/.test(text)) helpers.push('claudeCode')
151
+ if (/python/.test(text)) helpers.push('python')
152
+ if (/mcp/.test(text)) helpers.push('mcpBridge')
153
+ if (/file-tools|filetools/.test(text)) helpers.push('fileTools')
154
+ if (/secure-shell|ssh/.test(text)) helpers.push('secureShell')
155
+ if (/repl|websocket/.test(text)) helpers.push('repl')
156
+ if (/openai/.test(text)) helpers.push('openai')
157
+ if (/workflow/.test(text)) helpers.push('workflow')
158
+ if (/express|endpoint/.test(text)) helpers.push('express')
159
+ return uniq(helpers)
160
+ }
161
+
162
+ function buildPolicyTrace(commandHits: string[], hasEdits: boolean, hasTests: boolean, firstInstruction = '') {
163
+ const trace: string[] = []
164
+ const text = firstInstruction.toLowerCase()
165
+ if (commandHits.includes('luca describe')) trace.push('discover helper surface with luca describe')
166
+ if (commandHits.includes('luca eval')) trace.push('test assumptions with luca eval')
167
+ if (commandHits.includes('luca scaffold')) trace.push('lean on luca scaffold before manual boilerplate')
168
+ if (/container\.feature|container\.client|container\.server/.test(text)) trace.push('compose with Luca container primitives')
169
+ if (hasEdits) trace.push('apply small reviewable edits')
170
+ if (hasTests) trace.push('verify with bun tests')
171
+ if (trace.length === 0) trace.push('inspect before changing code')
172
+ return trace
173
+ }
174
+
175
+ function qualityTier(options: { commandHits: string[], hasEdits: boolean, hasTests: boolean, firstInstruction?: string, relevantHelpers: string[] }) {
176
+ const { commandHits, hasEdits, hasTests, firstInstruction = '', relevantHelpers } = options
177
+ const hasIntrospection = commandHits.includes('luca describe') || commandHits.includes('luca eval')
178
+ const architecture = classifyTask(firstInstruction, []) === 'architecture'
179
+ const richHelperUse = relevantHelpers.length >= 2
180
+
181
+ if ((hasEdits && hasTests && (hasIntrospection || richHelperUse)) || (architecture && hasIntrospection)) return 'gold'
182
+ if (hasEdits || hasIntrospection || richHelperUse) return 'silver'
183
+ return 'bronze'
184
+ }
185
+
186
+ function scoreSession(session: Omit<SessionRecord, 'reviewScore'>) {
187
+ let score = 0
188
+ const commandHits = session.commandsRun
189
+ const taskType = session.taskType
190
+ const changedCount = session.changedFiles.length
191
+
192
+ if (session.qualityTier === 'gold') score += 40
193
+ if (session.qualityTier === 'silver') score += 20
194
+ if (commandHits.includes('luca describe')) score += 12
195
+ if (commandHits.includes('luca eval')) score += 14
196
+ if (commandHits.includes('luca scaffold')) score += 6
197
+ if (session.testsRun.length > 0 || commandHits.includes('bun test')) score += 12
198
+ if (session.relevantHelpers.includes('describe')) score += 4
199
+ if (session.relevantHelpers.includes('eval')) score += 4
200
+ if (session.relevantHelpers.some(name => ['assistant', 'workflow', 'conversation', 'mcpBridge', 'express'].includes(name))) score += 6
201
+ if (taskType === 'feature-add') score += 8
202
+ if (taskType === 'bugfix') score += 7
203
+ if (taskType === 'architecture') score += 5
204
+ if (taskType === 'docs') score -= 8
205
+ if (changedCount === 0) score -= 10
206
+ if (changedCount >= 1 && changedCount <= 5) score += 6
207
+ if (changedCount > 12) score -= 8
208
+ if (session.notes.some(note => note.includes('manual verification'))) score -= 3
209
+ return score
210
+ }
211
+
212
+ function guessCanonicality(session: SessionRecord): 'canonical' | 'acceptable' | 'off-policy' {
213
+ const hasIntrospection = session.commandsRun.includes('luca describe') || session.commandsRun.includes('luca eval')
214
+ const hasContainerish = session.relevantHelpers.length >= 2 || /container\.(feature|client|server)/i.test(session.firstInstruction || '')
215
+ const tested = session.testsRun.length > 0 || session.commandsRun.includes('bun test')
216
+ if ((hasIntrospection && hasContainerish) || (hasIntrospection && tested)) return 'canonical'
217
+ if (session.qualityTier !== 'bronze') return 'acceptable'
218
+ return 'off-policy'
219
+ }
220
+
221
+ function guessBucket(session: SessionRecord): 'canonical-policy' | 'strong-implementation' | 'planning' | 'rejects' {
222
+ if (session.qualityTier === 'bronze' && session.changedFiles.length === 0) return 'rejects'
223
+ if (session.taskType === 'architecture' || session.taskType === 'investigation') return 'planning'
224
+ if (guessCanonicality(session) === 'canonical') return 'canonical-policy'
225
+ return 'strong-implementation'
226
+ }
227
+
228
+ function suggestedDisposition(session: SessionRecord): 'keep' | 'rewrite' | 'reject' {
229
+ const bucket = guessBucket(session)
230
+ if (bucket === 'rejects') return 'reject'
231
+ if (bucket === 'canonical-policy' || bucket === 'planning') return 'rewrite'
232
+ return 'keep'
233
+ }
234
+
235
+ function summarizeSession(session: SessionRecord) {
236
+ const parts = [
237
+ `${session.taskType} session from ${session.repo}`,
238
+ session.firstInstruction ? `task starts: ${session.firstInstruction.slice(0, 140)}` : undefined,
239
+ session.changedFiles.length ? `changed ${session.changedFiles.length} file(s)` : 'no changed files detected',
240
+ session.commandsRun.length ? `commands: ${session.commandsRun.join(', ')}` : 'no key commands detected',
241
+ session.relevantHelpers.length ? `helpers: ${session.relevantHelpers.join(', ')}` : undefined,
242
+ ].filter(Boolean)
243
+ return parts.join(' | ')
244
+ }
245
+
246
+ async function listFiles(root: string, pattern: string) {
247
+ const files: string[] = []
248
+ const glob = new Bun.Glob(pattern)
249
+ for await (const file of glob.scan({ cwd: root, absolute: true, onlyFiles: true })) files.push(file)
250
+ return files.sort()
251
+ }
252
+
253
+ async function fileExists(path: string) {
254
+ return (await Bun.file(path).exists())
255
+ }
256
+
257
+ function extractTextBlocks(content: any): string[] {
258
+ if (typeof content === 'string') return [content]
259
+ if (!Array.isArray(content)) return []
260
+ const texts: string[] = []
261
+ for (const part of content) {
262
+ if (typeof part?.text === 'string') texts.push(part.text)
263
+ if (typeof part?.content === 'string') texts.push(part.content)
264
+ }
265
+ return texts
266
+ }
267
+
268
+ async function parseSession(sessionPath: string, repo: RepoConfig): Promise<SessionRecord | null> {
269
+ const text = await Bun.file(sessionPath).text()
270
+ const lines = text.split('\n').filter(Boolean)
271
+
272
+ let userTurns = 0
273
+ let assistantTurns = 0
274
+ let toolUses = 0
275
+ let branch: string | undefined
276
+ let firstInstruction: string | undefined
277
+ const toolUsage: ToolCount = {}
278
+ const changedFiles: string[] = []
279
+ const commandHits: string[] = []
280
+ const testsRun: string[] = []
281
+ const notes: string[] = []
282
+ const snippets: string[] = []
283
+
284
+ for (const line of lines) {
285
+ let event: any
286
+ try {
287
+ event = JSON.parse(line)
288
+ } catch {
289
+ continue
290
+ }
291
+
292
+ if (event?.gitBranch && !branch) branch = event.gitBranch
293
+
294
+ if (event?.type === 'user') {
295
+ const blocks = extractTextBlocks(event?.message?.content)
296
+ if (blocks.length) {
297
+ userTurns += 1
298
+ const textBlock = blocks.join(' ').trim()
299
+ if (!firstInstruction && textBlock) firstInstruction = textBlock
300
+ if (snippets.length < 2 && textBlock) snippets.push(`user: ${textBlock.slice(0, 220)}`)
301
+ }
302
+ }
303
+
304
+ if (event?.type === 'assistant') {
305
+ assistantTurns += 1
306
+ const blocks = extractTextBlocks(event?.message?.content)
307
+ for (const block of blocks) {
308
+ if (snippets.length >= 8) break
309
+ if (/luca describe|luca eval|bun test|container\.(feature|client|server)|commands\/|features\/|endpoints\//i.test(block)) {
310
+ snippets.push(`assistant: ${block.replace(/\s+/g, ' ').slice(0, 220)}`)
311
+ }
312
+ }
313
+ if (Array.isArray(event?.message?.content)) {
314
+ for (const part of event.message.content) {
315
+ if (part?.type === 'tool_use') {
316
+ toolUses += 1
317
+ const name = part.name || 'unknown'
318
+ inc(toolUsage, name)
319
+ const command = part?.input?.command
320
+ if (typeof command === 'string') {
321
+ const hits = detectCommands(command)
322
+ commandHits.push(...hits)
323
+ if (hits.includes('bun test')) testsRun.push(command)
324
+ if (snippets.length < 8 && /luca describe|luca eval|bun test|luca scaffold|luca workflow/i.test(command)) {
325
+ snippets.push(`tool: ${command.replace(/\s+/g, ' ').slice(0, 220)}`)
326
+ }
327
+ }
328
+ }
329
+ }
330
+ }
331
+ }
332
+
333
+ const filePath = event?.toolUseResult?.filePath
334
+ if (typeof filePath === 'string' && filePath.startsWith(repo.repoPath)) changedFiles.push(relToRepo(repo.repoPath, filePath))
335
+
336
+ const blocks = extractTextBlocks(event?.message?.content)
337
+ for (const block of blocks) {
338
+ const escapedRepo = repo.repoPath.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')
339
+ const regex = new RegExp(`${escapedRepo}/[^\\s\`"']+`, 'g')
340
+ const matches = block.match(regex) || []
341
+ for (const match of matches) changedFiles.push(relToRepo(repo.repoPath, match))
342
+ }
343
+ }
344
+
345
+ const dedupedChangedFiles = uniq(
346
+ changedFiles
347
+ .map(normalizeChangedPath)
348
+ .filter(Boolean)
349
+ .filter(path => !path.includes('node_modules/'))
350
+ .filter(path => !path.startsWith('dist/'))
351
+ .filter(path => !path.startsWith('.git/'))
352
+ ).sort()
353
+
354
+ const dedupedCommandHits = uniq(commandHits)
355
+ const relevantHelpers = inferRelevantHelpers(dedupedCommandHits, dedupedChangedFiles, firstInstruction)
356
+ const hasEdits = dedupedChangedFiles.length > 0
357
+ const hasTests = testsRun.length > 0 || dedupedCommandHits.includes('bun test')
358
+ const taskType = classifyTask(firstInstruction, dedupedChangedFiles)
359
+ const policyTrace = buildPolicyTrace(dedupedCommandHits, hasEdits, hasTests, firstInstruction)
360
+
361
+ if (hasEdits && !hasTests) notes.push('candidate may need manual verification because no explicit bun test run was detected')
362
+ if (dedupedCommandHits.includes('luca describe') || dedupedCommandHits.includes('luca eval')) notes.push('shows Luca-native runtime introspection')
363
+ if (taskType === 'architecture' || taskType === 'investigation') notes.push('good candidate for planning/policy bucket if reasoning is clean')
364
+
365
+ const partial: Omit<SessionRecord, 'reviewScore'> = {
366
+ sessionId: sessionPath.split('/').pop()!.replace('.jsonl', ''),
367
+ sessionPath,
368
+ repo: repo.name,
369
+ repoPath: repo.repoPath,
370
+ branch,
371
+ firstInstruction,
372
+ taskType,
373
+ qualityTier: qualityTier({ commandHits: dedupedCommandHits, hasEdits, hasTests, firstInstruction, relevantHelpers }),
374
+ userTurns,
375
+ assistantTurns,
376
+ toolUses,
377
+ toolUsage,
378
+ changedFiles: dedupedChangedFiles,
379
+ commandsRun: dedupedCommandHits,
380
+ testsRun: uniq(testsRun),
381
+ relevantHelpers,
382
+ policyTrace,
383
+ notes,
384
+ snippets: uniq(snippets).slice(0, 8),
385
+ }
386
+
387
+ return {
388
+ ...partial,
389
+ reviewScore: scoreSession({ ...partial, qualityTier: partial.qualityTier })
390
+ }
391
+ }
392
+
393
+ async function countMatchingFiles(root: string, pattern: string) {
394
+ try {
395
+ return (await listFiles(root, pattern)).length
396
+ } catch {
397
+ return 0
398
+ }
399
+ }
400
+
401
+ async function buildRepoSummary(repo: RepoConfig, sessions: SessionRecord[], mainSessions: string[], subagentSessions: string[], sessionRoot: string): Promise<RepoSummary> {
402
+ const sourceInventory: Record<string, number> = {
403
+ 'README.md': (await fileExists(`${repo.repoPath}/README.md`)) ? 1 : 0,
404
+ 'CLAUDE.md': (await fileExists(`${repo.repoPath}/CLAUDE.md`)) ? 1 : 0,
405
+ 'AGENTS.md': (await fileExists(`${repo.repoPath}/AGENTS.md`)) ? 1 : 0,
406
+ 'docs/apis': await countMatchingFiles(`${repo.repoPath}/docs/apis`, '**/*.md'),
407
+ 'docs/examples': await countMatchingFiles(`${repo.repoPath}/docs/examples`, '**/*.md'),
408
+ 'docs/tutorials': await countMatchingFiles(`${repo.repoPath}/docs/tutorials`, '**/*.md'),
409
+ 'test': await countMatchingFiles(`${repo.repoPath}/test`, '**/*.ts'),
410
+ 'test-integration': await countMatchingFiles(`${repo.repoPath}/test-integration`, '**/*.ts'),
411
+ 'src/commands': await countMatchingFiles(`${repo.repoPath}/src/commands`, '**/*.ts'),
412
+ }
413
+
414
+ const topTools: ToolCount = {}
415
+ const changedFileCounts: Record<string, number> = {}
416
+ for (const session of sessions) {
417
+ for (const [tool, count] of Object.entries(session.toolUsage)) inc(topTools, tool, count)
418
+ for (const file of session.changedFiles) inc(changedFileCounts, file)
419
+ }
420
+
421
+ return {
422
+ repo: repo.name,
423
+ repoPath: repo.repoPath,
424
+ generatedAt: new Date().toISOString(),
425
+ sessionRoot,
426
+ counts: {
427
+ mainSessions: mainSessions.length,
428
+ subagentSessions: subagentSessions.length,
429
+ gold: sessions.filter(session => session.qualityTier === 'gold').length,
430
+ silver: sessions.filter(session => session.qualityTier === 'silver').length,
431
+ bronze: sessions.filter(session => session.qualityTier === 'bronze').length,
432
+ },
433
+ sourceInventory,
434
+ topTools: Object.fromEntries(Object.entries(topTools).sort((a, b) => b[1] - a[1]).slice(0, 15)),
435
+ topChangedFiles: Object.entries(changedFileCounts)
436
+ .sort((a, b) => b[1] - a[1])
437
+ .slice(0, 20)
438
+ .map(([path, count]) => ({ path, count })),
439
+ }
440
+ }
441
+
442
+ function toReviewEntry(session: SessionRecord, rank: number): ReviewQueueEntry {
443
+ const canonicality = guessCanonicality(session)
444
+ const bucket = guessBucket(session)
445
+ const verificationSignals = []
446
+ if (session.testsRun.length > 0 || session.commandsRun.includes('bun test')) verificationSignals.push('explicit bun test detected')
447
+ else verificationSignals.push('no explicit bun test detected')
448
+
449
+ const positiveSignals = uniq([
450
+ ...session.policyTrace,
451
+ ...(session.commandsRun.includes('luca describe') ? ['uses luca describe'] : []),
452
+ ...(session.commandsRun.includes('luca eval') ? ['uses luca eval'] : []),
453
+ ...(session.changedFiles.some(path => /^(commands|features|endpoints|clients|servers)\//.test(path)) ? ['edits Luca convention folders'] : []),
454
+ ])
455
+
456
+ const negativeSignals = uniq([
457
+ ...(session.changedFiles.length === 0 ? ['no changed files detected'] : []),
458
+ ...(!session.commandsRun.includes('bun test') && session.testsRun.length === 0 ? ['no explicit test verification'] : []),
459
+ ...(session.changedFiles.length > 12 ? ['large changed-file fan-out'] : []),
460
+ ])
461
+
462
+ return {
463
+ rank,
464
+ session_id: session.sessionId,
465
+ source_repo: session.repo,
466
+ candidate_path: session.sessionPath,
467
+ repo_path: session.repoPath,
468
+ branch: session.branch,
469
+ first_instruction: session.firstInstruction,
470
+ task_type: session.taskType,
471
+ quality_tier: session.qualityTier,
472
+ canonicality_guess: canonicality,
473
+ bucket_guess: bucket,
474
+ suggested_disposition: suggestedDisposition(session),
475
+ review_score: session.reviewScore,
476
+ summary: summarizeSession(session),
477
+ changed_files: session.changedFiles,
478
+ commands_run: session.commandsRun,
479
+ tests_run: session.testsRun,
480
+ relevant_helpers: session.relevantHelpers,
481
+ policy_signals: session.policyTrace,
482
+ verification_signals: verificationSignals,
483
+ positive_signals: positiveSignals,
484
+ negative_signals: negativeSignals,
485
+ snippets: session.snippets,
486
+ notes: session.notes,
487
+ }
488
+ }
489
+
490
+ async function main() {
491
+ await Bun.$`mkdir -p ${OUTPUT_DIR}`
492
+
493
+ const allSessions: SessionRecord[] = []
494
+ const summaries: RepoSummary[] = []
495
+
496
+ for (const repo of DEFAULT_REPOS) {
497
+ const home = inferHome(repo.repoPath)
498
+ const sessionRoot = `${home}/.claude/projects/${encodeClaudeProjectPath(repo.repoPath)}`
499
+ const allJsonl = await listFiles(sessionRoot, '**/*.jsonl')
500
+ const mainSessions = allJsonl.filter(path => !path.includes('/subagents/'))
501
+ const subagentSessions = allJsonl.filter(path => path.includes('/subagents/'))
502
+ const sessions = (await Promise.all(mainSessions.map(path => parseSession(path, repo))))
503
+ .filter((session): session is SessionRecord => Boolean(session))
504
+ .sort((a, b) => b.reviewScore - a.reviewScore)
505
+
506
+ allSessions.push(...sessions)
507
+ summaries.push(await buildRepoSummary(repo, sessions, mainSessions, subagentSessions, sessionRoot))
508
+
509
+ await Bun.write(`${OUTPUT_DIR}/${repo.name}-session-curation-summary.json`, `${JSON.stringify(summaries[summaries.length - 1], null, 2)}\n`)
510
+ await Bun.write(`${OUTPUT_DIR}/${repo.name}-session-candidates.jsonl`, sessions.map(session => JSON.stringify(session)).join('\n') + '\n')
511
+ }
512
+
513
+ const ranked = allSessions
514
+ .sort((a, b) => b.reviewScore - a.reviewScore)
515
+ .map((session, index) => toReviewEntry(session, index + 1))
516
+
517
+ const firstBatch = [
518
+ ...ranked.filter(item => item.source_repo === 'luca' && item.bucket_guess !== 'rejects').slice(0, 10),
519
+ ...ranked.filter(item => item.source_repo === 'agentic-loop' && item.bucket_guess !== 'rejects').slice(0, 15),
520
+ ...ranked.filter(item => item.bucket_guess === 'rejects').slice(0, 5),
521
+ ]
522
+ .map((item, index) => ({ ...item, rank: index + 1 }))
523
+
524
+ const manifest = {
525
+ generatedAt: new Date().toISOString(),
526
+ repos: summaries.map(summary => ({
527
+ repo: summary.repo,
528
+ repoPath: summary.repoPath,
529
+ counts: summary.counts,
530
+ sessionRoot: summary.sessionRoot,
531
+ })),
532
+ totalCandidates: ranked.length,
533
+ recommendedFirstBatch: {
534
+ total: firstBatch.length,
535
+ lucaCanonicalAndStrong: firstBatch.filter(item => item.source_repo === 'luca' && item.bucket_guess !== 'rejects').length,
536
+ agenticLoopCanonicalAndStrong: firstBatch.filter(item => item.source_repo === 'agentic-loop' && item.bucket_guess !== 'rejects').length,
537
+ rejects: firstBatch.filter(item => item.bucket_guess === 'rejects').length,
538
+ },
539
+ recommendations: [
540
+ 'Have the Luca author label the first batch using datasets/lora/review-schema.json.',
541
+ 'Prefer rewrite over raw keep for canonical-policy and planning examples.',
542
+ 'Treat docs and API references as retrieval sources, not direct LoRA rows.',
543
+ ],
544
+ }
545
+
546
+ await Bun.write(`${OUTPUT_DIR}/review-queue.jsonl`, ranked.map(item => JSON.stringify(item)).join('\n') + '\n')
547
+ await Bun.write(`${OUTPUT_DIR}/review-batch-1.jsonl`, firstBatch.map(item => JSON.stringify(item)).join('\n') + '\n')
548
+ await Bun.write(`${OUTPUT_DIR}/review-manifest.json`, `${JSON.stringify(manifest, null, 2)}\n`)
549
+
550
+ console.log(JSON.stringify({
551
+ outputDir: OUTPUT_DIR,
552
+ totalCandidates: ranked.length,
553
+ firstBatch: firstBatch.length,
554
+ repos: manifest.repos,
555
+ }, null, 2))
556
+ }
557
+
558
+ main().catch(error => {
559
+ console.error(error?.stack || error?.message || String(error))
560
+ process.exit(1)
561
+ })
@@ -1,4 +1,4 @@
1
1
  // Generated at compile time — do not edit manually
2
- export const BUILD_SHA = '6a44967'
2
+ export const BUILD_SHA = 'b06dca0'
3
3
  export const BUILD_BRANCH = 'main'
4
- export const BUILD_DATE = '2026-05-09T04:02:58Z'
4
+ export const BUILD_DATE = '2026-05-23T07:32:17Z'