luca 3.0.2 → 3.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bun.lock +45 -0
- package/commands/social.ts +137 -0
- package/datasets/lora/agentic-loop-session-candidates.jsonl +91 -0
- package/datasets/lora/agentic-loop-session-curation-summary.json +123 -0
- package/datasets/lora/luca-session-candidates.jsonl +29 -0
- package/datasets/lora/luca-session-curation-summary.json +121 -0
- package/datasets/lora/review-batch-1.jsonl +30 -0
- package/datasets/lora/review-manifest.json +41 -0
- package/datasets/lora/review-queue.jsonl +120 -0
- package/datasets/lora/review-schema.json +134 -0
- package/datasets/lora/review-template.jsonl +2 -0
- package/datasets/lora/review-ui.html +725 -0
- package/features/cipher-social.ts +493 -0
- package/package.json +6 -1
- package/scripts/curate-claude-sessions.ts +561 -0
- package/src/cli/build-info.ts +2 -2
- package/src/introspection/generated.agi.ts +13140 -12190
- package/src/introspection/generated.node.ts +3087 -2137
- package/src/node/container.ts +8 -0
- package/src/node/features/helpers.ts +12 -0
- package/src/node/features/socket-repl.ts +336 -0
- package/src/node/features/telnyx-assistant-connector.ts +1206 -0
- package/src/node/features/vm.ts +17 -0
- package/index.ts +0 -1
|
@@ -0,0 +1,561 @@
|
|
|
1
|
+
type ToolCount = Record<string, number>
|
|
2
|
+
|
|
3
|
+
type RepoConfig = {
|
|
4
|
+
name: string
|
|
5
|
+
repoPath: string
|
|
6
|
+
}
|
|
7
|
+
|
|
8
|
+
type SessionRecord = {
|
|
9
|
+
sessionId: string
|
|
10
|
+
sessionPath: string
|
|
11
|
+
repo: string
|
|
12
|
+
repoPath: string
|
|
13
|
+
branch?: string
|
|
14
|
+
firstInstruction?: string
|
|
15
|
+
taskType: string
|
|
16
|
+
qualityTier: 'gold' | 'silver' | 'bronze'
|
|
17
|
+
userTurns: number
|
|
18
|
+
assistantTurns: number
|
|
19
|
+
toolUses: number
|
|
20
|
+
toolUsage: ToolCount
|
|
21
|
+
changedFiles: string[]
|
|
22
|
+
commandsRun: string[]
|
|
23
|
+
testsRun: string[]
|
|
24
|
+
relevantHelpers: string[]
|
|
25
|
+
policyTrace: string[]
|
|
26
|
+
notes: string[]
|
|
27
|
+
snippets: string[]
|
|
28
|
+
reviewScore: number
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
type RepoSummary = {
|
|
32
|
+
repo: string
|
|
33
|
+
repoPath: string
|
|
34
|
+
generatedAt: string
|
|
35
|
+
sessionRoot: string
|
|
36
|
+
counts: {
|
|
37
|
+
mainSessions: number
|
|
38
|
+
subagentSessions: number
|
|
39
|
+
gold: number
|
|
40
|
+
silver: number
|
|
41
|
+
bronze: number
|
|
42
|
+
}
|
|
43
|
+
sourceInventory: Record<string, number>
|
|
44
|
+
topTools: ToolCount
|
|
45
|
+
topChangedFiles: Array<{ path: string, count: number }>
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
type ReviewQueueEntry = {
|
|
49
|
+
rank: number
|
|
50
|
+
session_id: string
|
|
51
|
+
source_repo: string
|
|
52
|
+
candidate_path: string
|
|
53
|
+
repo_path: string
|
|
54
|
+
branch?: string
|
|
55
|
+
first_instruction?: string
|
|
56
|
+
task_type: string
|
|
57
|
+
quality_tier: 'gold' | 'silver' | 'bronze'
|
|
58
|
+
canonicality_guess: 'canonical' | 'acceptable' | 'off-policy'
|
|
59
|
+
bucket_guess: 'canonical-policy' | 'strong-implementation' | 'planning' | 'rejects'
|
|
60
|
+
suggested_disposition: 'keep' | 'rewrite' | 'reject'
|
|
61
|
+
review_score: number
|
|
62
|
+
summary: string
|
|
63
|
+
changed_files: string[]
|
|
64
|
+
commands_run: string[]
|
|
65
|
+
tests_run: string[]
|
|
66
|
+
relevant_helpers: string[]
|
|
67
|
+
policy_signals: string[]
|
|
68
|
+
verification_signals: string[]
|
|
69
|
+
positive_signals: string[]
|
|
70
|
+
negative_signals: string[]
|
|
71
|
+
snippets: string[]
|
|
72
|
+
notes: string[]
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
const DEFAULT_REPOS: RepoConfig[] = [
|
|
76
|
+
{ name: 'luca', repoPath: '/Users/jonathansoeder/@soederpop/projects/luca' },
|
|
77
|
+
{ name: 'agentic-loop', repoPath: '/Users/jonathansoeder/@agentic-loop' },
|
|
78
|
+
]
|
|
79
|
+
|
|
80
|
+
const OUTPUT_DIR = process.argv[2] || `${process.cwd()}/datasets/lora`
|
|
81
|
+
|
|
82
|
+
function inferHome(repoPath: string) {
|
|
83
|
+
const home = process.env.HOME || ''
|
|
84
|
+
if (home && !home.includes('/.hermes/profiles/')) return home
|
|
85
|
+
const userHomeMatch = repoPath.match(/^\/Users\/[^/]+/)
|
|
86
|
+
if (userHomeMatch) return userHomeMatch[0]
|
|
87
|
+
return home
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
function encodeClaudeProjectPath(cwd: string) {
|
|
91
|
+
return cwd.replace(/[^A-Za-z0-9]/g, '-')
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
function uniq<T>(items: T[]) {
|
|
95
|
+
return Array.from(new Set(items))
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
function inc(map: Record<string, number>, key: string, amount = 1) {
|
|
99
|
+
map[key] = (map[key] || 0) + amount
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
function relToRepo(repoPath: string, maybeAbsolute: string) {
|
|
103
|
+
return maybeAbsolute.startsWith(repoPath)
|
|
104
|
+
? maybeAbsolute.slice(repoPath.length + 1)
|
|
105
|
+
: maybeAbsolute
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
function normalizeChangedPath(path: string) {
|
|
109
|
+
return path
|
|
110
|
+
.replace(/[:].*$/, '')
|
|
111
|
+
.replace(/[`,)\]\s]+$/g, '')
|
|
112
|
+
.trim()
|
|
113
|
+
}
|
|
114
|
+
|
|
115
|
+
function classifyTask(firstInstruction = '', changedFiles: string[]) {
|
|
116
|
+
const text = firstInstruction.toLowerCase()
|
|
117
|
+
const changed = changedFiles.join(' ').toLowerCase()
|
|
118
|
+
|
|
119
|
+
if (/what would it take|could i in theory|look at all of the methods|can we make equivalents|take a look|architect/.test(text)) return 'architecture'
|
|
120
|
+
if (/readme|docs|marketing website|tutorial|document|philosophy/.test(text) || /\.md$/.test(changed)) return 'docs'
|
|
121
|
+
if (/failing|bug|fix|error|broken|regression|should pass|green/.test(text)) return 'bugfix'
|
|
122
|
+
if (/refactor|rename all references|generated.*version control|migration/.test(text)) return 'refactor'
|
|
123
|
+
if (/test/.test(text) && changedFiles.some(file => file.startsWith('test/'))) return 'test-fix'
|
|
124
|
+
if (/workflow/.test(text)) return 'workflow'
|
|
125
|
+
if (/option|helper|implement|add|need|track|support|default|expose|bind|reload/.test(text)) return 'feature-add'
|
|
126
|
+
|
|
127
|
+
return 'investigation'
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
function detectCommands(command: string) {
|
|
131
|
+
const hits: string[] = []
|
|
132
|
+
if (/\bluca describe\b/.test(command)) hits.push('luca describe')
|
|
133
|
+
if (/\bluca eval\b/.test(command)) hits.push('luca eval')
|
|
134
|
+
if (/\bbun test\b|\bbun run test\b/.test(command)) hits.push('bun test')
|
|
135
|
+
if (/\bgit commit\b/.test(command)) hits.push('git commit')
|
|
136
|
+
if (/\bluca scaffold\b/.test(command)) hits.push('luca scaffold')
|
|
137
|
+
if (/\bluca workflow\b/.test(command)) hits.push('luca workflow')
|
|
138
|
+
return hits
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
function inferRelevantHelpers(commandHits: string[], changedFiles: string[], firstInstruction = '') {
|
|
142
|
+
const helpers: string[] = []
|
|
143
|
+
const text = `${firstInstruction} ${changedFiles.join(' ')}`.toLowerCase()
|
|
144
|
+
|
|
145
|
+
if (commandHits.includes('luca describe')) helpers.push('describe')
|
|
146
|
+
if (commandHits.includes('luca eval')) helpers.push('eval')
|
|
147
|
+
if (/assistant/.test(text)) helpers.push('assistant')
|
|
148
|
+
if (/conversation/.test(text)) helpers.push('conversation')
|
|
149
|
+
if (/contentdb|content-db/.test(text)) helpers.push('contentDb')
|
|
150
|
+
if (/claude/.test(text)) helpers.push('claudeCode')
|
|
151
|
+
if (/python/.test(text)) helpers.push('python')
|
|
152
|
+
if (/mcp/.test(text)) helpers.push('mcpBridge')
|
|
153
|
+
if (/file-tools|filetools/.test(text)) helpers.push('fileTools')
|
|
154
|
+
if (/secure-shell|ssh/.test(text)) helpers.push('secureShell')
|
|
155
|
+
if (/repl|websocket/.test(text)) helpers.push('repl')
|
|
156
|
+
if (/openai/.test(text)) helpers.push('openai')
|
|
157
|
+
if (/workflow/.test(text)) helpers.push('workflow')
|
|
158
|
+
if (/express|endpoint/.test(text)) helpers.push('express')
|
|
159
|
+
return uniq(helpers)
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
function buildPolicyTrace(commandHits: string[], hasEdits: boolean, hasTests: boolean, firstInstruction = '') {
|
|
163
|
+
const trace: string[] = []
|
|
164
|
+
const text = firstInstruction.toLowerCase()
|
|
165
|
+
if (commandHits.includes('luca describe')) trace.push('discover helper surface with luca describe')
|
|
166
|
+
if (commandHits.includes('luca eval')) trace.push('test assumptions with luca eval')
|
|
167
|
+
if (commandHits.includes('luca scaffold')) trace.push('lean on luca scaffold before manual boilerplate')
|
|
168
|
+
if (/container\.feature|container\.client|container\.server/.test(text)) trace.push('compose with Luca container primitives')
|
|
169
|
+
if (hasEdits) trace.push('apply small reviewable edits')
|
|
170
|
+
if (hasTests) trace.push('verify with bun tests')
|
|
171
|
+
if (trace.length === 0) trace.push('inspect before changing code')
|
|
172
|
+
return trace
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
function qualityTier(options: { commandHits: string[], hasEdits: boolean, hasTests: boolean, firstInstruction?: string, relevantHelpers: string[] }) {
|
|
176
|
+
const { commandHits, hasEdits, hasTests, firstInstruction = '', relevantHelpers } = options
|
|
177
|
+
const hasIntrospection = commandHits.includes('luca describe') || commandHits.includes('luca eval')
|
|
178
|
+
const architecture = classifyTask(firstInstruction, []) === 'architecture'
|
|
179
|
+
const richHelperUse = relevantHelpers.length >= 2
|
|
180
|
+
|
|
181
|
+
if ((hasEdits && hasTests && (hasIntrospection || richHelperUse)) || (architecture && hasIntrospection)) return 'gold'
|
|
182
|
+
if (hasEdits || hasIntrospection || richHelperUse) return 'silver'
|
|
183
|
+
return 'bronze'
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
function scoreSession(session: Omit<SessionRecord, 'reviewScore'>) {
|
|
187
|
+
let score = 0
|
|
188
|
+
const commandHits = session.commandsRun
|
|
189
|
+
const taskType = session.taskType
|
|
190
|
+
const changedCount = session.changedFiles.length
|
|
191
|
+
|
|
192
|
+
if (session.qualityTier === 'gold') score += 40
|
|
193
|
+
if (session.qualityTier === 'silver') score += 20
|
|
194
|
+
if (commandHits.includes('luca describe')) score += 12
|
|
195
|
+
if (commandHits.includes('luca eval')) score += 14
|
|
196
|
+
if (commandHits.includes('luca scaffold')) score += 6
|
|
197
|
+
if (session.testsRun.length > 0 || commandHits.includes('bun test')) score += 12
|
|
198
|
+
if (session.relevantHelpers.includes('describe')) score += 4
|
|
199
|
+
if (session.relevantHelpers.includes('eval')) score += 4
|
|
200
|
+
if (session.relevantHelpers.some(name => ['assistant', 'workflow', 'conversation', 'mcpBridge', 'express'].includes(name))) score += 6
|
|
201
|
+
if (taskType === 'feature-add') score += 8
|
|
202
|
+
if (taskType === 'bugfix') score += 7
|
|
203
|
+
if (taskType === 'architecture') score += 5
|
|
204
|
+
if (taskType === 'docs') score -= 8
|
|
205
|
+
if (changedCount === 0) score -= 10
|
|
206
|
+
if (changedCount >= 1 && changedCount <= 5) score += 6
|
|
207
|
+
if (changedCount > 12) score -= 8
|
|
208
|
+
if (session.notes.some(note => note.includes('manual verification'))) score -= 3
|
|
209
|
+
return score
|
|
210
|
+
}
|
|
211
|
+
|
|
212
|
+
function guessCanonicality(session: SessionRecord): 'canonical' | 'acceptable' | 'off-policy' {
|
|
213
|
+
const hasIntrospection = session.commandsRun.includes('luca describe') || session.commandsRun.includes('luca eval')
|
|
214
|
+
const hasContainerish = session.relevantHelpers.length >= 2 || /container\.(feature|client|server)/i.test(session.firstInstruction || '')
|
|
215
|
+
const tested = session.testsRun.length > 0 || session.commandsRun.includes('bun test')
|
|
216
|
+
if ((hasIntrospection && hasContainerish) || (hasIntrospection && tested)) return 'canonical'
|
|
217
|
+
if (session.qualityTier !== 'bronze') return 'acceptable'
|
|
218
|
+
return 'off-policy'
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
function guessBucket(session: SessionRecord): 'canonical-policy' | 'strong-implementation' | 'planning' | 'rejects' {
|
|
222
|
+
if (session.qualityTier === 'bronze' && session.changedFiles.length === 0) return 'rejects'
|
|
223
|
+
if (session.taskType === 'architecture' || session.taskType === 'investigation') return 'planning'
|
|
224
|
+
if (guessCanonicality(session) === 'canonical') return 'canonical-policy'
|
|
225
|
+
return 'strong-implementation'
|
|
226
|
+
}
|
|
227
|
+
|
|
228
|
+
function suggestedDisposition(session: SessionRecord): 'keep' | 'rewrite' | 'reject' {
|
|
229
|
+
const bucket = guessBucket(session)
|
|
230
|
+
if (bucket === 'rejects') return 'reject'
|
|
231
|
+
if (bucket === 'canonical-policy' || bucket === 'planning') return 'rewrite'
|
|
232
|
+
return 'keep'
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
function summarizeSession(session: SessionRecord) {
|
|
236
|
+
const parts = [
|
|
237
|
+
`${session.taskType} session from ${session.repo}`,
|
|
238
|
+
session.firstInstruction ? `task starts: ${session.firstInstruction.slice(0, 140)}` : undefined,
|
|
239
|
+
session.changedFiles.length ? `changed ${session.changedFiles.length} file(s)` : 'no changed files detected',
|
|
240
|
+
session.commandsRun.length ? `commands: ${session.commandsRun.join(', ')}` : 'no key commands detected',
|
|
241
|
+
session.relevantHelpers.length ? `helpers: ${session.relevantHelpers.join(', ')}` : undefined,
|
|
242
|
+
].filter(Boolean)
|
|
243
|
+
return parts.join(' | ')
|
|
244
|
+
}
|
|
245
|
+
|
|
246
|
+
async function listFiles(root: string, pattern: string) {
|
|
247
|
+
const files: string[] = []
|
|
248
|
+
const glob = new Bun.Glob(pattern)
|
|
249
|
+
for await (const file of glob.scan({ cwd: root, absolute: true, onlyFiles: true })) files.push(file)
|
|
250
|
+
return files.sort()
|
|
251
|
+
}
|
|
252
|
+
|
|
253
|
+
async function fileExists(path: string) {
|
|
254
|
+
return (await Bun.file(path).exists())
|
|
255
|
+
}
|
|
256
|
+
|
|
257
|
+
function extractTextBlocks(content: any): string[] {
|
|
258
|
+
if (typeof content === 'string') return [content]
|
|
259
|
+
if (!Array.isArray(content)) return []
|
|
260
|
+
const texts: string[] = []
|
|
261
|
+
for (const part of content) {
|
|
262
|
+
if (typeof part?.text === 'string') texts.push(part.text)
|
|
263
|
+
if (typeof part?.content === 'string') texts.push(part.content)
|
|
264
|
+
}
|
|
265
|
+
return texts
|
|
266
|
+
}
|
|
267
|
+
|
|
268
|
+
async function parseSession(sessionPath: string, repo: RepoConfig): Promise<SessionRecord | null> {
|
|
269
|
+
const text = await Bun.file(sessionPath).text()
|
|
270
|
+
const lines = text.split('\n').filter(Boolean)
|
|
271
|
+
|
|
272
|
+
let userTurns = 0
|
|
273
|
+
let assistantTurns = 0
|
|
274
|
+
let toolUses = 0
|
|
275
|
+
let branch: string | undefined
|
|
276
|
+
let firstInstruction: string | undefined
|
|
277
|
+
const toolUsage: ToolCount = {}
|
|
278
|
+
const changedFiles: string[] = []
|
|
279
|
+
const commandHits: string[] = []
|
|
280
|
+
const testsRun: string[] = []
|
|
281
|
+
const notes: string[] = []
|
|
282
|
+
const snippets: string[] = []
|
|
283
|
+
|
|
284
|
+
for (const line of lines) {
|
|
285
|
+
let event: any
|
|
286
|
+
try {
|
|
287
|
+
event = JSON.parse(line)
|
|
288
|
+
} catch {
|
|
289
|
+
continue
|
|
290
|
+
}
|
|
291
|
+
|
|
292
|
+
if (event?.gitBranch && !branch) branch = event.gitBranch
|
|
293
|
+
|
|
294
|
+
if (event?.type === 'user') {
|
|
295
|
+
const blocks = extractTextBlocks(event?.message?.content)
|
|
296
|
+
if (blocks.length) {
|
|
297
|
+
userTurns += 1
|
|
298
|
+
const textBlock = blocks.join(' ').trim()
|
|
299
|
+
if (!firstInstruction && textBlock) firstInstruction = textBlock
|
|
300
|
+
if (snippets.length < 2 && textBlock) snippets.push(`user: ${textBlock.slice(0, 220)}`)
|
|
301
|
+
}
|
|
302
|
+
}
|
|
303
|
+
|
|
304
|
+
if (event?.type === 'assistant') {
|
|
305
|
+
assistantTurns += 1
|
|
306
|
+
const blocks = extractTextBlocks(event?.message?.content)
|
|
307
|
+
for (const block of blocks) {
|
|
308
|
+
if (snippets.length >= 8) break
|
|
309
|
+
if (/luca describe|luca eval|bun test|container\.(feature|client|server)|commands\/|features\/|endpoints\//i.test(block)) {
|
|
310
|
+
snippets.push(`assistant: ${block.replace(/\s+/g, ' ').slice(0, 220)}`)
|
|
311
|
+
}
|
|
312
|
+
}
|
|
313
|
+
if (Array.isArray(event?.message?.content)) {
|
|
314
|
+
for (const part of event.message.content) {
|
|
315
|
+
if (part?.type === 'tool_use') {
|
|
316
|
+
toolUses += 1
|
|
317
|
+
const name = part.name || 'unknown'
|
|
318
|
+
inc(toolUsage, name)
|
|
319
|
+
const command = part?.input?.command
|
|
320
|
+
if (typeof command === 'string') {
|
|
321
|
+
const hits = detectCommands(command)
|
|
322
|
+
commandHits.push(...hits)
|
|
323
|
+
if (hits.includes('bun test')) testsRun.push(command)
|
|
324
|
+
if (snippets.length < 8 && /luca describe|luca eval|bun test|luca scaffold|luca workflow/i.test(command)) {
|
|
325
|
+
snippets.push(`tool: ${command.replace(/\s+/g, ' ').slice(0, 220)}`)
|
|
326
|
+
}
|
|
327
|
+
}
|
|
328
|
+
}
|
|
329
|
+
}
|
|
330
|
+
}
|
|
331
|
+
}
|
|
332
|
+
|
|
333
|
+
const filePath = event?.toolUseResult?.filePath
|
|
334
|
+
if (typeof filePath === 'string' && filePath.startsWith(repo.repoPath)) changedFiles.push(relToRepo(repo.repoPath, filePath))
|
|
335
|
+
|
|
336
|
+
const blocks = extractTextBlocks(event?.message?.content)
|
|
337
|
+
for (const block of blocks) {
|
|
338
|
+
const escapedRepo = repo.repoPath.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')
|
|
339
|
+
const regex = new RegExp(`${escapedRepo}/[^\\s\`"']+`, 'g')
|
|
340
|
+
const matches = block.match(regex) || []
|
|
341
|
+
for (const match of matches) changedFiles.push(relToRepo(repo.repoPath, match))
|
|
342
|
+
}
|
|
343
|
+
}
|
|
344
|
+
|
|
345
|
+
const dedupedChangedFiles = uniq(
|
|
346
|
+
changedFiles
|
|
347
|
+
.map(normalizeChangedPath)
|
|
348
|
+
.filter(Boolean)
|
|
349
|
+
.filter(path => !path.includes('node_modules/'))
|
|
350
|
+
.filter(path => !path.startsWith('dist/'))
|
|
351
|
+
.filter(path => !path.startsWith('.git/'))
|
|
352
|
+
).sort()
|
|
353
|
+
|
|
354
|
+
const dedupedCommandHits = uniq(commandHits)
|
|
355
|
+
const relevantHelpers = inferRelevantHelpers(dedupedCommandHits, dedupedChangedFiles, firstInstruction)
|
|
356
|
+
const hasEdits = dedupedChangedFiles.length > 0
|
|
357
|
+
const hasTests = testsRun.length > 0 || dedupedCommandHits.includes('bun test')
|
|
358
|
+
const taskType = classifyTask(firstInstruction, dedupedChangedFiles)
|
|
359
|
+
const policyTrace = buildPolicyTrace(dedupedCommandHits, hasEdits, hasTests, firstInstruction)
|
|
360
|
+
|
|
361
|
+
if (hasEdits && !hasTests) notes.push('candidate may need manual verification because no explicit bun test run was detected')
|
|
362
|
+
if (dedupedCommandHits.includes('luca describe') || dedupedCommandHits.includes('luca eval')) notes.push('shows Luca-native runtime introspection')
|
|
363
|
+
if (taskType === 'architecture' || taskType === 'investigation') notes.push('good candidate for planning/policy bucket if reasoning is clean')
|
|
364
|
+
|
|
365
|
+
const partial: Omit<SessionRecord, 'reviewScore'> = {
|
|
366
|
+
sessionId: sessionPath.split('/').pop()!.replace('.jsonl', ''),
|
|
367
|
+
sessionPath,
|
|
368
|
+
repo: repo.name,
|
|
369
|
+
repoPath: repo.repoPath,
|
|
370
|
+
branch,
|
|
371
|
+
firstInstruction,
|
|
372
|
+
taskType,
|
|
373
|
+
qualityTier: qualityTier({ commandHits: dedupedCommandHits, hasEdits, hasTests, firstInstruction, relevantHelpers }),
|
|
374
|
+
userTurns,
|
|
375
|
+
assistantTurns,
|
|
376
|
+
toolUses,
|
|
377
|
+
toolUsage,
|
|
378
|
+
changedFiles: dedupedChangedFiles,
|
|
379
|
+
commandsRun: dedupedCommandHits,
|
|
380
|
+
testsRun: uniq(testsRun),
|
|
381
|
+
relevantHelpers,
|
|
382
|
+
policyTrace,
|
|
383
|
+
notes,
|
|
384
|
+
snippets: uniq(snippets).slice(0, 8),
|
|
385
|
+
}
|
|
386
|
+
|
|
387
|
+
return {
|
|
388
|
+
...partial,
|
|
389
|
+
reviewScore: scoreSession({ ...partial, qualityTier: partial.qualityTier })
|
|
390
|
+
}
|
|
391
|
+
}
|
|
392
|
+
|
|
393
|
+
async function countMatchingFiles(root: string, pattern: string) {
|
|
394
|
+
try {
|
|
395
|
+
return (await listFiles(root, pattern)).length
|
|
396
|
+
} catch {
|
|
397
|
+
return 0
|
|
398
|
+
}
|
|
399
|
+
}
|
|
400
|
+
|
|
401
|
+
async function buildRepoSummary(repo: RepoConfig, sessions: SessionRecord[], mainSessions: string[], subagentSessions: string[], sessionRoot: string): Promise<RepoSummary> {
|
|
402
|
+
const sourceInventory: Record<string, number> = {
|
|
403
|
+
'README.md': (await fileExists(`${repo.repoPath}/README.md`)) ? 1 : 0,
|
|
404
|
+
'CLAUDE.md': (await fileExists(`${repo.repoPath}/CLAUDE.md`)) ? 1 : 0,
|
|
405
|
+
'AGENTS.md': (await fileExists(`${repo.repoPath}/AGENTS.md`)) ? 1 : 0,
|
|
406
|
+
'docs/apis': await countMatchingFiles(`${repo.repoPath}/docs/apis`, '**/*.md'),
|
|
407
|
+
'docs/examples': await countMatchingFiles(`${repo.repoPath}/docs/examples`, '**/*.md'),
|
|
408
|
+
'docs/tutorials': await countMatchingFiles(`${repo.repoPath}/docs/tutorials`, '**/*.md'),
|
|
409
|
+
'test': await countMatchingFiles(`${repo.repoPath}/test`, '**/*.ts'),
|
|
410
|
+
'test-integration': await countMatchingFiles(`${repo.repoPath}/test-integration`, '**/*.ts'),
|
|
411
|
+
'src/commands': await countMatchingFiles(`${repo.repoPath}/src/commands`, '**/*.ts'),
|
|
412
|
+
}
|
|
413
|
+
|
|
414
|
+
const topTools: ToolCount = {}
|
|
415
|
+
const changedFileCounts: Record<string, number> = {}
|
|
416
|
+
for (const session of sessions) {
|
|
417
|
+
for (const [tool, count] of Object.entries(session.toolUsage)) inc(topTools, tool, count)
|
|
418
|
+
for (const file of session.changedFiles) inc(changedFileCounts, file)
|
|
419
|
+
}
|
|
420
|
+
|
|
421
|
+
return {
|
|
422
|
+
repo: repo.name,
|
|
423
|
+
repoPath: repo.repoPath,
|
|
424
|
+
generatedAt: new Date().toISOString(),
|
|
425
|
+
sessionRoot,
|
|
426
|
+
counts: {
|
|
427
|
+
mainSessions: mainSessions.length,
|
|
428
|
+
subagentSessions: subagentSessions.length,
|
|
429
|
+
gold: sessions.filter(session => session.qualityTier === 'gold').length,
|
|
430
|
+
silver: sessions.filter(session => session.qualityTier === 'silver').length,
|
|
431
|
+
bronze: sessions.filter(session => session.qualityTier === 'bronze').length,
|
|
432
|
+
},
|
|
433
|
+
sourceInventory,
|
|
434
|
+
topTools: Object.fromEntries(Object.entries(topTools).sort((a, b) => b[1] - a[1]).slice(0, 15)),
|
|
435
|
+
topChangedFiles: Object.entries(changedFileCounts)
|
|
436
|
+
.sort((a, b) => b[1] - a[1])
|
|
437
|
+
.slice(0, 20)
|
|
438
|
+
.map(([path, count]) => ({ path, count })),
|
|
439
|
+
}
|
|
440
|
+
}
|
|
441
|
+
|
|
442
|
+
function toReviewEntry(session: SessionRecord, rank: number): ReviewQueueEntry {
|
|
443
|
+
const canonicality = guessCanonicality(session)
|
|
444
|
+
const bucket = guessBucket(session)
|
|
445
|
+
const verificationSignals = []
|
|
446
|
+
if (session.testsRun.length > 0 || session.commandsRun.includes('bun test')) verificationSignals.push('explicit bun test detected')
|
|
447
|
+
else verificationSignals.push('no explicit bun test detected')
|
|
448
|
+
|
|
449
|
+
const positiveSignals = uniq([
|
|
450
|
+
...session.policyTrace,
|
|
451
|
+
...(session.commandsRun.includes('luca describe') ? ['uses luca describe'] : []),
|
|
452
|
+
...(session.commandsRun.includes('luca eval') ? ['uses luca eval'] : []),
|
|
453
|
+
...(session.changedFiles.some(path => /^(commands|features|endpoints|clients|servers)\//.test(path)) ? ['edits Luca convention folders'] : []),
|
|
454
|
+
])
|
|
455
|
+
|
|
456
|
+
const negativeSignals = uniq([
|
|
457
|
+
...(session.changedFiles.length === 0 ? ['no changed files detected'] : []),
|
|
458
|
+
...(!session.commandsRun.includes('bun test') && session.testsRun.length === 0 ? ['no explicit test verification'] : []),
|
|
459
|
+
...(session.changedFiles.length > 12 ? ['large changed-file fan-out'] : []),
|
|
460
|
+
])
|
|
461
|
+
|
|
462
|
+
return {
|
|
463
|
+
rank,
|
|
464
|
+
session_id: session.sessionId,
|
|
465
|
+
source_repo: session.repo,
|
|
466
|
+
candidate_path: session.sessionPath,
|
|
467
|
+
repo_path: session.repoPath,
|
|
468
|
+
branch: session.branch,
|
|
469
|
+
first_instruction: session.firstInstruction,
|
|
470
|
+
task_type: session.taskType,
|
|
471
|
+
quality_tier: session.qualityTier,
|
|
472
|
+
canonicality_guess: canonicality,
|
|
473
|
+
bucket_guess: bucket,
|
|
474
|
+
suggested_disposition: suggestedDisposition(session),
|
|
475
|
+
review_score: session.reviewScore,
|
|
476
|
+
summary: summarizeSession(session),
|
|
477
|
+
changed_files: session.changedFiles,
|
|
478
|
+
commands_run: session.commandsRun,
|
|
479
|
+
tests_run: session.testsRun,
|
|
480
|
+
relevant_helpers: session.relevantHelpers,
|
|
481
|
+
policy_signals: session.policyTrace,
|
|
482
|
+
verification_signals: verificationSignals,
|
|
483
|
+
positive_signals: positiveSignals,
|
|
484
|
+
negative_signals: negativeSignals,
|
|
485
|
+
snippets: session.snippets,
|
|
486
|
+
notes: session.notes,
|
|
487
|
+
}
|
|
488
|
+
}
|
|
489
|
+
|
|
490
|
+
async function main() {
|
|
491
|
+
await Bun.$`mkdir -p ${OUTPUT_DIR}`
|
|
492
|
+
|
|
493
|
+
const allSessions: SessionRecord[] = []
|
|
494
|
+
const summaries: RepoSummary[] = []
|
|
495
|
+
|
|
496
|
+
for (const repo of DEFAULT_REPOS) {
|
|
497
|
+
const home = inferHome(repo.repoPath)
|
|
498
|
+
const sessionRoot = `${home}/.claude/projects/${encodeClaudeProjectPath(repo.repoPath)}`
|
|
499
|
+
const allJsonl = await listFiles(sessionRoot, '**/*.jsonl')
|
|
500
|
+
const mainSessions = allJsonl.filter(path => !path.includes('/subagents/'))
|
|
501
|
+
const subagentSessions = allJsonl.filter(path => path.includes('/subagents/'))
|
|
502
|
+
const sessions = (await Promise.all(mainSessions.map(path => parseSession(path, repo))))
|
|
503
|
+
.filter((session): session is SessionRecord => Boolean(session))
|
|
504
|
+
.sort((a, b) => b.reviewScore - a.reviewScore)
|
|
505
|
+
|
|
506
|
+
allSessions.push(...sessions)
|
|
507
|
+
summaries.push(await buildRepoSummary(repo, sessions, mainSessions, subagentSessions, sessionRoot))
|
|
508
|
+
|
|
509
|
+
await Bun.write(`${OUTPUT_DIR}/${repo.name}-session-curation-summary.json`, `${JSON.stringify(summaries[summaries.length - 1], null, 2)}\n`)
|
|
510
|
+
await Bun.write(`${OUTPUT_DIR}/${repo.name}-session-candidates.jsonl`, sessions.map(session => JSON.stringify(session)).join('\n') + '\n')
|
|
511
|
+
}
|
|
512
|
+
|
|
513
|
+
const ranked = allSessions
|
|
514
|
+
.sort((a, b) => b.reviewScore - a.reviewScore)
|
|
515
|
+
.map((session, index) => toReviewEntry(session, index + 1))
|
|
516
|
+
|
|
517
|
+
const firstBatch = [
|
|
518
|
+
...ranked.filter(item => item.source_repo === 'luca' && item.bucket_guess !== 'rejects').slice(0, 10),
|
|
519
|
+
...ranked.filter(item => item.source_repo === 'agentic-loop' && item.bucket_guess !== 'rejects').slice(0, 15),
|
|
520
|
+
...ranked.filter(item => item.bucket_guess === 'rejects').slice(0, 5),
|
|
521
|
+
]
|
|
522
|
+
.map((item, index) => ({ ...item, rank: index + 1 }))
|
|
523
|
+
|
|
524
|
+
const manifest = {
|
|
525
|
+
generatedAt: new Date().toISOString(),
|
|
526
|
+
repos: summaries.map(summary => ({
|
|
527
|
+
repo: summary.repo,
|
|
528
|
+
repoPath: summary.repoPath,
|
|
529
|
+
counts: summary.counts,
|
|
530
|
+
sessionRoot: summary.sessionRoot,
|
|
531
|
+
})),
|
|
532
|
+
totalCandidates: ranked.length,
|
|
533
|
+
recommendedFirstBatch: {
|
|
534
|
+
total: firstBatch.length,
|
|
535
|
+
lucaCanonicalAndStrong: firstBatch.filter(item => item.source_repo === 'luca' && item.bucket_guess !== 'rejects').length,
|
|
536
|
+
agenticLoopCanonicalAndStrong: firstBatch.filter(item => item.source_repo === 'agentic-loop' && item.bucket_guess !== 'rejects').length,
|
|
537
|
+
rejects: firstBatch.filter(item => item.bucket_guess === 'rejects').length,
|
|
538
|
+
},
|
|
539
|
+
recommendations: [
|
|
540
|
+
'Have the Luca author label the first batch using datasets/lora/review-schema.json.',
|
|
541
|
+
'Prefer rewrite over raw keep for canonical-policy and planning examples.',
|
|
542
|
+
'Treat docs and API references as retrieval sources, not direct LoRA rows.',
|
|
543
|
+
],
|
|
544
|
+
}
|
|
545
|
+
|
|
546
|
+
await Bun.write(`${OUTPUT_DIR}/review-queue.jsonl`, ranked.map(item => JSON.stringify(item)).join('\n') + '\n')
|
|
547
|
+
await Bun.write(`${OUTPUT_DIR}/review-batch-1.jsonl`, firstBatch.map(item => JSON.stringify(item)).join('\n') + '\n')
|
|
548
|
+
await Bun.write(`${OUTPUT_DIR}/review-manifest.json`, `${JSON.stringify(manifest, null, 2)}\n`)
|
|
549
|
+
|
|
550
|
+
console.log(JSON.stringify({
|
|
551
|
+
outputDir: OUTPUT_DIR,
|
|
552
|
+
totalCandidates: ranked.length,
|
|
553
|
+
firstBatch: firstBatch.length,
|
|
554
|
+
repos: manifest.repos,
|
|
555
|
+
}, null, 2))
|
|
556
|
+
}
|
|
557
|
+
|
|
558
|
+
main().catch(error => {
|
|
559
|
+
console.error(error?.stack || error?.message || String(error))
|
|
560
|
+
process.exit(1)
|
|
561
|
+
})
|
package/src/cli/build-info.ts
CHANGED