superlab 0.1.9 → 0.1.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (32) hide show
  1. package/README.md +58 -7
  2. package/README.zh-CN.md +58 -7
  3. package/bin/superlab.cjs +479 -4
  4. package/lib/auto.cjs +782 -0
  5. package/lib/context.cjs +240 -1
  6. package/lib/i18n.cjs +603 -199
  7. package/lib/install.cjs +33 -0
  8. package/package-assets/claude/commands/lab/auto.md +11 -0
  9. package/package-assets/claude/commands/lab/data.md +11 -0
  10. package/package-assets/claude/commands/lab.md +7 -0
  11. package/package-assets/codex/prompts/lab-auto.md +9 -0
  12. package/package-assets/codex/prompts/lab-data.md +9 -0
  13. package/package-assets/codex/prompts/lab.md +7 -0
  14. package/package-assets/shared/lab/.managed/templates/data.md +168 -0
  15. package/package-assets/shared/lab/config/workflow.json +4 -1
  16. package/package-assets/shared/lab/context/auto-mode.md +42 -0
  17. package/package-assets/shared/lab/context/auto-status.md +23 -0
  18. package/package-assets/shared/lab/context/data-decisions.md +59 -0
  19. package/package-assets/shared/lab/context/session-brief.md +3 -0
  20. package/package-assets/shared/lab/context/summary.md +4 -0
  21. package/package-assets/shared/lab/system/core.md +18 -1
  22. package/package-assets/shared/skills/lab/SKILL.md +45 -8
  23. package/package-assets/shared/skills/lab/references/workflow.md +3 -0
  24. package/package-assets/shared/skills/lab/stages/auto.md +59 -0
  25. package/package-assets/shared/skills/lab/stages/data.md +69 -0
  26. package/package-assets/shared/skills/lab/stages/iterate.md +9 -0
  27. package/package-assets/shared/skills/lab/stages/report.md +1 -0
  28. package/package-assets/shared/skills/lab/stages/review.md +3 -0
  29. package/package-assets/shared/skills/lab/stages/run.md +5 -0
  30. package/package-assets/shared/skills/lab/stages/spec.md +7 -0
  31. package/package-assets/shared/skills/lab/stages/write.md +5 -0
  32. package/package.json +1 -1
package/lib/context.cjs CHANGED
@@ -28,7 +28,7 @@ function readWorkflowLanguage(targetDir) {
28
28
  function extractValue(text, labels) {
29
29
  for (const label of labels) {
30
30
  const escaped = label.replace(/[.*+?^${}()|[\]\\]/g, "\\$&");
31
- const regex = new RegExp(`^\\s*(?:[-\\d.]+\\s*)?${escaped}:\\s*(.+?)\\s*$`, "im");
31
+ const regex = new RegExp(`^\\s*(?:[-\\d.]+\\s*)?${escaped}:[ \\t]*([^\\n\\r]+?)[ \\t]*$`, "im");
32
32
  const match = text.match(regex);
33
33
  if (match && match[1]) {
34
34
  return match[1].trim();
@@ -53,6 +53,10 @@ function extractOpenQuestion(text) {
53
53
  return "";
54
54
  }
55
55
 
56
+ function joinNonEmpty(parts, separator = "; ") {
57
+ return parts.filter(Boolean).join(separator);
58
+ }
59
+
56
60
  function extractClaim(text) {
57
61
  const blocks = text
58
62
  .split(/\n(?=\d+\.\s)/)
@@ -67,6 +71,10 @@ function extractClaim(text) {
67
71
  return "";
68
72
  }
69
73
 
74
+ function labelValue(text, englishLabels, chineseLabels = []) {
75
+ return extractValue(text, [...englishLabels, ...chineseLabels]);
76
+ }
77
+
70
78
  function renderSummary(lang, data) {
71
79
  if (lang === "zh") {
72
80
  return `# 研究摘要
@@ -76,6 +84,18 @@ function renderSummary(lang, data) {
76
84
  - Mission: ${data.problem || "待补充"}
77
85
  - Approved direction: ${data.direction || "待补充"}
78
86
  - Active stage: ${data.stage || "待补充"}
87
+ - Auto mode: ${data.autoStatus || "未启用"}
88
+ - Auto objective: ${data.autoObjective || "待补充"}
89
+ - Auto decision: ${data.autoDecision || "待补充"}
90
+ - Dataset package: ${data.datasetPackage || "待补充"}
91
+ - Dataset years: ${data.datasetYears || "待补充"}
92
+ - Benchmark role: ${data.benchmarkRole || "待补充"}
93
+ - Benchmark rationale: ${data.benchmarkRationale || "待补充"}
94
+ - Comparison suite: ${data.comparisonMethods || "待补充"}
95
+ - Comparison rationale: ${data.comparisonRationale || "待补充"}
96
+ - Comparison evidence: ${data.comparisonEvidence || "待补充"}
97
+ - Source basis: ${data.datasetSources || "待补充"}
98
+ - Representative papers: ${data.datasetPapers || "待补充"}
79
99
 
80
100
  ## 最强证据
81
101
 
@@ -98,6 +118,18 @@ function renderSummary(lang, data) {
98
118
  - Mission: ${data.problem || "TBD"}
99
119
  - Approved direction: ${data.direction || "TBD"}
100
120
  - Active stage: ${data.stage || "TBD"}
121
+ - Auto mode: ${data.autoStatus || "inactive"}
122
+ - Auto objective: ${data.autoObjective || "TBD"}
123
+ - Auto decision: ${data.autoDecision || "TBD"}
124
+ - Dataset package: ${data.datasetPackage || "TBD"}
125
+ - Dataset years: ${data.datasetYears || "TBD"}
126
+ - Benchmark role: ${data.benchmarkRole || "TBD"}
127
+ - Benchmark rationale: ${data.benchmarkRationale || "TBD"}
128
+ - Comparison suite: ${data.comparisonMethods || "TBD"}
129
+ - Comparison rationale: ${data.comparisonRationale || "TBD"}
130
+ - Comparison evidence: ${data.comparisonEvidence || "TBD"}
131
+ - Source basis: ${data.datasetSources || "TBD"}
132
+ - Representative papers: ${data.datasetPapers || "TBD"}
101
133
 
102
134
  ## Strongest Evidence
103
135
 
@@ -175,6 +207,18 @@ ${data.problem || "待补充"}
175
207
 
176
208
  - Approved direction: ${data.direction || "待补充"}
177
209
  - Why this is the active path: ${data.why || "当前已批准方向"}
210
+ - Auto mode: ${data.autoStatus || "未启用"}
211
+ - Auto objective: ${data.autoObjective || "待补充"}
212
+ - Auto decision: ${data.autoDecision || "待补充"}
213
+ - Dataset package: ${data.datasetPackage || "待补充"}
214
+ - Dataset years: ${data.datasetYears || "待补充"}
215
+ - Benchmark role: ${data.benchmarkRole || "待补充"}
216
+ - Benchmark rationale: ${data.benchmarkRationale || "待补充"}
217
+ - Comparison suite: ${data.comparisonMethods || "待补充"}
218
+ - Comparison rationale: ${data.comparisonRationale || "待补充"}
219
+ - Comparison evidence: ${data.comparisonEvidence || "待补充"}
220
+ - Source basis: ${data.datasetSources || "待补充"}
221
+ - Representative papers: ${data.datasetPapers || "待补充"}
178
222
 
179
223
  ## 主要风险
180
224
 
@@ -208,6 +252,18 @@ ${data.problem || "TBD"}
208
252
 
209
253
  - Approved direction: ${data.direction || "TBD"}
210
254
  - Why this is the active path: ${data.why || "This is the approved direction"}
255
+ - Auto mode: ${data.autoStatus || "inactive"}
256
+ - Auto objective: ${data.autoObjective || "TBD"}
257
+ - Auto decision: ${data.autoDecision || "TBD"}
258
+ - Dataset package: ${data.datasetPackage || "TBD"}
259
+ - Dataset years: ${data.datasetYears || "TBD"}
260
+ - Benchmark role: ${data.benchmarkRole || "TBD"}
261
+ - Benchmark rationale: ${data.benchmarkRationale || "TBD"}
262
+ - Comparison suite: ${data.comparisonMethods || "TBD"}
263
+ - Comparison rationale: ${data.comparisonRationale || "TBD"}
264
+ - Comparison evidence: ${data.comparisonEvidence || "TBD"}
265
+ - Source basis: ${data.datasetSources || "TBD"}
266
+ - Representative papers: ${data.datasetPapers || "TBD"}
211
267
 
212
268
  ## Main Risk
213
269
 
@@ -232,6 +288,64 @@ function buildContextSnapshot(targetDir) {
232
288
  const state = readFileIfExists(contextFile(targetDir, "state.md"));
233
289
  const evidence = readFileIfExists(contextFile(targetDir, "evidence-index.md"));
234
290
  const questions = readFileIfExists(contextFile(targetDir, "open-questions.md"));
291
+ const dataDecisions = readFileIfExists(contextFile(targetDir, "data-decisions.md"));
292
+ const autoMode = readFileIfExists(contextFile(targetDir, "auto-mode.md"));
293
+ const autoStatus = readFileIfExists(contextFile(targetDir, "auto-status.md"));
294
+ const classicBenchmarks = labelValue(
295
+ dataDecisions,
296
+ ["Classic public benchmarks", "Classic benchmarks"],
297
+ ["经典公开 benchmark", "经典 benchmark"]
298
+ );
299
+ const recentBenchmarks = labelValue(
300
+ dataDecisions,
301
+ ["Recent strong public benchmarks", "Latest benchmarks"],
302
+ ["近期强公开 benchmark", "最新 benchmark"]
303
+ );
304
+ const claimSpecificBenchmarks = labelValue(
305
+ dataDecisions,
306
+ ["Claim-specific benchmarks"],
307
+ ["专项 benchmark"]
308
+ );
309
+ const canonicalComparisons = labelValue(
310
+ dataDecisions,
311
+ ["Canonical baselines", "Classic comparison methods"],
312
+ ["规范基线", "经典对比方法"]
313
+ );
314
+ const strongHistoricalComparisons = labelValue(
315
+ dataDecisions,
316
+ ["Strong historical baselines"],
317
+ ["强历史基线"]
318
+ );
319
+ const recentComparisons = labelValue(
320
+ dataDecisions,
321
+ ["Recent strong public methods", "Recent strong or SOTA comparison methods"],
322
+ ["近期强公开方法", "近期强基线或 SOTA 对比方法"]
323
+ );
324
+ const closestPriorWork = labelValue(
325
+ dataDecisions,
326
+ ["Closest prior work"],
327
+ ["最接近的前作"]
328
+ );
329
+ const canonicalComparisonPapers = labelValue(
330
+ dataDecisions,
331
+ ["Representative papers for canonical baselines"],
332
+ ["规范基线的代表论文"]
333
+ );
334
+ const strongHistoricalComparisonPapers = labelValue(
335
+ dataDecisions,
336
+ ["Representative papers for strong historical baselines"],
337
+ ["强历史基线的代表论文"]
338
+ );
339
+ const recentComparisonPapers = labelValue(
340
+ dataDecisions,
341
+ ["Representative papers for recent strong public methods"],
342
+ ["近期强公开方法的代表论文"]
343
+ );
344
+ const closestPriorPapers = labelValue(
345
+ dataDecisions,
346
+ ["Representative papers for closest prior work"],
347
+ ["最接近前作的代表论文"]
348
+ );
235
349
 
236
350
  return {
237
351
  problem: extractValue(mission, ["One-sentence problem", "一句话问题"]),
@@ -246,6 +360,131 @@ function buildContextSnapshot(targetDir) {
246
360
  claim: extractClaim(evidence),
247
361
  question: extractOpenQuestion(questions),
248
362
  risk: extractValue(questions, ["Why it matters", "为什么重要"]) || extractValue(state, ["Current blocker", "当前阻塞"]),
363
+ datasetPackage:
364
+ extractValue(dataDecisions, ["Approved dataset package", "已批准数据集包"]) ||
365
+ extractValue(dataDecisions, ["Approved datasets", "已批准数据集"]),
366
+ datasetYears: extractValue(dataDecisions, ["Dataset years", "数据集年份"]),
367
+ datasetPapers: extractValue(dataDecisions, ["Papers that used the approved datasets", "使用过已批准数据集的论文", "使用过该数据集的论文"]),
368
+ datasetSources: joinNonEmpty(
369
+ [
370
+ labelValue(dataDecisions, ["Why the selected download sources are trusted"], ["为什么这些下载来源可信"]),
371
+ labelValue(dataDecisions, ["Official benchmark or dataset pages"], ["官方 benchmark 或数据集页面"]),
372
+ ],
373
+ " | "
374
+ ),
375
+ benchmarkRationale: joinNonEmpty(
376
+ [
377
+ (() => {
378
+ const value = labelValue(
379
+ dataDecisions,
380
+ ["Why classic public benchmarks are included", "Why classic benchmarks are included"],
381
+ ["为什么纳入经典公开 benchmark", "为什么纳入经典 benchmark"]
382
+ );
383
+ return value ? `Classic public: ${value}` : "";
384
+ })(),
385
+ (() => {
386
+ const value = labelValue(
387
+ dataDecisions,
388
+ ["Why recent strong public benchmarks are included", "Why latest benchmarks are included"],
389
+ ["为什么纳入近期强公开 benchmark", "为什么纳入最新 benchmark"]
390
+ );
391
+ return value ? `Recent strong public: ${value}` : "";
392
+ })(),
393
+ (() => {
394
+ const value = labelValue(
395
+ dataDecisions,
396
+ ["Why claim-specific benchmarks are included or why none are needed"],
397
+ ["为什么纳入专项 benchmark 或为什么不需要"]
398
+ );
399
+ return value ? `Claim-specific: ${value}` : "";
400
+ })(),
401
+ (() => {
402
+ const value = labelValue(
403
+ dataDecisions,
404
+ ["Why this benchmark mix is representative"],
405
+ ["为什么这个 benchmark 组合具有代表性"]
406
+ );
407
+ return value ? `Mix: ${value}` : "";
408
+ })(),
409
+ ],
410
+ " | "
411
+ ),
412
+ comparisonMethods: joinNonEmpty(
413
+ [
414
+ canonicalComparisons ? `Canonical: ${canonicalComparisons}` : "",
415
+ strongHistoricalComparisons ? `Strong historical: ${strongHistoricalComparisons}` : "",
416
+ recentComparisons ? `Recent strong public: ${recentComparisons}` : "",
417
+ closestPriorWork ? `Closest prior: ${closestPriorWork}` : "",
418
+ ],
419
+ "; "
420
+ ),
421
+ comparisonEvidence: joinNonEmpty(
422
+ [
423
+ canonicalComparisonPapers ? `Canonical baselines: ${canonicalComparisonPapers}` : "",
424
+ strongHistoricalComparisonPapers
425
+ ? `Strong historical baselines: ${strongHistoricalComparisonPapers}`
426
+ : "",
427
+ recentComparisonPapers ? `Recent strong public methods: ${recentComparisonPapers}` : "",
428
+ closestPriorPapers ? `Closest prior work: ${closestPriorPapers}` : "",
429
+ ],
430
+ " | "
431
+ ),
432
+ comparisonRationale: joinNonEmpty(
433
+ [
434
+ (() => {
435
+ const value = labelValue(
436
+ dataDecisions,
437
+ ["Why these canonical baselines are included", "Why these classic baselines are included"],
438
+ ["为什么纳入这些规范基线", "为什么纳入这些经典基线"]
439
+ );
440
+ return value ? `Canonical baselines: ${value}` : "";
441
+ })(),
442
+ (() => {
443
+ const value = labelValue(
444
+ dataDecisions,
445
+ ["Why these strong historical baselines are included"],
446
+ ["为什么纳入这些强历史基线"]
447
+ );
448
+ return value ? `Strong historical baselines: ${value}` : "";
449
+ })(),
450
+ (() => {
451
+ const value = labelValue(
452
+ dataDecisions,
453
+ ["Why these recent strong public methods are included", "Why these recent or SOTA baselines are included"],
454
+ ["为什么纳入这些近期强公开方法", "为什么纳入这些近期或 SOTA 基线"]
455
+ );
456
+ return value ? `Recent strong public methods: ${value}` : "";
457
+ })(),
458
+ (() => {
459
+ const value = labelValue(
460
+ dataDecisions,
461
+ ["Why this closest prior work is included or why none qualifies"],
462
+ ["为什么纳入这个最接近的前作或为什么没有合格项"]
463
+ );
464
+ return value ? `Closest prior work: ${value}` : "";
465
+ })(),
466
+ (() => {
467
+ const value = labelValue(
468
+ dataDecisions,
469
+ ["Why these comparison methods are fair"],
470
+ ["为什么这些对比方法是公平的"]
471
+ );
472
+ return value ? `Fairness: ${value}` : "";
473
+ })(),
474
+ ],
475
+ " | "
476
+ ),
477
+ benchmarkRole: joinNonEmpty(
478
+ [
479
+ classicBenchmarks ? `Classic public: ${classicBenchmarks}` : "",
480
+ recentBenchmarks ? `Recent strong public: ${recentBenchmarks}` : "",
481
+ claimSpecificBenchmarks ? `Claim-specific: ${claimSpecificBenchmarks}` : "",
482
+ ],
483
+ "; "
484
+ ),
485
+ autoObjective: extractValue(autoMode, ["Objective", "目标"]),
486
+ autoStatus: extractValue(autoStatus, ["Status", "状态"]),
487
+ autoDecision: extractValue(autoStatus, ["Current decision", "当前决策"]),
249
488
  };
250
489
  }
251
490