lumencode 1.2.0 → 1.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/lib/git.js CHANGED
@@ -2,6 +2,14 @@ import { execSync, exec as execCb } from 'child_process';
2
2
  import { existsSync, readFileSync } from 'fs';
3
3
  import { join } from 'path';
4
4
  import { aggregateAttribution, classifyAttribution } from './attribution.js';
5
+ import {
6
+ normalizeCommitFilePath,
7
+ normalizePathForGit,
8
+ projectMatches as projectMatchesFromGitPaths,
9
+ toRepoRelativePath,
10
+ } from './git-paths.js';
11
+ import { resolveAttributionOptions } from './git-attribution-options.js';
12
+ import { scoreSessionCandidate } from './git-attribution-candidates.js';
5
13
 
6
14
  // ── helpers ──
7
15
 
@@ -157,13 +165,6 @@ const AI_CONFIDENCE = {
157
165
  HIGH: 'high',
158
166
  };
159
167
 
160
- const CONFIDENCE_WEIGHTS = {
161
- [AI_CONFIDENCE.HIGH]: 1.0,
162
- [AI_CONFIDENCE.MEDIUM]: 0.7,
163
- [AI_CONFIDENCE.LOW]: 0.2,
164
- [AI_CONFIDENCE.NONE]: 0,
165
- };
166
-
167
168
  function isCountedAIConfidence(confidence) {
168
169
  return confidence === AI_CONFIDENCE.HIGH || confidence === AI_CONFIDENCE.MEDIUM;
169
170
  }
@@ -222,6 +223,21 @@ function buildEvidenceDetails({
222
223
  };
223
224
  }
224
225
 
226
+ // 检测正则中可能导致回溯爆炸的危险模式
227
+ function isSafeRegex(pattern) {
228
+ // 限制最大长度
229
+ if (pattern.length > 200) return false;
230
+ // 嵌套量词:如 (a+)+, (a*){2,}, (a{1,3})+
231
+ if (/\([^)]*[+*{][^)]*\)[+*{]/.test(pattern)) return false;
232
+ // 交替+量词:如 (a|b)*, (foo|bar)+
233
+ if (/\([^)]*\|[^)]*\)[+*{]/.test(pattern)) return false;
234
+ // 字符类后跟量词且字符类内含量词:如 [\w+]* — 模糊但潜在危险
235
+ if (/\[[^\]]+\][+*{]/.test(pattern) && /\[[^\]]*[*+]/.test(pattern)) return false;
236
+ // 重复量词:如 a** 或 a++ (有些引擎报错但不应依赖)
237
+ if (/[+*{]\s*[+*{]/.test(pattern)) return false;
238
+ return true;
239
+ }
240
+
225
241
  function loadCustomPatterns() {
226
242
  try {
227
243
  const configPath = join(process.cwd(), 'ai-patterns.json');
@@ -229,6 +245,13 @@ function loadCustomPatterns() {
229
245
  const raw = JSON.parse(readFileSync(configPath, 'utf-8'));
230
246
  return raw
231
247
  .filter(p => typeof p.re === 'string' && typeof p.signal === 'string')
248
+ .filter(p => {
249
+ if (!isSafeRegex(p.re)) {
250
+ console.warn(`[git] 跳过不安全正则: ${p.re.slice(0, 50)}`);
251
+ return false;
252
+ }
253
+ return true;
254
+ })
232
255
  .map(p => ({ re: new RegExp(p.re, p.flags || 'i'), signal: p.signal }));
233
256
  }
234
257
  } catch { /* ignore */ }
@@ -392,57 +415,59 @@ function computeBaselineDeviation(commit, baseline) {
392
415
 
393
416
  // ── Composite continuous scoring ──
394
417
 
395
- function computeContinuousScore(commit) {
418
+ function computeContinuousScore(commit, attributionOptions) {
396
419
  let score = 0;
397
420
  const signals = new Set(commit.aiSignals || []);
421
+ const weights = attributionOptions.scoreWeights;
398
422
 
399
423
  // Explicit signatures
400
- if (signals.has('coAuthor') || signals.has('generatedWith') || signals.has('assistedBy')) score += 0.85;
401
- if (signals.has('coAuthorCopilot') || signals.has('coAuthorCursor') || signals.has('coAuthorCodex')) score += 0.85;
402
- if (signals.has('robotEmoji') || signals.has('coAuthorOpencode')) score += 0.85;
403
- if (signals.has('authorClaude') || signals.has('authorBot')) score += 0.80;
404
- if (signals.has('generatedWithAider') || signals.has('aiderTag')) score += 0.85;
405
- if (signals.has('generatedWithCodex') || signals.has('coAuthorCodex')) score += 0.85;
406
- if (signals.has('coAuthorWindsurf') || signals.has('coAuthorAugment') || signals.has('coAuthorCline')) score += 0.85;
407
- if (signals.has('aiGenerated') || signals.has('generatedByAI') || signals.has('viaAI') || signals.has('aiTag')) score += 0.70;
424
+ if (signals.has('coAuthor') || signals.has('generatedWith') || signals.has('assistedBy')) score += weights.explicitSignature;
425
+ if (signals.has('coAuthorCopilot') || signals.has('coAuthorCursor') || signals.has('coAuthorCodex')) score += weights.explicitSignature;
426
+ if (signals.has('robotEmoji') || signals.has('coAuthorOpencode')) score += weights.explicitSignature;
427
+ if (signals.has('authorClaude') || signals.has('authorBot')) score += weights.explicitAuthor;
428
+ if (signals.has('generatedWithAider') || signals.has('aiderTag')) score += weights.explicitSignature;
429
+ if (signals.has('generatedWithCodex') || signals.has('coAuthorCodex')) score += weights.explicitSignature;
430
+ if (signals.has('coAuthorWindsurf') || signals.has('coAuthorAugment') || signals.has('coAuthorCline')) score += weights.explicitSignature;
431
+ if (signals.has('aiGenerated') || signals.has('generatedByAI') || signals.has('viaAI') || signals.has('aiTag')) score += weights.genericAISignature;
408
432
 
409
433
  // Session signals
410
- if (commit.sessionAttribution === 'strong') score += 0.40;
411
- else if (commit.sessionAttribution === 'cross-day') score += 0.25;
412
- else if (commit.sessionAttribution === 'weak') score += 0.15;
413
- else if (commit.sessionAttribution === 'cross-day-weak') score += 0.10;
434
+ if (commit.sessionAttribution === 'strong') score += weights.sessionStrong;
435
+ else if (commit.sessionAttribution === 'cross-day') score += weights.sessionCrossDay;
436
+ else if (commit.sessionAttribution === 'weak') score += weights.sessionWeak;
437
+ else if (commit.sessionAttribution === 'cross-day-weak') score += weights.sessionCrossDayWeak;
414
438
 
415
439
  // File overlap
416
440
  const overlap = commit.aiEvidenceDetails?.fileOverlapRatio || 0;
417
- score += overlap * 0.30;
441
+ score += overlap * weights.fileOverlap;
418
442
 
419
443
  // Style heuristic
420
- if (signals.has('styleBulletList')) score += 0.15;
421
- if (signals.has('styleConventionalScope')) score += 0.05;
422
- if (signals.has('styleImperativeMood')) score += 0.10;
423
- if (signals.has('styleLongStructuredBody')) score += 0.05;
444
+ if (signals.has('styleBulletList')) score += weights.styleBulletList;
445
+ if (signals.has('styleConventionalScope')) score += weights.styleConventionalScope;
446
+ if (signals.has('styleImperativeMood')) score += weights.styleImperativeMood;
447
+ if (signals.has('styleLongStructuredBody')) score += weights.styleLongStructuredBody;
424
448
 
425
449
  // Baseline deviation
426
- if (signals.has('baselineDeviationHigh')) score += 0.15;
427
- else if (signals.has('baselineDeviationMedium')) score += 0.08;
450
+ if (signals.has('baselineDeviationHigh')) score += weights.baselineDeviationHigh;
451
+ else if (signals.has('baselineDeviationMedium')) score += weights.baselineDeviationMedium;
428
452
 
429
453
  // Negative signals
430
454
  const negSignals = new Set(commit.negativeSignals || []);
431
- if (negSignals.has('humanMergeCommit')) score -= 0.50;
432
- if (negSignals.has('humanInformal')) score -= 0.20;
433
- if (negSignals.has('humanSmallScope')) score -= 0.15;
434
- if (negSignals.has('humanWIP')) score -= 0.15;
455
+ if (negSignals.has('humanMergeCommit')) score += weights.negativeMergeCommit;
456
+ if (negSignals.has('humanInformal')) score += weights.negativeInformal;
457
+ if (negSignals.has('humanSmallScope')) score += weights.negativeSmallScope;
458
+ if (negSignals.has('humanWIP')) score += weights.negativeWIP;
435
459
 
436
460
  // Baseline match (human pattern)
437
- if (signals.has('humanBaselineMatch')) score -= 0.10;
461
+ if (signals.has('humanBaselineMatch')) score += weights.humanBaselineMatch;
438
462
 
439
463
  return Math.max(0, Math.min(1, score));
440
464
  }
441
465
 
442
- function scoreToConfidence(score) {
443
- if (score >= 0.75) return AI_CONFIDENCE.HIGH;
444
- if (score >= 0.45) return AI_CONFIDENCE.MEDIUM;
445
- if (score >= 0.20) return AI_CONFIDENCE.LOW;
466
+ function scoreToConfidence(score, attributionOptions) {
467
+ const thresholds = attributionOptions.confidenceThresholds;
468
+ if (score >= thresholds.high) return AI_CONFIDENCE.HIGH;
469
+ if (score >= thresholds.medium) return AI_CONFIDENCE.MEDIUM;
470
+ if (score >= thresholds.low) return AI_CONFIDENCE.LOW;
446
471
  return AI_CONFIDENCE.NONE;
447
472
  }
448
473
 
@@ -514,7 +539,7 @@ export function detectAICommit(subject = '', author = '', body = '') {
514
539
 
515
540
  // ── 聚合函数 ──
516
541
 
517
- export function computeAIContribution(commits, toolFilter = null) {
542
+ export function computeAIContribution(commits, toolFilter = null, options = {}) {
518
543
  let aiCommits = 0, aiLinesAdded = 0, aiLinesDeleted = 0;
519
544
  let possibleAICommits = 0, possibleAILinesAdded = 0, possibleAILinesDeleted = 0;
520
545
  let weightedAILinesAdded = 0, weightedAILinesDeleted = 0;
@@ -522,6 +547,8 @@ export function computeAIContribution(commits, toolFilter = null) {
522
547
  let aiFileLinesAdded = 0, aiFileLinesDeleted = 0;
523
548
  let highConfidenceCommits = 0, mediumConfidenceCommits = 0, lowConfidenceCommits = 0;
524
549
  let totalLinesAdded = 0, totalLinesDeleted = 0;
550
+ const attributionOptions = resolveAttributionOptions(options.attribution || options);
551
+ const confidenceWeights = attributionOptions.confidenceWeights;
525
552
  const allCommits = commits || [];
526
553
  for (const c of allCommits) {
527
554
  totalLinesAdded += c.linesAdded || 0;
@@ -552,6 +579,12 @@ export function computeAIContribution(commits, toolFilter = null) {
552
579
  fileDeleted = c.linesDeleted || 0;
553
580
  }
554
581
 
582
+ // Step blame override: use precise line-level attribution when available
583
+ if (c.lineBlame) {
584
+ fileAdded = c.lineBlame.aiLines || 0;
585
+ fileDeleted = c.lineBlame.aiDeletedLines || 0;
586
+ }
587
+
555
588
  if (isCountedAIConfidence(confidence)) {
556
589
  aiCommits++;
557
590
  aiCommitLinesAdded += c.linesAdded || 0;
@@ -565,7 +598,7 @@ export function computeAIContribution(commits, toolFilter = null) {
565
598
  }
566
599
 
567
600
  // 加权计算:所有归因的 commit 都参与(包括 LOW)
568
- const weight = CONFIDENCE_WEIGHTS[confidence] || 0;
601
+ const weight = confidenceWeights[confidence] || 0;
569
602
  if (weight > 0) {
570
603
  weightedAILinesAdded += fileAdded * weight;
571
604
  weightedAILinesDeleted += fileDeleted * weight;
@@ -652,7 +685,9 @@ export function parseGitLogOutput(output, repo = '') {
652
685
 
653
686
  const flush = () => {
654
687
  if (!current) return;
655
- const dateKey = current.date.slice(0, 10);
688
+ // 使用本地日期做 daily stats key(用户期望看到的日期),
689
+ // UTC 日期(current.date)仅用于与 session 时间戳比较
690
+ const dateKey = current.dateLocal || current.date.slice(0, 10);
656
691
  // 注入 conventional 类型 + AI 信号
657
692
  const conv = parseConventional(current.subject);
658
693
  const ai = detectAICommit(current.subject, current.author, current.body || '');
@@ -700,7 +735,9 @@ export function parseGitLogOutput(output, repo = '') {
700
735
  const parts = header.split('|');
701
736
  const hash = parts[0] || '';
702
737
  const dateRaw = (parts[1] || '');
703
- // Normalize to UTC ISO for consistent comparison with session dates
738
+ // 保留本地日期用于 commitsByDate(用户看到的日期)
739
+ const dateLocal = dateRaw.slice(0, 10) || '';
740
+ // Normalize to UTC ISO for consistent comparison with session timestamps
704
741
  const dateMs = Date.parse(dateRaw);
705
742
  const date = Number.isFinite(dateMs)
706
743
  ? new Date(dateMs).toISOString().slice(0, 19) + 'Z'
@@ -711,6 +748,7 @@ export function parseGitLogOutput(output, repo = '') {
711
748
  repo,
712
749
  hash,
713
750
  date,
751
+ dateLocal,
714
752
  author,
715
753
  subject,
716
754
  body: '',
@@ -769,15 +807,14 @@ function sanitizeArg(s) {
769
807
  return String(s || '').replace(/[`$"\\|;&<>!\n\r]/g, '');
770
808
  }
771
809
 
772
- function buildGitArgs(since, until, author) {
773
- const sinceFull = since.includes('T') ? since : since + 'T00:00:00';
774
- const safeSince = sanitizeArg(sinceFull);
775
- const safeUntil = sanitizeArg(until);
776
- const authorArg = author ? ` --author="${sanitizeArg(author)}"` : '';
777
- // 格式:哨兵行(subject) body 行(可多行) → ENDBODY 行 → numstat 行
778
- const pretty = `--pretty=format:"${COMMIT_SENTINEL}%H|%ad|%ae|%s%n%B${BODY_END}"`;
779
- return `--all --no-renames ${pretty} --date=iso-strict --numstat --since="${safeSince}" --until="${safeUntil}"${authorArg}`;
780
- }
810
+ function buildGitArgs(since, until) {
811
+ const sinceFull = since.includes('T') ? since : since + 'T00:00:00';
812
+ const safeSince = sanitizeArg(sinceFull);
813
+ const safeUntil = sanitizeArg(until);
814
+ // 格式:哨兵行(subject) body (可多行) ENDBODY 行 → numstat 行
815
+ const pretty = `--pretty=format:"${COMMIT_SENTINEL}%H|%ad|%ae|%s%n%B${BODY_END}"`;
816
+ return `--all --no-renames ${pretty} --date=iso-strict --numstat --since="${safeSince}" --until="${safeUntil}"`;
817
+ }
781
818
 
782
819
  function mergeGitStats(target, source) {
783
820
  target.commits += source.commits;
@@ -800,37 +837,105 @@ function mergeGitStats(target, source) {
800
837
  // filesChanged 在 merge 完后由 finalize 重新计算(跨 repo 去重)
801
838
  }
802
839
 
803
- function recomputeFilesChanged(stats) {
804
- const set = new Set();
805
- for (const c of stats.commitList || []) {
806
- for (const f of c.files || []) set.add((c.repo || '') + '::' + f.path);
807
- }
808
- stats.filesChanged = set.size;
809
- }
840
+ function recomputeFilesChanged(stats) {
841
+ const set = new Set();
842
+ for (const c of stats.commitList || []) {
843
+ for (const f of c.files || []) set.add((c.repo || '') + '::' + f.path);
844
+ }
845
+ stats.filesChanged = set.size;
846
+ }
847
+
848
+ function recomputeStatsFromCommitList(stats) {
849
+ stats.commits = 0;
850
+ stats.filesChanged = 0;
851
+ stats.linesAdded = 0;
852
+ stats.linesDeleted = 0;
853
+ stats.commitsByDate = {};
854
+ stats.linesByDate = {};
855
+
856
+ for (const c of stats.commitList || []) {
857
+ const dateKey = c.dateLocal || (c.date || '').slice(0, 10);
858
+ stats.commits++;
859
+ stats.commitsByDate[dateKey] = (stats.commitsByDate[dateKey] || 0) + 1;
860
+ if (!stats.linesByDate[dateKey]) stats.linesByDate[dateKey] = { added: 0, deleted: 0, files: 0 };
861
+ stats.linesByDate[dateKey].added += c.linesAdded || 0;
862
+ stats.linesByDate[dateKey].deleted += c.linesDeleted || 0;
863
+ stats.linesByDate[dateKey].files += (c.files || []).length;
864
+ stats.linesAdded += c.linesAdded || 0;
865
+ stats.linesDeleted += c.linesDeleted || 0;
866
+ }
867
+ recomputeFilesChanged(stats);
868
+ }
869
+
870
+ function markAuthorOwnership(stats, expectedAuthor) {
871
+ const normalizedExpected = (expectedAuthor || '').toLowerCase();
872
+ for (const c of stats.commitList || []) {
873
+ c.expectedAuthor = expectedAuthor || null;
874
+ c.authorMatchesConfig = normalizedExpected
875
+ ? (c.author || '').toLowerCase() === normalizedExpected
876
+ : null;
877
+ }
878
+ }
879
+
880
+ function hasLocalSessionEvidence(commit) {
881
+ if (!commit.sessionId) return false;
882
+ if (commit.sessionAttribution === 'strong') return true;
883
+ return (commit.aiEvidenceDetails?.matchedFileCount || 0) > 0;
884
+ }
885
+
886
+ function filterCommitsForUser(stats) {
887
+ const commits = stats.commitList || [];
888
+ const hasAuthorOwnershipMetadata = commits.some(c => c.expectedAuthor || c.authorMatchesConfig !== undefined);
889
+
890
+ for (const c of commits) {
891
+ c.countedForUser = !hasAuthorOwnershipMetadata
892
+ || c.authorMatchesConfig === true
893
+ || hasLocalSessionEvidence(c);
894
+ }
895
+
896
+ if (!hasAuthorOwnershipMetadata) return;
897
+ stats.commitList = commits.filter(c => c.countedForUser);
898
+ recomputeStatsFromCommitList(stats);
899
+ }
810
900
 
811
901
  // ── async versions (server) with cache ──
812
902
 
813
903
  const gitCache = new Map();
904
+ const GIT_CACHE_MAX = 500;
905
+ const GIT_CACHE_TTL = 60_000;
814
906
  const CACHE_VERSION = 'v3';
815
907
 
816
- async function getGitStatsAsync(repoPath, since, until, author = null) {
817
- const cacheKey = `${repoPath}|${since}|${until}|${CACHE_VERSION}`;
818
- const cached = gitCache.get(cacheKey);
819
- if (cached && Date.now() - cached.ts < 60_000) return cached.stats;
908
+ function evictGitCache() {
909
+ const now = Date.now();
910
+ for (const [key, val] of gitCache) {
911
+ if (now - val.ts > GIT_CACHE_TTL) gitCache.delete(key);
912
+ }
913
+ while (gitCache.size > GIT_CACHE_MAX) {
914
+ const oldest = gitCache.keys().next().value;
915
+ gitCache.delete(oldest);
916
+ }
917
+ }
918
+
919
+ async function getGitStatsAsync(repoPath, since, until, author = null) {
920
+ const cacheKey = `${repoPath}|${since}|${until}|${CACHE_VERSION}`;
921
+ const cached = gitCache.get(cacheKey);
922
+ if (cached && Date.now() - cached.ts < GIT_CACHE_TTL) return cached.stats;
820
923
 
821
924
  try {
822
925
  await execAsync('git rev-parse --git-dir', { cwd: repoPath });
823
926
  } catch {
824
927
  return emptyResult();
825
928
  }
826
-
827
- try {
828
- const output = await execAsync(`git log ${buildGitArgs(since, until, author)}`, {
829
- cwd: repoPath, encoding: 'utf-8', maxBuffer: 50 * 1024 * 1024,
830
- });
831
- const stats = parseGitLogOutput(output, repoPath);
832
- gitCache.set(cacheKey, { stats, ts: Date.now() });
833
- return stats;
929
+
930
+ try {
931
+ const output = await execAsync(`git log ${buildGitArgs(since, until)}`, {
932
+ cwd: repoPath, encoding: 'utf-8', maxBuffer: 50 * 1024 * 1024,
933
+ });
934
+ const stats = parseGitLogOutput(output, repoPath);
935
+ markAuthorOwnership(stats, author);
936
+ gitCache.set(cacheKey, { stats, ts: Date.now() });
937
+ evictGitCache();
938
+ return stats;
834
939
  } catch {
835
940
  return emptyResult();
836
941
  }
@@ -867,31 +972,6 @@ export function invalidateGitCache() {
867
972
 
868
973
  // ── Session ↔ Commit 关联 ──
869
974
 
870
- function normalizePath(p) {
871
- if (!p) return '';
872
- return p.replace(/\\/g, '/').replace(/\/+/g, '/').replace(/\/$/, '').toLowerCase();
873
- }
874
-
875
- function toRelativeRepoPath(filePath, repoPath) {
876
- const fileN = normalizePath(filePath);
877
- const repoN = normalizePath(repoPath);
878
- if (!fileN) return '';
879
- if (!repoN) return normalizeCommitFilePath(fileN.replace(/^[a-z]:\//i, ''));
880
- if (fileN === repoN) return '';
881
- if (fileN.startsWith(repoN + '/')) return fileN.slice(repoN.length + 1);
882
- const repoTail = repoN.split('/').filter(Boolean).pop();
883
- if (repoTail) {
884
- const marker = `/${repoTail}/`;
885
- const idx = fileN.indexOf(marker);
886
- if (idx >= 0) return fileN.slice(idx + marker.length);
887
- }
888
- return fileN;
889
- }
890
-
891
- function normalizeCommitFilePath(filePath) {
892
- return normalizePath(filePath).replace(/^\.?\//, '');
893
- }
894
-
895
975
  function looksLikeFilePath(value) {
896
976
  if (typeof value !== 'string') return false;
897
977
  const v = value.trim();
@@ -984,21 +1064,43 @@ function extractTouchedFilesFromSession(session) {
984
1064
  const repoPath = session.project || '';
985
1065
  const files = new Set();
986
1066
  for (const tc of session.toolSequence || []) {
987
- // Write/Edit/NotebookEdit/MultiEdit 工具
988
- if (['Write', 'Edit', 'NotebookEdit', 'MultiEdit'].includes(tc.name)) {
989
- const rawPaths = collectFilePaths(tc.input);
1067
+ const name = tc.name || '';
1068
+ const input = tc.input || {};
1069
+
1070
+ // Claude 内置工具:Write/Edit/NotebookEdit/MultiEdit
1071
+ if (name === 'Write' || name === 'Edit' || name === 'NotebookEdit' || name === 'MultiEdit') {
1072
+ const rawPaths = collectFilePaths(input);
990
1073
  for (const rawPath of rawPaths) {
991
- const relative = normalizeCommitFilePath(toRelativeRepoPath(rawPath, repoPath));
1074
+ const relative = normalizeCommitFilePath(toRepoRelativePath(rawPath, repoPath));
992
1075
  if (relative) files.add(relative);
993
1076
  }
994
1077
  continue;
995
1078
  }
1079
+
1080
+ // MCP Serena 工具:replace_content, replace_symbol_body, insert_before/after_symbol 等
1081
+ // 以及其他带 relative_path/file_path 的 MCP 工具
1082
+ if (name.startsWith('mcp__serena') || name.startsWith('mcp__')) {
1083
+ // Serena 使用 relative_path,其他 MCP 工具可能使用 file_path/path
1084
+ const filePath = input.relative_path || input.file_path || input.path || '';
1085
+ if (filePath && typeof filePath === 'string') {
1086
+ const relative = normalizeCommitFilePath(toRepoRelativePath(filePath, repoPath));
1087
+ if (relative) files.add(relative);
1088
+ }
1089
+ // 部分 MCP 工具在 input 中嵌套了目标文件
1090
+ const rawPaths = collectFilePaths(input);
1091
+ for (const rawPath of rawPaths) {
1092
+ const relative = normalizeCommitFilePath(toRepoRelativePath(rawPath, repoPath));
1093
+ if (relative) files.add(relative);
1094
+ }
1095
+ continue;
1096
+ }
1097
+
996
1098
  // Bash 工具 — 从命令中提取文件路径
997
- if (tc.name === 'Bash') {
998
- const cmd = tc.input?.command || '';
1099
+ if (name === 'Bash') {
1100
+ const cmd = input.command || '';
999
1101
  const rawPaths = extractFilePathsFromBashCommand(cmd);
1000
1102
  for (const rawPath of rawPaths) {
1001
- const relative = normalizeCommitFilePath(toRelativeRepoPath(rawPath, repoPath));
1103
+ const relative = normalizeCommitFilePath(toRepoRelativePath(rawPath, repoPath));
1002
1104
  if (relative) files.add(relative);
1003
1105
  }
1004
1106
  }
@@ -1022,34 +1124,45 @@ function computeFileOverlap(sessionTouchedFiles, commitFiles) {
1022
1124
  });
1023
1125
  }
1024
1126
 
1025
- // 用于 commit.repo 与 session.project 之间宽松对齐:
1026
- // decodeProjectName `-` 解码为 `/`(D--foo-bar → D://foo/bar),
1027
- // 所以将 `-` `_` 统一转为 `/`,再以 `/` 为分隔符保留路径语义。
1028
- // 这样 d:/foo-bar 和 d:/foo/bar 匹配(同一项目的解码差异),
1029
- // 但 d:/foobar 和 d:/foo/bar 不匹配(不同项目)。
1030
- function projectKey(p) {
1031
- return normalizePath(p).replace(/[-_]/g, '/').replace(/\/+/g, '/').replace(/\/$/, '').replace(/[^a-z0-9/]/g, '');
1032
- }
1033
-
1034
- // 精确路径包含:parent 是 child 的前缀,且后面紧跟 '/' 或完全匹配
1035
- function pathContains(parent, child) {
1036
- if (parent === child) return true;
1037
- return child.startsWith(parent + '/');
1038
- }
1039
-
1040
- function projectMatches(commitRepoN, sessionProjectN) {
1041
- if (!commitRepoN || !sessionProjectN) return true;
1042
- // 精确路径匹配(双向:commit repo 可能是 session project 的子目录或反之)
1043
- if (pathContains(commitRepoN, sessionProjectN) || pathContains(sessionProjectN, commitRepoN)) return true;
1044
- // 宽松 key 对比(兜底:处理路径解码差异)
1045
- const a = projectKey(commitRepoN);
1046
- const b = projectKey(sessionProjectN);
1047
- return a && b && (a === b);
1127
+ function sortAttributionCandidates(candidates) {
1128
+ return candidates.sort((a, b) => {
1129
+ if (b.score !== a.score) return b.score - a.score;
1130
+ return a.distanceMs - b.distanceMs;
1131
+ });
1048
1132
  }
1049
1133
 
1050
- const BASH_GIT_COMMIT_RE = /\bgit\s+commit\b/i;
1051
- const STRONG_WINDOW_BEFORE_MS = 30 * 1000; // 30s before bash invocation
1052
- const STRONG_WINDOW_AFTER_MS = 5 * 60 * 1000; // 5min after
1134
+ function candidateFromSession(commit, session, distanceMs) {
1135
+ const overlap = computeFileOverlap(session.touchedFiles || [], commit.files || []);
1136
+ return scoreSessionCandidate(commit, session, {
1137
+ distanceMs,
1138
+ fileOverlapRatio: overlap.fileOverlapRatio,
1139
+ matchedFiles: overlap.matchedFiles,
1140
+ projectMatches: true,
1141
+ });
1142
+ }
1143
+
1144
+ function getStepSessionIdCandidates(sessionId, session) {
1145
+ if (!sessionId) return [];
1146
+ const candidates = [sessionId];
1147
+ if (sessionId.includes(':')) return candidates;
1148
+
1149
+ const originByTool = {
1150
+ claude: 'claude_code',
1151
+ codex: 'codex_cli',
1152
+ };
1153
+ const origin = originByTool[session?.primaryTool];
1154
+ if (origin) {
1155
+ candidates.push(`${origin}:${sessionId}`);
1156
+ } else {
1157
+ candidates.push(`claude_code:${sessionId}`, `codex_cli:${sessionId}`);
1158
+ }
1159
+
1160
+ return [...new Set(candidates)];
1161
+ }
1162
+
1163
+ const BASH_GIT_COMMIT_RE = /\bgit\s+commit\b/i;
1164
+ const STRONG_WINDOW_BEFORE_MS = 30 * 1000; // 30s before bash invocation
1165
+ const STRONG_WINDOW_AFTER_MS = 5 * 60 * 1000; // 5min after
1053
1166
 
1054
1167
  function toMs(iso) {
1055
1168
  if (!iso) return NaN;
@@ -1071,18 +1184,22 @@ function extractCommitBashTimestamps(session) {
1071
1184
  return ts;
1072
1185
  }
1073
1186
 
1074
- export function attributeCommitsToSessions(commits, sessions, { bufferMs = 30 * 60 * 1000 } = {}) {
1187
+ export function attributeCommitsToSessions(commits, sessions, options = {}) {
1075
1188
  const result = { sessionCommitMap: {} };
1076
1189
  if (!commits?.length || !sessions?.length) return result;
1190
+ const attributionOptions = resolveAttributionOptions(options.attribution || options);
1191
+ const bufferMs = options.bufferMs ?? attributionOptions.windows.weakWindowMinutes * 60 * 1000;
1192
+ const crossDayMs = attributionOptions.windows.crossDayWindowDays * 24 * 3600 * 1000;
1077
1193
 
1078
1194
  // 预计算每个 session 的 ms 范围 + 项目归一化 + bash commit 时间戳
1079
1195
  const sIndex = sessions.map(s => ({
1080
1196
  id: s.id,
1081
- projectN: normalizePath(s.project || ''),
1197
+ projectN: normalizePathForGit(s.project || ''),
1082
1198
  startMs: toMs(s.startTime),
1083
1199
  endMs: toMs(s.endTime),
1084
1200
  bashTs: extractCommitBashTimestamps(s),
1085
1201
  touchedFiles: extractTouchedFilesFromSession(s),
1202
+ primaryTool: s.primaryTool,
1086
1203
  }));
1087
1204
 
1088
1205
  // 阶段 1:重置 + 强信号匹配(Bash git commit)
@@ -1090,13 +1207,13 @@ export function attributeCommitsToSessions(commits, sessions, { bufferMs = 30 *
1090
1207
  c.sessionId = null;
1091
1208
  c.sessionAttribution = null;
1092
1209
  const commitMs = toMs(c.date);
1093
- const commitRepoN = normalizePath(c.repo || '');
1210
+ const commitRepoN = normalizePathForGit(c.repo || '');
1094
1211
  if (!Number.isFinite(commitMs)) continue;
1095
1212
 
1096
1213
  let matched = null;
1097
1214
  for (const s of sIndex) {
1098
1215
  if (!s.bashTs.length) continue;
1099
- if (!projectMatches(commitRepoN, s.projectN)) continue;
1216
+ if (!projectMatchesFromGitPaths(commitRepoN, s.projectN)) continue;
1100
1217
  for (const bts of s.bashTs) {
1101
1218
  if (commitMs >= bts - STRONG_WINDOW_BEFORE_MS && commitMs <= bts + STRONG_WINDOW_AFTER_MS) {
1102
1219
  matched = s;
@@ -1129,14 +1246,13 @@ export function attributeCommitsToSessions(commits, sessions, { bufferMs = 30 *
1129
1246
  for (const c of commits) {
1130
1247
  if (c.sessionAttribution) continue;
1131
1248
  const commitMs = toMs(c.date);
1132
- const commitRepoN = normalizePath(c.repo || '');
1249
+ const commitRepoN = normalizePathForGit(c.repo || '');
1133
1250
  if (!Number.isFinite(commitMs)) continue;
1134
1251
 
1135
- let best = null;
1136
- let bestDist = Infinity;
1252
+ const candidates = [];
1137
1253
  for (const s of sIndex) {
1138
1254
  if (!Number.isFinite(s.startMs) || !Number.isFinite(s.endMs)) continue;
1139
- if (!projectMatches(commitRepoN, s.projectN)) continue;
1255
+ if (!projectMatchesFromGitPaths(commitRepoN, s.projectN)) continue;
1140
1256
 
1141
1257
  // author 一致性校验:session 有已知 author 时,commit author 必须匹配
1142
1258
  const knownAuthors = sessionAuthors.get(s.id);
@@ -1147,13 +1263,13 @@ export function attributeCommitsToSessions(commits, sessions, { bufferMs = 30 *
1147
1263
  if (commitMs < lo || commitMs > hi) continue;
1148
1264
  const mid = (s.startMs + s.endMs) / 2;
1149
1265
  const dist = Math.abs(commitMs - mid);
1150
- if (dist < bestDist) {
1151
- best = s;
1152
- bestDist = dist;
1153
- }
1266
+ candidates.push(candidateFromSession(c, s, dist));
1154
1267
  }
1155
1268
 
1156
- if (best) {
1269
+ if (candidates.length) {
1270
+ const ranked = sortAttributionCandidates(candidates);
1271
+ const best = sIndex.find(s => s.id === ranked[0].sessionId);
1272
+ c.attributionCandidates = ranked.slice(0, 3);
1157
1273
  c.sessionId = best.id;
1158
1274
  c.sessionAttribution = 'weak';
1159
1275
  if (!result.sessionCommitMap[best.id]) result.sessionCommitMap[best.id] = [];
@@ -1166,29 +1282,28 @@ export function attributeCommitsToSessions(commits, sessions, { bufferMs = 30 *
1166
1282
  for (const c of commits) {
1167
1283
  if (c.sessionAttribution) continue;
1168
1284
  const commitMs = toMs(c.date);
1169
- const commitRepoN = normalizePath(c.repo || '');
1285
+ const commitRepoN = normalizePathForGit(c.repo || '');
1170
1286
  if (!Number.isFinite(commitMs)) continue;
1171
1287
 
1172
- let best = null;
1173
- let bestDist = Infinity;
1288
+ const candidates = [];
1174
1289
  for (const s of sIndex) {
1175
- if (!projectMatches(commitRepoN, s.projectN)) continue;
1290
+ if (!projectMatchesFromGitPaths(commitRepoN, s.projectN)) continue;
1176
1291
  if (!Number.isFinite(s.endMs)) continue;
1177
1292
  // commit 必须在 session 结束之后(不能是之前漏掉的)
1178
1293
  if (commitMs < s.endMs) continue;
1179
1294
  const dist = commitMs - s.endMs;
1180
1295
  // 最多跨 3 天
1181
- if (dist > 3 * 24 * 3600 * 1000) continue;
1296
+ if (dist > crossDayMs) continue;
1182
1297
  // author 校验:session 有已知 author 时,commit author 必须匹配
1183
1298
  const knownAuthors = sessionAuthors.get(s.id);
1184
1299
  if (knownAuthors?.size && c.author && !knownAuthors.has(c.author.toLowerCase())) continue;
1185
- if (dist < bestDist) {
1186
- best = s;
1187
- bestDist = dist;
1188
- }
1300
+ candidates.push(candidateFromSession(c, s, dist));
1189
1301
  }
1190
1302
 
1191
- if (best) {
1303
+ if (candidates.length) {
1304
+ const ranked = sortAttributionCandidates(candidates);
1305
+ const best = sIndex.find(s => s.id === ranked[0].sessionId);
1306
+ c.attributionCandidates = ranked.slice(0, 3);
1192
1307
  // 文件交集前置检查:无交集时标记为 cross-day-weak
1193
1308
  const commitFiles = (c.files || []).map(f => (f.path || '').replace(/\\/g, '/'));
1194
1309
  const sessionFiles = best.touchedFiles || [];
@@ -1231,15 +1346,17 @@ export function attachCommitsToSessions(sessions, commitList) {
1231
1346
  }
1232
1347
 
1233
1348
  // 一次性收尾:跑 attribution + 三个聚合
1234
- export function finalizeGitStats(merged, sessions = [], options = {}) {
1235
- if (!merged) return merged;
1236
- const fileOverrides = loadAttributionOverrides();
1349
+ export async function finalizeGitStats(merged, sessions = [], options = {}) {
1350
+ if (!merged) return merged;
1351
+ const attributionOptions = resolveAttributionOptions(options.attribution || options);
1352
+ const stepTrackingOptions = options.stepTracking || {};
1353
+ const fileOverrides = loadAttributionOverrides();
1237
1354
  const inputOverrides = options.overrides || {};
1238
1355
  const mergedOverrides = {
1239
1356
  commits: { ...fileOverrides.commits, ...(inputOverrides.commits || {}) },
1240
1357
  files: { ...fileOverrides.files, ...(inputOverrides.files || {}) },
1241
1358
  };
1242
- const { sessionCommitMap } = attributeCommitsToSessions(merged.commitList, sessions);
1359
+ const { sessionCommitMap } = attributeCommitsToSessions(merged.commitList, sessions, { attribution: attributionOptions });
1243
1360
  merged.sessionCommitMap = sessionCommitMap;
1244
1361
  const sessionsById = new Map((sessions || []).map(s => [s.id, s]));
1245
1362
  for (const s of sessions || []) {
@@ -1261,7 +1378,71 @@ export function finalizeGitStats(merged, sessions = [], options = {}) {
1261
1378
  }
1262
1379
  }
1263
1380
 
1264
- // Step 1.5: compute developer behavioral baselines
1381
+ // Step 1.5: Enrich commits with line-level step blame when available
1382
+ const stepTrackers = new Map();
1383
+ if (stepTrackingOptions.enabled !== false) try {
1384
+ const { StepTracker } = await import('./step-tracker.js');
1385
+ const projectRoots = [...new Set((sessions || []).map(s => s.project).filter(Boolean))];
1386
+ // Also check repo paths from commits
1387
+ for (const c of merged.commitList || []) {
1388
+ if (c.repo) projectRoots.push(c.repo);
1389
+ }
1390
+ for (const root of [...new Set(projectRoots.map(normalizePathForGit))]) {
1391
+ if (!root) continue;
1392
+ const tracker = new StepTracker(root, {
1393
+ dbPath: stepTrackingOptions.dbPath,
1394
+ maxFileSize: stepTrackingOptions.maxFileSize,
1395
+ });
1396
+ if (await tracker.isAvailableAsync()) {
1397
+ await tracker.open();
1398
+ stepTrackers.set(root, tracker);
1399
+ }
1400
+ }
1401
+ } catch {
1402
+ for (const tracker of stepTrackers.values()) tracker.close();
1403
+ stepTrackers.clear();
1404
+ }
1405
+
1406
+ if (stepTrackers.size > 0) {
1407
+ for (const c of merged.commitList || []) {
1408
+ if (!c.sessionId) continue;
1409
+ const candidateRoots = [
1410
+ c.repo,
1411
+ sessionsById.get(c.sessionId)?.project,
1412
+ ].filter(Boolean);
1413
+ let stepTracker = null;
1414
+ for (const candidateRoot of candidateRoots) {
1415
+ const normalizedCandidate = normalizePathForGit(candidateRoot);
1416
+ for (const [root, tracker] of stepTrackers.entries()) {
1417
+ if (projectMatchesFromGitPaths(root, normalizedCandidate)) {
1418
+ stepTracker = tracker;
1419
+ break;
1420
+ }
1421
+ }
1422
+ if (stepTracker) break;
1423
+ }
1424
+ if (!stepTracker && stepTrackers.size === 1) {
1425
+ stepTracker = stepTrackers.values().next().value;
1426
+ }
1427
+ if (!stepTracker) continue;
1428
+ try {
1429
+ const session = sessionsById.get(c.sessionId);
1430
+ for (const stepSessionId of getStepSessionIdCandidates(c.sessionId, session)) {
1431
+ const lineBlame = stepTracker.getLineAttributionForCommit({
1432
+ ...c,
1433
+ sessionId: stepSessionId,
1434
+ });
1435
+ if (lineBlame) {
1436
+ c.lineBlame = lineBlame;
1437
+ break;
1438
+ }
1439
+ }
1440
+ } catch { /* best effort */ }
1441
+ }
1442
+ for (const tracker of stepTrackers.values()) tracker.close();
1443
+ }
1444
+
1445
+ // Step 1.6: compute developer behavioral baselines
1265
1446
  const authorBaselines = computeAuthorBaseline(merged.commitList);
1266
1447
 
1267
1448
  // Step 2: 信心度评估(保持现有逻辑)
@@ -1367,19 +1548,21 @@ export function finalizeGitStats(merged, sessions = [], options = {}) {
1367
1548
  }
1368
1549
 
1369
1550
  // Step 2.5: composite continuous scoring for all commits
1370
- for (const c of merged.commitList || []) {
1371
- c.aiScore = computeContinuousScore(c);
1372
- const mappedConfidence = scoreToConfidence(c.aiScore);
1373
- // Only override if no explicit signature and continuous score disagrees
1374
- if (c.attributionType !== 'explicit') {
1551
+ for (const c of merged.commitList || []) {
1552
+ c.aiScore = computeContinuousScore(c, attributionOptions);
1553
+ const mappedConfidence = scoreToConfidence(c.aiScore, attributionOptions);
1554
+ // Only override if no explicit signature and continuous score disagrees
1555
+ if (c.attributionType !== 'explicit') {
1375
1556
  c.aiConfidence = pickHigherConfidence(c.aiConfidence, mappedConfidence);
1376
1557
  c.isAI = isCountedAIConfidence(c.aiConfidence);
1377
- c.aiAssisted = c.aiConfidence !== AI_CONFIDENCE.NONE;
1378
- }
1379
- }
1380
-
1381
- const attributionItems = [];
1382
- for (const c of merged.commitList || []) {
1558
+ c.aiAssisted = c.aiConfidence !== AI_CONFIDENCE.NONE;
1559
+ }
1560
+ }
1561
+
1562
+ filterCommitsForUser(merged);
1563
+
1564
+ const attributionItems = [];
1565
+ for (const c of merged.commitList || []) {
1383
1566
  const commitOverride = mergedOverrides.commits[c.hash] || null;
1384
1567
  const fileOverride = (c.files || []).find(f => mergedOverrides.files[`${c.hash}:${f.path}`]);
1385
1568
  const fileOverrideValue = fileOverride ? mergedOverrides.files[`${c.hash}:${fileOverride.path}`] : null;
@@ -1409,7 +1592,7 @@ export function finalizeGitStats(merged, sessions = [], options = {}) {
1409
1592
  }
1410
1593
 
1411
1594
  // Step 3: 全局 + 按工具聚合
1412
- merged.aiContribution = computeAIContribution(merged.commitList);
1595
+ merged.aiContribution = computeAIContribution(merged.commitList, null, attributionOptions);
1413
1596
  // 动态收集所有出现的 attributedTool,确保新工具自动覆盖
1414
1597
  const toolSet = new Set();
1415
1598
  for (const c of merged.commitList || []) {
@@ -1419,7 +1602,7 @@ export function finalizeGitStats(merged, sessions = [], options = {}) {
1419
1602
  for (const t of ['claude', 'codex', 'opencode', 'generic-ai']) toolSet.add(t);
1420
1603
  merged.aiContributionByTool = {};
1421
1604
  for (const tool of toolSet) {
1422
- merged.aiContributionByTool[tool] = computeAIContribution(merged.commitList, tool);
1605
+ merged.aiContributionByTool[tool] = computeAIContribution(merged.commitList, tool, attributionOptions);
1423
1606
  }
1424
1607
  merged.attributionSummary = aggregateAttribution(attributionItems);
1425
1608
  merged.commitTypes = computeCommitTypes(merged.commitList);