@paths.design/caws-cli 3.5.0 → 4.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (87) hide show
  1. package/dist/budget-derivation.d.ts +41 -2
  2. package/dist/budget-derivation.d.ts.map +1 -1
  3. package/dist/budget-derivation.js +417 -30
  4. package/dist/commands/archive.d.ts +50 -0
  5. package/dist/commands/archive.d.ts.map +1 -0
  6. package/dist/commands/archive.js +353 -0
  7. package/dist/commands/iterate.d.ts.map +1 -1
  8. package/dist/commands/iterate.js +12 -13
  9. package/dist/commands/mode.d.ts +24 -0
  10. package/dist/commands/mode.d.ts.map +1 -0
  11. package/dist/commands/mode.js +259 -0
  12. package/dist/commands/plan.d.ts +49 -0
  13. package/dist/commands/plan.d.ts.map +1 -0
  14. package/dist/commands/plan.js +448 -0
  15. package/dist/commands/quality-gates.d.ts +52 -0
  16. package/dist/commands/quality-gates.d.ts.map +1 -0
  17. package/dist/commands/quality-gates.js +490 -0
  18. package/dist/commands/specs.d.ts +71 -0
  19. package/dist/commands/specs.d.ts.map +1 -0
  20. package/dist/commands/specs.js +735 -0
  21. package/dist/commands/status.d.ts +4 -3
  22. package/dist/commands/status.d.ts.map +1 -1
  23. package/dist/commands/status.js +552 -22
  24. package/dist/commands/tutorial.d.ts +55 -0
  25. package/dist/commands/tutorial.d.ts.map +1 -0
  26. package/dist/commands/tutorial.js +481 -0
  27. package/dist/commands/validate.d.ts +10 -2
  28. package/dist/commands/validate.d.ts.map +1 -1
  29. package/dist/commands/validate.js +199 -39
  30. package/dist/config/modes.d.ts +225 -0
  31. package/dist/config/modes.d.ts.map +1 -0
  32. package/dist/config/modes.js +321 -0
  33. package/dist/constants/spec-types.d.ts +41 -0
  34. package/dist/constants/spec-types.d.ts.map +1 -0
  35. package/dist/constants/spec-types.js +42 -0
  36. package/dist/index-new.d.ts +5 -0
  37. package/dist/index-new.d.ts.map +1 -0
  38. package/dist/index-new.js +317 -0
  39. package/dist/index.js +227 -10
  40. package/dist/index.js.backup +4711 -0
  41. package/dist/policy/PolicyManager.d.ts +104 -0
  42. package/dist/policy/PolicyManager.d.ts.map +1 -0
  43. package/dist/policy/PolicyManager.js +399 -0
  44. package/dist/scaffold/cursor-hooks.d.ts.map +1 -1
  45. package/dist/scaffold/cursor-hooks.js +15 -0
  46. package/dist/scaffold/git-hooks.d.ts.map +1 -1
  47. package/dist/scaffold/git-hooks.js +32 -44
  48. package/dist/scaffold/index.d.ts.map +1 -1
  49. package/dist/scaffold/index.js +19 -0
  50. package/dist/spec/SpecFileManager.d.ts +146 -0
  51. package/dist/spec/SpecFileManager.d.ts.map +1 -0
  52. package/dist/spec/SpecFileManager.js +419 -0
  53. package/dist/utils/quality-gates-errors.js +520 -0
  54. package/dist/utils/quality-gates.d.ts +49 -0
  55. package/dist/utils/quality-gates.d.ts.map +1 -0
  56. package/dist/utils/quality-gates.js +361 -0
  57. package/dist/utils/spec-resolver.d.ts +88 -0
  58. package/dist/utils/spec-resolver.d.ts.map +1 -0
  59. package/dist/utils/spec-resolver.js +602 -0
  60. package/dist/validation/spec-validation.d.ts +14 -0
  61. package/dist/validation/spec-validation.d.ts.map +1 -1
  62. package/dist/validation/spec-validation.js +225 -13
  63. package/package.json +6 -5
  64. package/templates/.cursor/hooks/caws-scope-guard.sh +64 -8
  65. package/templates/.cursor/hooks/validate-spec.sh +22 -12
  66. package/templates/.cursor/rules/00-claims-verification.mdc +144 -0
  67. package/templates/.cursor/rules/01-working-style.mdc +50 -0
  68. package/templates/.cursor/rules/02-quality-gates.mdc +370 -0
  69. package/templates/.cursor/rules/03-naming-and-refactor.mdc +33 -0
  70. package/templates/.cursor/rules/04-logging-language-style.mdc +23 -0
  71. package/templates/.cursor/rules/05-safe-defaults-guards.mdc +23 -0
  72. package/templates/.cursor/rules/06-typescript-conventions.mdc +36 -0
  73. package/templates/.cursor/rules/07-process-ops.mdc +20 -0
  74. package/templates/.cursor/rules/08-solid-and-architecture.mdc +16 -0
  75. package/templates/.cursor/rules/09-docstrings.mdc +89 -0
  76. package/templates/.cursor/rules/10-authorship-and-attribution.mdc +15 -0
  77. package/templates/.cursor/rules/11-documentation-quality-standards.mdc +390 -0
  78. package/templates/.cursor/rules/12-scope-management-waivers.mdc +385 -0
  79. package/templates/.cursor/rules/13-implementation-completeness.mdc +516 -0
  80. package/templates/.cursor/rules/14-language-agnostic-standards.mdc +588 -0
  81. package/templates/.cursor/rules/15-sophisticated-todo-detection.mdc +425 -0
  82. package/templates/.cursor/rules/README.md +150 -0
  83. package/templates/apps/tools/caws/prompt-lint.js.backup +274 -0
  84. package/templates/apps/tools/caws/provenance.js.backup +73 -0
  85. package/templates/scripts/quality-gates/check-god-objects.js +146 -0
  86. package/templates/scripts/quality-gates/run-quality-gates.js +50 -0
  87. package/templates/scripts/v3/analysis/todo_analyzer.py +1950 -0
@@ -0,0 +1,1950 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Hidden TODO Pattern Analyzer
4
+
5
+ @description: Hidden TODO analyzer with better accuracy, context awareness,
6
+ and reduced false positives. Uses semantic analysis and context clues to
7
+ distinguish between hidden TODOs and legitimate documentation.
8
+
9
+ @parameters:
10
+ - root_dir: The root directory to analyze.
11
+ - min_confidence: The minimum confidence score to consider a TODO.
12
+ - output_json: The path to save the JSON report.
13
+ - output_md: The path to save the Markdown report.
14
+ - verbose: Whether to print verbose output.
15
+ - enable_code_stub_scan: Whether to enable code stub detection heuristics.
16
+
17
+ For an example on how to improve the clarity of the TODOs that were found, see the following snippet:
18
+ ```rust
19
+ // TODO: Implement ANE initialization with the following requirements:
20
+ // 1. ANE initialization: Initialize Apple Neural Engine framework and resources
21
+ // - Set up ANE device and computation resources
22
+ // - Initialize ANE neural network computation capabilities
23
+ // - Handle ANE initialization error handling and recovery
24
+ // 2. ANE resource setup: Set up ANE resources and memory
25
+ // - Allocate ANE memory and computation buffers
26
+ // - Set up ANE resource management and optimization
27
+ // - Implement ANE resource validation and verification
28
+ // 3. ANE configuration: Configure ANE settings and parameters
29
+ // - Set up ANE computation parameters and settings
30
+ // - Configure ANE performance and optimization settings
31
+ // - Handle ANE configuration validation and verification
32
+ // 4. ANE monitoring: Set up ANE monitoring and management
33
+ // - Initialize ANE performance monitoring
34
+ // - Set up ANE resource monitoring and management
35
+ // - Implement ANE monitoring and reporting
36
+ ```
37
+
38
+ The analyzer now includes engineering-grade TODO format suggestions. Use --engineering-suggestions to get
39
+ recommendations for upgrading TODOs to the CAWS-compliant format with completion checklists, acceptance
40
+ criteria, dependencies, and governance information.
41
+
42
+ @author: @darianrosebrook
43
+ @date: 2025-10-17
44
+ @version: 2.0.0
45
+ """
46
+
47
+ import os
48
+ import re
49
+ import json
50
+ from pathlib import Path
51
+ from collections import defaultdict, Counter
52
+ from typing import Any, Dict, List, Set, Tuple, Optional
53
+
54
+
55
+ class HiddenTodoAnalyzer:
56
+ def __init__(self, root_dir: str, *, enable_code_stub_scan: bool = True):
57
+ self.root_dir = Path(root_dir)
58
+ self.enable_code_stub_scan = enable_code_stub_scan
59
+
60
+ # Language-specific comment patterns
61
+ self.language_patterns = {
62
+ 'rust': {
63
+ 'extensions': ['.rs'],
64
+ 'single_line': r'^\s*//',
65
+ 'multi_line_start': r'^\s*/\*',
66
+ 'multi_line_end': r'\*/',
67
+ },
68
+ 'javascript': {
69
+ 'extensions': ['.js', '.mjs', '.cjs'],
70
+ 'single_line': r'^\s*//',
71
+ 'multi_line_start': r'^\s*/\*',
72
+ 'multi_line_end': r'\*/',
73
+ },
74
+ 'typescript': {
75
+ 'extensions': ['.ts', '.tsx', '.mts', '.cts'],
76
+ 'single_line': r'^\s*//',
77
+ 'multi_line_start': r'^\s*/\*',
78
+ 'multi_line_end': r'\*/',
79
+ },
80
+ 'python': {
81
+ 'extensions': ['.py', '.pyi'],
82
+ 'single_line': r'^\s*#',
83
+ 'multi_line_start': r'^\s*"""',
84
+ 'multi_line_end': r'"""',
85
+ },
86
+ 'go': {
87
+ 'extensions': ['.go'],
88
+ 'single_line': r'^\s*//',
89
+ 'multi_line_start': r'^\s*/\*',
90
+ 'multi_line_end': r'\*/',
91
+ },
92
+ 'java': {
93
+ 'extensions': ['.java'],
94
+ 'single_line': r'^\s*//',
95
+ 'multi_line_start': r'^\s*/\*',
96
+ 'multi_line_end': r'\*/',
97
+ },
98
+ 'csharp': {
99
+ 'extensions': ['.cs'],
100
+ 'single_line': r'^\s*//',
101
+ 'multi_line_start': r'^\s*/\*',
102
+ 'multi_line_end': r'\*/',
103
+ },
104
+ 'cpp': {
105
+ 'extensions': ['.cpp', '.cc', '.cxx', '.c++', '.hpp', '.h', '.hxx'],
106
+ 'single_line': r'^\s*//',
107
+ 'multi_line_start': r'^\s*/\*',
108
+ 'multi_line_end': r'\*/',
109
+ },
110
+ 'c': {
111
+ 'extensions': ['.c'],
112
+ 'single_line': r'^\s*//',
113
+ 'multi_line_start': r'^\s*/\*',
114
+ 'multi_line_end': r'\*/',
115
+ },
116
+ 'php': {
117
+ 'extensions': ['.php'],
118
+ 'single_line': r'^\s*//',
119
+ 'multi_line_start': r'^\s*/\*',
120
+ 'multi_line_end': r'\*/',
121
+ },
122
+ 'ruby': {
123
+ 'extensions': ['.rb'],
124
+ 'single_line': r'^\s*#',
125
+ 'multi_line_start': r'^\s*=begin',
126
+ 'multi_line_end': r'=end',
127
+ },
128
+ 'shell': {
129
+ 'extensions': ['.sh', '.bash', '.zsh', '.fish'],
130
+ 'single_line': r'^\s*#',
131
+ 'multi_line_start': None,
132
+ 'multi_line_end': None,
133
+ },
134
+ 'yaml': {
135
+ 'extensions': ['.yaml', '.yml'],
136
+ 'single_line': r'^\s*#',
137
+ 'multi_line_start': None,
138
+ 'multi_line_end': None,
139
+ },
140
+ 'json': {
141
+ 'extensions': ['.json'],
142
+ 'single_line': None,
143
+ 'multi_line_start': None,
144
+ 'multi_line_end': None,
145
+ },
146
+ 'markdown': {
147
+ 'extensions': ['.md', '.markdown'],
148
+ 'single_line': r'^\s*<!--',
149
+ 'multi_line_start': r'^\s*<!--',
150
+ 'multi_line_end': r'-->',
151
+ },
152
+ }
153
+
154
+ # Comprehensive list of file patterns to ignore
155
+ self.ignored_file_patterns = [
156
+ # Test files
157
+ r'\btest\b',
158
+ r'\btests\b',
159
+ r'_test\.',
160
+ r'_tests\.',
161
+ r'\.test\.',
162
+ r'\.spec\.',
163
+ r'\.specs\.',
164
+
165
+ # Build artifacts and generated files
166
+ r'\btarget\b',
167
+ r'\bbuild\b',
168
+ r'\bout\b',
169
+ r'\bdist\b',
170
+ r'\bbin\b',
171
+ r'\.next\b',
172
+ r'generated\.',
173
+ r'bindgen\.',
174
+ r'private\.',
175
+ r'mime_types_generated\.',
176
+ r'named_entities\.',
177
+ r'ascii_case_insensitive_html_attributes\.',
178
+
179
+ # Package management and dependencies
180
+ r'\bnode_modules\b',
181
+ r'package-lock\.json$',
182
+ r'package\.json$',
183
+ r'yarn\.lock$',
184
+ r'pnpm-lock\.yaml$',
185
+ r'\bvenv\b',
186
+ r'\bpip\b',
187
+ r'requirements\.txt$',
188
+ r'Pipfile$',
189
+ r'Pipfile\.lock$',
190
+ r'poetry\.lock$',
191
+ r'Cargo\.lock$',
192
+ r'Cargo\.toml$',
193
+
194
+ # External libraries and frameworks
195
+ r'libtorch-cpu',
196
+ r'libtorch\b',
197
+ r'\.venv-whisper\b',
198
+ r'whisper_conversion_env',
199
+ r'\bsite-packages\b',
200
+ r'lib/python\d+\.\d+/site-packages',
201
+ r'whisperkit\b',
202
+ r'\.build\b',
203
+
204
+ # Model and data directories
205
+ r'models\b',
206
+ r'\bmodels/',
207
+ r'temp\.rs$',
208
+ r'todo_analyzer\.',
209
+
210
+ # Version control and IDE
211
+ r'\.git\b',
212
+ r'\.github\b',
213
+ r'\.vscode\b',
214
+ r'\.idea\b',
215
+ r'\.DS_Store$',
216
+ r'\.DS_Store\?$',
217
+ r'\._',
218
+ r'\.Spotlight-V100$',
219
+
220
+ # Documentation and examples
221
+ r'\bdocs\b',
222
+ r'\bexamples\b',
223
+ r'\bdoc\b',
224
+ r'\bexample\b',
225
+
226
+ # Temporary and cache files
227
+ r'\bcache\b',
228
+ r'\btmp\b',
229
+ r'\btemp\b',
230
+ r'\.tmp$',
231
+ r'\.temp$',
232
+ r'\.cache$',
233
+
234
+ # Coverage and analysis reports
235
+ r'\bhtmlcov\b',
236
+ r'\bcoverage\b',
237
+ r'\.coverage$',
238
+ r'coverage\.xml$',
239
+ r'lcov\.info$',
240
+
241
+ # OS-specific files
242
+ r'Thumbs\.db$',
243
+ r'desktop\.ini$',
244
+ r'\.fseventsd$',
245
+ r'\.Trashes$',
246
+
247
+ # Language-specific build artifacts
248
+ r'\.rlib$',
249
+ r'\.rmeta$',
250
+ r'\.d$',
251
+ r'\.pdb$',
252
+ r'\.o$',
253
+ r'\.obj$',
254
+ r'\.exe$',
255
+ r'\.dll$',
256
+ r'\.so$',
257
+ r'\.dylib$',
258
+ r'\.pyc$',
259
+ r'\.pyo$',
260
+ r'__pycache__',
261
+ r'\.class$',
262
+ r'\.jar$',
263
+ r'\.war$',
264
+ r'\.ear$',
265
+
266
+ # Web assets
267
+ r'\.min\.js$',
268
+ r'\.min\.css$',
269
+ r'\.bundle\.js$',
270
+ r'\.chunk\.js$',
271
+ r'\.map$',
272
+
273
+ # Configuration files (often generated)
274
+ r'\.env\.local$',
275
+ r'\.env\.production$',
276
+ r'\.env\.development$',
277
+ r'config\.local\.',
278
+ r'config\.prod\.',
279
+ r'config\.dev\.',
280
+
281
+ # Logs and reports
282
+ r'\blogs\b',
283
+ r'\.log$',
284
+ r'\breports\b',
285
+ r'\bartifacts\b',
286
+ r'\btemp\b',
287
+
288
+ # IDE and editor files
289
+ r'\.swp$',
290
+ r'\.swo$',
291
+ r'~$',
292
+ r'\.bak$',
293
+ r'\.backup$',
294
+
295
+ # Additional exclusions from user's search
296
+ r'\.gitignore$',
297
+ r'\.json$',
298
+ r'\.md$',
299
+ ]
300
+
301
+ # Explicit TODO patterns (highest priority) - more restrictive
302
+ self.explicit_todo_patterns = {
303
+ 'explicit_todos': [
304
+ r'\bTODO\b.*?:',
305
+ r'\bFIXME\b.*?:',
306
+ r'\bHACK\b.*?:',
307
+ r'\bXXX\b.*?:',
308
+ r'\bTEMP\b.*?:.*?(implement|fix|replace|complete|add)',
309
+ r'\bTEMPORARY\b.*?:.*?(implement|fix|replace|complete|add)',
310
+ # User's VSCode search patterns
311
+ r'\bTODO\b(?!(_|\.|anal|\sanal|s))',
312
+ r'\bin\s+a\s+real\b(?!(_|\.|anal|\sanal|s))',
313
+ r'\bsimplified\b(?!(_|\.|anal|\sanal|s))',
314
+ r'\bfor\s+now\b(?!(_|\.|anal|\sanal|s))',
315
+ ]
316
+ }
317
+
318
+ # High-confidence hidden TODO patterns (more specific and contextual)
319
+ self.high_confidence_patterns = {
320
+ 'incomplete_implementation': [
321
+ r'\bnot\s+yet\s+implemented\b',
322
+ r'\bmissing\s+implementation\b',
323
+ r'\bincomplete\s+implementation\b',
324
+ r'\bpartial\s+implementation\b',
325
+ r'\bunimplemented\b',
326
+ r'\bnot\s+done\b',
327
+ r'\bpending\s+implementation\b',
328
+ r'\bto\s+be\s+implemented\b',
329
+ r'\bwill\s+be\s+implemented\b',
330
+ ],
331
+
332
+ 'placeholder_code': [
333
+ r'\bplaceholder\s+code\b',
334
+ r'\bplaceholder\s+implementation\b',
335
+ r'\bplaceholder\s+function\b',
336
+ r'\bplaceholder\s+value\b',
337
+ r'\bstub\s+implementation\b',
338
+ r'\bstub\s+function\b',
339
+ r'\bdummy\s+implementation\b',
340
+ r'\bfake\s+implementation\b',
341
+ r'\bexample\s+implementation\b',
342
+ r'\bdemo\s+implementation\b',
343
+ r'\bsample\s+implementation\b',
344
+ r'\btemplate\s+implementation\b',
345
+ r'\bstub\s+implementation\s+for\b',
346
+ r'\bsimplified\s+.*?\s+calculation\b',
347
+ r'\bsimplified\s+.*?\s+implementation\b',
348
+ r'\bfor\s+now\b.*?(just|simply|only)\s+(concatenate|return|use)',
349
+ r'\bin\s+practice\b.*?(would|should|will)\s+(intelligently|properly|correctly)',
350
+ ],
351
+
352
+ 'temporary_solutions': [
353
+ r'\btemporary\s+solution\b',
354
+ r'\btemporary\s+fix\b',
355
+ r'\btemporary\s+workaround\b',
356
+ r'\bquick\s+fix\b',
357
+ r'\bquick\s+hack\b',
358
+ r'\bworkaround\b',
359
+ r'\bhack\b.*?(fix|solution)',
360
+ r'\bpatch\b.*?(fix|solution)',
361
+ r'\bbypass\b.*?(fix|solution)',
362
+ ],
363
+
364
+ 'hardcoded_values': [
365
+ r'\bhardcoded\s+value\b',
366
+ r'\bhard-coded\s+value\b',
367
+ r'\bmagic\s+number\b',
368
+ r'\bmagic\s+string\b',
369
+ r'\bconstant\s+value\b.*?(replace|change|make\s+configurable)',
370
+ r'\bdefault\s+value\b.*?(replace|change|make\s+configurable)',
371
+ r'\bhardcoded\s+return\s+value\b',
372
+ r'\bhardcoded\s+result\b',
373
+ r'\bhardcoded\s+efficiency\b',
374
+ r'\bhardcoded\s+percentage\b',
375
+ ],
376
+
377
+ 'future_improvements': [
378
+ r'\bin\s+production\b.*?(implement|add|fix)',
379
+ r'\bin\s+a\s+real\s+implementation\b',
380
+ r'\beventually\b.*?(implement|add|fix)',
381
+ r'\blater\b.*?(implement|add|fix)',
382
+ r'\bshould\s+be\b.*?(implemented|added|fixed)',
383
+ r'\bwould\s+be\b.*?(implemented|added|fixed)',
384
+ r'\bcould\s+be\b.*?(implemented|added|fixed)',
385
+ r'\bwill\s+be\b.*?(implemented|added|fixed)',
386
+ r'\bin\s+practice\b.*?(would|should|will)\s+(analyze|merge|intelligently)',
387
+ r'\bin\s+practice\b.*?(this\s+would|this\s+should|this\s+will)',
388
+ r'\bfor\s+now\b.*?(just|simply|only)',
389
+ ],
390
+ }
391
+
392
+ # Medium-confidence patterns (context-dependent)
393
+ self.medium_confidence_patterns = {
394
+ 'basic_implementations': [
395
+ r'\bbasic\s+implementation\b.*?(improve|enhance|replace)',
396
+ r'\bsimple\s+implementation\b.*?(improve|enhance|replace)',
397
+ r'\bminimal\s+implementation\b.*?(improve|enhance|replace)',
398
+ r'\bnaive\s+implementation\b.*?(improve|enhance|replace)',
399
+ r'\brough\s+implementation\b.*?(improve|enhance|replace)',
400
+ r'\bcrude\s+implementation\b.*?(improve|enhance|replace)',
401
+ ],
402
+ }
403
+
404
+ # Patterns to exclude (legitimate technical terms and documentation)
405
+ self.exclusion_patterns = [
406
+ # Performance and optimization terms
407
+ r'\bperformance\s+monitoring\b',
408
+ r'\bperformance\s+optimization\b',
409
+ r'\bperformance\s+analysis\b',
410
+ r'\bperformance\s+benchmark\b',
411
+ r'\boptimize\s+for\s+performance\b',
412
+ r'\boptimization\s+strategy\b',
413
+ r'\befficient\s+implementation\b',
414
+
415
+ # Simulation and testing terms
416
+ r'\bsimulation\s+environment\b',
417
+ r'\bsimulate\s+network\s+conditions\b',
418
+ r'\bsimulate\s+.*?(behavior|response|data)\b',
419
+ r'\bsimulation\s+.*?(mode|environment)\b',
420
+
421
+ # Fallback and error handling
422
+ r'\bfallback\s+mechanism\b',
423
+ r'\bfallback\s+strategy\b',
424
+ r'\bfallback\s+to\b.*?(method|function|implementation)',
425
+
426
+ # Authentication and security
427
+ r'\bbasic\s+authentication\b',
428
+ r'\bbasic\s+configuration\b',
429
+ r'\bsimple\s+interface\b',
430
+ r'\bsimple\s+api\b',
431
+
432
+ # Mock and testing
433
+ r'\bmock\s+object\b',
434
+ r'\bmock\s+service\b',
435
+ r'\bmock\s+data\b',
436
+ r'\bmock\s+response\b',
437
+
438
+ # Documentation patterns
439
+ r'\bcurrent\s+implementation\b.*?(uses|provides|supports)',
440
+ r'\bthis\s+implementation\b.*?(uses|provides|supports)',
441
+ r'\bthe\s+implementation\b.*?(uses|provides|supports)',
442
+ r'\bimplementation\s+uses\b',
443
+ r'\bimplementation\s+provides\b',
444
+ r'\bimplementation\s+supports\b',
445
+
446
+ # Architecture and design documentation
447
+ r'\barchitecture\s+note\b',
448
+ r'\bdesign\s+note\b',
449
+ r'\bpattern\s+note\b',
450
+ r'\bdependency\s+injection\b',
451
+ r'\bresource\s+management\b',
452
+
453
+ # Console and logging
454
+ r'console\.(log|warn|error|info)',
455
+ r'\blogging\s+implementation\b',
456
+ ]
457
+
458
+ # Engineering-grade TODO template patterns (for suggestions)
459
+ self.engineering_grade_patterns = {
460
+ 'completion_checklist': [
461
+ r'COMPLETION CHECKLIST:',
462
+ r'COMPLETION CRITERIA:',
463
+ r'CHECKLIST:',
464
+ r'\[ \]',
465
+ r'\[x\]',
466
+ ],
467
+ 'acceptance_criteria': [
468
+ r'ACCEPTANCE CRITERIA:',
469
+ r'ACCEPTANCE:',
470
+ r'CRITERIA:',
471
+ r'REQUIREMENTS:',
472
+ ],
473
+ 'dependencies': [
474
+ r'DEPENDENCIES:',
475
+ r'DEPENDS ON:',
476
+ r'REQUIRES:',
477
+ r'BLOCKED BY:',
478
+ ],
479
+ 'governance': [
480
+ r'CAWS TIER:',
481
+ r'TIER:',
482
+ r'PRIORITY:',
483
+ r'BLOCKING:',
484
+ r'ESTIMATED EFFORT:',
485
+ r'EFFORT:',
486
+ r'GOVERNANCE:',
487
+ ],
488
+ 'structured_format': [
489
+ r'// TODO:.*?\n.*?//\s*COMPLETION',
490
+ r'// TODO:.*?\n.*?//\s*ACCEPTANCE',
491
+ r'// TODO:.*?\n.*?//\s*DEPENDENCIES',
492
+ ]
493
+ }
494
+
495
+ # Patterns that suggest a TODO needs engineering-grade format
496
+ self.needs_engineering_format_patterns = {
497
+ 'vague_todos': [
498
+ r'\bTODO\b.*?(implement|add|fix|complete|do)\b.*?$',
499
+ r'\bFIXME\b.*?(implement|add|fix|complete|do)\b.*?$',
500
+ r'\bHACK\b.*?(implement|add|fix|complete|do)\b.*?$',
501
+ ],
502
+ 'missing_structure': [
503
+ r'\bTODO\b.*?(?!.*COMPLETION CHECKLIST)(?!.*ACCEPTANCE CRITERIA)(?!.*DEPENDENCIES).*$',
504
+ r'\bFIXME\b.*?(?!.*COMPLETION CHECKLIST)(?!.*ACCEPTANCE CRITERIA)(?!.*DEPENDENCIES).*$',
505
+ ],
506
+ 'single_line_todos': [
507
+ r'^\s*//\s*TODO\b.*?$',
508
+ r'^\s*#\s*TODO\b.*?$',
509
+ ],
510
+ 'business_critical': [
511
+ r'\bTODO\b.*?(auth|security|payment|billing|database|persist|save|store)\b',
512
+ r'\bTODO\b.*?(critical|important|essential|required|must)\b',
513
+ r'\bFIXME\b.*?(auth|security|payment|billing|database|persist|save|store)\b',
514
+ ]
515
+ }
516
+
517
+ # Context clues that suggest documentation rather than TODO
518
+ self.documentation_indicators = [
519
+ r'@param',
520
+ r'@return',
521
+ r'@throws',
522
+ r'@author',
523
+ r'@date',
524
+ r'@version',
525
+ r'@description',
526
+ r'@example',
527
+ r'@see',
528
+ r'@since',
529
+ r'@deprecated',
530
+ r'\*\s*\*\s*\*', # JSDoc comment blocks
531
+ r'^\s*/\*\*', # Start of JSDoc
532
+ r'^\s*# ', # Markdown headers
533
+ r'^\s*## ', # Markdown subheaders
534
+ r'^\s*### ', # Markdown sub-subheaders
535
+ ]
536
+
537
+ # Context clues that suggest actual TODO
538
+ self.todo_indicators = [
539
+ r'\btodo\b',
540
+ r'\bfixme\b',
541
+ r'\bhack\b',
542
+ r'\bneed\s+to\b',
543
+ r'\bshould\s+be\b',
544
+ r'\bmust\s+be\b',
545
+ r'\bhas\s+to\b',
546
+ r'\brequired\s+to\b',
547
+ r'\bmissing\b',
548
+ r'\bincomplete\b',
549
+ r'\bpartial\b',
550
+ r'\bunfinished\b',
551
+ r'\bwork\s+in\s+progress\b',
552
+ r'\bwip\b',
553
+ ]
554
+
555
+ self.results = defaultdict(list)
556
+ self.file_stats = defaultdict(int)
557
+ self.pattern_stats = defaultdict(int)
558
+
559
+ # Heuristic code stub patterns keyed by language
560
+ self.code_stub_patterns = {
561
+ 'python': {
562
+ 'function_stub': re.compile(r'^\s*def\s+\w+\(.*\):'),
563
+ 'pass_stmt': re.compile(r'^\s*pass\s*$'),
564
+ 'ellipsis_stmt': re.compile(r'^\s*\.\.\.\s*$'),
565
+ 'raise_not_impl': re.compile(r'^\s*raise\s+NotImplementedError'),
566
+ 'return_not_impl': re.compile(r'^\s*return\s+(None|NotImplemented)\s*$'),
567
+ },
568
+ 'javascript': {
569
+ 'function_stub': re.compile(r'^\s*(async\s+)?function\s+\w+\(.*\)\s*{'),
570
+ 'arrow_stub': re.compile(r'^\s*const\s+\w+\s*=\s*\(.*\)\s*=>\s*{'),
571
+ 'throw_not_impl': re.compile(r"^\s*throw\s+new\s+Error\((\"|')(TODO|Not\s+Implemented)"),
572
+ 'return_todo': re.compile(r"^\s*return\s+(null|undefined);\s*//\s*TODO"),
573
+ },
574
+ 'typescript': {
575
+ 'function_stub': re.compile(r'^\s*(async\s+)?function\s+\w+\(.*\)\s*{'),
576
+ 'arrow_stub': re.compile(r'^\s*const\s+\w+\s*=\s*\(.*\)\s*=>\s*{'),
577
+ 'throw_not_impl': re.compile(r"^\s*throw\s+new\s+Error\((\"|')(TODO|Not\s+Implemented)"),
578
+ 'return_todo': re.compile(r"^\s*return\s+(null|undefined);\s*//\s*TODO"),
579
+ },
580
+ }
581
+
582
+ def should_ignore_file(self, file_path: Path) -> bool:
583
+ """Check if a file should be ignored based on patterns."""
584
+ path_str = str(file_path)
585
+
586
+ # Check against ignored patterns
587
+ for pattern in self.ignored_file_patterns:
588
+ if re.search(pattern, path_str, re.IGNORECASE):
589
+ return True
590
+
591
+ return False
592
+
593
+ def detect_language(self, file_path: Path) -> Optional[str]:
594
+ """Detect the programming language of a file based on its extension."""
595
+ suffix = file_path.suffix.lower()
596
+
597
+ for language, config in self.language_patterns.items():
598
+ if suffix in config['extensions']:
599
+ return language
600
+
601
+ return None
602
+
603
+ def is_excluded_pattern(self, comment: str) -> bool:
604
+ """Check if a comment matches exclusion patterns (legitimate technical terms)."""
605
+ for pattern in self.exclusion_patterns:
606
+ if re.search(pattern, comment, re.IGNORECASE):
607
+ return True
608
+ return False
609
+
610
+ def is_documentation_comment(self, comment: str) -> bool:
611
+ """Check if a comment appears to be documentation rather than a TODO."""
612
+ for indicator in self.documentation_indicators:
613
+ if re.search(indicator, comment, re.IGNORECASE):
614
+ return True
615
+ return False
616
+
617
+ def has_todo_indicators(self, comment: str) -> bool:
618
+ """Check if a comment contains indicators that suggest it's an actual TODO."""
619
+ for indicator in self.todo_indicators:
620
+ if re.search(indicator, comment, re.IGNORECASE):
621
+ return True
622
+ return False
623
+
624
+ def calculate_context_score(self, comment: str, line_num: int, file_path: Path) -> float:
625
+ """Calculate a context score to help determine if this is a real TODO."""
626
+ score = 0.0
627
+
628
+ # Check for documentation indicators (reduce score)
629
+ if self.is_documentation_comment(comment):
630
+ score -= 0.5
631
+
632
+ # Check for TODO indicators (increase score)
633
+ if self.has_todo_indicators(comment):
634
+ score += 0.3
635
+
636
+ # Check if it's in a generated file (reduce score)
637
+ if self.is_generated_file(file_path):
638
+ score -= 0.4
639
+
640
+ # Check if comment is very short (likely not a TODO)
641
+ if len(comment.strip()) < 20 and not self.has_todo_indicators(comment):
642
+ score -= 0.2
643
+
644
+ # Check if comment starts with common documentation words
645
+ doc_starters = ['note:', 'current', 'this', 'the', 'implementation', 'method', 'function']
646
+ if any(comment.lower().startswith(starter) for starter in doc_starters):
647
+ score -= 0.2
648
+
649
+ score = round(score, 3)
650
+
651
+ return max(-1.0, min(1.0, score)) # Clamp between -1 and 1
652
+
653
+ def is_generated_file(self, file_path: Path) -> bool:
654
+ """Check if a file appears to be generated code."""
655
+ path_str = str(file_path)
656
+ generated_indicators = [
657
+ r'\.next\b',
658
+ r'generated',
659
+ r'build/',
660
+ r'dist/',
661
+ r'target/',
662
+ r'node_modules',
663
+ r'\.min\.',
664
+ r'\.bundle\.',
665
+ r'\.chunk\.',
666
+ ]
667
+
668
+ for indicator in generated_indicators:
669
+ if re.search(indicator, path_str, re.IGNORECASE):
670
+ return True
671
+ return False
672
+
673
+ def extract_comments_from_file(self, file_path: Path) -> List[Tuple[int, str]]:
674
+ """Extract all comments from a file based on its language."""
675
+ language = self.detect_language(file_path)
676
+ if not language:
677
+ return []
678
+
679
+ config = self.language_patterns[language]
680
+ comments = []
681
+
682
+ try:
683
+ with open(file_path, 'r', encoding='utf-8') as f:
684
+ lines = f.readlines()
685
+
686
+ in_multiline = False
687
+ multiline_content = []
688
+
689
+ for line_num, line in enumerate(lines, 1):
690
+ original_line = line
691
+ line = line.strip()
692
+
693
+ # Skip empty lines
694
+ if not line:
695
+ continue
696
+
697
+ # Handle multi-line comments / docstrings
698
+ start_pattern = config['multi_line_start']
699
+ end_pattern = config['multi_line_end']
700
+ if start_pattern and end_pattern:
701
+ if not in_multiline:
702
+ start_match = re.search(start_pattern, original_line)
703
+ if start_match:
704
+ in_multiline = True
705
+ multiline_content = []
706
+ after_start = original_line[start_match.end():]
707
+ end_match_inline = re.search(end_pattern, after_start)
708
+
709
+ if end_match_inline:
710
+ body = after_start[:end_match_inline.start()].strip()
711
+ if body:
712
+ multiline_content.append(body)
713
+ combined = ' '.join(multiline_content).strip()
714
+ if combined:
715
+ comments.append((line_num, combined))
716
+ in_multiline = False
717
+ multiline_content = []
718
+ else:
719
+ stripped_body = after_start.strip()
720
+ if stripped_body:
721
+ multiline_content.append(stripped_body)
722
+ continue
723
+ else:
724
+ end_match = re.search(end_pattern, original_line)
725
+ if end_match:
726
+ before_end = original_line[:end_match.start()].strip()
727
+ if before_end:
728
+ multiline_content.append(before_end)
729
+ combined = ' '.join(multiline_content).strip()
730
+ if combined:
731
+ comments.append((line_num, combined))
732
+ in_multiline = False
733
+ multiline_content = []
734
+ continue
735
+ else:
736
+ inner = original_line.strip()
737
+ if inner:
738
+ multiline_content.append(inner)
739
+ continue
740
+
741
+ # Extract single-line comments (only if not in multi-line mode)
742
+ if not in_multiline and config['single_line'] and re.search(config['single_line'], line):
743
+ # Remove comment prefix
744
+ if language in ['rust', 'javascript', 'typescript', 'go', 'java', 'csharp', 'cpp', 'c', 'php']:
745
+ comment = re.sub(r'^\s*//\s*', '', line)
746
+ elif language in ['python', 'ruby', 'shell', 'yaml']:
747
+ comment = re.sub(r'^\s*#\s*', '', line)
748
+ elif language == 'markdown':
749
+ comment = re.sub(r'^\s*<!--\s*', '', line)
750
+ comment = re.sub(r'\s*-->$', '', comment)
751
+
752
+ if comment:
753
+ comments.append((line_num, comment))
754
+
755
+ except Exception as e:
756
+ print(f"Error reading {file_path}: {e}")
757
+
758
+ return comments
759
+
760
+ def detect_code_stubs(self, file_path: Path, language: str) -> List[Dict[str, Any]]:
761
+ """Detect code stub patterns beyond explicit comments."""
762
+ if not self.enable_code_stub_scan:
763
+ return []
764
+
765
+ patterns = self.code_stub_patterns.get(language)
766
+ if not patterns:
767
+ return []
768
+
769
+ try:
770
+ with open(file_path, 'r', encoding='utf-8') as f:
771
+ lines = f.readlines()
772
+ except Exception as e:
773
+ print(f"Error reading {file_path}: {e}")
774
+ return []
775
+
776
+ if language == 'python':
777
+ stubs = self._detect_python_code_stubs(lines, patterns)
778
+ elif language in ('javascript', 'typescript'):
779
+ stubs = self._detect_js_code_stubs(lines, patterns)
780
+ else:
781
+ stubs = []
782
+
783
+ for stub in stubs:
784
+ self.pattern_stats[stub['reason']] += 1
785
+
786
+ return stubs
787
+
788
+ def _detect_python_code_stubs(self, lines: List[str], patterns: Dict[str, re.Pattern]) -> List[Dict[str, Any]]:
789
+ stubs: List[Dict[str, Any]] = []
790
+
791
+ for idx, raw_line in enumerate(lines, 1):
792
+ stripped = raw_line.strip()
793
+ if not stripped:
794
+ continue
795
+
796
+ if patterns['function_stub'].match(raw_line):
797
+ stub_entry = self._scan_python_function_body(lines, idx, patterns)
798
+ if stub_entry:
799
+ stubs.append(stub_entry)
800
+ continue
801
+
802
+ if patterns['raise_not_impl'].search(raw_line):
803
+ stubs.append({
804
+ 'line': idx,
805
+ 'reason': 'python_raise_not_implemented',
806
+ 'snippet': stripped,
807
+ 'confidence': 0.95,
808
+ 'context_score': 0.2,
809
+ })
810
+ continue
811
+
812
+ if patterns['ellipsis_stmt'].match(raw_line):
813
+ stubs.append({
814
+ 'line': idx,
815
+ 'reason': 'python_ellipsis_stub',
816
+ 'snippet': stripped,
817
+ 'confidence': 0.85,
818
+ 'context_score': 0.15,
819
+ })
820
+ continue
821
+
822
+ return stubs
823
+
824
+ def _scan_python_function_body(self, lines: List[str], start_index: int, patterns: Dict[str, re.Pattern]) -> Optional[Dict[str, Any]]:
825
+ """Inspect the first meaningful statement in a Python function for stub markers."""
826
+ func_line = lines[start_index - 1]
827
+ func_indent = len(func_line) - len(func_line.lstrip())
828
+
829
+ for idx in range(start_index + 1, len(lines) + 1):
830
+ raw_line = lines[idx - 1]
831
+ stripped = raw_line.strip()
832
+
833
+ if not stripped or stripped.startswith('#'):
834
+ continue
835
+
836
+ current_indent = len(raw_line) - len(raw_line.lstrip())
837
+ if current_indent <= func_indent:
838
+ break
839
+
840
+ if patterns['pass_stmt'].match(raw_line):
841
+ return {
842
+ 'line': idx,
843
+ 'reason': 'python_pass_stub',
844
+ 'snippet': stripped,
845
+ 'confidence': 0.82,
846
+ 'context_score': 0.1,
847
+ }
848
+
849
+ if patterns['ellipsis_stmt'].match(raw_line):
850
+ return {
851
+ 'line': idx,
852
+ 'reason': 'python_ellipsis_stub',
853
+ 'snippet': stripped,
854
+ 'confidence': 0.82,
855
+ 'context_score': 0.1,
856
+ }
857
+
858
+ if patterns['raise_not_impl'].search(raw_line):
859
+ return {
860
+ 'line': idx,
861
+ 'reason': 'python_raise_not_implemented',
862
+ 'snippet': stripped,
863
+ 'confidence': 0.95,
864
+ 'context_score': 0.25,
865
+ }
866
+
867
+ if patterns['return_not_impl'].match(raw_line):
868
+ return {
869
+ 'line': idx,
870
+ 'reason': 'python_return_placeholder',
871
+ 'snippet': stripped,
872
+ 'confidence': 0.8,
873
+ 'context_score': 0.1,
874
+ }
875
+
876
+ # First substantive line is real implementation
877
+ break
878
+
879
+ return None
880
+
881
+ def _detect_js_code_stubs(self, lines: List[str], patterns: Dict[str, re.Pattern]) -> List[Dict[str, Any]]:
882
+ stubs: List[Dict[str, Any]] = []
883
+ total_lines = len(lines)
884
+
885
+ for idx, raw_line in enumerate(lines, 1):
886
+ stripped = raw_line.strip()
887
+ if not stripped:
888
+ continue
889
+
890
+ if patterns['throw_not_impl'].search(stripped):
891
+ stubs.append({
892
+ 'line': idx,
893
+ 'reason': 'js_throw_not_implemented',
894
+ 'snippet': stripped,
895
+ 'confidence': 0.9,
896
+ 'context_score': 0.2,
897
+ })
898
+ continue
899
+
900
+ if patterns['return_todo'].search(stripped):
901
+ stubs.append({
902
+ 'line': idx,
903
+ 'reason': 'js_return_todo',
904
+ 'snippet': stripped,
905
+ 'confidence': 0.82,
906
+ 'context_score': 0.1,
907
+ })
908
+ continue
909
+
910
+ if patterns['function_stub'].match(raw_line) or patterns['arrow_stub'].match(raw_line):
911
+ stub_entry = self._scan_js_function_body(lines, idx, patterns)
912
+ if stub_entry:
913
+ stubs.append(stub_entry)
914
+
915
+ return stubs
916
+
917
+ def _scan_js_function_body(self, lines: List[str], start_index: int, patterns: Dict[str, re.Pattern]) -> Optional[Dict[str, Any]]:
918
+ """Inspect the first executable statement in a JS/TS function body."""
919
+ opening_line = lines[start_index - 1]
920
+ initial_brace_count = opening_line.count('{') - opening_line.count('}')
921
+ brace_depth = max(initial_brace_count, 0)
922
+
923
+ for idx in range(start_index + 1, len(lines) + 1):
924
+ raw_line = lines[idx - 1]
925
+ stripped = raw_line.strip()
926
+
927
+ brace_depth += raw_line.count('{')
928
+ brace_depth -= raw_line.count('}')
929
+
930
+ if not stripped or stripped.startswith('//') or stripped.startswith('/*'):
931
+ continue
932
+
933
+ if brace_depth < 0:
934
+ break
935
+
936
+ if patterns['throw_not_impl'].search(stripped):
937
+ return {
938
+ 'line': idx,
939
+ 'reason': 'js_throw_not_implemented',
940
+ 'snippet': stripped,
941
+ 'confidence': 0.9,
942
+ 'context_score': 0.2,
943
+ }
944
+
945
+ if patterns['return_todo'].search(stripped):
946
+ return {
947
+ 'line': idx,
948
+ 'reason': 'js_return_todo',
949
+ 'snippet': stripped,
950
+ 'confidence': 0.82,
951
+ 'context_score': 0.1,
952
+ }
953
+
954
+ if brace_depth <= 0:
955
+ break
956
+
957
+ # Found non-stub statement
958
+ break
959
+
960
+ return None
961
+
962
+ def analyze_comment(self, comment: str, line_num: int, file_path: Path) -> Dict[str, Any]:
963
+ """Analyze a single comment for hidden TODO patterns with enhanced context awareness."""
964
+ normalized = comment.strip()
965
+ if not normalized:
966
+ return {}
967
+
968
+ comment = normalized
969
+ matches = defaultdict(list)
970
+ confidence_scores = []
971
+
972
+ # Skip if this is an excluded pattern (legitimate technical term)
973
+ if self.is_excluded_pattern(comment):
974
+ return {}
975
+
976
+ # Calculate context score
977
+ context_score = self.calculate_context_score(comment, line_num, file_path)
978
+
979
+ # Check explicit TODO patterns (highest confidence)
980
+ for pattern in self.explicit_todo_patterns['explicit_todos']:
981
+ if re.search(pattern, comment, re.IGNORECASE):
982
+ matches['explicit_todos'].append(pattern)
983
+ # Adjust confidence based on context
984
+ base_confidence = 1.0
985
+ adjusted_confidence = min(1.0, max(0.1, base_confidence + context_score * 0.3))
986
+ confidence_scores.append(('explicit', adjusted_confidence))
987
+ self.pattern_stats[pattern] += 1
988
+
989
+ # Check high-confidence patterns
990
+ for category, patterns in self.high_confidence_patterns.items():
991
+ for pattern in patterns:
992
+ if re.search(pattern, comment, re.IGNORECASE):
993
+ matches[category].append(pattern)
994
+ # Adjust confidence based on context
995
+ base_confidence = 0.9
996
+ adjusted_confidence = min(1.0, max(0.1, base_confidence + context_score * 0.2))
997
+ confidence_scores.append((category, adjusted_confidence))
998
+ self.pattern_stats[pattern] += 1
999
+
1000
+ # Check medium-confidence patterns
1001
+ for category, patterns in self.medium_confidence_patterns.items():
1002
+ for pattern in patterns:
1003
+ if re.search(pattern, comment, re.IGNORECASE):
1004
+ matches[category].append(pattern)
1005
+ # Adjust confidence based on context
1006
+ base_confidence = 0.6
1007
+ adjusted_confidence = min(1.0, max(0.1, base_confidence + context_score * 0.1))
1008
+ confidence_scores.append((category, adjusted_confidence))
1009
+ self.pattern_stats[pattern] += 1
1010
+
1011
+ # Calculate overall confidence score
1012
+ if not confidence_scores:
1013
+ return {}
1014
+
1015
+ overall_confidence = max(score for _, score in confidence_scores)
1016
+
1017
+ return {
1018
+ 'matches': matches,
1019
+ 'confidence_score': overall_confidence,
1020
+ 'confidence_breakdown': confidence_scores,
1021
+ 'context_score': context_score
1022
+ }
1023
+
1024
+ def analyze_engineering_grade_suggestions(self, comment: str, line_num: int, file_path: Path) -> Dict[str, Any]:
1025
+ """Analyze a TODO comment to suggest engineering-grade format improvements."""
1026
+ normalized = comment.strip()
1027
+ if not normalized:
1028
+ return {}
1029
+
1030
+ # Only analyze explicit TODOs
1031
+ if not re.search(r'\b(TODO|FIXME|HACK)\b', normalized, re.IGNORECASE):
1032
+ return {}
1033
+
1034
+ suggestions = {
1035
+ 'needs_engineering_format': False,
1036
+ 'missing_elements': [],
1037
+ 'suggested_tier': None,
1038
+ 'priority_level': 'Medium',
1039
+ 'template_suggestion': None,
1040
+ 'confidence': 0.0
1041
+ }
1042
+
1043
+ # Check if already has engineering-grade structure
1044
+ has_structure = False
1045
+ for category, patterns in self.engineering_grade_patterns.items():
1046
+ for pattern in patterns:
1047
+ if re.search(pattern, normalized, re.IGNORECASE):
1048
+ has_structure = True
1049
+ break
1050
+ if has_structure:
1051
+ break
1052
+
1053
+ if not has_structure:
1054
+ suggestions['needs_engineering_format'] = True
1055
+ suggestions['confidence'] = 0.8
1056
+
1057
+ # Check what's missing
1058
+ missing_elements = []
1059
+
1060
+ # Check for completion checklist
1061
+ if not any(re.search(pattern, normalized, re.IGNORECASE) for pattern in self.engineering_grade_patterns['completion_checklist']):
1062
+ missing_elements.append('completion_checklist')
1063
+
1064
+ # Check for acceptance criteria
1065
+ if not any(re.search(pattern, normalized, re.IGNORECASE) for pattern in self.engineering_grade_patterns['acceptance_criteria']):
1066
+ missing_elements.append('acceptance_criteria')
1067
+
1068
+ # Check for dependencies
1069
+ if not any(re.search(pattern, normalized, re.IGNORECASE) for pattern in self.engineering_grade_patterns['dependencies']):
1070
+ missing_elements.append('dependencies')
1071
+
1072
+ # Check for governance info
1073
+ if not any(re.search(pattern, normalized, re.IGNORECASE) for pattern in self.engineering_grade_patterns['governance']):
1074
+ missing_elements.append('governance')
1075
+
1076
+ suggestions['missing_elements'] = missing_elements
1077
+
1078
+ # Determine suggested CAWS tier based on content
1079
+ if any(re.search(pattern, normalized, re.IGNORECASE) for pattern in self.needs_engineering_format_patterns['business_critical']):
1080
+ suggestions['suggested_tier'] = 1
1081
+ suggestions['priority_level'] = 'Critical'
1082
+ suggestions['confidence'] = 0.9
1083
+ elif any(re.search(pattern, normalized, re.IGNORECASE) for pattern in self.needs_engineering_format_patterns['vague_todos']):
1084
+ suggestions['suggested_tier'] = 2
1085
+ suggestions['priority_level'] = 'High'
1086
+ suggestions['confidence'] = 0.7
1087
+ else:
1088
+ suggestions['suggested_tier'] = 3
1089
+ suggestions['priority_level'] = 'Medium'
1090
+ suggestions['confidence'] = 0.6
1091
+
1092
+ # Generate template suggestion
1093
+ suggestions['template_suggestion'] = self._generate_template_suggestion(normalized, suggestions, file_path)
1094
+
1095
+ return suggestions
1096
+
1097
+ def _generate_template_suggestion(self, todo_text: str, suggestions: Dict[str, Any], file_path: Path) -> str:
1098
+ """Generate a suggested engineering-grade TODO template based on the original TODO."""
1099
+ # Extract the main TODO description
1100
+ todo_match = re.search(r'\b(TODO|FIXME|HACK)\b[:\s]*(.*?)$', todo_text, re.IGNORECASE)
1101
+ if not todo_match:
1102
+ return ""
1103
+
1104
+ todo_type = todo_match.group(1).upper()
1105
+ description = todo_match.group(2).strip()
1106
+
1107
+ # Determine language-specific comment prefix
1108
+ language = self.detect_language(file_path)
1109
+ if language in ['rust', 'javascript', 'typescript', 'go', 'java', 'csharp', 'cpp', 'c']:
1110
+ comment_prefix = "//"
1111
+ elif language in ['python', 'ruby', 'shell', 'yaml']:
1112
+ comment_prefix = "#"
1113
+ else:
1114
+ comment_prefix = "//"
1115
+
1116
+ tier = suggestions.get('suggested_tier', 2)
1117
+ priority = suggestions.get('priority_level', 'Medium')
1118
+
1119
+ template = f"""{comment_prefix} {todo_type}: {description}
1120
+ {comment_prefix} <One-sentence context & why this exists>
1121
+ {comment_prefix}
1122
+ {comment_prefix} COMPLETION CHECKLIST:
1123
+ {comment_prefix} [ ] Primary functionality implemented
1124
+ {comment_prefix} [ ] API/data structures defined & stable
1125
+ {comment_prefix} [ ] Error handling + validation aligned with error taxonomy
1126
+ {comment_prefix} [ ] Tests: Unit ≥80% branch coverage (≥50% mutation if enabled)
1127
+ {comment_prefix} [ ] Integration tests for external systems/contracts
1128
+ {comment_prefix} [ ] Documentation: public API + system behavior
1129
+ {comment_prefix} [ ] Performance/profiled against SLA (CPU/mem/latency throughput)
1130
+ {comment_prefix} [ ] Security posture reviewed (inputs, authz, sandboxing)
1131
+ {comment_prefix} [ ] Observability: logs (debug), metrics (SLO-aligned), tracing
1132
+ {comment_prefix} [ ] Configurability and feature flags defined if relevant
1133
+ {comment_prefix} [ ] Failure-mode cards documented (degradation paths)
1134
+ {comment_prefix}
1135
+ {comment_prefix} ACCEPTANCE CRITERIA:
1136
+ {comment_prefix} - <User-facing measurable behavior>
1137
+ {comment_prefix} - <Invariant or schema contract requirements>
1138
+ {comment_prefix} - <Performance/statistical bounds>
1139
+ {comment_prefix} - <Interoperation requirements or protocol contract>
1140
+ {comment_prefix}
1141
+ {comment_prefix} DEPENDENCIES:
1142
+ {comment_prefix} - <System or feature this relies on> (Required/Optional)
1143
+ {comment_prefix} - <Interop/contract references>
1144
+ {comment_prefix} - File path(s)/module links to dependent code
1145
+ {comment_prefix}
1146
+ {comment_prefix} ESTIMATED EFFORT: <Number + confidence range>
1147
+ {comment_prefix} PRIORITY: {priority}
1148
+ {comment_prefix} BLOCKING: {{Yes/No}} – If Yes: explicitly list what it blocks
1149
+ {comment_prefix}
1150
+ {comment_prefix} GOVERNANCE:
1151
+ {comment_prefix} - CAWS Tier: {tier} (impacts rigor, provenance, review policy)
1152
+ {comment_prefix} - Change Budget: <LOC or file count> (if relevant)
1153
+ {comment_prefix} - Reviewer Requirements: <Roles or domain expertise>"""
1154
+
1155
+ return template
1156
+
1157
+ def analyze_file(self, file_path: Path) -> Dict:
1158
+ """Analyze a single file for hidden TODO patterns."""
1159
+ language = self.detect_language(file_path)
1160
+ if not language:
1161
+ return {}
1162
+
1163
+ # Skip ignored files
1164
+ if self.should_ignore_file(file_path):
1165
+ return {}
1166
+
1167
+ comments = self.extract_comments_from_file(file_path)
1168
+ try:
1169
+ relative_path = str(file_path.relative_to(self.root_dir))
1170
+ except ValueError:
1171
+ relative_path = str(file_path)
1172
+
1173
+ file_analysis = {
1174
+ 'file_path': relative_path,
1175
+ 'language': language,
1176
+ 'total_comments': len(comments),
1177
+ 'total_lines': self._count_file_lines(file_path),
1178
+ 'comment_lines': len(comments),
1179
+ 'hidden_todos': defaultdict(list),
1180
+ 'all_comments': []
1181
+ }
1182
+
1183
+ for line_num, comment in comments:
1184
+ analysis = self.analyze_comment(comment, line_num, file_path)
1185
+ engineering_suggestions = self.analyze_engineering_grade_suggestions(comment, line_num, file_path)
1186
+
1187
+ if analysis and analysis['matches']:
1188
+ todo_data = {
1189
+ 'comment': comment,
1190
+ 'matches': analysis['matches'],
1191
+ 'confidence_score': analysis['confidence_score'],
1192
+ 'confidence_breakdown': analysis['confidence_breakdown'],
1193
+ 'context_score': analysis['context_score']
1194
+ }
1195
+
1196
+ # Add engineering-grade suggestions if available
1197
+ if engineering_suggestions and engineering_suggestions.get('needs_engineering_format'):
1198
+ todo_data['engineering_suggestions'] = engineering_suggestions
1199
+
1200
+ file_analysis['hidden_todos'][line_num] = todo_data
1201
+
1202
+ # Store all comments for analysis
1203
+ file_analysis['all_comments'].append({
1204
+ 'line': line_num,
1205
+ 'comment': comment
1206
+ })
1207
+
1208
+ # Detect stub implementations in code bodies
1209
+ for stub in self.detect_code_stubs(file_path, language):
1210
+ line_num = stub['line']
1211
+ reason = stub['reason']
1212
+ snippet = stub['snippet']
1213
+ confidence = stub['confidence']
1214
+ context = stub.get('context_score', 0.0)
1215
+
1216
+ # Attempt to merge with nearby comment within 3 lines above
1217
+ nearby_comment_line = None
1218
+ for existing_line in sorted(file_analysis['hidden_todos'].keys()):
1219
+ if existing_line == line_num:
1220
+ nearby_comment_line = existing_line
1221
+ break
1222
+ if existing_line < line_num and line_num - existing_line <= 3:
1223
+ nearby_comment_line = existing_line
1224
+
1225
+ target_line = nearby_comment_line if nearby_comment_line is not None else line_num
1226
+
1227
+ if target_line in file_analysis['hidden_todos']:
1228
+ entry = file_analysis['hidden_todos'][target_line]
1229
+ entry['matches'].setdefault('code_stubs', []).append(reason)
1230
+ entry['confidence_score'] = max(entry['confidence_score'], confidence)
1231
+ entry['confidence_breakdown'].append(('code_stub', confidence))
1232
+ entry['context_score'] = max(entry['context_score'], context)
1233
+ if target_line != line_num:
1234
+ related = entry.setdefault('related_stub_lines', [])
1235
+ if line_num not in related:
1236
+ related.append(line_num)
1237
+ else:
1238
+ file_analysis['hidden_todos'][target_line] = {
1239
+ 'comment': snippet,
1240
+ 'matches': defaultdict(list, {'code_stubs': [reason]}),
1241
+ 'confidence_score': confidence,
1242
+ 'confidence_breakdown': [('code_stub', confidence)],
1243
+ 'context_score': context,
1244
+ }
1245
+ file_analysis['all_comments'].append({
1246
+ 'line': line_num,
1247
+ 'comment': snippet
1248
+ })
1249
+
1250
+ return file_analysis
1251
+
1252
+ def _count_file_lines(self, file_path: Path) -> int:
1253
+ """Count total lines in a file."""
1254
+ try:
1255
+ with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
1256
+ return sum(1 for _ in f)
1257
+ except Exception:
1258
+ return 0
1259
+
1260
+ def analyze_directory(self, languages: Optional[List[str]] = None, min_confidence: float = 0.7, v3_only: bool = False) -> Dict:
1261
+ """Analyze all files in the directory for hidden TODO patterns with improved accuracy."""
1262
+ print(f"Analyzing files with improved patterns in: {self.root_dir}")
1263
+ print(f"Minimum confidence threshold: {min_confidence}")
1264
+
1265
+ # Get all files with supported extensions
1266
+ all_files = []
1267
+ for language, config in self.language_patterns.items():
1268
+ if languages and language not in languages:
1269
+ continue
1270
+ for ext in config['extensions']:
1271
+ if v3_only:
1272
+ # Only analyze files in iterations/v3/ directory
1273
+ v3_dir = self.root_dir / 'iterations' / 'v3'
1274
+ if v3_dir.exists():
1275
+ all_files.extend(v3_dir.rglob(f'*{ext}'))
1276
+ else:
1277
+ all_files.extend(self.root_dir.rglob(f'*{ext}'))
1278
+
1279
+ # Filter out ignored files
1280
+ non_ignored_files = [
1281
+ f for f in all_files if not self.should_ignore_file(f)]
1282
+
1283
+ print(f"Found {len(all_files)} total files")
1284
+ print(f"Found {len(non_ignored_files)} non-ignored files")
1285
+
1286
+ # Count by language
1287
+ language_counts = defaultdict(int)
1288
+ for file_path in non_ignored_files:
1289
+ language = self.detect_language(file_path)
1290
+ if language:
1291
+ language_counts[language] += 1
1292
+
1293
+ print("Files by language:")
1294
+ for lang, count in sorted(language_counts.items()):
1295
+ print(f" {lang}: {count} files")
1296
+
1297
+ # Reset pattern statistics for this run
1298
+ self.pattern_stats = defaultdict(int)
1299
+
1300
+ all_results = {
1301
+ 'summary': {
1302
+ 'total_files': len(all_files),
1303
+ 'non_ignored_files': len(non_ignored_files),
1304
+ 'ignored_files': len(all_files) - len(non_ignored_files),
1305
+ 'language_counts': dict(language_counts),
1306
+ 'files_with_hidden_todos': 0,
1307
+ 'total_hidden_todos': 0,
1308
+ 'high_confidence_todos': 0,
1309
+ 'medium_confidence_todos': 0,
1310
+ 'low_confidence_todos': 0,
1311
+ 'code_stub_todos': 0,
1312
+ 'pattern_counts': {},
1313
+ 'min_confidence_threshold': min_confidence,
1314
+ },
1315
+ 'files': {},
1316
+ 'patterns': defaultdict(list)
1317
+ }
1318
+
1319
+ for file_path in non_ignored_files:
1320
+ print(f"Analyzing: {file_path.relative_to(self.root_dir)}")
1321
+ file_analysis = self.analyze_file(file_path)
1322
+
1323
+ if file_analysis and file_analysis['hidden_todos']:
1324
+ # Filter by confidence threshold
1325
+ filtered_todos = {}
1326
+ for line_num, data in file_analysis['hidden_todos'].items():
1327
+ if data['confidence_score'] >= min_confidence:
1328
+ filtered_todos[line_num] = data
1329
+
1330
+ # Count by confidence level
1331
+ if data['confidence_score'] >= 0.9:
1332
+ all_results['summary']['high_confidence_todos'] += 1
1333
+ elif data['confidence_score'] >= 0.6:
1334
+ all_results['summary']['medium_confidence_todos'] += 1
1335
+ else:
1336
+ all_results['summary']['low_confidence_todos'] += 1
1337
+
1338
+ if filtered_todos:
1339
+ file_analysis['hidden_todos'] = filtered_todos
1340
+ all_results['files'][file_analysis['file_path']] = file_analysis
1341
+ all_results['summary']['files_with_hidden_todos'] += 1
1342
+ all_results['summary']['total_hidden_todos'] += len(filtered_todos)
1343
+
1344
+ # Group by patterns
1345
+ for line_num, data in filtered_todos.items():
1346
+ for category, patterns in data['matches'].items():
1347
+ all_results['patterns'][category].append({
1348
+ 'file': file_analysis['file_path'],
1349
+ 'language': file_analysis['language'],
1350
+ 'line': line_num,
1351
+ 'comment': data['comment'],
1352
+ 'patterns': patterns,
1353
+ 'confidence_score': data['confidence_score'],
1354
+ 'context_score': data['context_score']
1355
+ })
1356
+ if category == 'code_stubs':
1357
+ all_results['summary']['code_stub_todos'] += 1
1358
+
1359
+ all_results['summary']['pattern_counts'] = dict(self.pattern_stats)
1360
+
1361
+ return all_results
1362
+
1363
+ def analyze_files(self, file_paths: List[str], min_confidence: float = 0.7) -> Dict:
1364
+ """Analyze specific files for hidden TODO patterns."""
1365
+ print(f"Analyzing {len(file_paths)} specific files with improved patterns")
1366
+ print(f"Minimum confidence threshold: {min_confidence}")
1367
+
1368
+ # Convert string paths to Path objects and filter valid files
1369
+ valid_files = []
1370
+ for file_path in file_paths:
1371
+ path = Path(file_path)
1372
+ if path.exists() and path.is_file():
1373
+ valid_files.append(path)
1374
+ else:
1375
+ print(f"Warning: File not found or not accessible: {file_path}")
1376
+
1377
+ if not valid_files:
1378
+ print("No valid files to analyze")
1379
+ return {
1380
+ 'summary': {
1381
+ 'total_files': 0,
1382
+ 'non_ignored_files': 0,
1383
+ 'ignored_files': 0,
1384
+ 'language_counts': {},
1385
+ 'files_with_hidden_todos': 0,
1386
+ 'total_hidden_todos': 0,
1387
+ 'high_confidence_todos': 0,
1388
+ 'medium_confidence_todos': 0,
1389
+ 'low_confidence_todos': 0,
1390
+ 'code_stub_todos': 0,
1391
+ 'pattern_counts': {},
1392
+ 'min_confidence_threshold': min_confidence,
1393
+ },
1394
+ 'files': {},
1395
+ 'patterns': defaultdict(list)
1396
+ }
1397
+
1398
+ # Reset pattern statistics for this run
1399
+ self.pattern_stats = defaultdict(int)
1400
+
1401
+ # Count languages
1402
+ language_counts = Counter()
1403
+ non_ignored_files = []
1404
+
1405
+ for file_path in valid_files:
1406
+ language = self.detect_language(file_path)
1407
+ if language:
1408
+ language_counts[language] += 1
1409
+ # Skip ignored files
1410
+ if not self.should_ignore_file(file_path):
1411
+ non_ignored_files.append(file_path)
1412
+
1413
+ all_results = {
1414
+ 'summary': {
1415
+ 'total_files': len(valid_files),
1416
+ 'non_ignored_files': len(non_ignored_files),
1417
+ 'ignored_files': len(valid_files) - len(non_ignored_files),
1418
+ 'language_counts': dict(language_counts),
1419
+ 'files_with_hidden_todos': 0,
1420
+ 'total_hidden_todos': 0,
1421
+ 'high_confidence_todos': 0,
1422
+ 'medium_confidence_todos': 0,
1423
+ 'low_confidence_todos': 0,
1424
+ 'code_stub_todos': 0,
1425
+ 'pattern_counts': {},
1426
+ 'min_confidence_threshold': min_confidence,
1427
+ },
1428
+ 'files': {},
1429
+ 'patterns': defaultdict(list)
1430
+ }
1431
+
1432
+ for file_path in non_ignored_files:
1433
+ print(f"Analyzing: {file_path}")
1434
+ file_analysis = self.analyze_file(file_path)
1435
+
1436
+ if file_analysis and file_analysis['hidden_todos']:
1437
+ # Filter by confidence threshold
1438
+ filtered_todos = {}
1439
+ for line_num, data in file_analysis['hidden_todos'].items():
1440
+ if data['confidence_score'] >= min_confidence:
1441
+ filtered_todos[line_num] = data
1442
+
1443
+ # Count by confidence level
1444
+ if data['confidence_score'] >= 0.9:
1445
+ all_results['summary']['high_confidence_todos'] += 1
1446
+ elif data['confidence_score'] >= 0.6:
1447
+ all_results['summary']['medium_confidence_todos'] += 1
1448
+ else:
1449
+ all_results['summary']['low_confidence_todos'] += 1
1450
+
1451
+ # Count patterns
1452
+ for pattern in data.get('matches', []):
1453
+ self.pattern_stats[pattern] += 1
1454
+
1455
+ if filtered_todos:
1456
+ all_results['summary']['files_with_hidden_todos'] += 1
1457
+ all_results['summary']['total_hidden_todos'] += len(filtered_todos)
1458
+ all_results['files'][str(file_path)] = {
1459
+ 'language': file_analysis['language'],
1460
+ 'hidden_todos': filtered_todos,
1461
+ 'total_lines': file_analysis['total_lines'],
1462
+ 'comment_lines': file_analysis['comment_lines']
1463
+ }
1464
+
1465
+ # Add to patterns
1466
+ for line_num, data in filtered_todos.items():
1467
+ for pattern in data.get('matches', []):
1468
+ all_results['patterns'][pattern].append({
1469
+ 'file': str(file_path),
1470
+ 'language': file_analysis['language'],
1471
+ 'line': line_num,
1472
+ 'comment': data['comment'],
1473
+ 'patterns': [pattern],
1474
+ 'confidence_score': data['confidence_score'],
1475
+ 'context_score': data.get('context_score', 0.0)
1476
+ })
1477
+
1478
+ # Finalize pattern counts
1479
+ all_results['summary']['pattern_counts'] = dict(self.pattern_stats)
1480
+
1481
+ return all_results
1482
+
1483
+ def analyze_staged_files_with_dependencies(self, min_confidence: float = 0.7,
1484
+ dependency_resolution: bool = True) -> Dict:
1485
+ """
1486
+ Analyze staged files for hidden TODOs with dependency resolution.
1487
+
1488
+ This method:
1489
+ 1. Gets staged files from git
1490
+ 2. Analyzes them for hidden TODOs
1491
+ 3. Resolves dependencies to determine if TODOs are blocking
1492
+ 4. Returns results with dependency information
1493
+ """
1494
+ import subprocess
1495
+
1496
+ try:
1497
+ # Get staged files
1498
+ result = subprocess.run(['git', 'diff', '--cached', '--name-only'],
1499
+ capture_output=True, text=True, check=True)
1500
+ staged_files = [f.strip() for f in result.stdout.split('\n') if f.strip()]
1501
+
1502
+ # Filter for supported file types
1503
+ supported_extensions = set()
1504
+ for lang_config in self.language_patterns.values():
1505
+ supported_extensions.update(lang_config['extensions'])
1506
+
1507
+ staged_files = [f for f in staged_files
1508
+ if any(f.endswith(ext) for ext in supported_extensions)]
1509
+
1510
+ if not staged_files:
1511
+ return {
1512
+ 'summary': {
1513
+ 'staged_files': 0,
1514
+ 'analyzed_files': 0,
1515
+ 'total_hidden_todos': 0,
1516
+ 'blocking_todos': 0,
1517
+ 'non_blocking_todos': 0,
1518
+ 'dependency_resolution_enabled': dependency_resolution
1519
+ },
1520
+ 'files': {},
1521
+ 'dependencies': {},
1522
+ 'blocking_analysis': {}
1523
+ }
1524
+
1525
+ print(f"📁 Found {len(staged_files)} staged files to analyze")
1526
+
1527
+ # Analyze staged files
1528
+ analysis_results = self.analyze_files(staged_files, min_confidence)
1529
+
1530
+ # Add dependency resolution if enabled
1531
+ if dependency_resolution:
1532
+ dependency_info = self._resolve_todo_dependencies(analysis_results)
1533
+ analysis_results['dependencies'] = dependency_info
1534
+ analysis_results['blocking_analysis'] = self._analyze_blocking_todos(
1535
+ analysis_results, dependency_info)
1536
+
1537
+ # Update summary with staged file info
1538
+ analysis_results['summary']['staged_files'] = len(staged_files)
1539
+ analysis_results['summary']['analyzed_files'] = len(staged_files)
1540
+ analysis_results['summary']['dependency_resolution_enabled'] = dependency_resolution
1541
+
1542
+ return analysis_results
1543
+
1544
+ except subprocess.CalledProcessError as e:
1545
+ print(f"Error getting staged files: {e}")
1546
+ return {
1547
+ 'summary': {
1548
+ 'staged_files': 0,
1549
+ 'analyzed_files': 0,
1550
+ 'total_hidden_todos': 0,
1551
+ 'blocking_todos': 0,
1552
+ 'non_blocking_todos': 0,
1553
+ 'dependency_resolution_enabled': False,
1554
+ 'error': str(e)
1555
+ },
1556
+ 'files': {},
1557
+ 'dependencies': {},
1558
+ 'blocking_analysis': {}
1559
+ }
1560
+
1561
+ def _resolve_todo_dependencies(self, analysis_results: Dict) -> Dict:
1562
+ """
1563
+ Resolve dependencies for TODOs found in staged files.
1564
+
1565
+ Returns dependency information for each TODO.
1566
+ """
1567
+ dependencies = {}
1568
+
1569
+ for file_path, file_data in analysis_results['files'].items():
1570
+ file_deps = {}
1571
+
1572
+ for line_num, todo_data in file_data['hidden_todos'].items():
1573
+ todo_text = todo_data['comment']
1574
+ todo_deps = self._extract_dependencies_from_todo(todo_text, file_path)
1575
+ file_deps[line_num] = todo_deps
1576
+
1577
+ if file_deps:
1578
+ dependencies[file_path] = file_deps
1579
+
1580
+ return dependencies
1581
+
1582
+ def _extract_dependencies_from_todo(self, todo_text: str, file_path: str) -> Dict:
1583
+ """
1584
+ Extract dependency information from TODO text using the engineering-grade template.
1585
+
1586
+ Looks for:
1587
+ - DEPENDENCIES section
1588
+ - BLOCKING status
1589
+ - CAWS Tier
1590
+ - Required/Optional dependencies
1591
+ """
1592
+ dependencies = {
1593
+ 'blocking': False,
1594
+ 'caws_tier': None,
1595
+ 'required_deps': [],
1596
+ 'optional_deps': [],
1597
+ 'estimated_effort': None,
1598
+ 'priority': 'Medium'
1599
+ }
1600
+
1601
+ # Look for DEPENDENCIES section
1602
+ deps_match = re.search(r'DEPENDENCIES:\s*\n((?:- .*\n?)*)', todo_text, re.MULTILINE)
1603
+ if deps_match:
1604
+ deps_text = deps_match.group(1)
1605
+ for line in deps_text.split('\n'):
1606
+ line = line.strip()
1607
+ if line.startswith('- '):
1608
+ dep_text = line[2:].strip()
1609
+ if '(Required)' in dep_text:
1610
+ dependencies['required_deps'].append(dep_text.replace('(Required)', '').strip())
1611
+ elif '(Optional)' in dep_text:
1612
+ dependencies['optional_deps'].append(dep_text.replace('(Optional)', '').strip())
1613
+ else:
1614
+ # Default to required if not specified
1615
+ dependencies['required_deps'].append(dep_text)
1616
+
1617
+ # Look for BLOCKING status
1618
+ blocking_match = re.search(r'BLOCKING:\s*{Yes|No}', todo_text)
1619
+ if blocking_match:
1620
+ dependencies['blocking'] = 'Yes' in blocking_match.group(0)
1621
+
1622
+ # Look for CAWS Tier
1623
+ tier_match = re.search(r'CAWS Tier:\s*(\d+)', todo_text)
1624
+ if tier_match:
1625
+ dependencies['caws_tier'] = int(tier_match.group(1))
1626
+
1627
+ # Look for PRIORITY
1628
+ priority_match = re.search(r'PRIORITY:\s*{Critical|High|Medium|Low}', todo_text)
1629
+ if priority_match:
1630
+ dependencies['priority'] = priority_match.group(0).split(':')[1].strip()
1631
+
1632
+ # Look for ESTIMATED EFFORT
1633
+ effort_match = re.search(r'ESTIMATED EFFORT:\s*([^\\n]+)', todo_text)
1634
+ if effort_match:
1635
+ dependencies['estimated_effort'] = effort_match.group(1).strip()
1636
+
1637
+ return dependencies
1638
+
1639
+ def _analyze_blocking_todos(self, analysis_results: Dict, dependency_info: Dict) -> Dict:
1640
+ """
1641
+ Analyze which TODOs are blocking based on dependency resolution.
1642
+ """
1643
+ blocking_analysis = {
1644
+ 'blocking_todos': [],
1645
+ 'non_blocking_todos': [],
1646
+ 'critical_blockers': [],
1647
+ 'dependency_summary': {
1648
+ 'total_required_deps': 0,
1649
+ 'resolved_deps': 0,
1650
+ 'unresolved_deps': 0
1651
+ }
1652
+ }
1653
+
1654
+ for file_path, file_data in analysis_results['files'].items():
1655
+ file_deps = dependency_info.get(file_path, {})
1656
+
1657
+ for line_num, todo_data in file_data['hidden_todos'].items():
1658
+ todo_deps = file_deps.get(line_num, {})
1659
+
1660
+ # Determine if TODO is blocking
1661
+ is_blocking = self._is_todo_blocking(todo_deps, file_path)
1662
+
1663
+ todo_info = {
1664
+ 'file': file_path,
1665
+ 'line': line_num,
1666
+ 'text': todo_data['comment'],
1667
+ 'confidence': todo_data['confidence_score'],
1668
+ 'dependencies': todo_deps,
1669
+ 'blocking': is_blocking
1670
+ }
1671
+
1672
+ if is_blocking:
1673
+ blocking_analysis['blocking_todos'].append(todo_info)
1674
+
1675
+ # Check if it's critical
1676
+ if todo_deps.get('priority') == 'Critical' or todo_deps.get('caws_tier') == 1:
1677
+ blocking_analysis['critical_blockers'].append(todo_info)
1678
+ else:
1679
+ blocking_analysis['non_blocking_todos'].append(todo_info)
1680
+
1681
+ # Update dependency summary
1682
+ required_deps = todo_deps.get('required_deps', [])
1683
+ blocking_analysis['dependency_summary']['total_required_deps'] += len(required_deps)
1684
+ # For now, assume all dependencies are unresolved (would need actual resolution logic)
1685
+ blocking_analysis['dependency_summary']['unresolved_deps'] += len(required_deps)
1686
+
1687
+ return blocking_analysis
1688
+
1689
+ def _is_todo_blocking(self, todo_deps: Dict, file_path: str) -> bool:
1690
+ """
1691
+ Determine if a TODO is blocking based on its dependencies and context.
1692
+ """
1693
+ # Explicit blocking flag
1694
+ if todo_deps.get('blocking', False):
1695
+ return True
1696
+
1697
+ # High priority or critical tier
1698
+ if todo_deps.get('priority') in ['Critical', 'High']:
1699
+ return True
1700
+
1701
+ if todo_deps.get('caws_tier') == 1:
1702
+ return True
1703
+
1704
+ # Has required dependencies (simplified check)
1705
+ if todo_deps.get('required_deps'):
1706
+ return True
1707
+
1708
+ return False
1709
+
1710
+ def generate_report(self, results: Dict) -> str:
1711
+ """Generate a comprehensive report with enhanced accuracy information."""
1712
+ report = []
1713
+ report.append("# Improved Hidden TODO Analysis Report (v2.0)")
1714
+ report.append("=" * 60)
1715
+ report.append("")
1716
+
1717
+ # Summary
1718
+ summary = results['summary']
1719
+ report.append("## Summary")
1720
+ report.append(f"- Total files: {summary['total_files']}")
1721
+ report.append(f"- Non-ignored files: {summary['non_ignored_files']}")
1722
+ report.append(f"- Ignored files: {summary['ignored_files']}")
1723
+ report.append(f"- Files with hidden TODOs: {summary['files_with_hidden_todos']}")
1724
+ report.append(f"- Total hidden TODOs found: {summary['total_hidden_todos']}")
1725
+ report.append(f"- Code stub detections: {summary.get('code_stub_todos', 0)}")
1726
+ report.append(f"- High confidence TODOs (≥0.9): {summary['high_confidence_todos']}")
1727
+ report.append(f"- Medium confidence TODOs (≥0.6): {summary['medium_confidence_todos']}")
1728
+ report.append(f"- Low confidence TODOs (<0.6): {summary['low_confidence_todos']}")
1729
+ report.append(f"- Minimum confidence threshold: {summary['min_confidence_threshold']}")
1730
+ report.append("")
1731
+
1732
+ # Language breakdown
1733
+ report.append("## Files by Language")
1734
+ for lang, count in sorted(summary['language_counts'].items()):
1735
+ report.append(f"- **{lang}**: {count} files")
1736
+ report.append("")
1737
+
1738
+ # Pattern statistics
1739
+ if summary['pattern_counts']:
1740
+ report.append("## Pattern Statistics")
1741
+ for pattern, count in sorted(summary['pattern_counts'].items(), key=lambda x: x[1], reverse=True):
1742
+ if count > 0:
1743
+ report.append(f"- `{pattern}`: {count} occurrences")
1744
+ report.append("")
1745
+
1746
+ # Files with most high-confidence hidden TODOs
1747
+ if results['files']:
1748
+ report.append("## Files with High-Confidence Hidden TODOs")
1749
+ file_todo_counts = []
1750
+ for file_path, data in results['files'].items():
1751
+ high_conf_count = sum(1 for todo in data['hidden_todos'].values()
1752
+ if todo['confidence_score'] >= 0.9)
1753
+ if high_conf_count > 0:
1754
+ file_todo_counts.append((file_path, data['language'], high_conf_count))
1755
+
1756
+ file_todo_counts.sort(key=lambda x: x[2], reverse=True)
1757
+ for file_path, language, count in file_todo_counts:
1758
+ report.append(f"- `{file_path}` ({language}): {count} high-confidence TODOs")
1759
+ report.append("")
1760
+
1761
+ # Engineering-grade TODO suggestions
1762
+ engineering_suggestions = []
1763
+ for file_path, file_data in results['files'].items():
1764
+ for line_num, todo_data in file_data['hidden_todos'].items():
1765
+ if 'engineering_suggestions' in todo_data:
1766
+ suggestions = todo_data['engineering_suggestions']
1767
+ if suggestions.get('needs_engineering_format'):
1768
+ engineering_suggestions.append({
1769
+ 'file': file_path,
1770
+ 'line': line_num,
1771
+ 'language': file_data['language'],
1772
+ 'original_comment': todo_data['comment'],
1773
+ 'suggestions': suggestions
1774
+ })
1775
+
1776
+ if engineering_suggestions:
1777
+ report.append("## Engineering-Grade TODO Suggestions")
1778
+ report.append("")
1779
+ report.append("The following TODOs should be upgraded to the engineering-grade format:")
1780
+ report.append("")
1781
+
1782
+ for suggestion in engineering_suggestions[:10]: # Limit to top 10
1783
+ report.append(f"### `{suggestion['file']}:{suggestion['line']}` ({suggestion['language']})")
1784
+ report.append(f"**Original:** {suggestion['original_comment'][:100]}...")
1785
+ report.append(f"**Suggested Tier:** {suggestion['suggestions']['suggested_tier']}")
1786
+ report.append(f"**Priority:** {suggestion['suggestions']['priority_level']}")
1787
+ report.append(f"**Missing Elements:** {', '.join(suggestion['suggestions']['missing_elements'])}")
1788
+ report.append("")
1789
+ report.append("**Suggested Template:**")
1790
+ report.append("```")
1791
+ report.append(suggestion['suggestions']['template_suggestion'])
1792
+ report.append("```")
1793
+ report.append("")
1794
+
1795
+ if len(engineering_suggestions) > 10:
1796
+ report.append(f"... and {len(engineering_suggestions) - 10} more TODOs need engineering-grade format")
1797
+ report.append("")
1798
+
1799
+ # Pattern categories with confidence scores
1800
+ if results['patterns']:
1801
+ report.append("## Pattern Categories by Confidence")
1802
+ for category, items in results['patterns'].items():
1803
+ if items:
1804
+ high_conf_items = [item for item in items if 'confidence_score' in item and item['confidence_score'] >= 0.9]
1805
+ medium_conf_items = [item for item in items if 'confidence_score' in item and 0.6 <= item['confidence_score'] < 0.9]
1806
+ low_conf_items = [item for item in items if 'confidence_score' in item and item['confidence_score'] < 0.6]
1807
+
1808
+ if high_conf_items or medium_conf_items:
1809
+ report.append(f"### {category.replace('_', ' ').title()} ({len(items)} items)")
1810
+
1811
+ if high_conf_items:
1812
+ report.append(f"#### High Confidence ({len(high_conf_items)} items)")
1813
+ for item in high_conf_items[:3]:
1814
+ context_info = f" (context: {item['context_score']:.1f})" if 'context_score' in item else ""
1815
+ report.append(f"- `{item['file']}:{item['line']}` ({item['language']}, conf: {item['confidence_score']:.1f}{context_info}): {item['comment'][:80]}...")
1816
+ if len(high_conf_items) > 3:
1817
+ report.append(f"- ... and {len(high_conf_items) - 3} more high-confidence items")
1818
+
1819
+ if medium_conf_items:
1820
+ report.append(f"#### Medium Confidence ({len(medium_conf_items)} items)")
1821
+ for item in medium_conf_items[:2]:
1822
+ context_info = f" (context: {item['context_score']:.1f})" if 'context_score' in item else ""
1823
+ report.append(f"- `{item['file']}:{item['line']}` ({item['language']}, conf: {item['confidence_score']:.1f}{context_info}): {item['comment'][:80]}...")
1824
+ if len(medium_conf_items) > 2:
1825
+ report.append(f"- ... and {len(medium_conf_items) - 2} more medium-confidence items")
1826
+
1827
+ if low_conf_items:
1828
+ report.append(f"#### Low Confidence ({len(low_conf_items)} items) - *Consider reviewing for false positives*")
1829
+
1830
+ report.append("")
1831
+
1832
+ return "\n".join(report)
1833
+
1834
+
1835
+ HiddenTodoAnalyzer = HiddenTodoAnalyzer
1836
+
1837
+
1838
+ def main():
1839
+ import argparse
1840
+
1841
+ parser = argparse.ArgumentParser(
1842
+ description='Analyze files for hidden TODO patterns with improved accuracy')
1843
+ parser.add_argument('--root', default='.',
1844
+ help='Root directory to analyze (default: current directory)')
1845
+ parser.add_argument('--files', nargs='+',
1846
+ help='Specific files to analyze (instead of scanning directory)')
1847
+ parser.add_argument('--languages', nargs='+',
1848
+ help='Specific languages to analyze (e.g., rust python javascript)')
1849
+ parser.add_argument(
1850
+ '--output-json', help='Output JSON file for detailed results')
1851
+ parser.add_argument('--output-md', help='Output Markdown report file')
1852
+ parser.add_argument('--min-confidence', type=float, default=0.7,
1853
+ help='Minimum confidence threshold (0.0-1.0, default: 0.7)')
1854
+ parser.add_argument('--verbose', '-v',
1855
+ action='store_true', help='Verbose output')
1856
+ parser.add_argument('--disable-code-stub-scan',
1857
+ action='store_true', help='Disable code stub detection heuristics')
1858
+ parser.add_argument('--ci-mode',
1859
+ action='store_true', help='CI mode - exit with error code if hidden TODOs found')
1860
+ parser.add_argument('--warn-only',
1861
+ action='store_true', help='Warning mode - only warn, never fail')
1862
+ parser.add_argument('--v3-only',
1863
+ action='store_true', help='Only analyze v3 folder (matches user search scope)')
1864
+ parser.add_argument('--staged-only',
1865
+ action='store_true', help='Only analyze staged files with dependency resolution')
1866
+ parser.add_argument('--disable-dependency-resolution',
1867
+ action='store_true', help='Disable dependency resolution for staged files')
1868
+ parser.add_argument('--engineering-suggestions',
1869
+ action='store_true', help='Include engineering-grade TODO format suggestions')
1870
+
1871
+ args = parser.parse_args()
1872
+
1873
+ analyzer = HiddenTodoAnalyzer(
1874
+ args.root,
1875
+ enable_code_stub_scan=not args.disable_code_stub_scan,
1876
+ )
1877
+
1878
+ # Analyze staged files with dependency resolution
1879
+ if args.staged_only:
1880
+ results = analyzer.analyze_staged_files_with_dependencies(
1881
+ args.min_confidence,
1882
+ dependency_resolution=not args.disable_dependency_resolution
1883
+ )
1884
+ # Analyze specific files or entire directory
1885
+ elif args.files:
1886
+ results = analyzer.analyze_files(args.files, args.min_confidence)
1887
+ else:
1888
+ results = analyzer.analyze_directory(args.languages, args.min_confidence, args.v3_only)
1889
+
1890
+ # Print summary
1891
+ summary = results['summary']
1892
+ print(f"\n{'='*60}")
1893
+ print("IMPROVED HIDDEN TODO ANALYSIS COMPLETE (v2.0)")
1894
+ print(f"{'='*60}")
1895
+ print(f"Total files: {summary['total_files']}")
1896
+ print(f"Non-ignored files: {summary['non_ignored_files']}")
1897
+ print(f"Ignored files: {summary['ignored_files']}")
1898
+ print(f"Files with hidden TODOs: {summary['files_with_hidden_todos']}")
1899
+ print(f"Total hidden TODOs: {summary['total_hidden_todos']}")
1900
+ print(f"High confidence (≥0.9): {summary['high_confidence_todos']}")
1901
+ print(f"Medium confidence (≥0.6): {summary['medium_confidence_todos']}")
1902
+ print(f"Low confidence (<0.6): {summary['low_confidence_todos']}")
1903
+ print(f"Confidence threshold: {summary['min_confidence_threshold']}")
1904
+
1905
+ print(f"\nFiles by language:")
1906
+ for lang, count in sorted(summary['language_counts'].items()):
1907
+ print(f" {lang}: {count} files")
1908
+
1909
+ if summary['pattern_counts']:
1910
+ print(f"\nTop patterns found:")
1911
+ for pattern, count in sorted(summary['pattern_counts'].items(), key=lambda x: x[1], reverse=True)[:15]:
1912
+ if count > 0:
1913
+ print(f" {pattern}: {count}")
1914
+
1915
+ # Save reports
1916
+ if args.output_json:
1917
+ with open(args.output_json, 'w') as f:
1918
+ json.dump(results, f, indent=2)
1919
+ print(f"\nDetailed results saved to: {args.output_json}")
1920
+
1921
+ if args.output_md:
1922
+ report = analyzer.generate_report(results)
1923
+ with open(args.output_md, 'w') as f:
1924
+ f.write(report)
1925
+ print(f"Report saved to: {args.output_md}")
1926
+ else:
1927
+ # Print report to console
1928
+ try:
1929
+ print("\n" + analyzer.generate_report(results))
1930
+ except Exception as e:
1931
+ print(f"⚠️ Could not generate report: {e}")
1932
+
1933
+ # Handle CI mode and warn-only mode
1934
+ summary = results['summary']
1935
+ total_hidden_todos = summary['total_hidden_todos']
1936
+
1937
+ if total_hidden_todos > 0:
1938
+ if args.ci_mode:
1939
+ print(f"\n❌ CI MODE: Found {total_hidden_todos} hidden TODOs - blocking commit/push")
1940
+ exit(1)
1941
+ elif args.warn_only:
1942
+ print(f"\n⚠️ WARN MODE: Found {total_hidden_todos} hidden TODOs - proceeding anyway")
1943
+ else:
1944
+ print(f"\n⚠️ Found {total_hidden_todos} hidden TODOs - consider addressing them")
1945
+ else:
1946
+ print(f"\n✅ No hidden TODOs found - good job!")
1947
+
1948
+
1949
+ if __name__ == '__main__':
1950
+ main()