claude-self-reflect 5.0.7 → 6.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. package/.claude/agents/open-source-maintainer.md +1 -1
  2. package/.claude/agents/reflection-specialist.md +2 -2
  3. package/Dockerfile.async-importer +6 -4
  4. package/Dockerfile.importer +6 -6
  5. package/Dockerfile.safe-watcher +8 -8
  6. package/Dockerfile.streaming-importer +8 -1
  7. package/Dockerfile.watcher +8 -16
  8. package/docker-compose.yaml +12 -6
  9. package/installer/.claude/agents/README.md +138 -0
  10. package/package.json +5 -26
  11. package/src/__init__.py +0 -0
  12. package/src/cli/__init__.py +0 -0
  13. package/src/runtime/__init__.py +0 -0
  14. package/src/runtime/import-latest.py +124 -0
  15. package/{scripts → src/runtime}/precompact-hook.sh +1 -1
  16. package/src/runtime/streaming-importer.py +995 -0
  17. package/{scripts → src/runtime}/watcher-loop.sh +1 -1
  18. package/.claude/agents/claude-self-reflect-test.md +0 -1274
  19. package/.claude/agents/reflect-tester.md +0 -300
  20. package/scripts/add-timestamp-indexes.py +0 -134
  21. package/scripts/ast_grep_final_analyzer.py +0 -338
  22. package/scripts/ast_grep_unified_registry.py +0 -710
  23. package/scripts/check-collections.py +0 -29
  24. package/scripts/debug-august-parsing.py +0 -80
  25. package/scripts/debug-import-single.py +0 -91
  26. package/scripts/debug-project-resolver.py +0 -82
  27. package/scripts/debug-temporal-tools.py +0 -135
  28. package/scripts/import-conversations-enhanced.py +0 -672
  29. package/scripts/migrate-to-unified-state.py +0 -426
  30. package/scripts/session_quality_tracker.py +0 -671
  31. package/scripts/update_patterns.py +0 -334
  32. /package/{scripts → src}/importer/__init__.py +0 -0
  33. /package/{scripts → src}/importer/__main__.py +0 -0
  34. /package/{scripts → src}/importer/core/__init__.py +0 -0
  35. /package/{scripts → src}/importer/core/config.py +0 -0
  36. /package/{scripts → src}/importer/core/exceptions.py +0 -0
  37. /package/{scripts → src}/importer/core/models.py +0 -0
  38. /package/{scripts → src}/importer/embeddings/__init__.py +0 -0
  39. /package/{scripts → src}/importer/embeddings/base.py +0 -0
  40. /package/{scripts → src}/importer/embeddings/fastembed_provider.py +0 -0
  41. /package/{scripts → src}/importer/embeddings/validator.py +0 -0
  42. /package/{scripts → src}/importer/embeddings/voyage_provider.py +0 -0
  43. /package/{scripts → src}/importer/main.py +0 -0
  44. /package/{scripts → src}/importer/processors/__init__.py +0 -0
  45. /package/{scripts → src}/importer/processors/ast_extractor.py +0 -0
  46. /package/{scripts → src}/importer/processors/chunker.py +0 -0
  47. /package/{scripts → src}/importer/processors/concept_extractor.py +0 -0
  48. /package/{scripts → src}/importer/processors/conversation_parser.py +0 -0
  49. /package/{scripts → src}/importer/processors/tool_extractor.py +0 -0
  50. /package/{scripts → src}/importer/state/__init__.py +0 -0
  51. /package/{scripts → src}/importer/state/state_manager.py +0 -0
  52. /package/{scripts → src}/importer/storage/__init__.py +0 -0
  53. /package/{scripts → src}/importer/storage/qdrant_storage.py +0 -0
  54. /package/{scripts → src}/importer/utils/__init__.py +0 -0
  55. /package/{scripts → src}/importer/utils/logger.py +0 -0
  56. /package/{scripts → src}/importer/utils/project_normalizer.py +0 -0
  57. /package/{scripts → src/runtime}/delta-metadata-update-safe.py +0 -0
  58. /package/{scripts → src/runtime}/delta-metadata-update.py +0 -0
  59. /package/{scripts → src/runtime}/doctor.py +0 -0
  60. /package/{scripts → src/runtime}/embedding_service.py +0 -0
  61. /package/{scripts → src/runtime}/force-metadata-recovery.py +0 -0
  62. /package/{scripts → src/runtime}/import-conversations-unified.py +0 -0
  63. /package/{scripts → src/runtime}/import_strategies.py +0 -0
  64. /package/{scripts → src/runtime}/message_processors.py +0 -0
  65. /package/{scripts → src/runtime}/metadata_extractor.py +0 -0
  66. /package/{scripts → src/runtime}/streaming-watcher.py +0 -0
  67. /package/{scripts → src/runtime}/unified_state_manager.py +0 -0
  68. /package/{scripts → src/runtime}/utils.py +0 -0
@@ -1,710 +0,0 @@
1
- #!/usr/bin/env python3
2
- """
3
- Unified AST-GREP Pattern Registry
4
- Combines custom patterns with official catalog patterns
5
- MANDATORY: Uses AST patterns only, no regex
6
- """
7
-
8
- from typing import Dict, List, Any
9
- import json
10
- import logging
11
- from pathlib import Path
12
-
13
- # Setup logger
14
- logger = logging.getLogger(__name__)
15
- logging.basicConfig(level=logging.INFO)
16
-
17
- class UnifiedASTGrepRegistry:
18
- """
19
- Unified registry combining:
20
- 1. Custom AST patterns for Python
21
- 2. Official catalog patterns from AST-GREP
22
- 3. TypeScript/JavaScript patterns
23
- All patterns are AST-based, not regex.
24
- """
25
-
26
- def __init__(self):
27
- self.patterns = self._load_unified_patterns()
28
-
29
- # Merge auto-updated catalog if present
30
- json_path = Path(__file__).parent / "unified_registry.json"
31
- if json_path.exists():
32
- try:
33
- with open(json_path, 'r') as f:
34
- data = json.load(f)
35
- # Merge catalog patterns into existing patterns
36
- catalog_patterns = data.get("patterns", {})
37
- for category, patterns in catalog_patterns.items():
38
- if category not in self.patterns:
39
- self.patterns[category] = []
40
- # Add patterns that don't already exist
41
- existing_ids = {p['id'] for p in self.patterns[category]}
42
- for pattern in patterns:
43
- if pattern.get('id') not in existing_ids:
44
- self.patterns[category].append(pattern)
45
- except Exception as e:
46
- # Continue with static patterns if catalog load fails
47
- pass
48
-
49
- def _load_unified_patterns(self) -> Dict[str, List[Dict[str, Any]]]:
50
- """Load unified patterns from multiple sources."""
51
- patterns = {}
52
-
53
- # Python patterns (custom)
54
- patterns.update(self._load_python_patterns())
55
-
56
- # TypeScript patterns (from catalog)
57
- patterns.update(self._load_typescript_patterns())
58
-
59
- # JavaScript patterns (shared with TS)
60
- patterns.update(self._load_javascript_patterns())
61
-
62
- # Shell script patterns
63
- patterns.update(self._load_shell_patterns())
64
-
65
- return patterns
66
-
67
- def _load_python_patterns(self) -> Dict[str, List[Dict[str, Any]]]:
68
- """Python-specific AST patterns."""
69
- return {
70
- "python_async": [
71
- {
72
- "id": "async-function",
73
- "pattern": "async def $FUNC($$$): $$$",
74
- "description": "Async function definition",
75
- "quality": "good",
76
- "weight": 2,
77
- "language": "python"
78
- },
79
- {
80
- "id": "async-with",
81
- "pattern": "async with $RESOURCE: $$$",
82
- "description": "Async context manager",
83
- "quality": "good",
84
- "weight": 3,
85
- "language": "python"
86
- },
87
- {
88
- "id": "await-gather",
89
- "pattern": "await asyncio.gather($$$)",
90
- "description": "Parallel async execution",
91
- "quality": "good",
92
- "weight": 4,
93
- "language": "python"
94
- },
95
- {
96
- "id": "await-call",
97
- "pattern": "await $FUNC($$$)",
98
- "description": "Awaited async call",
99
- "quality": "neutral",
100
- "weight": 1,
101
- "language": "python"
102
- }
103
- ],
104
- "python_error_handling": [
105
- {
106
- "id": "specific-except",
107
- "pattern": "except $ERROR: $$$",
108
- "description": "Specific exception handling",
109
- "quality": "good",
110
- "weight": 3,
111
- "language": "python"
112
- },
113
- {
114
- "id": "broad-except",
115
- "pattern": "except: $$$",
116
- "description": "Bare except clause",
117
- "quality": "bad",
118
- "weight": -3,
119
- "language": "python"
120
- },
121
- {
122
- "id": "try-finally",
123
- "pattern": "try: $TRY finally: $FINALLY",
124
- "description": "Try-finally block",
125
- "quality": "good",
126
- "weight": 2,
127
- "language": "python"
128
- }
129
- ],
130
- "python_logging": [
131
- {
132
- "id": "logger-call",
133
- "pattern": "logger.$METHOD($$$)",
134
- "description": "Logger usage",
135
- "quality": "good",
136
- "weight": 2,
137
- "language": "python"
138
- },
139
- {
140
- "id": "print-call",
141
- "pattern": "print($$$)",
142
- "description": "Print statement",
143
- "quality": "bad",
144
- "weight": -1,
145
- "language": "python"
146
- },
147
- {
148
- "id": "debug-print-f-sq",
149
- "pattern": "print(f'$A')",
150
- "description": "F-string print (single quote)",
151
- "quality": "bad",
152
- "weight": -2,
153
- "language": "python"
154
- },
155
- {
156
- "id": "debug-print-f-dq",
157
- "pattern": "print(f\"$A\")",
158
- "description": "F-string print (double quote)",
159
- "quality": "bad",
160
- "weight": -2,
161
- "language": "python"
162
- }
163
- ],
164
- "python_typing": [
165
- {
166
- "id": "typed-function",
167
- "pattern": "def $FUNC($$$) -> $RETURN: $$$",
168
- "description": "Function with return type",
169
- "quality": "good",
170
- "weight": 3,
171
- "language": "python"
172
- },
173
- {
174
- "id": "typed-async",
175
- "pattern": "async def $FUNC($$$) -> $RETURN: $$$",
176
- "description": "Async function with return type",
177
- "quality": "good",
178
- "weight": 4,
179
- "language": "python"
180
- },
181
- {
182
- "id": "type-annotation",
183
- "pattern": "$VAR: $TYPE = $$$",
184
- "description": "Variable type annotation",
185
- "quality": "good",
186
- "weight": 2,
187
- "language": "python"
188
- }
189
- ],
190
- "python_antipatterns": [
191
- {
192
- "id": "sync-sleep",
193
- "pattern": "time.sleep($$$)",
194
- "description": "Blocking sleep in async context",
195
- "quality": "bad",
196
- "weight": -5,
197
- "language": "python"
198
- },
199
- {
200
- "id": "sync-open",
201
- "pattern": "open($$$)",
202
- "description": "Sync file open (should use aiofiles)",
203
- "quality": "bad",
204
- "weight": -3,
205
- "language": "python"
206
- },
207
- {
208
- "id": "requests-call",
209
- "pattern": "requests.$METHOD($$$)",
210
- "description": "Sync HTTP request (should use aiohttp)",
211
- "quality": "bad",
212
- "weight": -4,
213
- "language": "python"
214
- },
215
- {
216
- "id": "global-var",
217
- "pattern": "global $VAR",
218
- "description": "Global variable usage",
219
- "quality": "bad",
220
- "weight": -2,
221
- "language": "python"
222
- },
223
- {
224
- "id": "mutable-default",
225
- "pattern": "def $FUNC($$$, $ARG=[]): $$$",
226
- "description": "Mutable default argument",
227
- "quality": "bad",
228
- "weight": -4,
229
- "language": "python"
230
- },
231
- {
232
- "id": "sync-voyage-embed",
233
- "pattern": "$CLIENT.embed($$$)",
234
- "description": "Blocking Voyage embed in async context",
235
- "quality": "bad",
236
- "weight": -5,
237
- "language": "python",
238
- "inside": "async def $FUNC($$$): $$$"
239
- },
240
- {
241
- "id": "thread-join-async",
242
- "pattern": "$THREAD.join($$$)",
243
- "description": "Thread join blocking async context",
244
- "quality": "bad",
245
- "weight": -5,
246
- "language": "python",
247
- "inside": "async def $FUNC($$$): $$$"
248
- },
249
- {
250
- "id": "invalid-env-var-hyphen",
251
- "pattern": "os.getenv('$VAR')",
252
- "description": "Environment variable with hyphen (invalid in shells)",
253
- "quality": "bad",
254
- "weight": -3,
255
- "language": "python",
256
- "constraint": "$VAR matches .*-.*"
257
- },
258
- {
259
- "id": "dotenv-override-runtime",
260
- "pattern": "load_dotenv($$$, override=True)",
261
- "description": "Runtime environment mutation in MCP",
262
- "quality": "bad",
263
- "weight": -3,
264
- "language": "python"
265
- }
266
- ],
267
- "python_qdrant": [
268
- {
269
- "id": "qdrant-search",
270
- "pattern": "$CLIENT.search($$$)",
271
- "description": "Qdrant search operation",
272
- "quality": "neutral",
273
- "weight": 1,
274
- "language": "python"
275
- },
276
- {
277
- "id": "qdrant-upsert",
278
- "pattern": "$CLIENT.upsert($$$)",
279
- "description": "Qdrant upsert operation",
280
- "quality": "neutral",
281
- "weight": 1,
282
- "language": "python"
283
- },
284
- {
285
- "id": "collection-create",
286
- "pattern": "create_collection($$$)",
287
- "description": "Collection creation",
288
- "quality": "neutral",
289
- "weight": 1,
290
- "language": "python"
291
- }
292
- ],
293
- "python_mcp": [
294
- {
295
- "id": "mcp-tool",
296
- "pattern": "@server.tool\nasync def $TOOL($$$): $$$",
297
- "description": "MCP tool definition",
298
- "quality": "good",
299
- "weight": 5,
300
- "language": "python"
301
- },
302
- {
303
- "id": "mcp-resource",
304
- "pattern": "@server.resource($$$)\nasync def $RESOURCE($$$): $$$",
305
- "description": "MCP resource definition",
306
- "quality": "good",
307
- "weight": 5,
308
- "language": "python"
309
- },
310
- {
311
- "id": "missing-embedding-guard",
312
- "pattern": "query_embedding = await $MGR.generate_embedding($$$)\n$$$\nawait $CLIENT.search($$$, query_vector=query_embedding, $$$)",
313
- "description": "Missing None check after embedding generation",
314
- "quality": "bad",
315
- "weight": -4,
316
- "language": "python"
317
- },
318
- {
319
- "id": "attr-vs-api",
320
- "pattern": "$MGR.model_name",
321
- "description": "Accessing non-existent attribute instead of API",
322
- "quality": "bad",
323
- "weight": -3,
324
- "language": "python",
325
- "note": "Use get_model_info() instead"
326
- },
327
- {
328
- "id": "duplicate-import",
329
- "pattern": "import $MODULE\n$$$\ndef $FUNC($$$):\n $$$\n import $MODULE",
330
- "description": "Duplicate import inside function",
331
- "quality": "bad",
332
- "weight": -2,
333
- "language": "python"
334
- }
335
- ],
336
- "python_runtime_modification": [
337
- {
338
- "id": "singleton-state-change",
339
- "pattern": "$SINGLETON.$ATTR = $VALUE",
340
- "description": "Runtime singleton state modification",
341
- "quality": "neutral",
342
- "weight": 0,
343
- "language": "python",
344
- "note": "Can be good for mode switching, bad if uncontrolled"
345
- },
346
- {
347
- "id": "public-init-exposure",
348
- "pattern": "def try_initialize_$TYPE(self): $$$",
349
- "description": "Public initialization method for runtime config",
350
- "quality": "neutral",
351
- "weight": 0,
352
- "language": "python"
353
- }
354
- ]
355
- }
356
-
357
- def _load_typescript_patterns(self) -> Dict[str, List[Dict[str, Any]]]:
358
- """TypeScript-specific patterns from catalog."""
359
- return {
360
- "typescript_async": [
361
- {
362
- "id": "no-await-in-promise-all",
363
- "pattern": "await $A",
364
- "inside": "Promise.all($_)",
365
- "description": "No await in Promise.all array",
366
- "quality": "bad",
367
- "weight": -4,
368
- "language": "typescript",
369
- "fix": "$A"
370
- },
371
- {
372
- "id": "async-function-ts",
373
- "pattern": "async function $FUNC($$$) { $$$ }",
374
- "description": "Async function",
375
- "quality": "good",
376
- "weight": 2,
377
- "language": "typescript"
378
- },
379
- {
380
- "id": "async-arrow",
381
- "pattern": "async ($$$) => { $$$ }",
382
- "description": "Async arrow function",
383
- "quality": "good",
384
- "weight": 2,
385
- "language": "typescript"
386
- }
387
- ],
388
- "typescript_console": [
389
- {
390
- "id": "no-console-log",
391
- "pattern": "console.log($$$)",
392
- "description": "Console.log usage",
393
- "quality": "bad",
394
- "weight": -2,
395
- "language": "typescript",
396
- "fix": ""
397
- },
398
- {
399
- "id": "no-console-debug",
400
- "pattern": "console.debug($$$)",
401
- "description": "Console.debug usage",
402
- "quality": "bad",
403
- "weight": -2,
404
- "language": "typescript",
405
- "fix": ""
406
- },
407
- {
408
- "id": "console-error-in-catch",
409
- "pattern": "console.error($$$)",
410
- "inside": "catch ($_) { $$$ }",
411
- "description": "Console.error in catch (OK)",
412
- "quality": "neutral",
413
- "weight": 0,
414
- "language": "typescript"
415
- }
416
- ],
417
- "typescript_react": [
418
- {
419
- "id": "useState-hook",
420
- "pattern": "const [$STATE, $SETTER] = useState($$$)",
421
- "description": "React useState hook",
422
- "quality": "good",
423
- "weight": 2,
424
- "language": "typescript"
425
- },
426
- {
427
- "id": "useEffect-hook",
428
- "pattern": "useEffect(() => { $$$ }, $DEPS)",
429
- "description": "React useEffect hook",
430
- "quality": "neutral",
431
- "weight": 1,
432
- "language": "typescript"
433
- },
434
- {
435
- "id": "useEffect-no-deps",
436
- "pattern": "useEffect(() => { $$$ })",
437
- "description": "useEffect without dependencies",
438
- "quality": "bad",
439
- "weight": -3,
440
- "language": "typescript"
441
- }
442
- ],
443
- "typescript_imports": [
444
- {
445
- "id": "barrel-import",
446
- "pattern": "import { $$$ } from '$MODULE'",
447
- "description": "Named import",
448
- "quality": "neutral",
449
- "weight": 0,
450
- "language": "typescript"
451
- },
452
- {
453
- "id": "default-import",
454
- "pattern": "import $NAME from '$MODULE'",
455
- "description": "Default import",
456
- "quality": "neutral",
457
- "weight": 0,
458
- "language": "typescript"
459
- },
460
- {
461
- "id": "import-all",
462
- "pattern": "import * as $NAME from '$MODULE'",
463
- "description": "Import all",
464
- "quality": "neutral",
465
- "weight": -1,
466
- "language": "typescript"
467
- }
468
- ]
469
- }
470
-
471
- def _load_shell_patterns(self) -> Dict[str, List[Dict[str, Any]]]:
472
- """Shell script patterns."""
473
- return {
474
- "shell_env_handling": [
475
- {
476
- "id": "unused-shell-var",
477
- "pattern": "$VAR=\"$VALUE\"",
478
- "description": "Assigned but never referenced variable",
479
- "quality": "bad",
480
- "weight": -2,
481
- "language": "bash",
482
- "note": "Check if variable is used later"
483
- },
484
- {
485
- "id": "unsafe-var-check",
486
- "pattern": "[ ! -z \"$VAR\" ]",
487
- "description": "Unsafe variable check (breaks with set -u)",
488
- "quality": "bad",
489
- "weight": -3,
490
- "language": "bash",
491
- "fix": "[ -n \"${VAR:-}\" ]"
492
- },
493
- {
494
- "id": "redundant-export",
495
- "pattern": "export $VAR=\"$VAR\"",
496
- "description": "Redundant export of same value",
497
- "quality": "bad",
498
- "weight": -2,
499
- "language": "bash"
500
- },
501
- {
502
- "id": "missing-safety-flags",
503
- "pattern": "#!/bin/bash",
504
- "description": "Missing safety flags",
505
- "quality": "bad",
506
- "weight": -3,
507
- "language": "bash",
508
- "note": "Add 'set -euo pipefail' after shebang"
509
- }
510
- ]
511
- }
512
-
513
- def _load_javascript_patterns(self) -> Dict[str, List[Dict[str, Any]]]:
514
- """JavaScript patterns (subset of TypeScript)."""
515
- return {
516
- "javascript_async": [
517
- {
518
- "id": "callback-hell",
519
- "pattern": "$FUNC($$$, function($$$) { $$$ })",
520
- "description": "Callback pattern (consider promises)",
521
- "quality": "bad",
522
- "weight": -2,
523
- "language": "javascript"
524
- },
525
- {
526
- "id": "promise-then",
527
- "pattern": "$PROMISE.then($$$)",
528
- "description": "Promise then chain",
529
- "quality": "neutral",
530
- "weight": 0,
531
- "language": "javascript"
532
- },
533
- {
534
- "id": "async-await",
535
- "pattern": "await $PROMISE",
536
- "description": "Async/await usage",
537
- "quality": "good",
538
- "weight": 2,
539
- "language": "javascript"
540
- }
541
- ],
542
- "javascript_var": [
543
- {
544
- "id": "var-declaration",
545
- "pattern": "var $VAR = $$$",
546
- "description": "Var declaration (use const/let)",
547
- "quality": "bad",
548
- "weight": -3,
549
- "language": "javascript"
550
- },
551
- {
552
- "id": "const-declaration",
553
- "pattern": "const $VAR = $$$",
554
- "description": "Const declaration",
555
- "quality": "good",
556
- "weight": 2,
557
- "language": "javascript"
558
- },
559
- {
560
- "id": "let-declaration",
561
- "pattern": "let $VAR = $$$",
562
- "description": "Let declaration",
563
- "quality": "good",
564
- "weight": 1,
565
- "language": "javascript"
566
- }
567
- ]
568
- }
569
-
570
- def get_all_patterns(self) -> List[Dict[str, Any]]:
571
- """Get all patterns as a flat list."""
572
- all_patterns = []
573
- for category, patterns in self.patterns.items():
574
- for pattern in patterns:
575
- # Avoid mutating source; create a copy
576
- item = dict(pattern)
577
- item['category'] = category
578
- all_patterns.append(item)
579
- return all_patterns
580
-
581
- def get_patterns_by_language(self, language: str) -> List[Dict[str, Any]]:
582
- """Get patterns for a specific language."""
583
- return [p for p in self.get_all_patterns() if p.get('language') == language]
584
-
585
- def get_good_patterns(self) -> List[Dict[str, Any]]:
586
- """Get only good quality patterns."""
587
- return [p for p in self.get_all_patterns() if p.get('quality') == 'good']
588
-
589
- def get_bad_patterns(self) -> List[Dict[str, Any]]:
590
- """Get only bad quality patterns (anti-patterns)."""
591
- return [p for p in self.get_all_patterns() if p.get('quality') == 'bad']
592
-
593
- def calculate_quality_score(self, matches: List[Dict], loc: int = 1000) -> float:
594
- """
595
- Calculate quality score using penalty-based approach.
596
- Issues dominate the score; good patterns provide minimal bonus.
597
-
598
- Args:
599
- matches: List of pattern matches with weight and count
600
- loc: Lines of code (for normalization)
601
-
602
- Returns:
603
- Score from 0.0 to 1.0
604
- """
605
- import math
606
-
607
- # Normalize to KLOC (thousands of lines)
608
- kloc = max(1.0, loc / 1000.0)
609
-
610
- # Separate issues (bad) from good patterns
611
- issues = [m for m in matches if m.get('quality') == 'bad']
612
- good_patterns = [m for m in matches if m.get('quality') == 'good']
613
-
614
- # Calculate severity-weighted issue density
615
- total_issues = 0
616
- for issue in issues:
617
- severity = abs(issue.get('weight', 1)) # Use weight as severity
618
- count = issue.get('count', 0)
619
- total_issues += severity * count
620
-
621
- issues_per_kloc = total_issues / kloc
622
-
623
- # Penalty calculation (logarithmic to avoid linear dominance)
624
- # Calibrated so 50 issues/KLOC = ~50% penalty
625
- penalty = min(0.7, 0.15 * math.log1p(issues_per_kloc))
626
-
627
- # Small bonus for good patterns (capped at 5%)
628
- good_score = 0
629
- if good_patterns:
630
- for pattern in good_patterns:
631
- weight = pattern.get('weight', 1)
632
- count = pattern.get('count', 0)
633
- # Cap contribution per pattern type
634
- normalized_count = min(count / kloc, 50) # Max 50 per KLOC
635
- good_score += weight * normalized_count / 1000
636
-
637
- bonus = min(0.05, good_score) # Cap at 5% bonus
638
-
639
- # Final score: start at 100%, subtract penalty, add small bonus
640
- score = max(0.0, min(1.0, 1.0 - penalty + bonus))
641
-
642
- return score
643
-
644
- def export_to_json(self, path: str):
645
- """Export registry to JSON file."""
646
- data = {
647
- 'source': 'unified-ast-grep',
648
- 'version': '2.0.0',
649
- 'patterns': self.patterns,
650
- 'stats': {
651
- 'total_patterns': len(self.get_all_patterns()),
652
- 'good_patterns': len(self.get_good_patterns()),
653
- 'bad_patterns': len(self.get_bad_patterns()),
654
- 'languages': list(set(p.get('language') for p in self.get_all_patterns())),
655
- 'categories': list(self.patterns.keys())
656
- }
657
- }
658
-
659
- with open(path, 'w') as f:
660
- json.dump(data, f, indent=2)
661
-
662
-
663
- # Singleton instance
664
- _unified_registry = None
665
-
666
- def get_unified_registry() -> UnifiedASTGrepRegistry:
667
- """Get or create the unified AST-GREP pattern registry."""
668
- global _unified_registry
669
- if _unified_registry is None:
670
- _unified_registry = UnifiedASTGrepRegistry()
671
- return _unified_registry
672
-
673
-
674
- if __name__ == "__main__":
675
- # Test the unified registry
676
- registry = get_unified_registry()
677
-
678
- print("Unified AST-GREP Pattern Registry")
679
- print("=" * 60)
680
-
681
- all_patterns = registry.get_all_patterns()
682
- print(f"\nTotal patterns: {len(all_patterns)}")
683
- print(f"Good patterns: {len(registry.get_good_patterns())}")
684
- print(f"Bad patterns: {len(registry.get_bad_patterns())}")
685
-
686
- # Count by language
687
- languages = {}
688
- for pattern in all_patterns:
689
- lang = pattern.get('language', 'unknown')
690
- languages[lang] = languages.get(lang, 0) + 1
691
-
692
- print(f"\nPatterns by language:")
693
- for lang, count in languages.items():
694
- print(f" - {lang}: {count} patterns")
695
-
696
- print(f"\nCategories ({len(registry.patterns)}):")
697
- for category in registry.patterns.keys():
698
- count = len(registry.patterns[category])
699
- print(f" - {category}: {count} patterns")
700
-
701
- # Export to JSON
702
- export_path = Path(__file__).parent / "unified_registry.json"
703
- registry.export_to_json(export_path)
704
- print(f"\n✅ Exported unified registry to {export_path}")
705
-
706
- # Show sample patterns
707
- print("\nSample patterns:")
708
- for pattern in all_patterns[:5]:
709
- print(f" - {pattern['id']} ({pattern['language']}): {pattern.get('pattern', 'N/A')[:40]}...")
710
- print(f" Quality: {pattern['quality']}, Weight: {pattern['weight']}")