@ivannikov-pro/ai-context-surgeon 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (87) hide show
  1. package/LICENSE +22 -0
  2. package/README.md +372 -0
  3. package/bin/catalog.js +153 -0
  4. package/bin/cli.js +380 -0
  5. package/bin/installer.js +135 -0
  6. package/bin/prompts.js +371 -0
  7. package/checklists/phase-1-analysis.md +58 -0
  8. package/checklists/phase-2-planning.md +67 -0
  9. package/checklists/phase-3-restructuring.md +77 -0
  10. package/checklists/phase-4-documentation.md +111 -0
  11. package/checklists/phase-5-validation.md +91 -0
  12. package/examples/before-after/README.md +139 -0
  13. package/examples/ideal-monorepo/README.md +127 -0
  14. package/knowledge/agent-context-system/artifacts/guide.md +183 -0
  15. package/knowledge/agent-context-system/artifacts/knowledge.md +177 -0
  16. package/knowledge/agent-context-system/artifacts/skills.md +101 -0
  17. package/knowledge/agent-context-system/artifacts/workflows.md +143 -0
  18. package/knowledge/agent-context-system/metadata.json +26 -0
  19. package/knowledge/agent-context-system/timestamps.json +5 -0
  20. package/knowledge/agent-vulnerabilities/LICENSE +21 -0
  21. package/knowledge/agent-vulnerabilities/artifacts/stealth_injection.md +110 -0
  22. package/knowledge/agent-vulnerabilities/artifacts/vulnerabilities.md +232 -0
  23. package/knowledge/agent-vulnerabilities/metadata.json +14 -0
  24. package/knowledge/agent-vulnerabilities/timestamps.json +5 -0
  25. package/knowledge/power-words-dictionary/LICENSE +21 -0
  26. package/knowledge/power-words-dictionary/artifacts/dictionary.md +231 -0
  27. package/knowledge/power-words-dictionary/artifacts/prompt_amplifier.py +381 -0
  28. package/knowledge/power-words-dictionary/metadata.json +14 -0
  29. package/knowledge/power-words-dictionary/timestamps.json +5 -0
  30. package/package.json +77 -0
  31. package/roles/README.md +81 -0
  32. package/roles/architect.md +203 -0
  33. package/roles/inspector.md +232 -0
  34. package/roles/librarian.md +176 -0
  35. package/roles/scout.md +169 -0
  36. package/roles/surgeon.md +172 -0
  37. package/roles/tuner.md +204 -0
  38. package/skills/annotate-jsdoc/SKILL.md +262 -0
  39. package/skills/prompt-engineering/LICENSE +21 -0
  40. package/skills/prompt-engineering/SKILL.md +235 -0
  41. package/skills/prompt-engineering/scripts/extract_instructions.py +416 -0
  42. package/skills/prompt-engineering/scripts/prompt_amplifier.py +381 -0
  43. package/skills/prompt-engineering/scripts/prompt_diff_tracker.py +281 -0
  44. package/skills/prompt-engineering/scripts/prompt_dna_analyzer.py +692 -0
  45. package/skills/prompt-engineering/scripts/templates/code_review.md +47 -0
  46. package/skills/prompt-engineering/scripts/templates/dump_extraction.md +59 -0
  47. package/skills/prompt-engineering/scripts/templates/multi_agent_orchestration.md +100 -0
  48. package/skills/prompt-engineering/scripts/templates/prompt_audit.md +106 -0
  49. package/skills/prompt-engineering/scripts/templates/stealth_injection.md +110 -0
  50. package/skills/prompt-engineering/scripts/templates/task_automation.md +87 -0
  51. package/skills/prompt-engineering/workflows/amplify.md +36 -0
  52. package/skills/prompt-engineering/workflows/audit.md +55 -0
  53. package/skills/prompt-engineering/workflows/context-dump.md +90 -0
  54. package/skills/prompt-engineering/workflows/diff.md +44 -0
  55. package/strategy/bash-guide.md +134 -0
  56. package/strategy/context-exclusion.md +220 -0
  57. package/strategy/context-weight-theory.md +49 -0
  58. package/strategy/file-navigation-header.md +562 -0
  59. package/strategy/jsdoc-guide.md +596 -0
  60. package/strategy/monorepo_strategy.md +726 -0
  61. package/strategy/package-json-guide.md +541 -0
  62. package/templates/AGENTS.md.template +148 -0
  63. package/templates/antigravityignore.template +64 -0
  64. package/templates/cursorrules.template +7 -0
  65. package/templates/knowledge-item.template +44 -0
  66. package/templates/package-json-ideal.template +26 -0
  67. package/templates/package-readme.template +45 -0
  68. package/templates/publish-meta.template +34 -0
  69. package/templates/skill.template +50 -0
  70. package/templates/workflow.template +33 -0
  71. package/tools/analyze-package-json.sh +213 -0
  72. package/tools/analyze-structure.sh +101 -0
  73. package/tools/audit-jsdoc.sh +176 -0
  74. package/tools/check-fnh-freshness.sh +74 -0
  75. package/tools/detect-circular-deps.sh +147 -0
  76. package/tools/detect-god-files.sh +71 -0
  77. package/tools/enforce-god-files.sh +112 -0
  78. package/tools/enrich-package-json.js +311 -0
  79. package/tools/generate-jsdoc-headers.sh +109 -0
  80. package/tools/generate-source-map.sh +71 -0
  81. package/tools/lint-imports.sh +123 -0
  82. package/tools/measure-context-cost.sh +206 -0
  83. package/tools/scan-fnh.sh +174 -0
  84. package/tools/shared/config.sh +53 -0
  85. package/tools/validate-context-hygiene.sh +52 -0
  86. package/tools/validate-context-weight.sh +100 -0
  87. package/tools/validate-naming.sh +98 -0
@@ -0,0 +1,692 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ prompt_dna_analyzer.py — Generates a "DNA profile" of any system prompt.
4
+ Analyzes power word density, restriction ratio, blind spots, and priority structure.
5
+
6
+ v2.0 — Added file-type profiles, anti-gaming penalties, batch mode with uniqueness check.
7
+
8
+ Usage:
9
+ python3 scripts/prompt_dna_analyzer.py /tmp/system_prompt_logs.txt
10
+ python3 scripts/prompt_dna_analyzer.py --dump 19 # analyze specific dump
11
+ python3 scripts/prompt_dna_analyzer.py some_prompt.txt --output dna_report.md
12
+ python3 scripts/prompt_dna_analyzer.py some_prompt.txt --type role
13
+ python3 scripts/prompt_dna_analyzer.py --batch .agents/ --check-uniqueness
14
+ """
15
+
16
+ import re
17
+ import sys
18
+ import os
19
+ import json
20
+ import argparse
21
+ import hashlib
22
+ from collections import Counter, defaultdict
23
+ from datetime import datetime
24
+ from pathlib import Path
25
+
26
+ # ============================================================================
27
+ # ANALYSIS PATTERNS
28
+ # ============================================================================
29
+
30
+ ENFORCEMENT_PATTERNS = {
31
+ 'MUST': r'\bMUST\b',
32
+ 'MUST NOT': r'\bMUST NOT\b',
33
+ 'NEVER': r'\bNEVER\b',
34
+ 'ALWAYS': r'\bALWAYS\b',
35
+ 'DO NOT': r'\bDO NOT\b',
36
+ 'SHALL': r'\bSHALL\b',
37
+ 'REQUIRED': r'\bREQUIRED\b',
38
+ 'FORBIDDEN': r'\bFORBIDDEN\b',
39
+ 'PROHIBITED': r'\bPROHIBITED\b',
40
+ 'MANDATORY': r'\bMANDATORY\b',
41
+ 'CRITICAL': r'\bCRITICAL\b',
42
+ 'ESSENTIAL': r'\bESSENTIAL\b',
43
+ 'IMPORTANT': r'\bIMPORTANT\b',
44
+ 'UNACCEPTABLE': r'\bUNACCEPTABLE\b',
45
+ 'FAILED': r'\bFAILED\b',
46
+ }
47
+
48
+ ENABLING_PATTERNS = {
49
+ 'may': r'\byou may\b',
50
+ 'can': r'\byou can\b',
51
+ 'could': r'\byou could\b',
52
+ 'consider': r'\bconsider\b',
53
+ 'optional': r'\boptional\b',
54
+ 'if needed': r'\bif needed\b',
55
+ 'when appropriate': r'\bwhen appropriate\b',
56
+ 'exercise judgement': r'\bexercise judgement\b',
57
+ }
58
+
59
+ STRUCTURE_TAGS = [
60
+ 'identity', 'user_rules', 'RULE', 'communication_style',
61
+ 'planning_mode', 'planning_mode_artifacts', 'ephemeral_message',
62
+ 'EPHEMERAL_MESSAGE', 'web_application_development', 'skills',
63
+ 'plugins', 'persistent_context', 'artifacts', 'workflows',
64
+ 'user_information', 'mcp_servers', 'functions', 'function',
65
+ 'bash_command_reminder', 'ADDITIONAL_METADATA', 'USER_REQUEST',
66
+ 'USER_SETTINGS_CHANGE', 'WORKFLOW', 'conversation_summaries',
67
+ ]
68
+
69
+ SECURITY_CHECKPOINTS = {
70
+ 'auth_mentioned': r'\bauth(?:entication|orization)?\b',
71
+ 'security_rules': r'\bsecurity\b',
72
+ 'input_validation': r'\bvalidat(?:e|ion)\b',
73
+ 'rate_limiting': r'\brate.?limit\b',
74
+ 'encryption': r'\bencrypt\b',
75
+ 'sanitization': r'\bsaniti[zs]e?\b',
76
+ 'xss_prevention': r'\bXSS\b',
77
+ 'csrf_prevention': r'\bCSRF\b',
78
+ 'injection_prevention': r'\binjection\b',
79
+ 'secrets_handling': r'\bsecrets?\b',
80
+ 'api_key_protection': r'\bapi.?key\b',
81
+ 'privacy': r'\bprivacy\b',
82
+ 'data_protection': r'\bdata.?protect\b',
83
+ }
84
+
85
+ TOOL_PATTERN = re.compile(r'"name":\s*"([^"]+)"')
86
+
87
+ # Known generic boilerplate blocks that inflate scores without adding value
88
+ GENERIC_BOILERPLATE = [
89
+ "You MUST ALWAYS read and follow instructions strictly",
90
+ "You NEVER ignore error handling or validation steps",
91
+ "You MUST NOT bypass or skip workflow phases",
92
+ "You ALWAYS enforce strict standards",
93
+ "You MUST NEVER guess requirements",
94
+ "CRITICAL MANDATORY INSTRUCTION",
95
+ ]
96
+
97
+ # ============================================================================
98
+ # FILE-TYPE PROFILES
99
+ # ============================================================================
100
+
101
+ FILE_TYPE_PROFILES = {
102
+ 'role': {
103
+ 'description': 'Agent role definitions (as-*.md)',
104
+ 'target_hardness': 70,
105
+ 'label': '🎭 Role',
106
+ },
107
+ 'workflow': {
108
+ 'description': 'Step-by-step guides',
109
+ 'target_hardness': 60,
110
+ 'label': '📋 Workflow',
111
+ },
112
+ 'knowledge': {
113
+ 'description': 'Context documents',
114
+ 'target_hardness': 40,
115
+ 'label': '📚 Knowledge',
116
+ },
117
+ 'reference': {
118
+ 'description': 'API/SDK/tool docs (SKILL.md)',
119
+ 'target_hardness': 20,
120
+ 'label': '📖 Reference',
121
+ },
122
+ 'config': {
123
+ 'description': 'Config files (AGENTS.md, README)',
124
+ 'target_hardness': 50,
125
+ 'label': '⚙️ Config',
126
+ },
127
+ }
128
+
129
+
130
+ def detect_file_type(filepath):
131
+ """Auto-detect file type from path patterns."""
132
+ path = str(filepath)
133
+ basename = os.path.basename(path)
134
+
135
+ if basename == 'SKILL.md':
136
+ return 'reference'
137
+ if basename.startswith('as-') and path.endswith('.md'):
138
+ return 'role'
139
+ if 'workflows/' in path:
140
+ return 'workflow'
141
+ if 'knowledge/' in path:
142
+ return 'knowledge'
143
+ if basename in ('AGENTS.md', 'README.md'):
144
+ return 'config'
145
+
146
+ return 'workflow' # default
147
+
148
+
149
+ # ============================================================================
150
+ # ANTI-GAMING DETECTION
151
+ # ============================================================================
152
+
153
+ def detect_gaming(text, enforcement_total):
154
+ """Detect score inflation attempts. Returns penalties dict."""
155
+ penalties = {}
156
+ word_count = len(text.split())
157
+
158
+ if word_count < 20:
159
+ return penalties
160
+
161
+ # 1. EOF concentration: >70% enforcement words in last 20% of text
162
+ split_point = len(text) * 4 // 5
163
+ tail = text[split_point:]
164
+ tail_enforcement = 0
165
+ for pattern in ENFORCEMENT_PATTERNS.values():
166
+ tail_enforcement += len(re.findall(pattern, tail, re.IGNORECASE))
167
+
168
+ if enforcement_total > 3 and tail_enforcement > enforcement_total * 0.7:
169
+ penalties['eof_concentration'] = {
170
+ 'penalty': -20,
171
+ 'reason': f'Enforcement concentrated at EOF ({tail_enforcement}/{enforcement_total} = {tail_enforcement/enforcement_total:.0%})',
172
+ }
173
+
174
+ # 2. Generic boilerplate detection
175
+ boilerplate_hits = 0
176
+ for block in GENERIC_BOILERPLATE:
177
+ if block in text:
178
+ boilerplate_hits += 1
179
+
180
+ if boilerplate_hits >= 3:
181
+ penalties['generic_boilerplate'] = {
182
+ 'penalty': -15,
183
+ 'reason': f'Generic boilerplate detected ({boilerplate_hits}/{len(GENERIC_BOILERPLATE)} known patterns)',
184
+ }
185
+
186
+ # 3. Repeated identical lines (copy-paste indicator)
187
+ lines = [l.strip() for l in text.split('\n') if l.strip() and len(l.strip()) > 30]
188
+ line_counts = Counter(lines)
189
+ duplicates = {l: c for l, c in line_counts.items() if c > 1}
190
+ if len(duplicates) > 2:
191
+ penalties['duplicate_lines'] = {
192
+ 'penalty': -10,
193
+ 'reason': f'{len(duplicates)} lines appear multiple times',
194
+ }
195
+
196
+ return penalties
197
+
198
+
199
+ # ============================================================================
200
+ # CORE ANALYSIS
201
+ # ============================================================================
202
+
203
+ def analyze_prompt(text, label="Input", file_type=None, filepath=None):
204
+ """Generate a DNA profile for a prompt text."""
205
+ # Auto-detect file type if not specified
206
+ if file_type is None and filepath:
207
+ file_type = detect_file_type(filepath)
208
+ elif file_type is None:
209
+ file_type = 'workflow'
210
+
211
+ profile = {
212
+ 'label': label,
213
+ 'file_type': file_type,
214
+ 'file_type_profile': FILE_TYPE_PROFILES.get(file_type, FILE_TYPE_PROFILES['workflow']),
215
+ 'size_bytes': len(text),
216
+ 'line_count': text.count('\n') + 1,
217
+ 'word_count': len(text.split()),
218
+ }
219
+
220
+ # 1. Enforcement word frequency
221
+ enforcement = {}
222
+ for name, pattern in ENFORCEMENT_PATTERNS.items():
223
+ count = len(re.findall(pattern, text, re.IGNORECASE))
224
+ if count:
225
+ enforcement[name] = count
226
+ profile['enforcement_words'] = enforcement
227
+ profile['enforcement_total'] = sum(enforcement.values())
228
+
229
+ # 2. Enabling word frequency
230
+ enabling = {}
231
+ for name, pattern in ENABLING_PATTERNS.items():
232
+ count = len(re.findall(pattern, text, re.IGNORECASE))
233
+ if count:
234
+ enabling[name] = count
235
+ profile['enabling_words'] = enabling
236
+ profile['enabling_total'] = sum(enabling.values())
237
+
238
+ # 3. Restriction ratio
239
+ total_directives = profile['enforcement_total'] + profile['enabling_total']
240
+ if total_directives > 0:
241
+ profile['restriction_ratio'] = round(profile['enforcement_total'] / total_directives, 2)
242
+ else:
243
+ profile['restriction_ratio'] = 0
244
+
245
+ # 4. Hardness score (0-100) — with anti-gaming
246
+ density = profile['enforcement_total'] / max(1, profile['word_count']) * 1000
247
+
248
+ # Base score from density (0-60 range)
249
+ base = min(60, int(density * 3))
250
+
251
+ # Bonus for high-impact words (0-40 range)
252
+ bonus = min(40, (
253
+ enforcement.get('CRITICAL', 0) * 3 +
254
+ enforcement.get('MANDATORY', 0) * 4 +
255
+ enforcement.get('UNACCEPTABLE', 0) * 8 +
256
+ enforcement.get('FAILED', 0) * 6 +
257
+ enforcement.get('MUST', 0) * 1 +
258
+ enforcement.get('NEVER', 0) * 2
259
+ ))
260
+
261
+ raw_hardness = min(100, base + bonus)
262
+
263
+ # Anti-gaming penalties
264
+ gaming_penalties = detect_gaming(text, profile['enforcement_total'])
265
+ total_penalty = sum(p['penalty'] for p in gaming_penalties.values())
266
+
267
+ hardness = max(0, min(100, raw_hardness + total_penalty))
268
+
269
+ profile['hardness_score'] = hardness
270
+ profile['hardness_raw'] = raw_hardness
271
+ profile['gaming_penalties'] = gaming_penalties
272
+ profile['total_penalty'] = total_penalty
273
+ profile['enforcement_density'] = round(density, 1)
274
+
275
+ # 5. Target assessment based on file type
276
+ target = profile['file_type_profile']['target_hardness']
277
+ profile['target_hardness'] = target
278
+ profile['meets_target'] = hardness >= target
279
+ profile['target_delta'] = hardness - target
280
+
281
+ # 6. Structure tags found
282
+ tags_found = []
283
+ tags_missing = []
284
+ for tag in STRUCTURE_TAGS:
285
+ if re.search(r'<' + re.escape(tag) + r'[\s>]', text):
286
+ tags_found.append(tag)
287
+ else:
288
+ tags_missing.append(tag)
289
+ profile['tags_found'] = tags_found
290
+ profile['tags_missing'] = tags_missing
291
+ profile['structure_completeness'] = round(len(tags_found) / len(STRUCTURE_TAGS) * 100, 1)
292
+
293
+ # 7. Security blind spots
294
+ security_present = []
295
+ security_missing = []
296
+ for name, pattern in SECURITY_CHECKPOINTS.items():
297
+ if re.search(pattern, text, re.IGNORECASE):
298
+ security_present.append(name)
299
+ else:
300
+ security_missing.append(name)
301
+ profile['security_present'] = security_present
302
+ profile['security_missing'] = security_missing
303
+ profile['security_coverage'] = round(len(security_present) / len(SECURITY_CHECKPOINTS) * 100, 1)
304
+
305
+ # 8. Tool count
306
+ tools = list(set(TOOL_PATTERN.findall(text)))
307
+ tools = [t for t in tools if not t.startswith('$') and t not in ('type', 'string', 'integer')]
308
+ profile['tools'] = sorted(tools)
309
+ profile['tool_count'] = len(tools)
310
+
311
+ # 9. Priority channels detected
312
+ channels = []
313
+ if re.search(r'<user_rules>', text):
314
+ channels.append('user_rules (TOP PRIORITY)')
315
+ if re.search(r'<EPHEMERAL_MESSAGE>', text):
316
+ channels.append('EPHEMERAL_MESSAGE (runtime override)')
317
+ if re.search(r'CRITICAL INSTRUCTION', text):
318
+ channels.append('CRITICAL INSTRUCTION (embedded directive)')
319
+ if re.search(r'MANDATORY RULE', text):
320
+ channels.append('MANDATORY RULE (enforcement)')
321
+ if re.search(r'<planning_mode>', text):
322
+ channels.append('planning_mode (behavioral)')
323
+ if re.search(r'<identity>', text):
324
+ channels.append('identity (baseline)')
325
+ profile['priority_channels'] = channels
326
+
327
+ # 10. Content hash for uniqueness checking
328
+ # Hash the last 20% of the file (where boilerplate is typically appended)
329
+ tail_start = len(text) * 4 // 5
330
+ profile['tail_hash'] = hashlib.md5(text[tail_start:].encode()).hexdigest()[:12]
331
+
332
+ return profile
333
+
334
+
335
+ # ============================================================================
336
+ # BATCH MODE & UNIQUENESS
337
+ # ============================================================================
338
+
339
+ def batch_analyze(directory, check_uniqueness=False):
340
+ """Analyze all .md files in a directory tree."""
341
+ dir_path = Path(directory)
342
+ profiles = []
343
+
344
+ for md_file in sorted(dir_path.rglob("*.md")):
345
+ # Skip non-agent files
346
+ rel = str(md_file.relative_to(dir_path))
347
+ if any(skip in rel for skip in ['node_modules', 'dist', '.git', 'projects/']):
348
+ continue
349
+
350
+ try:
351
+ text = md_file.read_text(encoding='utf-8', errors='replace')
352
+ except Exception:
353
+ continue
354
+
355
+ file_type = detect_file_type(md_file)
356
+ profile = analyze_prompt(text, label=rel, file_type=file_type, filepath=str(md_file))
357
+ profile['filepath'] = str(md_file)
358
+ profiles.append(profile)
359
+
360
+ # Uniqueness analysis
361
+ uniqueness_report = None
362
+ if check_uniqueness:
363
+ uniqueness_report = check_content_uniqueness(profiles, dir_path)
364
+
365
+ return profiles, uniqueness_report
366
+
367
+
368
+ def check_content_uniqueness(profiles, base_dir):
369
+ """Detect copy-pasted blocks across files."""
370
+ report = {
371
+ 'total_files': len(profiles),
372
+ 'duplicate_blocks': [],
373
+ 'uniqueness_score': 100,
374
+ }
375
+
376
+ # Check tail hashes for identical endings
377
+ tail_groups = defaultdict(list)
378
+ for p in profiles:
379
+ tail_groups[p['tail_hash']].append(p['label'])
380
+
381
+ for tail_hash, files in tail_groups.items():
382
+ if len(files) > 2:
383
+ report['duplicate_blocks'].append({
384
+ 'hash': tail_hash,
385
+ 'count': len(files),
386
+ 'files': files[:10],
387
+ 'more': max(0, len(files) - 10),
388
+ })
389
+
390
+ # Check for exact line repetitions across files
391
+ line_to_files = defaultdict(set)
392
+ for p in profiles:
393
+ filepath = p.get('filepath', '')
394
+ try:
395
+ text = Path(filepath).read_text(encoding='utf-8', errors='replace')
396
+ for line in text.split('\n'):
397
+ stripped = line.strip()
398
+ if len(stripped) > 40: # Only meaningful lines
399
+ line_to_files[stripped].add(p['label'])
400
+ except Exception:
401
+ continue
402
+
403
+ # Lines appearing in >5 files = suspicious
404
+ suspicious_lines = {
405
+ line: files for line, files in line_to_files.items()
406
+ if len(files) > 5 and not line.startswith('|') and not line.startswith('#')
407
+ }
408
+
409
+ if suspicious_lines:
410
+ # Group by similarity
411
+ for line, files in sorted(suspicious_lines.items(), key=lambda x: -len(x[1])):
412
+ report['duplicate_blocks'].append({
413
+ 'type': 'repeated_line',
414
+ 'line': line[:100],
415
+ 'count': len(files),
416
+ 'files': sorted(files)[:5],
417
+ 'more': max(0, len(files) - 5),
418
+ })
419
+
420
+ # Calculate uniqueness score
421
+ total_duplicates = sum(b['count'] for b in report['duplicate_blocks'])
422
+ if report['total_files'] > 0:
423
+ penalty = min(80, total_duplicates * 2)
424
+ report['uniqueness_score'] = max(0, 100 - penalty)
425
+
426
+ return report
427
+
428
+
429
+ # ============================================================================
430
+ # REPORT RENDERING
431
+ # ============================================================================
432
+
433
+ def render_report(profile):
434
+ """Render a profile as a Markdown report."""
435
+ lines = []
436
+ lines.append(f"# 🧬 Prompt DNA Profile: {profile['label']}")
437
+ lines.append(f"> Generated: {datetime.now().isoformat()}")
438
+ lines.append(f"> Size: {profile['size_bytes']:,} bytes | {profile['line_count']} lines | {profile['word_count']} words")
439
+ lines.append(f"> Type: {profile['file_type_profile']['label']} ({profile['file_type']})")
440
+ lines.append("")
441
+
442
+ # Hardness meter
443
+ h = profile['hardness_score']
444
+ target = profile['target_hardness']
445
+ bar = '█' * (h // 5) + '░' * (20 - h // 5)
446
+ label = '🟢 Soft' if h < 30 else '🟡 Moderate' if h < 60 else '🔴 Hard' if h < 85 else '💀 Nuclear'
447
+ status = '✅ PASS' if profile['meets_target'] else '❌ FAIL'
448
+ lines.append(f"## Hardness Score: {h}/100 {label} (Target: {target}) {status}")
449
+ lines.append(f"```")
450
+ lines.append(f"[{bar}] {h}%")
451
+ lines.append(f"```")
452
+
453
+ # Show penalties if any
454
+ if profile['gaming_penalties']:
455
+ lines.append("")
456
+ lines.append("### ⚠️ Gaming Penalties Applied")
457
+ for name, info in profile['gaming_penalties'].items():
458
+ lines.append(f"- **{name}**: {info['penalty']} — {info['reason']}")
459
+ lines.append(f"- Raw score: {profile['hardness_raw']} → Adjusted: {profile['hardness_score']}")
460
+ lines.append("")
461
+
462
+ # Restriction ratio
463
+ r = profile['restriction_ratio']
464
+ lines.append(f"## Restriction Ratio: {r:.0%}")
465
+ lines.append(f"- Enforcement words: **{profile['enforcement_total']}**")
466
+ lines.append(f"- Enabling words: **{profile['enabling_total']}**")
467
+ lines.append(f"- Ratio: {r:.0%} restrictive / {1-r:.0%} permissive")
468
+ lines.append(f"- Density: **{profile.get('enforcement_density', 0)}** enforcement words per 1000 words")
469
+ lines.append("")
470
+
471
+ # Enforcement breakdown
472
+ lines.append("## Enforcement Words")
473
+ lines.append("| Word | Count | Impact |")
474
+ lines.append("|----|----|----|")
475
+ for word, count in sorted(profile['enforcement_words'].items(), key=lambda x: -x[1]):
476
+ impact = '🔴' if word in ('CRITICAL', 'MANDATORY', 'UNACCEPTABLE', 'FAILED') else '🟡' if word in ('MUST', 'NEVER', 'ALWAYS') else '🟢'
477
+ lines.append(f"| {word} | {count} | {impact} |")
478
+ lines.append("")
479
+
480
+ # Priority channels
481
+ lines.append("## Priority Channels Detected")
482
+ for i, ch in enumerate(profile['priority_channels'], 1):
483
+ lines.append(f"{i}. {ch}")
484
+ lines.append("")
485
+
486
+ # Structure
487
+ lines.append(f"## Structure Completeness: {profile['structure_completeness']}%")
488
+ lines.append(f"- Tags found ({len(profile['tags_found'])}): {', '.join(f'`<{t}>`' for t in profile['tags_found'][:15])}")
489
+ if profile['tags_missing']:
490
+ lines.append(f"- Tags missing ({len(profile['tags_missing'])}): {', '.join(f'`<{t}>`' for t in profile['tags_missing'][:10])}")
491
+ lines.append("")
492
+
493
+ # Security
494
+ cov = profile['security_coverage']
495
+ sec_label = '🟢 Good' if cov > 60 else '🟡 Partial' if cov > 30 else '🔴 Weak'
496
+ lines.append(f"## Security Coverage: {cov}% {sec_label}")
497
+ if profile['security_present']:
498
+ lines.append(f"- ✅ Present: {', '.join(profile['security_present'])}")
499
+ if profile['security_missing']:
500
+ lines.append(f"- ❌ Missing: {', '.join(profile['security_missing'])}")
501
+ lines.append("")
502
+
503
+ # Tools
504
+ if profile['tools']:
505
+ lines.append(f"## Tools: {profile['tool_count']}")
506
+ lines.append(', '.join(f'`{t}`' for t in profile['tools']))
507
+ lines.append("")
508
+
509
+ # Recommendations
510
+ lines.append("## 💡 Recommendations")
511
+ if not profile['meets_target']:
512
+ lines.append(f"- ⬆️ Hardness {h} is below target {target} for {profile['file_type']} files")
513
+ if profile['gaming_penalties']:
514
+ lines.append("- 🚫 Remove generic boilerplate — add contextual enforcement instead")
515
+ if h < 40 and profile['file_type'] in ('role', 'workflow'):
516
+ lines.append("- ⬆️ Add MUST/NEVER directives to enforce critical behaviors")
517
+ lines.append("- ⬆️ Add CRITICAL INSTRUCTION blocks for non-negotiable rules")
518
+ if cov < 50:
519
+ lines.append("- 🔒 Add security directives (input validation, sanitization, etc.)")
520
+ if profile['structure_completeness'] < 50:
521
+ lines.append("- 📐 Add structure tags for better organization")
522
+ if h > 80:
523
+ lines.append("- ✅ Prompt is well-enforced. Consider adding enabling words for flexibility.")
524
+
525
+ return '\n'.join(lines)
526
+
527
+
528
+ def render_batch_report(profiles, uniqueness_report=None):
529
+ """Render a batch analysis report."""
530
+ lines = []
531
+ lines.append("# 🧬 Batch DNA Analysis Report")
532
+ lines.append(f"> Generated: {datetime.now().isoformat()}")
533
+ lines.append(f"> Files analyzed: {len(profiles)}")
534
+ lines.append("")
535
+
536
+ # Summary by type
537
+ by_type = defaultdict(list)
538
+ for p in profiles:
539
+ by_type[p['file_type']].append(p)
540
+
541
+ lines.append("## Summary by Type")
542
+ lines.append("")
543
+ lines.append("| Type | Count | Target | Pass | Fail | Avg Hardness |")
544
+ lines.append("|----|----|----|----|----|----|")
545
+ for ft, type_profiles in sorted(by_type.items()):
546
+ target = FILE_TYPE_PROFILES.get(ft, {}).get('target_hardness', 60)
547
+ passing = sum(1 for p in type_profiles if p['meets_target'])
548
+ failing = len(type_profiles) - passing
549
+ avg = sum(p['hardness_score'] for p in type_profiles) / max(1, len(type_profiles))
550
+ status = '✅' if failing == 0 else '❌'
551
+ lines.append(f"| {ft} | {len(type_profiles)} | {target} | {passing} | {failing} {status} | {avg:.0f} |")
552
+ lines.append("")
553
+
554
+ # Full table
555
+ lines.append("## All Files")
556
+ lines.append("")
557
+ lines.append("| File | Type | Hardness | Target | Status | Penalties |")
558
+ lines.append("|----|----|----|----|----|----|")
559
+ for p in sorted(profiles, key=lambda x: x['hardness_score']):
560
+ status = '✅' if p['meets_target'] else '❌'
561
+ penalties = ', '.join(p['gaming_penalties'].keys()) if p['gaming_penalties'] else '—'
562
+ lines.append(f"| {p['label']} | {p['file_type']} | {p['hardness_score']} | {p['target_hardness']} | {status} | {penalties} |")
563
+ lines.append("")
564
+
565
+ # Gaming detected
566
+ gamed = [p for p in profiles if p['gaming_penalties']]
567
+ if gamed:
568
+ lines.append(f"## ⚠️ Gaming Detected ({len(gamed)} files)")
569
+ lines.append("")
570
+ for p in gamed:
571
+ lines.append(f"### {p['label']}")
572
+ for name, info in p['gaming_penalties'].items():
573
+ lines.append(f"- **{name}**: {info['penalty']} — {info['reason']}")
574
+ lines.append("")
575
+
576
+ # Uniqueness report
577
+ if uniqueness_report:
578
+ lines.append(f"## 🔄 Uniqueness Score: {uniqueness_report['uniqueness_score']}%")
579
+ lines.append("")
580
+ if uniqueness_report['duplicate_blocks']:
581
+ lines.append("### Duplicate Blocks Found")
582
+ lines.append("")
583
+ for block in uniqueness_report['duplicate_blocks'][:10]:
584
+ block_type = block.get('type', 'tail_hash')
585
+ if block_type == 'repeated_line':
586
+ lines.append(f"- **Line repeated in {block['count']} files**: `{block['line']}`")
587
+ files_str = ', '.join(block['files'])
588
+ if block['more'] > 0:
589
+ files_str += f" (+{block['more']} more)"
590
+ lines.append(f" Files: {files_str}")
591
+ else:
592
+ lines.append(f"- **Identical endings in {block['count']} files** (hash: {block['hash']})")
593
+ files_str = ', '.join(block['files'])
594
+ if block['more'] > 0:
595
+ files_str += f" (+{block['more']} more)"
596
+ lines.append(f" Files: {files_str}")
597
+ lines.append("")
598
+ else:
599
+ lines.append("✅ No duplicate blocks detected. Content is unique across files.")
600
+ lines.append("")
601
+
602
+ return '\n'.join(lines)
603
+
604
+
605
+ # ============================================================================
606
+ # MAIN
607
+ # ============================================================================
608
+
609
+ def main():
610
+ parser = argparse.ArgumentParser(description='Prompt DNA Analyzer v2.0')
611
+ parser.add_argument('input', nargs='?', help='Input file to analyze')
612
+ parser.add_argument('--output', '-o', help='Output file for report')
613
+ parser.add_argument('--json', '-j', action='store_true', help='Output as JSON')
614
+ parser.add_argument('--dump', '-d', type=int, help='Analyze specific dump number from logs')
615
+ parser.add_argument('--type', '-t', choices=list(FILE_TYPE_PROFILES.keys()),
616
+ help='File type profile (auto-detected if not specified)')
617
+ parser.add_argument('--batch', '-b', help='Analyze all .md files in directory')
618
+ parser.add_argument('--check-uniqueness', '-u', action='store_true',
619
+ help='Check for duplicate content across files (batch mode only)')
620
+ args = parser.parse_args()
621
+
622
+ # Batch mode
623
+ if args.batch:
624
+ profiles, uniqueness_report = batch_analyze(
625
+ args.batch,
626
+ check_uniqueness=args.check_uniqueness,
627
+ )
628
+
629
+ if args.json:
630
+ result = json.dumps({
631
+ 'profiles': profiles,
632
+ 'uniqueness': uniqueness_report,
633
+ }, indent=2, ensure_ascii=False, default=str)
634
+ else:
635
+ result = render_batch_report(profiles, uniqueness_report)
636
+
637
+ if args.output:
638
+ with open(args.output, 'w', encoding='utf-8') as f:
639
+ f.write(result)
640
+ print(f"Batch report written to: {args.output}")
641
+ else:
642
+ print(result)
643
+
644
+ # Exit code: 1 if any files fail their target
645
+ failures = [p for p in profiles if not p['meets_target']]
646
+ if failures:
647
+ print(f"\n❌ {len(failures)} file(s) below target hardness", file=sys.stderr)
648
+ sys.exit(1)
649
+ else:
650
+ print(f"\n✅ All {len(profiles)} files meet their type-based targets", file=sys.stderr)
651
+ return
652
+
653
+ # Single file mode
654
+ if not args.input:
655
+ parser.error("Either 'input' file or '--batch' directory is required")
656
+
657
+ with open(args.input, 'r', encoding='utf-8', errors='replace') as f:
658
+ content = f.read()
659
+
660
+ # If --dump specified, extract that dump
661
+ if args.dump:
662
+ pattern = r'={10,}\nTIMESTAMP:\s*(.+?)(?:\n={10,})'
663
+ splits = re.split(pattern, content)
664
+ dump_idx = (args.dump - 1) * 2 + 1
665
+ if dump_idx < len(splits):
666
+ ts = splits[dump_idx].strip()
667
+ body = splits[dump_idx + 1] if dump_idx + 1 < len(splits) else ''
668
+ content = body
669
+ label = f"Dump #{args.dump} ({ts[:50]})"
670
+ else:
671
+ print(f"Error: dump #{args.dump} not found", file=sys.stderr)
672
+ sys.exit(1)
673
+ else:
674
+ label = os.path.basename(args.input)
675
+
676
+ profile = analyze_prompt(content, label, file_type=args.type, filepath=args.input)
677
+
678
+ if args.json:
679
+ result = json.dumps(profile, indent=2, ensure_ascii=False)
680
+ else:
681
+ result = render_report(profile)
682
+
683
+ if args.output:
684
+ with open(args.output, 'w', encoding='utf-8') as f:
685
+ f.write(result)
686
+ print(f"Report written to: {args.output}")
687
+ else:
688
+ print(result)
689
+
690
+
691
+ if __name__ == '__main__':
692
+ main()