@ivannikov-pro/ai-context-surgeon 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +22 -0
- package/README.md +372 -0
- package/bin/catalog.js +153 -0
- package/bin/cli.js +380 -0
- package/bin/installer.js +135 -0
- package/bin/prompts.js +371 -0
- package/checklists/phase-1-analysis.md +58 -0
- package/checklists/phase-2-planning.md +67 -0
- package/checklists/phase-3-restructuring.md +77 -0
- package/checklists/phase-4-documentation.md +111 -0
- package/checklists/phase-5-validation.md +91 -0
- package/examples/before-after/README.md +139 -0
- package/examples/ideal-monorepo/README.md +127 -0
- package/knowledge/agent-context-system/artifacts/guide.md +183 -0
- package/knowledge/agent-context-system/artifacts/knowledge.md +177 -0
- package/knowledge/agent-context-system/artifacts/skills.md +101 -0
- package/knowledge/agent-context-system/artifacts/workflows.md +143 -0
- package/knowledge/agent-context-system/metadata.json +26 -0
- package/knowledge/agent-context-system/timestamps.json +5 -0
- package/knowledge/agent-vulnerabilities/LICENSE +21 -0
- package/knowledge/agent-vulnerabilities/artifacts/stealth_injection.md +110 -0
- package/knowledge/agent-vulnerabilities/artifacts/vulnerabilities.md +232 -0
- package/knowledge/agent-vulnerabilities/metadata.json +14 -0
- package/knowledge/agent-vulnerabilities/timestamps.json +5 -0
- package/knowledge/power-words-dictionary/LICENSE +21 -0
- package/knowledge/power-words-dictionary/artifacts/dictionary.md +231 -0
- package/knowledge/power-words-dictionary/artifacts/prompt_amplifier.py +381 -0
- package/knowledge/power-words-dictionary/metadata.json +14 -0
- package/knowledge/power-words-dictionary/timestamps.json +5 -0
- package/package.json +77 -0
- package/roles/README.md +81 -0
- package/roles/architect.md +203 -0
- package/roles/inspector.md +232 -0
- package/roles/librarian.md +176 -0
- package/roles/scout.md +169 -0
- package/roles/surgeon.md +172 -0
- package/roles/tuner.md +204 -0
- package/skills/annotate-jsdoc/SKILL.md +262 -0
- package/skills/prompt-engineering/LICENSE +21 -0
- package/skills/prompt-engineering/SKILL.md +235 -0
- package/skills/prompt-engineering/scripts/extract_instructions.py +416 -0
- package/skills/prompt-engineering/scripts/prompt_amplifier.py +381 -0
- package/skills/prompt-engineering/scripts/prompt_diff_tracker.py +281 -0
- package/skills/prompt-engineering/scripts/prompt_dna_analyzer.py +692 -0
- package/skills/prompt-engineering/scripts/templates/code_review.md +47 -0
- package/skills/prompt-engineering/scripts/templates/dump_extraction.md +59 -0
- package/skills/prompt-engineering/scripts/templates/multi_agent_orchestration.md +100 -0
- package/skills/prompt-engineering/scripts/templates/prompt_audit.md +106 -0
- package/skills/prompt-engineering/scripts/templates/stealth_injection.md +110 -0
- package/skills/prompt-engineering/scripts/templates/task_automation.md +87 -0
- package/skills/prompt-engineering/workflows/amplify.md +36 -0
- package/skills/prompt-engineering/workflows/audit.md +55 -0
- package/skills/prompt-engineering/workflows/context-dump.md +90 -0
- package/skills/prompt-engineering/workflows/diff.md +44 -0
- package/strategy/bash-guide.md +134 -0
- package/strategy/context-exclusion.md +220 -0
- package/strategy/context-weight-theory.md +49 -0
- package/strategy/file-navigation-header.md +562 -0
- package/strategy/jsdoc-guide.md +596 -0
- package/strategy/monorepo_strategy.md +726 -0
- package/strategy/package-json-guide.md +541 -0
- package/templates/AGENTS.md.template +148 -0
- package/templates/antigravityignore.template +64 -0
- package/templates/cursorrules.template +7 -0
- package/templates/knowledge-item.template +44 -0
- package/templates/package-json-ideal.template +26 -0
- package/templates/package-readme.template +45 -0
- package/templates/publish-meta.template +34 -0
- package/templates/skill.template +50 -0
- package/templates/workflow.template +33 -0
- package/tools/analyze-package-json.sh +213 -0
- package/tools/analyze-structure.sh +101 -0
- package/tools/audit-jsdoc.sh +176 -0
- package/tools/check-fnh-freshness.sh +74 -0
- package/tools/detect-circular-deps.sh +147 -0
- package/tools/detect-god-files.sh +71 -0
- package/tools/enforce-god-files.sh +112 -0
- package/tools/enrich-package-json.js +311 -0
- package/tools/generate-jsdoc-headers.sh +109 -0
- package/tools/generate-source-map.sh +71 -0
- package/tools/lint-imports.sh +123 -0
- package/tools/measure-context-cost.sh +206 -0
- package/tools/scan-fnh.sh +174 -0
- package/tools/shared/config.sh +53 -0
- package/tools/validate-context-hygiene.sh +52 -0
- package/tools/validate-context-weight.sh +100 -0
- package/tools/validate-naming.sh +98 -0
|
@@ -0,0 +1,692 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
prompt_dna_analyzer.py — Generates a "DNA profile" of any system prompt.
|
|
4
|
+
Analyzes power word density, restriction ratio, blind spots, and priority structure.
|
|
5
|
+
|
|
6
|
+
v2.0 — Added file-type profiles, anti-gaming penalties, batch mode with uniqueness check.
|
|
7
|
+
|
|
8
|
+
Usage:
|
|
9
|
+
python3 scripts/prompt_dna_analyzer.py /tmp/system_prompt_logs.txt
|
|
10
|
+
python3 scripts/prompt_dna_analyzer.py --dump 19 # analyze specific dump
|
|
11
|
+
python3 scripts/prompt_dna_analyzer.py some_prompt.txt --output dna_report.md
|
|
12
|
+
python3 scripts/prompt_dna_analyzer.py some_prompt.txt --type role
|
|
13
|
+
python3 scripts/prompt_dna_analyzer.py --batch .agents/ --check-uniqueness
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
import re
|
|
17
|
+
import sys
|
|
18
|
+
import os
|
|
19
|
+
import json
|
|
20
|
+
import argparse
|
|
21
|
+
import hashlib
|
|
22
|
+
from collections import Counter, defaultdict
|
|
23
|
+
from datetime import datetime
|
|
24
|
+
from pathlib import Path
|
|
25
|
+
|
|
26
|
+
# ============================================================================
|
|
27
|
+
# ANALYSIS PATTERNS
|
|
28
|
+
# ============================================================================
|
|
29
|
+
|
|
30
|
+
ENFORCEMENT_PATTERNS = {
|
|
31
|
+
'MUST': r'\bMUST\b',
|
|
32
|
+
'MUST NOT': r'\bMUST NOT\b',
|
|
33
|
+
'NEVER': r'\bNEVER\b',
|
|
34
|
+
'ALWAYS': r'\bALWAYS\b',
|
|
35
|
+
'DO NOT': r'\bDO NOT\b',
|
|
36
|
+
'SHALL': r'\bSHALL\b',
|
|
37
|
+
'REQUIRED': r'\bREQUIRED\b',
|
|
38
|
+
'FORBIDDEN': r'\bFORBIDDEN\b',
|
|
39
|
+
'PROHIBITED': r'\bPROHIBITED\b',
|
|
40
|
+
'MANDATORY': r'\bMANDATORY\b',
|
|
41
|
+
'CRITICAL': r'\bCRITICAL\b',
|
|
42
|
+
'ESSENTIAL': r'\bESSENTIAL\b',
|
|
43
|
+
'IMPORTANT': r'\bIMPORTANT\b',
|
|
44
|
+
'UNACCEPTABLE': r'\bUNACCEPTABLE\b',
|
|
45
|
+
'FAILED': r'\bFAILED\b',
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
ENABLING_PATTERNS = {
|
|
49
|
+
'may': r'\byou may\b',
|
|
50
|
+
'can': r'\byou can\b',
|
|
51
|
+
'could': r'\byou could\b',
|
|
52
|
+
'consider': r'\bconsider\b',
|
|
53
|
+
'optional': r'\boptional\b',
|
|
54
|
+
'if needed': r'\bif needed\b',
|
|
55
|
+
'when appropriate': r'\bwhen appropriate\b',
|
|
56
|
+
'exercise judgement': r'\bexercise judgement\b',
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
STRUCTURE_TAGS = [
|
|
60
|
+
'identity', 'user_rules', 'RULE', 'communication_style',
|
|
61
|
+
'planning_mode', 'planning_mode_artifacts', 'ephemeral_message',
|
|
62
|
+
'EPHEMERAL_MESSAGE', 'web_application_development', 'skills',
|
|
63
|
+
'plugins', 'persistent_context', 'artifacts', 'workflows',
|
|
64
|
+
'user_information', 'mcp_servers', 'functions', 'function',
|
|
65
|
+
'bash_command_reminder', 'ADDITIONAL_METADATA', 'USER_REQUEST',
|
|
66
|
+
'USER_SETTINGS_CHANGE', 'WORKFLOW', 'conversation_summaries',
|
|
67
|
+
]
|
|
68
|
+
|
|
69
|
+
SECURITY_CHECKPOINTS = {
|
|
70
|
+
'auth_mentioned': r'\bauth(?:entication|orization)?\b',
|
|
71
|
+
'security_rules': r'\bsecurity\b',
|
|
72
|
+
'input_validation': r'\bvalidat(?:e|ion)\b',
|
|
73
|
+
'rate_limiting': r'\brate.?limit\b',
|
|
74
|
+
'encryption': r'\bencrypt\b',
|
|
75
|
+
'sanitization': r'\bsaniti[zs]e?\b',
|
|
76
|
+
'xss_prevention': r'\bXSS\b',
|
|
77
|
+
'csrf_prevention': r'\bCSRF\b',
|
|
78
|
+
'injection_prevention': r'\binjection\b',
|
|
79
|
+
'secrets_handling': r'\bsecrets?\b',
|
|
80
|
+
'api_key_protection': r'\bapi.?key\b',
|
|
81
|
+
'privacy': r'\bprivacy\b',
|
|
82
|
+
'data_protection': r'\bdata.?protect\b',
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
TOOL_PATTERN = re.compile(r'"name":\s*"([^"]+)"')
|
|
86
|
+
|
|
87
|
+
# Known generic boilerplate blocks that inflate scores without adding value
|
|
88
|
+
GENERIC_BOILERPLATE = [
|
|
89
|
+
"You MUST ALWAYS read and follow instructions strictly",
|
|
90
|
+
"You NEVER ignore error handling or validation steps",
|
|
91
|
+
"You MUST NOT bypass or skip workflow phases",
|
|
92
|
+
"You ALWAYS enforce strict standards",
|
|
93
|
+
"You MUST NEVER guess requirements",
|
|
94
|
+
"CRITICAL MANDATORY INSTRUCTION",
|
|
95
|
+
]
|
|
96
|
+
|
|
97
|
+
# ============================================================================
|
|
98
|
+
# FILE-TYPE PROFILES
|
|
99
|
+
# ============================================================================
|
|
100
|
+
|
|
101
|
+
FILE_TYPE_PROFILES = {
|
|
102
|
+
'role': {
|
|
103
|
+
'description': 'Agent role definitions (as-*.md)',
|
|
104
|
+
'target_hardness': 70,
|
|
105
|
+
'label': '🎭 Role',
|
|
106
|
+
},
|
|
107
|
+
'workflow': {
|
|
108
|
+
'description': 'Step-by-step guides',
|
|
109
|
+
'target_hardness': 60,
|
|
110
|
+
'label': '📋 Workflow',
|
|
111
|
+
},
|
|
112
|
+
'knowledge': {
|
|
113
|
+
'description': 'Context documents',
|
|
114
|
+
'target_hardness': 40,
|
|
115
|
+
'label': '📚 Knowledge',
|
|
116
|
+
},
|
|
117
|
+
'reference': {
|
|
118
|
+
'description': 'API/SDK/tool docs (SKILL.md)',
|
|
119
|
+
'target_hardness': 20,
|
|
120
|
+
'label': '📖 Reference',
|
|
121
|
+
},
|
|
122
|
+
'config': {
|
|
123
|
+
'description': 'Config files (AGENTS.md, README)',
|
|
124
|
+
'target_hardness': 50,
|
|
125
|
+
'label': '⚙️ Config',
|
|
126
|
+
},
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
def detect_file_type(filepath):
|
|
131
|
+
"""Auto-detect file type from path patterns."""
|
|
132
|
+
path = str(filepath)
|
|
133
|
+
basename = os.path.basename(path)
|
|
134
|
+
|
|
135
|
+
if basename == 'SKILL.md':
|
|
136
|
+
return 'reference'
|
|
137
|
+
if basename.startswith('as-') and path.endswith('.md'):
|
|
138
|
+
return 'role'
|
|
139
|
+
if 'workflows/' in path:
|
|
140
|
+
return 'workflow'
|
|
141
|
+
if 'knowledge/' in path:
|
|
142
|
+
return 'knowledge'
|
|
143
|
+
if basename in ('AGENTS.md', 'README.md'):
|
|
144
|
+
return 'config'
|
|
145
|
+
|
|
146
|
+
return 'workflow' # default
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
# ============================================================================
|
|
150
|
+
# ANTI-GAMING DETECTION
|
|
151
|
+
# ============================================================================
|
|
152
|
+
|
|
153
|
+
def detect_gaming(text, enforcement_total):
|
|
154
|
+
"""Detect score inflation attempts. Returns penalties dict."""
|
|
155
|
+
penalties = {}
|
|
156
|
+
word_count = len(text.split())
|
|
157
|
+
|
|
158
|
+
if word_count < 20:
|
|
159
|
+
return penalties
|
|
160
|
+
|
|
161
|
+
# 1. EOF concentration: >70% enforcement words in last 20% of text
|
|
162
|
+
split_point = len(text) * 4 // 5
|
|
163
|
+
tail = text[split_point:]
|
|
164
|
+
tail_enforcement = 0
|
|
165
|
+
for pattern in ENFORCEMENT_PATTERNS.values():
|
|
166
|
+
tail_enforcement += len(re.findall(pattern, tail, re.IGNORECASE))
|
|
167
|
+
|
|
168
|
+
if enforcement_total > 3 and tail_enforcement > enforcement_total * 0.7:
|
|
169
|
+
penalties['eof_concentration'] = {
|
|
170
|
+
'penalty': -20,
|
|
171
|
+
'reason': f'Enforcement concentrated at EOF ({tail_enforcement}/{enforcement_total} = {tail_enforcement/enforcement_total:.0%})',
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
# 2. Generic boilerplate detection
|
|
175
|
+
boilerplate_hits = 0
|
|
176
|
+
for block in GENERIC_BOILERPLATE:
|
|
177
|
+
if block in text:
|
|
178
|
+
boilerplate_hits += 1
|
|
179
|
+
|
|
180
|
+
if boilerplate_hits >= 3:
|
|
181
|
+
penalties['generic_boilerplate'] = {
|
|
182
|
+
'penalty': -15,
|
|
183
|
+
'reason': f'Generic boilerplate detected ({boilerplate_hits}/{len(GENERIC_BOILERPLATE)} known patterns)',
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
# 3. Repeated identical lines (copy-paste indicator)
|
|
187
|
+
lines = [l.strip() for l in text.split('\n') if l.strip() and len(l.strip()) > 30]
|
|
188
|
+
line_counts = Counter(lines)
|
|
189
|
+
duplicates = {l: c for l, c in line_counts.items() if c > 1}
|
|
190
|
+
if len(duplicates) > 2:
|
|
191
|
+
penalties['duplicate_lines'] = {
|
|
192
|
+
'penalty': -10,
|
|
193
|
+
'reason': f'{len(duplicates)} lines appear multiple times',
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
return penalties
|
|
197
|
+
|
|
198
|
+
|
|
199
|
+
# ============================================================================
|
|
200
|
+
# CORE ANALYSIS
|
|
201
|
+
# ============================================================================
|
|
202
|
+
|
|
203
|
+
def analyze_prompt(text, label="Input", file_type=None, filepath=None):
|
|
204
|
+
"""Generate a DNA profile for a prompt text."""
|
|
205
|
+
# Auto-detect file type if not specified
|
|
206
|
+
if file_type is None and filepath:
|
|
207
|
+
file_type = detect_file_type(filepath)
|
|
208
|
+
elif file_type is None:
|
|
209
|
+
file_type = 'workflow'
|
|
210
|
+
|
|
211
|
+
profile = {
|
|
212
|
+
'label': label,
|
|
213
|
+
'file_type': file_type,
|
|
214
|
+
'file_type_profile': FILE_TYPE_PROFILES.get(file_type, FILE_TYPE_PROFILES['workflow']),
|
|
215
|
+
'size_bytes': len(text),
|
|
216
|
+
'line_count': text.count('\n') + 1,
|
|
217
|
+
'word_count': len(text.split()),
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
# 1. Enforcement word frequency
|
|
221
|
+
enforcement = {}
|
|
222
|
+
for name, pattern in ENFORCEMENT_PATTERNS.items():
|
|
223
|
+
count = len(re.findall(pattern, text, re.IGNORECASE))
|
|
224
|
+
if count:
|
|
225
|
+
enforcement[name] = count
|
|
226
|
+
profile['enforcement_words'] = enforcement
|
|
227
|
+
profile['enforcement_total'] = sum(enforcement.values())
|
|
228
|
+
|
|
229
|
+
# 2. Enabling word frequency
|
|
230
|
+
enabling = {}
|
|
231
|
+
for name, pattern in ENABLING_PATTERNS.items():
|
|
232
|
+
count = len(re.findall(pattern, text, re.IGNORECASE))
|
|
233
|
+
if count:
|
|
234
|
+
enabling[name] = count
|
|
235
|
+
profile['enabling_words'] = enabling
|
|
236
|
+
profile['enabling_total'] = sum(enabling.values())
|
|
237
|
+
|
|
238
|
+
# 3. Restriction ratio
|
|
239
|
+
total_directives = profile['enforcement_total'] + profile['enabling_total']
|
|
240
|
+
if total_directives > 0:
|
|
241
|
+
profile['restriction_ratio'] = round(profile['enforcement_total'] / total_directives, 2)
|
|
242
|
+
else:
|
|
243
|
+
profile['restriction_ratio'] = 0
|
|
244
|
+
|
|
245
|
+
# 4. Hardness score (0-100) — with anti-gaming
|
|
246
|
+
density = profile['enforcement_total'] / max(1, profile['word_count']) * 1000
|
|
247
|
+
|
|
248
|
+
# Base score from density (0-60 range)
|
|
249
|
+
base = min(60, int(density * 3))
|
|
250
|
+
|
|
251
|
+
# Bonus for high-impact words (0-40 range)
|
|
252
|
+
bonus = min(40, (
|
|
253
|
+
enforcement.get('CRITICAL', 0) * 3 +
|
|
254
|
+
enforcement.get('MANDATORY', 0) * 4 +
|
|
255
|
+
enforcement.get('UNACCEPTABLE', 0) * 8 +
|
|
256
|
+
enforcement.get('FAILED', 0) * 6 +
|
|
257
|
+
enforcement.get('MUST', 0) * 1 +
|
|
258
|
+
enforcement.get('NEVER', 0) * 2
|
|
259
|
+
))
|
|
260
|
+
|
|
261
|
+
raw_hardness = min(100, base + bonus)
|
|
262
|
+
|
|
263
|
+
# Anti-gaming penalties
|
|
264
|
+
gaming_penalties = detect_gaming(text, profile['enforcement_total'])
|
|
265
|
+
total_penalty = sum(p['penalty'] for p in gaming_penalties.values())
|
|
266
|
+
|
|
267
|
+
hardness = max(0, min(100, raw_hardness + total_penalty))
|
|
268
|
+
|
|
269
|
+
profile['hardness_score'] = hardness
|
|
270
|
+
profile['hardness_raw'] = raw_hardness
|
|
271
|
+
profile['gaming_penalties'] = gaming_penalties
|
|
272
|
+
profile['total_penalty'] = total_penalty
|
|
273
|
+
profile['enforcement_density'] = round(density, 1)
|
|
274
|
+
|
|
275
|
+
# 5. Target assessment based on file type
|
|
276
|
+
target = profile['file_type_profile']['target_hardness']
|
|
277
|
+
profile['target_hardness'] = target
|
|
278
|
+
profile['meets_target'] = hardness >= target
|
|
279
|
+
profile['target_delta'] = hardness - target
|
|
280
|
+
|
|
281
|
+
# 6. Structure tags found
|
|
282
|
+
tags_found = []
|
|
283
|
+
tags_missing = []
|
|
284
|
+
for tag in STRUCTURE_TAGS:
|
|
285
|
+
if re.search(r'<' + re.escape(tag) + r'[\s>]', text):
|
|
286
|
+
tags_found.append(tag)
|
|
287
|
+
else:
|
|
288
|
+
tags_missing.append(tag)
|
|
289
|
+
profile['tags_found'] = tags_found
|
|
290
|
+
profile['tags_missing'] = tags_missing
|
|
291
|
+
profile['structure_completeness'] = round(len(tags_found) / len(STRUCTURE_TAGS) * 100, 1)
|
|
292
|
+
|
|
293
|
+
# 7. Security blind spots
|
|
294
|
+
security_present = []
|
|
295
|
+
security_missing = []
|
|
296
|
+
for name, pattern in SECURITY_CHECKPOINTS.items():
|
|
297
|
+
if re.search(pattern, text, re.IGNORECASE):
|
|
298
|
+
security_present.append(name)
|
|
299
|
+
else:
|
|
300
|
+
security_missing.append(name)
|
|
301
|
+
profile['security_present'] = security_present
|
|
302
|
+
profile['security_missing'] = security_missing
|
|
303
|
+
profile['security_coverage'] = round(len(security_present) / len(SECURITY_CHECKPOINTS) * 100, 1)
|
|
304
|
+
|
|
305
|
+
# 8. Tool count
|
|
306
|
+
tools = list(set(TOOL_PATTERN.findall(text)))
|
|
307
|
+
tools = [t for t in tools if not t.startswith('$') and t not in ('type', 'string', 'integer')]
|
|
308
|
+
profile['tools'] = sorted(tools)
|
|
309
|
+
profile['tool_count'] = len(tools)
|
|
310
|
+
|
|
311
|
+
# 9. Priority channels detected
|
|
312
|
+
channels = []
|
|
313
|
+
if re.search(r'<user_rules>', text):
|
|
314
|
+
channels.append('user_rules (TOP PRIORITY)')
|
|
315
|
+
if re.search(r'<EPHEMERAL_MESSAGE>', text):
|
|
316
|
+
channels.append('EPHEMERAL_MESSAGE (runtime override)')
|
|
317
|
+
if re.search(r'CRITICAL INSTRUCTION', text):
|
|
318
|
+
channels.append('CRITICAL INSTRUCTION (embedded directive)')
|
|
319
|
+
if re.search(r'MANDATORY RULE', text):
|
|
320
|
+
channels.append('MANDATORY RULE (enforcement)')
|
|
321
|
+
if re.search(r'<planning_mode>', text):
|
|
322
|
+
channels.append('planning_mode (behavioral)')
|
|
323
|
+
if re.search(r'<identity>', text):
|
|
324
|
+
channels.append('identity (baseline)')
|
|
325
|
+
profile['priority_channels'] = channels
|
|
326
|
+
|
|
327
|
+
# 10. Content hash for uniqueness checking
|
|
328
|
+
# Hash the last 20% of the file (where boilerplate is typically appended)
|
|
329
|
+
tail_start = len(text) * 4 // 5
|
|
330
|
+
profile['tail_hash'] = hashlib.md5(text[tail_start:].encode()).hexdigest()[:12]
|
|
331
|
+
|
|
332
|
+
return profile
|
|
333
|
+
|
|
334
|
+
|
|
335
|
+
# ============================================================================
|
|
336
|
+
# BATCH MODE & UNIQUENESS
|
|
337
|
+
# ============================================================================
|
|
338
|
+
|
|
339
|
+
def batch_analyze(directory, check_uniqueness=False):
|
|
340
|
+
"""Analyze all .md files in a directory tree."""
|
|
341
|
+
dir_path = Path(directory)
|
|
342
|
+
profiles = []
|
|
343
|
+
|
|
344
|
+
for md_file in sorted(dir_path.rglob("*.md")):
|
|
345
|
+
# Skip non-agent files
|
|
346
|
+
rel = str(md_file.relative_to(dir_path))
|
|
347
|
+
if any(skip in rel for skip in ['node_modules', 'dist', '.git', 'projects/']):
|
|
348
|
+
continue
|
|
349
|
+
|
|
350
|
+
try:
|
|
351
|
+
text = md_file.read_text(encoding='utf-8', errors='replace')
|
|
352
|
+
except Exception:
|
|
353
|
+
continue
|
|
354
|
+
|
|
355
|
+
file_type = detect_file_type(md_file)
|
|
356
|
+
profile = analyze_prompt(text, label=rel, file_type=file_type, filepath=str(md_file))
|
|
357
|
+
profile['filepath'] = str(md_file)
|
|
358
|
+
profiles.append(profile)
|
|
359
|
+
|
|
360
|
+
# Uniqueness analysis
|
|
361
|
+
uniqueness_report = None
|
|
362
|
+
if check_uniqueness:
|
|
363
|
+
uniqueness_report = check_content_uniqueness(profiles, dir_path)
|
|
364
|
+
|
|
365
|
+
return profiles, uniqueness_report
|
|
366
|
+
|
|
367
|
+
|
|
368
|
+
def check_content_uniqueness(profiles, base_dir):
|
|
369
|
+
"""Detect copy-pasted blocks across files."""
|
|
370
|
+
report = {
|
|
371
|
+
'total_files': len(profiles),
|
|
372
|
+
'duplicate_blocks': [],
|
|
373
|
+
'uniqueness_score': 100,
|
|
374
|
+
}
|
|
375
|
+
|
|
376
|
+
# Check tail hashes for identical endings
|
|
377
|
+
tail_groups = defaultdict(list)
|
|
378
|
+
for p in profiles:
|
|
379
|
+
tail_groups[p['tail_hash']].append(p['label'])
|
|
380
|
+
|
|
381
|
+
for tail_hash, files in tail_groups.items():
|
|
382
|
+
if len(files) > 2:
|
|
383
|
+
report['duplicate_blocks'].append({
|
|
384
|
+
'hash': tail_hash,
|
|
385
|
+
'count': len(files),
|
|
386
|
+
'files': files[:10],
|
|
387
|
+
'more': max(0, len(files) - 10),
|
|
388
|
+
})
|
|
389
|
+
|
|
390
|
+
# Check for exact line repetitions across files
|
|
391
|
+
line_to_files = defaultdict(set)
|
|
392
|
+
for p in profiles:
|
|
393
|
+
filepath = p.get('filepath', '')
|
|
394
|
+
try:
|
|
395
|
+
text = Path(filepath).read_text(encoding='utf-8', errors='replace')
|
|
396
|
+
for line in text.split('\n'):
|
|
397
|
+
stripped = line.strip()
|
|
398
|
+
if len(stripped) > 40: # Only meaningful lines
|
|
399
|
+
line_to_files[stripped].add(p['label'])
|
|
400
|
+
except Exception:
|
|
401
|
+
continue
|
|
402
|
+
|
|
403
|
+
# Lines appearing in >5 files = suspicious
|
|
404
|
+
suspicious_lines = {
|
|
405
|
+
line: files for line, files in line_to_files.items()
|
|
406
|
+
if len(files) > 5 and not line.startswith('|') and not line.startswith('#')
|
|
407
|
+
}
|
|
408
|
+
|
|
409
|
+
if suspicious_lines:
|
|
410
|
+
# Group by similarity
|
|
411
|
+
for line, files in sorted(suspicious_lines.items(), key=lambda x: -len(x[1])):
|
|
412
|
+
report['duplicate_blocks'].append({
|
|
413
|
+
'type': 'repeated_line',
|
|
414
|
+
'line': line[:100],
|
|
415
|
+
'count': len(files),
|
|
416
|
+
'files': sorted(files)[:5],
|
|
417
|
+
'more': max(0, len(files) - 5),
|
|
418
|
+
})
|
|
419
|
+
|
|
420
|
+
# Calculate uniqueness score
|
|
421
|
+
total_duplicates = sum(b['count'] for b in report['duplicate_blocks'])
|
|
422
|
+
if report['total_files'] > 0:
|
|
423
|
+
penalty = min(80, total_duplicates * 2)
|
|
424
|
+
report['uniqueness_score'] = max(0, 100 - penalty)
|
|
425
|
+
|
|
426
|
+
return report
|
|
427
|
+
|
|
428
|
+
|
|
429
|
+
# ============================================================================
|
|
430
|
+
# REPORT RENDERING
|
|
431
|
+
# ============================================================================
|
|
432
|
+
|
|
433
|
+
def render_report(profile):
|
|
434
|
+
"""Render a profile as a Markdown report."""
|
|
435
|
+
lines = []
|
|
436
|
+
lines.append(f"# 🧬 Prompt DNA Profile: {profile['label']}")
|
|
437
|
+
lines.append(f"> Generated: {datetime.now().isoformat()}")
|
|
438
|
+
lines.append(f"> Size: {profile['size_bytes']:,} bytes | {profile['line_count']} lines | {profile['word_count']} words")
|
|
439
|
+
lines.append(f"> Type: {profile['file_type_profile']['label']} ({profile['file_type']})")
|
|
440
|
+
lines.append("")
|
|
441
|
+
|
|
442
|
+
# Hardness meter
|
|
443
|
+
h = profile['hardness_score']
|
|
444
|
+
target = profile['target_hardness']
|
|
445
|
+
bar = '█' * (h // 5) + '░' * (20 - h // 5)
|
|
446
|
+
label = '🟢 Soft' if h < 30 else '🟡 Moderate' if h < 60 else '🔴 Hard' if h < 85 else '💀 Nuclear'
|
|
447
|
+
status = '✅ PASS' if profile['meets_target'] else '❌ FAIL'
|
|
448
|
+
lines.append(f"## Hardness Score: {h}/100 {label} (Target: {target}) {status}")
|
|
449
|
+
lines.append(f"```")
|
|
450
|
+
lines.append(f"[{bar}] {h}%")
|
|
451
|
+
lines.append(f"```")
|
|
452
|
+
|
|
453
|
+
# Show penalties if any
|
|
454
|
+
if profile['gaming_penalties']:
|
|
455
|
+
lines.append("")
|
|
456
|
+
lines.append("### ⚠️ Gaming Penalties Applied")
|
|
457
|
+
for name, info in profile['gaming_penalties'].items():
|
|
458
|
+
lines.append(f"- **{name}**: {info['penalty']} — {info['reason']}")
|
|
459
|
+
lines.append(f"- Raw score: {profile['hardness_raw']} → Adjusted: {profile['hardness_score']}")
|
|
460
|
+
lines.append("")
|
|
461
|
+
|
|
462
|
+
# Restriction ratio
|
|
463
|
+
r = profile['restriction_ratio']
|
|
464
|
+
lines.append(f"## Restriction Ratio: {r:.0%}")
|
|
465
|
+
lines.append(f"- Enforcement words: **{profile['enforcement_total']}**")
|
|
466
|
+
lines.append(f"- Enabling words: **{profile['enabling_total']}**")
|
|
467
|
+
lines.append(f"- Ratio: {r:.0%} restrictive / {1-r:.0%} permissive")
|
|
468
|
+
lines.append(f"- Density: **{profile.get('enforcement_density', 0)}** enforcement words per 1000 words")
|
|
469
|
+
lines.append("")
|
|
470
|
+
|
|
471
|
+
# Enforcement breakdown
|
|
472
|
+
lines.append("## Enforcement Words")
|
|
473
|
+
lines.append("| Word | Count | Impact |")
|
|
474
|
+
lines.append("|----|----|----|")
|
|
475
|
+
for word, count in sorted(profile['enforcement_words'].items(), key=lambda x: -x[1]):
|
|
476
|
+
impact = '🔴' if word in ('CRITICAL', 'MANDATORY', 'UNACCEPTABLE', 'FAILED') else '🟡' if word in ('MUST', 'NEVER', 'ALWAYS') else '🟢'
|
|
477
|
+
lines.append(f"| {word} | {count} | {impact} |")
|
|
478
|
+
lines.append("")
|
|
479
|
+
|
|
480
|
+
# Priority channels
|
|
481
|
+
lines.append("## Priority Channels Detected")
|
|
482
|
+
for i, ch in enumerate(profile['priority_channels'], 1):
|
|
483
|
+
lines.append(f"{i}. {ch}")
|
|
484
|
+
lines.append("")
|
|
485
|
+
|
|
486
|
+
# Structure
|
|
487
|
+
lines.append(f"## Structure Completeness: {profile['structure_completeness']}%")
|
|
488
|
+
lines.append(f"- Tags found ({len(profile['tags_found'])}): {', '.join(f'`<{t}>`' for t in profile['tags_found'][:15])}")
|
|
489
|
+
if profile['tags_missing']:
|
|
490
|
+
lines.append(f"- Tags missing ({len(profile['tags_missing'])}): {', '.join(f'`<{t}>`' for t in profile['tags_missing'][:10])}")
|
|
491
|
+
lines.append("")
|
|
492
|
+
|
|
493
|
+
# Security
|
|
494
|
+
cov = profile['security_coverage']
|
|
495
|
+
sec_label = '🟢 Good' if cov > 60 else '🟡 Partial' if cov > 30 else '🔴 Weak'
|
|
496
|
+
lines.append(f"## Security Coverage: {cov}% {sec_label}")
|
|
497
|
+
if profile['security_present']:
|
|
498
|
+
lines.append(f"- ✅ Present: {', '.join(profile['security_present'])}")
|
|
499
|
+
if profile['security_missing']:
|
|
500
|
+
lines.append(f"- ❌ Missing: {', '.join(profile['security_missing'])}")
|
|
501
|
+
lines.append("")
|
|
502
|
+
|
|
503
|
+
# Tools
|
|
504
|
+
if profile['tools']:
|
|
505
|
+
lines.append(f"## Tools: {profile['tool_count']}")
|
|
506
|
+
lines.append(', '.join(f'`{t}`' for t in profile['tools']))
|
|
507
|
+
lines.append("")
|
|
508
|
+
|
|
509
|
+
# Recommendations
|
|
510
|
+
lines.append("## 💡 Recommendations")
|
|
511
|
+
if not profile['meets_target']:
|
|
512
|
+
lines.append(f"- ⬆️ Hardness {h} is below target {target} for {profile['file_type']} files")
|
|
513
|
+
if profile['gaming_penalties']:
|
|
514
|
+
lines.append("- 🚫 Remove generic boilerplate — add contextual enforcement instead")
|
|
515
|
+
if h < 40 and profile['file_type'] in ('role', 'workflow'):
|
|
516
|
+
lines.append("- ⬆️ Add MUST/NEVER directives to enforce critical behaviors")
|
|
517
|
+
lines.append("- ⬆️ Add CRITICAL INSTRUCTION blocks for non-negotiable rules")
|
|
518
|
+
if cov < 50:
|
|
519
|
+
lines.append("- 🔒 Add security directives (input validation, sanitization, etc.)")
|
|
520
|
+
if profile['structure_completeness'] < 50:
|
|
521
|
+
lines.append("- 📐 Add structure tags for better organization")
|
|
522
|
+
if h > 80:
|
|
523
|
+
lines.append("- ✅ Prompt is well-enforced. Consider adding enabling words for flexibility.")
|
|
524
|
+
|
|
525
|
+
return '\n'.join(lines)
|
|
526
|
+
|
|
527
|
+
|
|
528
|
+
def render_batch_report(profiles, uniqueness_report=None):
|
|
529
|
+
"""Render a batch analysis report."""
|
|
530
|
+
lines = []
|
|
531
|
+
lines.append("# 🧬 Batch DNA Analysis Report")
|
|
532
|
+
lines.append(f"> Generated: {datetime.now().isoformat()}")
|
|
533
|
+
lines.append(f"> Files analyzed: {len(profiles)}")
|
|
534
|
+
lines.append("")
|
|
535
|
+
|
|
536
|
+
# Summary by type
|
|
537
|
+
by_type = defaultdict(list)
|
|
538
|
+
for p in profiles:
|
|
539
|
+
by_type[p['file_type']].append(p)
|
|
540
|
+
|
|
541
|
+
lines.append("## Summary by Type")
|
|
542
|
+
lines.append("")
|
|
543
|
+
lines.append("| Type | Count | Target | Pass | Fail | Avg Hardness |")
|
|
544
|
+
lines.append("|----|----|----|----|----|----|")
|
|
545
|
+
for ft, type_profiles in sorted(by_type.items()):
|
|
546
|
+
target = FILE_TYPE_PROFILES.get(ft, {}).get('target_hardness', 60)
|
|
547
|
+
passing = sum(1 for p in type_profiles if p['meets_target'])
|
|
548
|
+
failing = len(type_profiles) - passing
|
|
549
|
+
avg = sum(p['hardness_score'] for p in type_profiles) / max(1, len(type_profiles))
|
|
550
|
+
status = '✅' if failing == 0 else '❌'
|
|
551
|
+
lines.append(f"| {ft} | {len(type_profiles)} | {target} | {passing} | {failing} {status} | {avg:.0f} |")
|
|
552
|
+
lines.append("")
|
|
553
|
+
|
|
554
|
+
# Full table
|
|
555
|
+
lines.append("## All Files")
|
|
556
|
+
lines.append("")
|
|
557
|
+
lines.append("| File | Type | Hardness | Target | Status | Penalties |")
|
|
558
|
+
lines.append("|----|----|----|----|----|----|")
|
|
559
|
+
for p in sorted(profiles, key=lambda x: x['hardness_score']):
|
|
560
|
+
status = '✅' if p['meets_target'] else '❌'
|
|
561
|
+
penalties = ', '.join(p['gaming_penalties'].keys()) if p['gaming_penalties'] else '—'
|
|
562
|
+
lines.append(f"| {p['label']} | {p['file_type']} | {p['hardness_score']} | {p['target_hardness']} | {status} | {penalties} |")
|
|
563
|
+
lines.append("")
|
|
564
|
+
|
|
565
|
+
# Gaming detected
|
|
566
|
+
gamed = [p for p in profiles if p['gaming_penalties']]
|
|
567
|
+
if gamed:
|
|
568
|
+
lines.append(f"## ⚠️ Gaming Detected ({len(gamed)} files)")
|
|
569
|
+
lines.append("")
|
|
570
|
+
for p in gamed:
|
|
571
|
+
lines.append(f"### {p['label']}")
|
|
572
|
+
for name, info in p['gaming_penalties'].items():
|
|
573
|
+
lines.append(f"- **{name}**: {info['penalty']} — {info['reason']}")
|
|
574
|
+
lines.append("")
|
|
575
|
+
|
|
576
|
+
# Uniqueness report
|
|
577
|
+
if uniqueness_report:
|
|
578
|
+
lines.append(f"## 🔄 Uniqueness Score: {uniqueness_report['uniqueness_score']}%")
|
|
579
|
+
lines.append("")
|
|
580
|
+
if uniqueness_report['duplicate_blocks']:
|
|
581
|
+
lines.append("### Duplicate Blocks Found")
|
|
582
|
+
lines.append("")
|
|
583
|
+
for block in uniqueness_report['duplicate_blocks'][:10]:
|
|
584
|
+
block_type = block.get('type', 'tail_hash')
|
|
585
|
+
if block_type == 'repeated_line':
|
|
586
|
+
lines.append(f"- **Line repeated in {block['count']} files**: `{block['line']}`")
|
|
587
|
+
files_str = ', '.join(block['files'])
|
|
588
|
+
if block['more'] > 0:
|
|
589
|
+
files_str += f" (+{block['more']} more)"
|
|
590
|
+
lines.append(f" Files: {files_str}")
|
|
591
|
+
else:
|
|
592
|
+
lines.append(f"- **Identical endings in {block['count']} files** (hash: {block['hash']})")
|
|
593
|
+
files_str = ', '.join(block['files'])
|
|
594
|
+
if block['more'] > 0:
|
|
595
|
+
files_str += f" (+{block['more']} more)"
|
|
596
|
+
lines.append(f" Files: {files_str}")
|
|
597
|
+
lines.append("")
|
|
598
|
+
else:
|
|
599
|
+
lines.append("✅ No duplicate blocks detected. Content is unique across files.")
|
|
600
|
+
lines.append("")
|
|
601
|
+
|
|
602
|
+
return '\n'.join(lines)
|
|
603
|
+
|
|
604
|
+
|
|
605
|
+
# ============================================================================
|
|
606
|
+
# MAIN
|
|
607
|
+
# ============================================================================
|
|
608
|
+
|
|
609
|
+
def main():
|
|
610
|
+
parser = argparse.ArgumentParser(description='Prompt DNA Analyzer v2.0')
|
|
611
|
+
parser.add_argument('input', nargs='?', help='Input file to analyze')
|
|
612
|
+
parser.add_argument('--output', '-o', help='Output file for report')
|
|
613
|
+
parser.add_argument('--json', '-j', action='store_true', help='Output as JSON')
|
|
614
|
+
parser.add_argument('--dump', '-d', type=int, help='Analyze specific dump number from logs')
|
|
615
|
+
parser.add_argument('--type', '-t', choices=list(FILE_TYPE_PROFILES.keys()),
|
|
616
|
+
help='File type profile (auto-detected if not specified)')
|
|
617
|
+
parser.add_argument('--batch', '-b', help='Analyze all .md files in directory')
|
|
618
|
+
parser.add_argument('--check-uniqueness', '-u', action='store_true',
|
|
619
|
+
help='Check for duplicate content across files (batch mode only)')
|
|
620
|
+
args = parser.parse_args()
|
|
621
|
+
|
|
622
|
+
# Batch mode
|
|
623
|
+
if args.batch:
|
|
624
|
+
profiles, uniqueness_report = batch_analyze(
|
|
625
|
+
args.batch,
|
|
626
|
+
check_uniqueness=args.check_uniqueness,
|
|
627
|
+
)
|
|
628
|
+
|
|
629
|
+
if args.json:
|
|
630
|
+
result = json.dumps({
|
|
631
|
+
'profiles': profiles,
|
|
632
|
+
'uniqueness': uniqueness_report,
|
|
633
|
+
}, indent=2, ensure_ascii=False, default=str)
|
|
634
|
+
else:
|
|
635
|
+
result = render_batch_report(profiles, uniqueness_report)
|
|
636
|
+
|
|
637
|
+
if args.output:
|
|
638
|
+
with open(args.output, 'w', encoding='utf-8') as f:
|
|
639
|
+
f.write(result)
|
|
640
|
+
print(f"Batch report written to: {args.output}")
|
|
641
|
+
else:
|
|
642
|
+
print(result)
|
|
643
|
+
|
|
644
|
+
# Exit code: 1 if any files fail their target
|
|
645
|
+
failures = [p for p in profiles if not p['meets_target']]
|
|
646
|
+
if failures:
|
|
647
|
+
print(f"\n❌ {len(failures)} file(s) below target hardness", file=sys.stderr)
|
|
648
|
+
sys.exit(1)
|
|
649
|
+
else:
|
|
650
|
+
print(f"\n✅ All {len(profiles)} files meet their type-based targets", file=sys.stderr)
|
|
651
|
+
return
|
|
652
|
+
|
|
653
|
+
# Single file mode
|
|
654
|
+
if not args.input:
|
|
655
|
+
parser.error("Either 'input' file or '--batch' directory is required")
|
|
656
|
+
|
|
657
|
+
with open(args.input, 'r', encoding='utf-8', errors='replace') as f:
|
|
658
|
+
content = f.read()
|
|
659
|
+
|
|
660
|
+
# If --dump specified, extract that dump
|
|
661
|
+
if args.dump:
|
|
662
|
+
pattern = r'={10,}\nTIMESTAMP:\s*(.+?)(?:\n={10,})'
|
|
663
|
+
splits = re.split(pattern, content)
|
|
664
|
+
dump_idx = (args.dump - 1) * 2 + 1
|
|
665
|
+
if dump_idx < len(splits):
|
|
666
|
+
ts = splits[dump_idx].strip()
|
|
667
|
+
body = splits[dump_idx + 1] if dump_idx + 1 < len(splits) else ''
|
|
668
|
+
content = body
|
|
669
|
+
label = f"Dump #{args.dump} ({ts[:50]})"
|
|
670
|
+
else:
|
|
671
|
+
print(f"Error: dump #{args.dump} not found", file=sys.stderr)
|
|
672
|
+
sys.exit(1)
|
|
673
|
+
else:
|
|
674
|
+
label = os.path.basename(args.input)
|
|
675
|
+
|
|
676
|
+
profile = analyze_prompt(content, label, file_type=args.type, filepath=args.input)
|
|
677
|
+
|
|
678
|
+
if args.json:
|
|
679
|
+
result = json.dumps(profile, indent=2, ensure_ascii=False)
|
|
680
|
+
else:
|
|
681
|
+
result = render_report(profile)
|
|
682
|
+
|
|
683
|
+
if args.output:
|
|
684
|
+
with open(args.output, 'w', encoding='utf-8') as f:
|
|
685
|
+
f.write(result)
|
|
686
|
+
print(f"Report written to: {args.output}")
|
|
687
|
+
else:
|
|
688
|
+
print(result)
|
|
689
|
+
|
|
690
|
+
|
|
691
|
+
if __name__ == '__main__':
|
|
692
|
+
main()
|