ghostcode 0.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ghostcode/__init__.py +3 -0
- ghostcode/audit/__init__.py +0 -0
- ghostcode/audit/logger.py +149 -0
- ghostcode/cli.py +986 -0
- ghostcode/config.py +187 -0
- ghostcode/mapping/__init__.py +0 -0
- ghostcode/mapping/encryption.py +143 -0
- ghostcode/mapping/ghost_map.py +222 -0
- ghostcode/mapping/token_generator.py +78 -0
- ghostcode/parsers/__init__.py +0 -0
- ghostcode/parsers/base.py +66 -0
- ghostcode/parsers/cpp_parser.py +341 -0
- ghostcode/parsers/python_parser.py +397 -0
- ghostcode/reveal/__init__.py +0 -0
- ghostcode/reveal/code_revealer.py +374 -0
- ghostcode/reveal/diff_analyzer.py +426 -0
- ghostcode/reveal/explanation_translator.py +214 -0
- ghostcode/risk_report.py +467 -0
- ghostcode/transformers/__init__.py +0 -0
- ghostcode/transformers/comment_anonymizer.py +95 -0
- ghostcode/transformers/comment_stripper.py +60 -0
- ghostcode/transformers/isolator.py +312 -0
- ghostcode/transformers/literal_scrubber.py +452 -0
- ghostcode/transformers/multi_file.py +99 -0
- ghostcode/transformers/symbol_renamer.py +64 -0
- ghostcode/utils/__init__.py +0 -0
- ghostcode/utils/clipboard.py +52 -0
- ghostcode/utils/stdlib_registry.py +221 -0
- ghostcode-0.5.0.dist-info/METADATA +92 -0
- ghostcode-0.5.0.dist-info/RECORD +33 -0
- ghostcode-0.5.0.dist-info/WHEEL +5 -0
- ghostcode-0.5.0.dist-info/entry_points.txt +2 -0
- ghostcode-0.5.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,426 @@
|
|
|
1
|
+
"""AST diff analyzer and confidence scoring.
|
|
2
|
+
|
|
3
|
+
Compares the ghost code that was sent to the AI against the code the AI
|
|
4
|
+
returned. Classifies structural changes and assigns a confidence score
|
|
5
|
+
for how safe the auto-reveal is.
|
|
6
|
+
|
|
7
|
+
Change categories:
|
|
8
|
+
MODIFIED — existing function body changed (the fix)
|
|
9
|
+
NEW_FUNCTION — AI created a new function
|
|
10
|
+
DELETED — AI removed a function
|
|
11
|
+
SIGNATURE_CHANGE — AI changed a function's parameters
|
|
12
|
+
NEW_VARIABLE — AI introduced a new variable
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
import difflib
|
|
16
|
+
import re
|
|
17
|
+
from dataclasses import dataclass, field
|
|
18
|
+
from enum import Enum
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class ChangeType(Enum):
|
|
22
|
+
MODIFIED = "modified"
|
|
23
|
+
NEW_FUNCTION = "new_function"
|
|
24
|
+
DELETED_FUNCTION = "deleted_function"
|
|
25
|
+
SIGNATURE_CHANGE = "signature_change"
|
|
26
|
+
NEW_VARIABLE = "new_variable"
|
|
27
|
+
NEW_DEPENDENCY = "new_dependency"
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class Confidence(Enum):
|
|
31
|
+
HIGH = "HIGH"
|
|
32
|
+
MEDIUM = "MEDIUM"
|
|
33
|
+
LOW = "LOW"
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
@dataclass
|
|
37
|
+
class StructuralChange:
|
|
38
|
+
"""A single structural change between sent and received code."""
|
|
39
|
+
type: ChangeType
|
|
40
|
+
name: str
|
|
41
|
+
detail: str = ""
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
@dataclass
|
|
45
|
+
class ChangeBlock:
|
|
46
|
+
"""A contiguous block of changed lines between original and modified code."""
|
|
47
|
+
start_line: int # 0-indexed line in the new/modified code
|
|
48
|
+
end_line: int # exclusive
|
|
49
|
+
original_lines: list[str]
|
|
50
|
+
new_lines: list[str]
|
|
51
|
+
block_type: str = "modified" # "modified", "added", or "deleted"
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
@dataclass
|
|
55
|
+
class DiffResult:
|
|
56
|
+
"""Result of comparing sent vs received code."""
|
|
57
|
+
changes: list[StructuralChange] = field(default_factory=list)
|
|
58
|
+
confidence: Confidence = Confidence.HIGH
|
|
59
|
+
confidence_score: int = 100
|
|
60
|
+
confidence_reason: str = ""
|
|
61
|
+
new_symbols: list[str] = field(default_factory=list)
|
|
62
|
+
new_dependencies: list[str] = field(default_factory=list)
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
class DiffAnalyzer:
|
|
66
|
+
"""Compares sent ghost code against AI's modified version."""
|
|
67
|
+
|
|
68
|
+
def analyze(self, sent_code: str, received_code: str) -> DiffResult:
|
|
69
|
+
"""Compare structural differences between sent and received code.
|
|
70
|
+
|
|
71
|
+
Args:
|
|
72
|
+
sent_code: The ghost code that was sent to the AI.
|
|
73
|
+
received_code: The code the AI returned (still in ghost form).
|
|
74
|
+
|
|
75
|
+
Returns:
|
|
76
|
+
DiffResult with changes, confidence, and new symbols.
|
|
77
|
+
"""
|
|
78
|
+
result = DiffResult()
|
|
79
|
+
|
|
80
|
+
# Extract functions from both
|
|
81
|
+
sent_functions = self._extract_functions(sent_code)
|
|
82
|
+
received_functions = self._extract_functions(received_code)
|
|
83
|
+
|
|
84
|
+
sent_names = set(sent_functions.keys())
|
|
85
|
+
received_names = set(received_functions.keys())
|
|
86
|
+
|
|
87
|
+
# New functions
|
|
88
|
+
for name in received_names - sent_names:
|
|
89
|
+
result.changes.append(StructuralChange(
|
|
90
|
+
type=ChangeType.NEW_FUNCTION,
|
|
91
|
+
name=name,
|
|
92
|
+
detail=f"AI created new function '{name}'",
|
|
93
|
+
))
|
|
94
|
+
|
|
95
|
+
# Deleted functions
|
|
96
|
+
for name in sent_names - received_names:
|
|
97
|
+
result.changes.append(StructuralChange(
|
|
98
|
+
type=ChangeType.DELETED_FUNCTION,
|
|
99
|
+
name=name,
|
|
100
|
+
detail=f"AI removed function '{name}'",
|
|
101
|
+
))
|
|
102
|
+
|
|
103
|
+
# Modified functions
|
|
104
|
+
for name in sent_names & received_names:
|
|
105
|
+
sent_body = sent_functions[name]
|
|
106
|
+
received_body = received_functions[name]
|
|
107
|
+
if sent_body != received_body:
|
|
108
|
+
# Check if signature changed
|
|
109
|
+
sent_sig = self._extract_signature(sent_body)
|
|
110
|
+
received_sig = self._extract_signature(received_body)
|
|
111
|
+
if sent_sig != received_sig:
|
|
112
|
+
result.changes.append(StructuralChange(
|
|
113
|
+
type=ChangeType.SIGNATURE_CHANGE,
|
|
114
|
+
name=name,
|
|
115
|
+
detail=f"Signature changed: {sent_sig} → {received_sig}",
|
|
116
|
+
))
|
|
117
|
+
else:
|
|
118
|
+
result.changes.append(StructuralChange(
|
|
119
|
+
type=ChangeType.MODIFIED,
|
|
120
|
+
name=name,
|
|
121
|
+
detail="Body modified (the fix)",
|
|
122
|
+
))
|
|
123
|
+
|
|
124
|
+
# New variables (ghost-pattern tokens in received but not sent)
|
|
125
|
+
sent_tokens = set(re.findall(r"\bg[vftcsnmx]_\d{3}\b", sent_code))
|
|
126
|
+
received_tokens = set(re.findall(r"\bg[vftcsnmx]_\d{3}\b", received_code))
|
|
127
|
+
new_tokens = received_tokens - sent_tokens
|
|
128
|
+
result.new_symbols = sorted(new_tokens)
|
|
129
|
+
for token in new_tokens:
|
|
130
|
+
result.changes.append(StructuralChange(
|
|
131
|
+
type=ChangeType.NEW_VARIABLE,
|
|
132
|
+
name=token,
|
|
133
|
+
detail=f"AI introduced new symbol '{token}'",
|
|
134
|
+
))
|
|
135
|
+
|
|
136
|
+
# New dependencies
|
|
137
|
+
sent_deps = set(self._extract_dependencies(sent_code))
|
|
138
|
+
received_deps = set(self._extract_dependencies(received_code))
|
|
139
|
+
result.new_dependencies = sorted(received_deps - sent_deps)
|
|
140
|
+
for dep in result.new_dependencies:
|
|
141
|
+
result.changes.append(StructuralChange(
|
|
142
|
+
type=ChangeType.NEW_DEPENDENCY,
|
|
143
|
+
name=dep,
|
|
144
|
+
detail=f"AI added dependency: {dep}",
|
|
145
|
+
))
|
|
146
|
+
|
|
147
|
+
# Calculate confidence score
|
|
148
|
+
self._score_confidence(result)
|
|
149
|
+
|
|
150
|
+
return result
|
|
151
|
+
|
|
152
|
+
def _extract_functions(self, code: str) -> dict[str, str]:
|
|
153
|
+
"""Extract function names and bodies from code."""
|
|
154
|
+
functions = {}
|
|
155
|
+
|
|
156
|
+
# C++ style: type name(params) { ... }
|
|
157
|
+
cpp_pattern = re.compile(
|
|
158
|
+
r"(?:[\w:*&<>, ]+\s+)?(\w+)\s*\([^)]*\)(?:\s*(?:const|override|noexcept|final))*\s*\{",
|
|
159
|
+
)
|
|
160
|
+
|
|
161
|
+
for match in cpp_pattern.finditer(code):
|
|
162
|
+
name = match.group(1)
|
|
163
|
+
# Skip keywords
|
|
164
|
+
if name in ("if", "for", "while", "switch", "catch", "return"):
|
|
165
|
+
continue
|
|
166
|
+
# Find matching brace
|
|
167
|
+
start = match.start()
|
|
168
|
+
brace_pos = code.index("{", match.start())
|
|
169
|
+
depth = 0
|
|
170
|
+
i = brace_pos
|
|
171
|
+
while i < len(code):
|
|
172
|
+
if code[i] == "{":
|
|
173
|
+
depth += 1
|
|
174
|
+
elif code[i] == "}":
|
|
175
|
+
depth -= 1
|
|
176
|
+
if depth == 0:
|
|
177
|
+
functions[name] = code[start:i + 1]
|
|
178
|
+
break
|
|
179
|
+
i += 1
|
|
180
|
+
|
|
181
|
+
# Python style: def name(params):
|
|
182
|
+
py_pattern = re.compile(r"def\s+(\w+)\s*\([^)]*\)\s*(?:->.*?)?:")
|
|
183
|
+
for match in py_pattern.finditer(code):
|
|
184
|
+
name = match.group(1)
|
|
185
|
+
start = match.start()
|
|
186
|
+
# Find the end by indentation
|
|
187
|
+
body_start = match.end()
|
|
188
|
+
lines = code[body_start:].split("\n")
|
|
189
|
+
end_offset = body_start
|
|
190
|
+
if lines and lines[0].strip() == "":
|
|
191
|
+
end_offset += len(lines[0]) + 1
|
|
192
|
+
lines = lines[1:]
|
|
193
|
+
|
|
194
|
+
# Get indentation of first body line
|
|
195
|
+
body_indent = None
|
|
196
|
+
for line in lines:
|
|
197
|
+
if line.strip():
|
|
198
|
+
body_indent = len(line) - len(line.lstrip())
|
|
199
|
+
break
|
|
200
|
+
|
|
201
|
+
if body_indent is not None:
|
|
202
|
+
for line in lines:
|
|
203
|
+
if line.strip() and (len(line) - len(line.lstrip())) < body_indent:
|
|
204
|
+
break
|
|
205
|
+
end_offset += len(line) + 1
|
|
206
|
+
|
|
207
|
+
functions[name] = code[start:end_offset]
|
|
208
|
+
|
|
209
|
+
return functions
|
|
210
|
+
|
|
211
|
+
def _extract_signature(self, func_code: str) -> str:
|
|
212
|
+
"""Extract just the signature line from a function."""
|
|
213
|
+
# C++
|
|
214
|
+
match = re.match(
|
|
215
|
+
r"((?:[\w:*&<>, ]+\s+)?\w+\s*\([^)]*\)(?:\s*(?:const|override|noexcept|final))*)",
|
|
216
|
+
func_code.strip(),
|
|
217
|
+
)
|
|
218
|
+
if match:
|
|
219
|
+
return match.group(1).strip()
|
|
220
|
+
|
|
221
|
+
# Python
|
|
222
|
+
match = re.match(r"(def\s+\w+\s*\([^)]*\))", func_code.strip())
|
|
223
|
+
if match:
|
|
224
|
+
return match.group(1).strip()
|
|
225
|
+
|
|
226
|
+
return func_code.split("\n")[0].strip()
|
|
227
|
+
|
|
228
|
+
def _extract_dependencies(self, code: str) -> list[str]:
|
|
229
|
+
"""Extract #include and import statements."""
|
|
230
|
+
deps = []
|
|
231
|
+
for line in code.split("\n"):
|
|
232
|
+
stripped = line.strip()
|
|
233
|
+
if (stripped.startswith("#include")
|
|
234
|
+
or stripped.startswith("import ")
|
|
235
|
+
or stripped.startswith("from ")):
|
|
236
|
+
deps.append(stripped)
|
|
237
|
+
return deps
|
|
238
|
+
|
|
239
|
+
def _score_confidence(self, result: DiffResult):
|
|
240
|
+
"""Calculate confidence score based on changes detected."""
|
|
241
|
+
score = 100
|
|
242
|
+
|
|
243
|
+
for change in result.changes:
|
|
244
|
+
if change.type == ChangeType.MODIFIED:
|
|
245
|
+
score -= 5 # Expected — this is the fix
|
|
246
|
+
elif change.type == ChangeType.NEW_VARIABLE:
|
|
247
|
+
score -= 5
|
|
248
|
+
elif change.type == ChangeType.NEW_FUNCTION:
|
|
249
|
+
score -= 15
|
|
250
|
+
elif change.type == ChangeType.DELETED_FUNCTION:
|
|
251
|
+
score -= 20
|
|
252
|
+
elif change.type == ChangeType.SIGNATURE_CHANGE:
|
|
253
|
+
score -= 10
|
|
254
|
+
elif change.type == ChangeType.NEW_DEPENDENCY:
|
|
255
|
+
score -= 3
|
|
256
|
+
|
|
257
|
+
score = max(0, score)
|
|
258
|
+
|
|
259
|
+
if score >= 80:
|
|
260
|
+
result.confidence = Confidence.HIGH
|
|
261
|
+
result.confidence_reason = "Surgical fix — safe to auto-apply"
|
|
262
|
+
elif score >= 50:
|
|
263
|
+
result.confidence = Confidence.MEDIUM
|
|
264
|
+
result.confidence_reason = "Moderate changes — review recommended"
|
|
265
|
+
else:
|
|
266
|
+
result.confidence = Confidence.LOW
|
|
267
|
+
result.confidence_reason = (
|
|
268
|
+
"Significant structural changes — manual review required"
|
|
269
|
+
)
|
|
270
|
+
|
|
271
|
+
result.confidence_score = score
|
|
272
|
+
|
|
273
|
+
def detect_change_blocks(self, original: str, modified: str) -> list[ChangeBlock]:
|
|
274
|
+
"""Detect contiguous blocks of changes between original and modified code.
|
|
275
|
+
|
|
276
|
+
Uses difflib.SequenceMatcher to find change opcodes, then merges
|
|
277
|
+
consecutive blocks within a 1-line gap into single blocks.
|
|
278
|
+
"""
|
|
279
|
+
orig_lines = original.splitlines()
|
|
280
|
+
mod_lines = modified.splitlines()
|
|
281
|
+
|
|
282
|
+
matcher = difflib.SequenceMatcher(None, orig_lines, mod_lines)
|
|
283
|
+
raw_blocks: list[ChangeBlock] = []
|
|
284
|
+
|
|
285
|
+
for tag, i1, i2, j1, j2 in matcher.get_opcodes():
|
|
286
|
+
if tag == "equal":
|
|
287
|
+
continue
|
|
288
|
+
|
|
289
|
+
if tag == "replace":
|
|
290
|
+
block_type = "modified"
|
|
291
|
+
elif tag == "insert":
|
|
292
|
+
block_type = "added"
|
|
293
|
+
elif tag == "delete":
|
|
294
|
+
block_type = "deleted"
|
|
295
|
+
else:
|
|
296
|
+
continue
|
|
297
|
+
|
|
298
|
+
raw_blocks.append(ChangeBlock(
|
|
299
|
+
start_line=j1,
|
|
300
|
+
end_line=j2,
|
|
301
|
+
original_lines=orig_lines[i1:i2],
|
|
302
|
+
new_lines=mod_lines[j1:j2],
|
|
303
|
+
block_type=block_type,
|
|
304
|
+
))
|
|
305
|
+
|
|
306
|
+
# Merge consecutive blocks within 1-line gap
|
|
307
|
+
if not raw_blocks:
|
|
308
|
+
return []
|
|
309
|
+
|
|
310
|
+
merged: list[ChangeBlock] = [raw_blocks[0]]
|
|
311
|
+
for block in raw_blocks[1:]:
|
|
312
|
+
prev = merged[-1]
|
|
313
|
+
if block.start_line - prev.end_line <= 1:
|
|
314
|
+
# Merge: extend the previous block
|
|
315
|
+
gap_lines = []
|
|
316
|
+
if block.start_line > prev.end_line:
|
|
317
|
+
gap_lines = mod_lines[prev.end_line:block.start_line]
|
|
318
|
+
merged[-1] = ChangeBlock(
|
|
319
|
+
start_line=prev.start_line,
|
|
320
|
+
end_line=block.end_line,
|
|
321
|
+
original_lines=prev.original_lines + gap_lines + block.original_lines,
|
|
322
|
+
new_lines=prev.new_lines + gap_lines + block.new_lines,
|
|
323
|
+
block_type="modified" if prev.original_lines or block.original_lines else "added",
|
|
324
|
+
)
|
|
325
|
+
else:
|
|
326
|
+
merged.append(block)
|
|
327
|
+
|
|
328
|
+
return merged
|
|
329
|
+
|
|
330
|
+
def describe_change(self, block: ChangeBlock) -> str:
|
|
331
|
+
"""Generate a concise human-readable description of a change block."""
|
|
332
|
+
new_text = "\n".join(block.new_lines)
|
|
333
|
+
orig_text = "\n".join(block.original_lines)
|
|
334
|
+
|
|
335
|
+
# Pure addition (no original lines)
|
|
336
|
+
if not block.original_lines:
|
|
337
|
+
return self._describe_addition(block.new_lines)
|
|
338
|
+
|
|
339
|
+
# Pure deletion
|
|
340
|
+
if not block.new_lines:
|
|
341
|
+
return f"removed {len(block.original_lines)} lines"
|
|
342
|
+
|
|
343
|
+
# Modification — try specific heuristics
|
|
344
|
+
desc = self._describe_modification(block.original_lines, block.new_lines)
|
|
345
|
+
if desc:
|
|
346
|
+
return desc
|
|
347
|
+
|
|
348
|
+
# Fallback
|
|
349
|
+
return f"modified {len(block.new_lines)} lines"
|
|
350
|
+
|
|
351
|
+
def _describe_addition(self, lines: list[str]) -> str:
|
|
352
|
+
"""Describe newly added lines."""
|
|
353
|
+
joined = "\n".join(lines)
|
|
354
|
+
stripped = [l.strip() for l in lines if l.strip()]
|
|
355
|
+
|
|
356
|
+
# New function definition
|
|
357
|
+
for line in stripped:
|
|
358
|
+
match = re.match(r"def\s+(\w+)\s*\(", line)
|
|
359
|
+
if match:
|
|
360
|
+
return f"new helper function '{match.group(1)}'"
|
|
361
|
+
# C++ function
|
|
362
|
+
match = re.match(r"(?:[\w:*&<>, ]+\s+)?(\w+)\s*\([^)]*\)\s*\{", line)
|
|
363
|
+
if match and match.group(1) not in ("if", "for", "while", "switch", "catch"):
|
|
364
|
+
return f"new helper function '{match.group(1)}'"
|
|
365
|
+
|
|
366
|
+
# Import / include
|
|
367
|
+
if all(l.startswith(("import ", "from ", "#include")) for l in stripped if l):
|
|
368
|
+
deps = ", ".join(stripped)
|
|
369
|
+
return f"added dependency: {deps}"
|
|
370
|
+
|
|
371
|
+
# try/except block
|
|
372
|
+
if any("try:" in l or "try {" in l for l in stripped):
|
|
373
|
+
return "error handling"
|
|
374
|
+
|
|
375
|
+
# Null-safety patterns
|
|
376
|
+
if any(".get(" in l or "is None" in l or "is not None" in l for l in stripped):
|
|
377
|
+
return "null-safety check"
|
|
378
|
+
|
|
379
|
+
# Conditional wrapping
|
|
380
|
+
if any(l.startswith("if ") or l.startswith("if(") for l in stripped):
|
|
381
|
+
return "added conditional check"
|
|
382
|
+
|
|
383
|
+
return f"added {len(lines)} lines"
|
|
384
|
+
|
|
385
|
+
def _describe_modification(self, orig_lines: list[str], new_lines: list[str]) -> str | None:
|
|
386
|
+
"""Try to describe a modification with a specific heuristic. Returns None for fallback."""
|
|
387
|
+
orig_stripped = [l.strip() for l in orig_lines if l.strip()]
|
|
388
|
+
new_stripped = [l.strip() for l in new_lines if l.strip()]
|
|
389
|
+
|
|
390
|
+
# Null-safety: .get() pattern introduced
|
|
391
|
+
if any(".get(" in l for l in new_stripped) and not any(".get(" in l for l in orig_stripped):
|
|
392
|
+
return "null-safety check with .get()"
|
|
393
|
+
|
|
394
|
+
# try/except added around existing code
|
|
395
|
+
if any("try:" in l or "try {" in l for l in new_stripped) and not any("try:" in l or "try {" in l for l in orig_stripped):
|
|
396
|
+
return "error handling"
|
|
397
|
+
|
|
398
|
+
# Wrapped in conditional
|
|
399
|
+
if any(l.startswith("if ") or l.startswith("if(") for l in new_stripped) and not any(l.startswith("if ") or l.startswith("if(") for l in orig_stripped):
|
|
400
|
+
return "wrapped in conditional check"
|
|
401
|
+
|
|
402
|
+
# Single operator change (e.g., '+' to '-')
|
|
403
|
+
if len(orig_stripped) == 1 and len(new_stripped) == 1:
|
|
404
|
+
o, n = orig_stripped[0], new_stripped[0]
|
|
405
|
+
# Find single-character operator differences
|
|
406
|
+
if len(o) == len(n):
|
|
407
|
+
diffs = [(oc, nc) for oc, nc in zip(o, n) if oc != nc]
|
|
408
|
+
if len(diffs) == 1:
|
|
409
|
+
oc, nc = diffs[0]
|
|
410
|
+
if oc in "+-*/%<>=!&|^" or nc in "+-*/%<>=!&|^":
|
|
411
|
+
return f"changed '{oc}' to '{nc}'"
|
|
412
|
+
|
|
413
|
+
# New function in modified block
|
|
414
|
+
for line in new_stripped:
|
|
415
|
+
match = re.match(r"def\s+(\w+)\s*\(", line)
|
|
416
|
+
if match and not any(match.group(1) in l for l in orig_stripped):
|
|
417
|
+
return f"new helper function '{match.group(1)}'"
|
|
418
|
+
|
|
419
|
+
# Import added
|
|
420
|
+
new_imports = [l for l in new_stripped if l.startswith(("import ", "from ", "#include"))]
|
|
421
|
+
old_imports = [l for l in orig_stripped if l.startswith(("import ", "from ", "#include"))]
|
|
422
|
+
added_imports = set(new_imports) - set(old_imports)
|
|
423
|
+
if added_imports:
|
|
424
|
+
return f"added dependency: {', '.join(added_imports)}"
|
|
425
|
+
|
|
426
|
+
return None
|
|
@@ -0,0 +1,214 @@
|
|
|
1
|
+
"""Explanation translator.
|
|
2
|
+
|
|
3
|
+
Analyzes the AI's translated response and adds contextual annotations:
|
|
4
|
+
|
|
5
|
+
1. Naming advice detection — AI says "rename gv_01 to something descriptive"
|
|
6
|
+
which becomes "rename userCount to something descriptive" (nonsensical).
|
|
7
|
+
Detects and annotates these.
|
|
8
|
+
|
|
9
|
+
2. Stub speculation detection — AI references a function that was sent as
|
|
10
|
+
a stub (no implementation). Flags that the AI is guessing about behavior
|
|
11
|
+
it never saw.
|
|
12
|
+
|
|
13
|
+
3. New symbol flagging — marks AI-introduced variables in explanations.
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
import re
|
|
17
|
+
from dataclasses import dataclass
|
|
18
|
+
|
|
19
|
+
from ..mapping.ghost_map import GhostMap
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
@dataclass
|
|
23
|
+
class Annotation:
|
|
24
|
+
"""An annotation to add to the translated explanation."""
|
|
25
|
+
type: str # "naming_advice", "stub_speculation", "new_symbol"
|
|
26
|
+
location: str # description of where in the text
|
|
27
|
+
original_text: str # the relevant sentence/phrase
|
|
28
|
+
note: str # the annotation message
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
# Patterns that indicate naming advice
|
|
32
|
+
NAMING_ADVICE_PATTERNS = [
|
|
33
|
+
r"(?:rename|renaming)\s+(?:`)?{name}(?:`)?",
|
|
34
|
+
r"(?:`)?{name}(?:`)?\s+(?:is a |has a )?(?:misleading|confusing|unclear|bad|poor)\s+(?:name|naming)",
|
|
35
|
+
r"(?:consider|try|suggest)\s+(?:calling|naming|renaming)\s+(?:`)?{name}(?:`)?",
|
|
36
|
+
r"(?:better|clearer|more descriptive)\s+name\s+(?:for|than)\s+(?:`)?{name}(?:`)?",
|
|
37
|
+
r"(?:`)?{name}(?:`)?\s+(?:should|could|might)\s+be\s+(?:renamed|called|named)",
|
|
38
|
+
r"(?:variable|function|class|method)\s+name\s+(?:`)?{name}(?:`)?",
|
|
39
|
+
]
|
|
40
|
+
|
|
41
|
+
# Patterns that indicate the AI is speculating about behavior
|
|
42
|
+
SPECULATION_PATTERNS = [
|
|
43
|
+
r"(?:might|may|could|probably|likely|possibly)\s+(?:be |cause |have |return |throw )",
|
|
44
|
+
r"(?:I'm not sure|it's unclear|hard to tell|without seeing)",
|
|
45
|
+
r"(?:assuming|if I had to guess|based on the name)",
|
|
46
|
+
r"(?:the implementation of|inside|within)\s+(?:`)?{name}(?:`)?",
|
|
47
|
+
r"(?:make sure|verify|check|ensure)\s+(?:that\s+)?(?:`)?{name}(?:`)?",
|
|
48
|
+
]
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
class ExplanationTranslator:
|
|
52
|
+
"""Analyzes translated AI responses and adds contextual annotations."""
|
|
53
|
+
|
|
54
|
+
def __init__(self, ghost_map: GhostMap, stubs: list[str] | None = None):
|
|
55
|
+
"""
|
|
56
|
+
Args:
|
|
57
|
+
ghost_map: The bidirectional ghost map.
|
|
58
|
+
stubs: List of function names that were sent as stubs (no body).
|
|
59
|
+
"""
|
|
60
|
+
self._map = ghost_map
|
|
61
|
+
self._forward = ghost_map.forward_map()
|
|
62
|
+
self._stubs = set(stubs or [])
|
|
63
|
+
# Build reverse: original_name → ghost_token
|
|
64
|
+
self._reverse = {v: k for k, v in self._forward.items()}
|
|
65
|
+
|
|
66
|
+
def annotate(self, translated_text: str) -> tuple[str, list[Annotation]]:
|
|
67
|
+
"""Analyze translated text and add annotations.
|
|
68
|
+
|
|
69
|
+
Args:
|
|
70
|
+
translated_text: The AI response after token replacement.
|
|
71
|
+
|
|
72
|
+
Returns:
|
|
73
|
+
Tuple of (annotated_text, list_of_annotations).
|
|
74
|
+
"""
|
|
75
|
+
annotations = []
|
|
76
|
+
|
|
77
|
+
# Detect naming advice
|
|
78
|
+
annotations.extend(self._detect_naming_advice(translated_text))
|
|
79
|
+
|
|
80
|
+
# Detect stub speculation
|
|
81
|
+
annotations.extend(self._detect_stub_speculation(translated_text))
|
|
82
|
+
|
|
83
|
+
# Apply annotations to text
|
|
84
|
+
annotated = self._apply_annotations(translated_text, annotations)
|
|
85
|
+
|
|
86
|
+
return annotated, annotations
|
|
87
|
+
|
|
88
|
+
def _detect_naming_advice(self, text: str) -> list[Annotation]:
|
|
89
|
+
"""Detect sentences where the AI gives naming advice about ghost tokens.
|
|
90
|
+
|
|
91
|
+
After translation, "rename gv_001 to count" becomes
|
|
92
|
+
"rename userCount to count" — which is nonsensical.
|
|
93
|
+
"""
|
|
94
|
+
annotations = []
|
|
95
|
+
# Check each mapped symbol
|
|
96
|
+
for original_name, ghost_token in self._reverse.items():
|
|
97
|
+
for pattern_template in NAMING_ADVICE_PATTERNS:
|
|
98
|
+
pattern = pattern_template.format(name=re.escape(original_name))
|
|
99
|
+
for match in re.finditer(pattern, text, re.IGNORECASE):
|
|
100
|
+
# Extract the surrounding sentence
|
|
101
|
+
sentence = self._extract_sentence(text, match.start())
|
|
102
|
+
annotations.append(Annotation(
|
|
103
|
+
type="naming_advice",
|
|
104
|
+
location=f"near '{original_name}'",
|
|
105
|
+
original_text=sentence,
|
|
106
|
+
note=(
|
|
107
|
+
f"AI was commenting on the ghost token name "
|
|
108
|
+
f"'{ghost_token}', not your original name "
|
|
109
|
+
f"'{original_name}'. This suggestion may not apply."
|
|
110
|
+
),
|
|
111
|
+
))
|
|
112
|
+
break # One match per symbol is enough
|
|
113
|
+
|
|
114
|
+
return annotations
|
|
115
|
+
|
|
116
|
+
def _detect_stub_speculation(self, text: str) -> list[Annotation]:
|
|
117
|
+
"""Detect when the AI speculates about functions sent as stubs."""
|
|
118
|
+
annotations = []
|
|
119
|
+
|
|
120
|
+
for stub_ghost_token in self._stubs:
|
|
121
|
+
original_name = self._forward.get(stub_ghost_token)
|
|
122
|
+
if not original_name:
|
|
123
|
+
continue
|
|
124
|
+
|
|
125
|
+
# Check if the AI references this function
|
|
126
|
+
if original_name not in text:
|
|
127
|
+
continue
|
|
128
|
+
|
|
129
|
+
# Check for speculation patterns near the reference
|
|
130
|
+
for pattern_template in SPECULATION_PATTERNS:
|
|
131
|
+
pattern = pattern_template.format(name=re.escape(original_name))
|
|
132
|
+
for match in re.finditer(pattern, text, re.IGNORECASE):
|
|
133
|
+
sentence = self._extract_sentence(text, match.start())
|
|
134
|
+
annotations.append(Annotation(
|
|
135
|
+
type="stub_speculation",
|
|
136
|
+
location=f"about '{original_name}'",
|
|
137
|
+
original_text=sentence,
|
|
138
|
+
note=(
|
|
139
|
+
f"'{original_name}' was sent as a stub (no "
|
|
140
|
+
f"implementation). AI is inferring behavior "
|
|
141
|
+
f"without seeing the full code. Verify manually."
|
|
142
|
+
),
|
|
143
|
+
))
|
|
144
|
+
break
|
|
145
|
+
|
|
146
|
+
# Even without speculation patterns, flag any substantive
|
|
147
|
+
# discussion of a stubbed function
|
|
148
|
+
sentences = text.split(".")
|
|
149
|
+
for sentence in sentences:
|
|
150
|
+
if original_name in sentence and len(sentence.strip()) > 30:
|
|
151
|
+
# Check it's not just a brief mention
|
|
152
|
+
word_count = len(sentence.split())
|
|
153
|
+
if word_count > 10:
|
|
154
|
+
already_flagged = any(
|
|
155
|
+
a.type == "stub_speculation"
|
|
156
|
+
and original_name in a.location
|
|
157
|
+
for a in annotations
|
|
158
|
+
)
|
|
159
|
+
if not already_flagged:
|
|
160
|
+
annotations.append(Annotation(
|
|
161
|
+
type="stub_speculation",
|
|
162
|
+
location=f"about '{original_name}'",
|
|
163
|
+
original_text=sentence.strip(),
|
|
164
|
+
note=(
|
|
165
|
+
f"'{original_name}' was sent as a stub. "
|
|
166
|
+
f"AI reasoning about its behavior may be "
|
|
167
|
+
f"inaccurate."
|
|
168
|
+
),
|
|
169
|
+
))
|
|
170
|
+
|
|
171
|
+
return annotations
|
|
172
|
+
|
|
173
|
+
def _extract_sentence(self, text: str, position: int) -> str:
|
|
174
|
+
"""Extract the sentence containing a given position."""
|
|
175
|
+
# Find sentence boundaries
|
|
176
|
+
# Look backwards for sentence start
|
|
177
|
+
start = position
|
|
178
|
+
while start > 0 and text[start - 1] not in ".!?\n":
|
|
179
|
+
start -= 1
|
|
180
|
+
|
|
181
|
+
# Look forwards for sentence end
|
|
182
|
+
end = position
|
|
183
|
+
while end < len(text) and text[end] not in ".!?\n":
|
|
184
|
+
end += 1
|
|
185
|
+
|
|
186
|
+
sentence = text[start:end].strip()
|
|
187
|
+
# Truncate if too long
|
|
188
|
+
if len(sentence) > 150:
|
|
189
|
+
sentence = sentence[:150] + "..."
|
|
190
|
+
return sentence
|
|
191
|
+
|
|
192
|
+
def _apply_annotations(self, text: str, annotations: list[Annotation]) -> str:
|
|
193
|
+
"""Insert annotation markers into the text."""
|
|
194
|
+
if not annotations:
|
|
195
|
+
return text
|
|
196
|
+
|
|
197
|
+
annotated = text
|
|
198
|
+
|
|
199
|
+
# Add annotations as footnotes at the end
|
|
200
|
+
if annotations:
|
|
201
|
+
annotated += "\n\n---\n"
|
|
202
|
+
annotated += "**GhostCode Annotations:**\n\n"
|
|
203
|
+
|
|
204
|
+
for i, ann in enumerate(annotations, 1):
|
|
205
|
+
icon = {
|
|
206
|
+
"naming_advice": "~~",
|
|
207
|
+
"stub_speculation": "!!",
|
|
208
|
+
"new_symbol": "++",
|
|
209
|
+
}.get(ann.type, "**")
|
|
210
|
+
|
|
211
|
+
annotated += f"[{icon} {i}] {ann.note}\n"
|
|
212
|
+
annotated += f" Context: \"{ann.original_text[:80]}...\"\n\n"
|
|
213
|
+
|
|
214
|
+
return annotated
|