ghostcode 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,374 @@
1
+ """Code revealer — restores ghost tokens to original names.
2
+
3
+ Handles both pure code files and AI responses containing mixed content
4
+ (prose + code blocks). When processing AI responses, separates zones
5
+ and applies different restoration strategies to each.
6
+
7
+ Zone types:
8
+ CODE_BLOCK — fenced code blocks (```lang ... ```)
9
+ INLINE_CODE — backtick spans in prose (`gv_001`)
10
+ PROSE — natural language text
11
+ """
12
+
13
+ import re
14
+ from dataclasses import dataclass, field
15
+ from enum import Enum
16
+
17
+ from ..mapping.ghost_map import GhostMap
18
+
19
+
20
+ class ZoneType(Enum):
21
+ PROSE = "prose"
22
+ CODE_BLOCK = "code_block"
23
+ INLINE_CODE = "inline_code"
24
+
25
+
26
+ @dataclass
27
+ class Zone:
28
+ """A segment of an AI response with a specific type."""
29
+ type: ZoneType
30
+ content: str
31
+ start: int
32
+ end: int
33
+ language: str = "" # for code blocks
34
+
35
+
36
+ @dataclass
37
+ class RevealResult:
38
+ """Result of revealing an AI response."""
39
+ restored_code: str
40
+ restored_explanation: str
41
+ symbols_restored: int
42
+ new_symbols: list[str] = field(default_factory=list)
43
+ new_dependencies: list[str] = field(default_factory=list)
44
+ annotations: list[dict] = field(default_factory=list)
45
+
46
+
47
+ # Ghost token pattern — matches any ghost token format
48
+ GHOST_TOKEN_PATTERN = re.compile(r"\bg[vftcsnmx]_\d{3}\b")
49
+
50
+ # Pattern for token with common suffixes in prose
51
+ GHOST_TOKEN_PROSE_PATTERN = re.compile(
52
+ r"\bg[vftcsnmx]_\d{3}(?='s|[-/]|(?:\b))"
53
+ )
54
+
55
+
56
+ class CodeRevealer:
57
+ """Restores ghost tokens to original names in code and AI responses."""
58
+
59
+ def __init__(self, ghost_map: GhostMap):
60
+ self._map = ghost_map
61
+ self._forward = ghost_map.forward_map()
62
+ # Sort by token length descending to avoid substring collisions
63
+ self._sorted_tokens = sorted(
64
+ self._forward.keys(), key=len, reverse=True
65
+ )
66
+
67
+ def reveal_code(self, ghost_source: str,
68
+ original_ghost: str | None = None,
69
+ diff_result=None) -> tuple[str, int, list[str]]:
70
+ """Reveal a pure code file (no prose).
71
+
72
+ Args:
73
+ ghost_source: The AI-modified ghost code to reveal.
74
+ original_ghost: The original ghost file sent to AI (optional).
75
+ If provided, changed blocks are annotated with descriptive
76
+ comments like '# --- AI MODIFIED: desc ---'.
77
+ diff_result: Optional DiffResult from DiffAnalyzer (unused, reserved).
78
+
79
+ Returns:
80
+ Tuple of (restored_source, count_restored, new_symbols).
81
+ """
82
+ restored = ghost_source
83
+ count = 0
84
+
85
+ for token in self._sorted_tokens:
86
+ original = self._forward[token]
87
+ if token in restored:
88
+ restored = restored.replace(token, original)
89
+ count += 1
90
+
91
+ # Restore anonymized comments from map metadata
92
+ # Tokens appear wrapped in comment syntax: # [gc_001], """[gc_001]""", // [gc_001]
93
+ # We try each wrapper pattern, then fall back to bare token
94
+ original_comments = self._map._metadata.get("original_comments", {})
95
+ for comment_token, original_text in original_comments.items():
96
+ replaced = False
97
+ # Try wrapped patterns first (most specific to least)
98
+ for pattern in [
99
+ f'"""{comment_token}"""',
100
+ f"'''{comment_token}'''",
101
+ f"/* {comment_token} */",
102
+ f"// {comment_token}",
103
+ f"# {comment_token}",
104
+ ]:
105
+ if pattern in restored:
106
+ restored = restored.replace(pattern, original_text)
107
+ count += 1
108
+ replaced = True
109
+ break
110
+ # Fall back to bare token
111
+ if not replaced and comment_token in restored:
112
+ restored = restored.replace(comment_token, original_text)
113
+ count += 1
114
+
115
+ new_symbols = self._detect_new_symbols(restored)
116
+ for sym in new_symbols:
117
+ restored = restored.replace(sym, f"NEW_{sym}")
118
+
119
+ # Annotate new/changed lines if original ghost file is provided
120
+ if original_ghost is not None:
121
+ restored = self._annotate_new_lines(restored, original_ghost)
122
+
123
+ return restored, count, new_symbols
124
+
125
+ def _reveal_original(self, original_ghost: str) -> str:
126
+ """Reveal the original ghost file for fair comparison."""
127
+ original_revealed = original_ghost
128
+ for token in self._sorted_tokens:
129
+ original_name = self._forward[token]
130
+ if token in original_revealed:
131
+ original_revealed = original_revealed.replace(token, original_name)
132
+
133
+ # Restore comments in original too (try wrapped patterns first)
134
+ original_comments = self._map._metadata.get("original_comments", {})
135
+ for comment_token, original_text in original_comments.items():
136
+ for pattern in [
137
+ f'"""{comment_token}"""',
138
+ f"'''{comment_token}'''",
139
+ f"/* {comment_token} */",
140
+ f"// {comment_token}",
141
+ f"# {comment_token}",
142
+ comment_token,
143
+ ]:
144
+ if pattern in original_revealed:
145
+ original_revealed = original_revealed.replace(pattern, original_text)
146
+ break
147
+
148
+ return original_revealed
149
+
150
+ def _detect_comment_style(self, code: str) -> str:
151
+ """Auto-detect comment prefix from surrounding code."""
152
+ for line in code.splitlines():
153
+ stripped = line.strip()
154
+ if stripped.startswith("//"):
155
+ return "//"
156
+ if stripped.startswith("#") and not stripped.startswith("#include"):
157
+ return "#"
158
+ # Fallback based on common patterns
159
+ if "def " in code or "import " in code:
160
+ return "#"
161
+ if "#include" in code or "int main" in code or "::" in code:
162
+ return "//"
163
+ return "#"
164
+
165
+ def _annotate_new_lines(self, revealed: str, original_ghost: str) -> str:
166
+ """Annotate changed blocks with descriptive AI-change comments.
167
+
168
+ Compares the revealed code against the original ghost file
169
+ (after revealing it too) and inserts block-level annotations like:
170
+ # --- AI MODIFIED: changed '+' to '-' ---
171
+ # --- AI ADDED: null-safety check ---
172
+ """
173
+ from .diff_analyzer import DiffAnalyzer
174
+
175
+ original_revealed = self._reveal_original(original_ghost)
176
+
177
+ analyzer = DiffAnalyzer()
178
+ blocks = analyzer.detect_change_blocks(original_revealed, revealed)
179
+
180
+ if not blocks:
181
+ return revealed
182
+
183
+ comment_prefix = self._detect_comment_style(revealed)
184
+ lines = revealed.splitlines()
185
+
186
+ # Insert annotations backwards to preserve line numbers
187
+ for block in reversed(blocks):
188
+ description = analyzer.describe_change(block)
189
+
190
+ if block.block_type == "added":
191
+ label = "AI ADDED"
192
+ elif block.block_type == "deleted":
193
+ label = "AI REMOVED"
194
+ else:
195
+ label = "AI MODIFIED"
196
+
197
+ annotation = f"{comment_prefix} --- {label}: {description} ---"
198
+
199
+ # Insert annotation above the block's start line
200
+ insert_at = block.start_line
201
+ if insert_at <= len(lines):
202
+ # Match indentation of the first line of the block
203
+ if insert_at < len(lines) and lines[insert_at].strip():
204
+ indent = len(lines[insert_at]) - len(lines[insert_at].lstrip())
205
+ annotation = " " * indent + annotation
206
+ lines.insert(insert_at, annotation)
207
+
208
+ return "\n".join(lines)
209
+
210
+ def reveal_ai_response(self, response: str) -> RevealResult:
211
+ """Reveal a full AI response (prose + code blocks).
212
+
213
+ Parses the response into zones, applies zone-specific restoration,
214
+ and produces both restored code and translated explanation.
215
+ """
216
+ zones = self._parse_zones(response)
217
+ restored_parts = []
218
+ code_blocks = []
219
+ symbols_restored = 0
220
+
221
+ for zone in zones:
222
+ if zone.type == ZoneType.CODE_BLOCK:
223
+ revealed, count, new_syms = self.reveal_code(zone.content)
224
+ code_blocks.append(revealed)
225
+ symbols_restored += count
226
+ # Reconstruct fenced block
227
+ lang = zone.language
228
+ restored_parts.append(f"```{lang}\n{revealed}\n```")
229
+
230
+ elif zone.type == ZoneType.INLINE_CODE:
231
+ revealed = self._reveal_inline(zone.content)
232
+ if revealed != zone.content:
233
+ symbols_restored += 1
234
+ restored_parts.append(f"`{revealed}`")
235
+
236
+ else:
237
+ # Prose — apply token replacement with word boundaries
238
+ revealed = self._reveal_prose(zone.content)
239
+ restored_parts.append(revealed)
240
+
241
+ restored_full = "".join(restored_parts)
242
+
243
+ # Extract just the code blocks for the code output
244
+ restored_code = "\n\n".join(code_blocks) if code_blocks else ""
245
+
246
+ # Detect new symbols across all code blocks
247
+ all_new = []
248
+ for block in code_blocks:
249
+ all_new.extend(self._detect_new_symbols(block))
250
+ all_new = list(set(all_new))
251
+
252
+ # Detect new dependencies
253
+ new_deps = self._detect_new_dependencies(code_blocks)
254
+
255
+ result = RevealResult(
256
+ restored_code=restored_code,
257
+ restored_explanation=restored_full,
258
+ symbols_restored=symbols_restored,
259
+ new_symbols=all_new,
260
+ new_dependencies=new_deps,
261
+ )
262
+
263
+ return result
264
+
265
+ def _parse_zones(self, text: str) -> list[Zone]:
266
+ """Parse AI response into typed zones."""
267
+ zones = []
268
+ pos = 0
269
+
270
+ # Pattern for fenced code blocks
271
+ code_block_pattern = re.compile(
272
+ r"```(\w*)\n(.*?)```", re.DOTALL
273
+ )
274
+
275
+ for match in code_block_pattern.finditer(text):
276
+ # Add prose before this code block
277
+ if match.start() > pos:
278
+ prose = text[pos:match.start()]
279
+ # Split prose further into inline code and text
280
+ zones.extend(self._parse_inline_zones(prose, pos))
281
+
282
+ # Add the code block
283
+ zones.append(Zone(
284
+ type=ZoneType.CODE_BLOCK,
285
+ content=match.group(2),
286
+ start=match.start(),
287
+ end=match.end(),
288
+ language=match.group(1),
289
+ ))
290
+ pos = match.end()
291
+
292
+ # Add remaining prose after last code block
293
+ if pos < len(text):
294
+ zones.extend(self._parse_inline_zones(text[pos:], pos))
295
+
296
+ return zones
297
+
298
+ def _parse_inline_zones(self, text: str, base_offset: int) -> list[Zone]:
299
+ """Split prose text into PROSE and INLINE_CODE zones."""
300
+ zones = []
301
+ pos = 0
302
+
303
+ for match in re.finditer(r"`([^`]+)`", text):
304
+ # Prose before inline code
305
+ if match.start() > pos:
306
+ zones.append(Zone(
307
+ type=ZoneType.PROSE,
308
+ content=text[pos:match.start()],
309
+ start=base_offset + pos,
310
+ end=base_offset + match.start(),
311
+ ))
312
+
313
+ # Inline code
314
+ zones.append(Zone(
315
+ type=ZoneType.INLINE_CODE,
316
+ content=match.group(1),
317
+ start=base_offset + match.start(),
318
+ end=base_offset + match.end(),
319
+ ))
320
+ pos = match.end()
321
+
322
+ # Remaining prose
323
+ if pos < len(text):
324
+ zones.append(Zone(
325
+ type=ZoneType.PROSE,
326
+ content=text[pos:],
327
+ start=base_offset + pos,
328
+ end=base_offset + len(text),
329
+ ))
330
+
331
+ return zones
332
+
333
+ def _reveal_inline(self, code_span: str) -> str:
334
+ """Reveal tokens in an inline code span."""
335
+ result = code_span
336
+ for token in self._sorted_tokens:
337
+ if token in result:
338
+ result = result.replace(token, self._forward[token])
339
+ return result
340
+
341
+ def _reveal_prose(self, text: str) -> str:
342
+ """Reveal tokens in prose with word-boundary matching.
343
+
344
+ Handles common prose patterns:
345
+ gv_001-related → connectionPool-related
346
+ gv_001's value → connectionPool's value
347
+ gf_001/gf_002 → update_matrix/validate_input
348
+ """
349
+ result = text
350
+ for token in self._sorted_tokens:
351
+ original = self._forward[token]
352
+ # Word boundary match that allows common suffixes
353
+ pattern = re.compile(
354
+ r"\b" + re.escape(token) + r"(?='s|[-/.,;:!?\s\)]|$)"
355
+ )
356
+ result = pattern.sub(original, result)
357
+ return result
358
+
359
+ def _detect_new_symbols(self, code: str) -> list[str]:
360
+ """Find ghost-pattern tokens not in our map."""
361
+ found = set(GHOST_TOKEN_PATTERN.findall(code))
362
+ known = self._map.all_tokens()
363
+ # Also exclude tokens we already replaced (they shouldn't be here)
364
+ return sorted(found - known)
365
+
366
+ def _detect_new_dependencies(self, code_blocks: list[str]) -> list[str]:
367
+ """Detect new #include or import statements the AI introduced."""
368
+ deps = []
369
+ for block in code_blocks:
370
+ for line in block.split("\n"):
371
+ stripped = line.strip()
372
+ if stripped.startswith("#include") or stripped.startswith("import ") or stripped.startswith("from "):
373
+ deps.append(stripped)
374
+ return deps