gptdiff 0.1.21__py3-none-any.whl → 0.1.24__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
gptdiff/applydiff.py ADDED
@@ -0,0 +1,264 @@
1
+ """
2
+ Module: applydiff
3
+
4
+ Contains the function to apply unified git diffs to files on disk.
5
+ """
6
+
7
+ from pathlib import Path
8
+ import re
9
+ import hashlib
10
+
11
+ def apply_diff(project_dir, diff_text):
12
+ """
13
+ Applies a unified diff (as generated by git diff) to the files in project_dir
14
+ using pure Python (without calling the external 'patch' command).
15
+
16
+ Handles file modifications, new file creation, and file deletions.
17
+
18
+ Returns:
19
+ True if at least one file was modified (or deleted/created) as a result of the patch,
20
+ False otherwise.
21
+ """
22
+ from pathlib import Path
23
+ import re, hashlib
24
+
25
+ def file_hash(filepath):
26
+ h = hashlib.sha256()
27
+ with open(filepath, "rb") as f:
28
+ h.update(f.read())
29
+ return h.hexdigest()
30
+
31
+ def apply_patch_to_file(file_path, patch):
32
+ """
33
+ Applies a unified diff patch (for a single file) to file_path.
34
+
35
+ Returns True if the patch was applied successfully, False otherwise.
36
+ """
37
+ # Read the original file lines; if the file doesn't exist, treat it as empty.
38
+ if file_path.exists():
39
+ original_lines = file_path.read_text(encoding="utf8").splitlines(keepends=True)
40
+ else:
41
+ original_lines = []
42
+ new_lines = []
43
+ current_index = 0
44
+
45
+ patch_lines = patch.splitlines()
46
+ # Regex for a hunk header, e.g., @@ -3,7 +3,6 @@
47
+ hunk_header_re = re.compile(r"^@@(?: -(\d+)(?:,(\d+))?)?(?: \+(\d+)(?:,(\d+))?)? @@")
48
+ i = 0
49
+ while i < len(patch_lines):
50
+ line = patch_lines[i]
51
+ if line.lstrip().startswith("@@"):
52
+ if line.strip() == "@@":
53
+ # Handle minimal hunk header without line numbers.
54
+ orig_start = 1
55
+ else:
56
+ m = hunk_header_re.match(line.strip())
57
+ if not m:
58
+ print("Invalid hunk header:", line)
59
+ return False
60
+ orig_start = int(m.group(1)) if m.group(1) is not None else 1
61
+ hunk_start_index = orig_start - 1 # diff headers are 1-indexed
62
+ if hunk_start_index > len(original_lines):
63
+ print("Hunk start index beyond file length")
64
+ return False
65
+ new_lines.extend(original_lines[current_index:hunk_start_index])
66
+ current_index = hunk_start_index
67
+ i += 1
68
+ # Process the hunk lines until the next hunk header.
69
+ while i < len(patch_lines) and not patch_lines[i].startswith("@@"):
70
+ pline = patch_lines[i]
71
+ if pline.startswith(" "):
72
+ # Context line must match exactly.
73
+ expected = pline[1:]
74
+ if current_index >= len(original_lines):
75
+ print("Context line expected but file ended")
76
+ return False
77
+ orig_line = original_lines[current_index].rstrip("\n")
78
+ if orig_line != expected:
79
+ print("Context line mismatch. Expected:", expected, "Got:", orig_line)
80
+ return False
81
+ new_lines.append(original_lines[current_index])
82
+ current_index += 1
83
+ elif pline.startswith("-"):
84
+ # Removal line: verify and skip from original.
85
+ expected = pline[1:]
86
+ if current_index >= len(original_lines):
87
+ print("Removal line expected but file ended")
88
+ return False
89
+ orig_line = original_lines[current_index].rstrip("\n")
90
+ if orig_line != expected:
91
+ print("Removal line mismatch. Expected:", expected, "Got:", orig_line)
92
+ return False
93
+ current_index += 1
94
+ elif pline.startswith("+"):
95
+ # Addition line: add to new_lines.
96
+ new_lines.append(pline[1:] + "\n")
97
+ else:
98
+ print("Unexpected line in hunk:", pline)
99
+ return False
100
+ i += 1
101
+ else:
102
+ # Skip non-hunk header lines.
103
+ i += 1
104
+
105
+ # Append any remaining lines from the original file.
106
+ new_lines.extend(original_lines[current_index:])
107
+ # Ensure parent directories exist before writing the file.
108
+ file_path.parent.mkdir(parents=True, exist_ok=True)
109
+ # Write the new content back to the file.
110
+ file_path.write_text("".join(new_lines), encoding="utf8")
111
+ return True
112
+
113
+ # Parse the diff into per-file patches.
114
+ file_patches = parse_diff_per_file(diff_text)
115
+ if not file_patches:
116
+ print("No file patches found in diff.")
117
+ return False
118
+
119
+ # Record original file hashes.
120
+ original_hashes = {}
121
+ for file_path, _ in file_patches:
122
+ target_file = Path(project_dir) / file_path
123
+ if target_file.exists():
124
+ original_hashes[file_path] = file_hash(target_file)
125
+ else:
126
+ original_hashes[file_path] = None
127
+
128
+ any_change = False
129
+ # Process each file patch.
130
+ for file_path, patch in file_patches:
131
+ target_file = Path(project_dir) / file_path
132
+ if "+++ /dev/null" in patch:
133
+ # Deletion patch: delete the file if it exists.
134
+ if target_file.exists():
135
+ target_file.unlink()
136
+ if not target_file.exists():
137
+ any_change = True
138
+ else:
139
+ print(f"Failed to delete file: {target_file}")
140
+ return False
141
+ else:
142
+ # Modification or new file creation.
143
+ success = apply_patch_to_file(target_file, patch)
144
+ if not success:
145
+ print(f"Failed to apply patch to file: {target_file}")
146
+ return False
147
+
148
+ # Verify that at least one file was changed by comparing hashes.
149
+ for file_path, patch in file_patches:
150
+ target_file = Path(project_dir) / file_path
151
+ if "+++ /dev/null" in patch:
152
+ if not target_file.exists():
153
+ any_change = True
154
+ else:
155
+ print(f"Expected deletion but file still exists: {target_file}")
156
+ return False
157
+ else:
158
+ old_hash = original_hashes.get(file_path)
159
+ if target_file.exists():
160
+ new_hash = file_hash(target_file)
161
+ if old_hash != new_hash:
162
+ any_change = True
163
+ else:
164
+ print(f"No change detected in file: {target_file}")
165
+ else:
166
+ print(f"Expected modification or creation but file is missing: {target_file}")
167
+ return False
168
+
169
+ if not any_change:
170
+ print("Patch applied but no file modifications detected.")
171
+ return False
172
+ return True
173
+
174
+ def parse_diff_per_file(diff_text):
175
+ """Parse unified diff text into individual file patches.
176
+
177
+ Splits a multi-file diff into per-file entries for processing. Handles:
178
+ - File creations (+++ /dev/null)
179
+ - File deletions (--- /dev/null)
180
+ - Standard modifications
181
+
182
+ Args:
183
+ diff_text: Unified diff string as generated by `git diff`
184
+
185
+ Returns:
186
+ List of tuples (file_path, patch) where:
187
+ - file_path: Relative path to modified file
188
+ - patch: Full diff fragment for this file
189
+
190
+ Note:
191
+ Uses 'b/' prefix detection from git diffs to determine target paths
192
+ """
193
+ header_re = re.compile(r'^(?:diff --git\s+)?(a/[^ ]+)\s+(b/[^ ]+)\s*$', re.MULTILINE)
194
+ lines = diff_text.splitlines()
195
+
196
+ # Check if any header line exists.
197
+ if not any(header_re.match(line) for line in lines):
198
+ # Fallback strategy: detect file headers starting with '--- a/' or '-- a/'
199
+ diffs = []
200
+ current_lines = []
201
+ current_file = None
202
+ deletion_mode = False
203
+ header_line_re = re.compile(r'^-{2,3}\s+a/(.+)$')
204
+
205
+ for line in lines:
206
+ if header_line_re.match(line):
207
+ if current_file is not None and current_lines:
208
+ if deletion_mode and not any(l.startswith("+++ /dev/null") for l in current_lines):
209
+ current_lines.append("+++ /dev/null")
210
+ diffs.append((current_file, "\n".join(current_lines)))
211
+ current_lines = [line]
212
+ deletion_mode = False
213
+ file_from = header_line_re.match(line).group(1).strip()
214
+ current_file = file_from
215
+ else:
216
+ current_lines.append(line)
217
+ if "deleted file mode" in line:
218
+ deletion_mode = True
219
+ if line.startswith("+++ "):
220
+ parts = line.split()
221
+ if len(parts) >= 2:
222
+ file_to = parts[1].strip()
223
+ if file_to != "/dev/null":
224
+ current_file = file_to[2:] if (file_to.startswith("a/") or file_to.startswith("b/")) else file_to
225
+ if current_file is not None and current_lines:
226
+ if deletion_mode and not any(l.startswith("+++ ") for l in current_lines):
227
+ current_lines.append("+++ /dev/null")
228
+ diffs.append((current_file, "\n".join(current_lines)))
229
+ return diffs
230
+ else:
231
+ # Use header-based strategy.
232
+ diffs = []
233
+ current_lines = []
234
+ current_file = None
235
+ deletion_mode = False
236
+ for line in lines:
237
+ m = header_re.match(line)
238
+ if m:
239
+ if current_file is not None and current_lines:
240
+ if deletion_mode and not any(l.startswith("+++ ") for l in current_lines):
241
+ current_lines.append("+++ /dev/null")
242
+ diffs.append((current_file, "\n".join(current_lines)))
243
+ current_lines = [line]
244
+ deletion_mode = False
245
+ file_from = m.group(1) # e.g. "a/index.html"
246
+ file_to = m.group(2) # e.g. "b/index.html"
247
+ current_file = file_to[2:] if file_to.startswith("b/") else file_to
248
+ else:
249
+ current_lines.append(line)
250
+ if "deleted file mode" in line:
251
+ deletion_mode = True
252
+ if line.startswith("+++ "):
253
+ parts = line.split()
254
+ if len(parts) >= 2:
255
+ file_to = parts[1].strip()
256
+ if file_to != "/dev/null":
257
+ current_file = file_to[2:] if (file_to.startswith("a/") or file_to.startswith("b/")) else file_to
258
+ if current_file is not None and current_lines:
259
+ if deletion_mode and not any(l.startswith("+++ ") for l in current_lines):
260
+ current_lines.append("+++ /dev/null")
261
+ diffs.append((current_file, "\n".join(current_lines)))
262
+ return diffs
263
+
264
+
gptdiff/gptdiff.py CHANGED
@@ -3,29 +3,38 @@ from pathlib import Path
3
3
  import subprocess
4
4
  import hashlib
5
5
  import re
6
-
6
+ import time
7
+ import os
8
+ import json
9
+ import subprocess
10
+ import sys
11
+ import fnmatch
12
+ import argparse
13
+ import pkgutil
14
+ import contextvars
15
+ from pkgutil import get_data
16
+ import threading
7
17
 
8
18
  import openai
9
19
  from openai import OpenAI
10
-
11
20
  import tiktoken
12
21
  import time
13
-
14
22
  import os
15
23
  import json
16
24
  import subprocess
17
- from pathlib import Path
18
25
  import sys
19
26
  import fnmatch
20
27
  import argparse
21
28
  import pkgutil
22
- import re
23
29
  import contextvars
24
- from ai_agent_toolbox import MarkdownParser, MarkdownPromptFormatter, Toolbox, FlatXMLParser, FlatXMLPromptFormatter
25
- import threading
26
30
  from pkgutil import get_data
31
+ import threading
32
+ from ai_agent_toolbox import MarkdownParser, MarkdownPromptFormatter, Toolbox, FlatXMLParser, FlatXMLPromptFormatter
33
+ from .applydiff import apply_diff, parse_diff_per_file
27
34
 
35
+ VERBOSE = False
28
36
  diff_context = contextvars.ContextVar('diffcontent', default="")
37
+
29
38
  def create_diff_toolbox():
30
39
  toolbox = Toolbox()
31
40
 
@@ -97,7 +106,9 @@ def color_code_diff(diff_text: str) -> str:
97
106
 
98
107
  def load_gitignore_patterns(gitignore_path):
99
108
  with open(gitignore_path, 'r') as f:
100
- patterns = [line.strip() for line in f if line.strip() and not line.startswith('#')]
109
+ patterns = [
110
+ line.strip() for line in f if line.strip() and not line.startswith('#')
111
+ ]
101
112
  return patterns
102
113
 
103
114
  def is_ignored(filepath, gitignore_patterns):
@@ -165,7 +176,7 @@ def load_project_files(project_dir, cwd):
165
176
  Prints skipped files to stdout for visibility
166
177
  """
167
178
  ignore_paths = [Path(cwd) / ".gitignore", Path(cwd) / ".gptignore"]
168
- gitignore_patterns = [".gitignore", "diff.patch", "prompt.txt", ".gptignore", "*.pdf", "*.docx", ".git", "*.orig", "*.rej", "*.diff"]
179
+ gitignore_patterns = [".gitignore", "diff.patch", "prompt.txt", ".*", ".gptignore", "*.pdf", "*.docx", ".git", "*.orig", "*.rej", "*.diff"]
169
180
 
170
181
  for p in ignore_paths:
171
182
  if p.exists():
@@ -175,14 +186,15 @@ def load_project_files(project_dir, cwd):
175
186
  project_files = []
176
187
  for file in list_files_and_dirs(project_dir, gitignore_patterns):
177
188
  if os.path.isfile(file):
178
- try:
179
- with open(file, 'r') as f:
180
- content = f.read()
189
+ try:
190
+ with open(file, 'r') as f:
191
+ content = f.read()
192
+ if VERBOSE:
181
193
  print(file)
182
- project_files.append((file, content))
183
- except UnicodeDecodeError:
184
- print(f"Skipping file {file} due to UnicodeDecodeError")
185
- continue
194
+ project_files.append((file, content))
195
+ except UnicodeDecodeError:
196
+ print(f"Skipping file {file} due to UnicodeDecodeError")
197
+ continue
186
198
 
187
199
  print("")
188
200
  return project_files
@@ -194,37 +206,54 @@ def load_prepend_file(file):
194
206
  # Function to call GPT-4 API and calculate the cost
195
207
  def call_llm_for_diff(system_prompt, user_prompt, files_content, model, temperature=0.7, max_tokens=30000, api_key=None, base_url=None):
196
208
  enc = tiktoken.get_encoding("o200k_base")
209
+
210
+ # Use colors in print statements
211
+ red = "\033[91m"
212
+ green = "\033[92m"
213
+ reset = "\033[0m"
197
214
  start_time = time.time()
198
215
 
199
216
  parser = MarkdownParser()
200
217
  formatter = MarkdownPromptFormatter()
201
218
  toolbox = create_diff_toolbox()
202
219
  tool_prompt = formatter.usage_prompt(toolbox)
203
- system_prompt += "\n"+tool_prompt
220
+ system_prompt += "\n" + tool_prompt
204
221
 
205
222
  if 'gemini' in model:
206
- user_prompt = system_prompt+"\n"+user_prompt
223
+ user_prompt = system_prompt + "\n" + user_prompt
207
224
 
208
225
  messages = [
209
- {"role": "system", "content": system_prompt},
210
- {"role": "user", "content": user_prompt + "\n"+files_content},
226
+ {"role": "system", "content": f"{green}{system_prompt}{reset}"},
227
+ {"role": "user", "content": user_prompt + "\n" + files_content},
211
228
  ]
212
- print("Using", model)
213
- print("SYSTEM PROMPT")
214
- print(system_prompt)
215
- print("USER PROMPT")
216
- print(user_prompt, "+", len(enc.encode(files_content)), "tokens of file content")
229
+ if VERBOSE:
230
+ print(f"{green}Using {model}{reset}")
231
+ print(f"{green}SYSTEM PROMPT{reset}")
232
+ print(system_prompt)
233
+ print(f"{green}USER PROMPT{reset}")
234
+ print(user_prompt, "+", len(enc.encode(files_content)), "tokens of file content")
235
+ else:
236
+ print("Generating diff...")
217
237
 
218
- if api_key is None:
238
+ if not api_key:
219
239
  api_key = os.getenv('GPTDIFF_LLM_API_KEY')
220
- if base_url is None:
240
+ if not base_url:
221
241
  base_url = os.getenv('GPTDIFF_LLM_BASE_URL', "https://nano-gpt.com/api/v1/")
242
+ base_url = base_url or "https://nano-gpt.com/api/v1/"
243
+
222
244
  client = OpenAI(api_key=api_key, base_url=base_url)
223
245
  response = client.chat.completions.create(model=model,
224
246
  messages=messages,
225
247
  max_tokens=max_tokens,
226
248
  temperature=temperature)
227
249
 
250
+ if VERBOSE:
251
+ print("RESPONSE RAW-------------")
252
+ print(response.choices[0].message.content.strip())
253
+ print("/RESPONSE RAW-------------")
254
+ else:
255
+ print("Diff generated.")
256
+
228
257
  prompt_tokens = response.usage.prompt_tokens
229
258
  completion_tokens = response.usage.completion_tokens
230
259
  total_tokens = response.usage.total_tokens
@@ -345,7 +374,8 @@ def smartapply(diff_text, files, model=None, api_key=None, base_url=None):
345
374
  del files[path]
346
375
  else:
347
376
  updated = call_llm_for_apply_with_think_tool_available(path, original, patch, model, api_key=api_key, base_url=base_url)
348
- files[path] = updated.strip()
377
+ cleaned = strip_bad_output(updated, original)
378
+ files[path] = cleaned
349
379
 
350
380
  threads = []
351
381
 
@@ -360,172 +390,6 @@ def smartapply(diff_text, files, model=None, api_key=None, base_url=None):
360
390
 
361
391
  return files
362
392
 
363
- def apply_diff(project_dir, diff_text):
364
- """
365
- Applies a unified diff (as generated by git diff) to the files in project_dir
366
- using pure Python (without calling the external 'patch' command).
367
-
368
- Handles file modifications, new file creation, and file deletions.
369
-
370
- Returns:
371
- True if at least one file was modified (or deleted/created) as a result of the patch,
372
- False otherwise.
373
- """
374
- from pathlib import Path
375
- import re, hashlib
376
-
377
- def file_hash(filepath):
378
- h = hashlib.sha256()
379
- with open(filepath, "rb") as f:
380
- h.update(f.read())
381
- return h.hexdigest()
382
-
383
- def apply_patch_to_file(file_path, patch):
384
- """
385
- Applies a unified diff patch (for a single file) to file_path.
386
-
387
- Returns True if the patch was applied successfully, False otherwise.
388
- """
389
- # Read the original file lines; if the file doesn't exist, treat it as empty.
390
- if file_path.exists():
391
- original_lines = file_path.read_text(encoding="utf8").splitlines(keepends=True)
392
- else:
393
- original_lines = []
394
- new_lines = []
395
- current_index = 0
396
-
397
- patch_lines = patch.splitlines()
398
- # Regex for a hunk header, e.g., @@ -3,7 +3,6 @@
399
- hunk_header_re = re.compile(r"^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@")
400
- i = 0
401
- while i < len(patch_lines):
402
- line = patch_lines[i]
403
- if line.startswith("@@"):
404
- m = hunk_header_re.match(line)
405
- if not m:
406
- print("Invalid hunk header:", line)
407
- return False
408
- orig_start = int(m.group(1))
409
- # orig_len = int(m.group(2)) if m.group(2) else 1 # not used explicitly
410
- # new_start = int(m.group(3))
411
- # new_len = int(m.group(4)) if m.group(4) else 1
412
-
413
- # Copy unchanged lines before the hunk.
414
- hunk_start_index = orig_start - 1 # diff headers are 1-indexed
415
- if hunk_start_index > len(original_lines):
416
- print("Hunk start index beyond file length")
417
- return False
418
- new_lines.extend(original_lines[current_index:hunk_start_index])
419
- current_index = hunk_start_index
420
-
421
- i += 1
422
- # Process the hunk lines until the next hunk header.
423
- while i < len(patch_lines) and not patch_lines[i].startswith("@@"):
424
- pline = patch_lines[i]
425
- if pline.startswith(" "):
426
- # Context line must match exactly.
427
- expected = pline[1:]
428
- if current_index >= len(original_lines):
429
- print("Context line expected but file ended")
430
- return False
431
- orig_line = original_lines[current_index].rstrip("\n")
432
- if orig_line != expected:
433
- print("Context line mismatch. Expected:", expected, "Got:", orig_line)
434
- return False
435
- new_lines.append(original_lines[current_index])
436
- current_index += 1
437
- elif pline.startswith("-"):
438
- # Removal line: verify and skip from original.
439
- expected = pline[1:]
440
- if current_index >= len(original_lines):
441
- print("Removal line expected but file ended")
442
- return False
443
- orig_line = original_lines[current_index].rstrip("\n")
444
- if orig_line != expected:
445
- print("Removal line mismatch. Expected:", expected, "Got:", orig_line)
446
- return False
447
- current_index += 1
448
- elif pline.startswith("+"):
449
- # Addition line: add to new_lines.
450
- new_lines.append(pline[1:] + "\n")
451
- else:
452
- print("Unexpected line in hunk:", pline)
453
- return False
454
- i += 1
455
- else:
456
- # Skip non-hunk header lines.
457
- i += 1
458
-
459
- # Append any remaining lines from the original file.
460
- new_lines.extend(original_lines[current_index:])
461
- # Ensure parent directories exist before writing the file.
462
- file_path.parent.mkdir(parents=True, exist_ok=True)
463
- # Write the new content back to the file.
464
- file_path.write_text("".join(new_lines), encoding="utf8")
465
- return True
466
-
467
- # Parse the diff into per-file patches.
468
- file_patches = parse_diff_per_file(diff_text)
469
- if not file_patches:
470
- print("No file patches found in diff.")
471
- return False
472
-
473
- # Record original file hashes.
474
- original_hashes = {}
475
- for file_path, _ in file_patches:
476
- target_file = Path(project_dir) / file_path
477
- if target_file.exists():
478
- original_hashes[file_path] = file_hash(target_file)
479
- else:
480
- original_hashes[file_path] = None
481
-
482
- any_change = False
483
- # Process each file patch.
484
- for file_path, patch in file_patches:
485
- target_file = Path(project_dir) / file_path
486
- if "+++ /dev/null" in patch:
487
- # Deletion patch: delete the file if it exists.
488
- if target_file.exists():
489
- target_file.unlink()
490
- if not target_file.exists():
491
- any_change = True
492
- else:
493
- print(f"Failed to delete file: {target_file}")
494
- return False
495
- else:
496
- # Modification or new file creation.
497
- success = apply_patch_to_file(target_file, patch)
498
- if not success:
499
- print(f"Failed to apply patch to file: {target_file}")
500
- return False
501
-
502
- # Verify that at least one file was changed by comparing hashes.
503
- for file_path, patch in file_patches:
504
- target_file = Path(project_dir) / file_path
505
- if "+++ /dev/null" in patch:
506
- if not target_file.exists():
507
- any_change = True
508
- else:
509
- print(f"Expected deletion but file still exists: {target_file}")
510
- return False
511
- else:
512
- old_hash = original_hashes.get(file_path)
513
- if target_file.exists():
514
- new_hash = file_hash(target_file)
515
- if old_hash != new_hash:
516
- any_change = True
517
- else:
518
- print(f"No change detected in file: {target_file}")
519
- else:
520
- print(f"Expected modification or creation but file is missing: {target_file}")
521
- return False
522
-
523
- if not any_change:
524
- print("Patch applied but no file modifications detected.")
525
- return False
526
- return True
527
-
528
-
529
393
  def parse_arguments():
530
394
  parser = argparse.ArgumentParser(description='Generate and optionally apply git diffs using GPT-4.')
531
395
  parser.add_argument('prompt', type=str, help='Prompt that runs on the codebase.')
@@ -541,9 +405,8 @@ def parse_arguments():
541
405
  parser.add_argument('--max_tokens', type=int, default=30000, help='Temperature parameter for model creativity (0.0 to 2.0)')
542
406
  parser.add_argument('--model', type=str, default=None, help='Model to use for the API call.')
543
407
  parser.add_argument('--applymodel', type=str, default=None, help='Model to use for applying the diff. Defaults to the value of --model if not specified.')
544
-
545
408
  parser.add_argument('--nowarn', action='store_true', help='Disable large token warning')
546
-
409
+ parser.add_argument('--verbose', action='store_true', help='Enable verbose output with detailed information')
547
410
  return parser.parse_args()
548
411
 
549
412
  def absolute_to_relative(absolute_path):
@@ -551,95 +414,8 @@ def absolute_to_relative(absolute_path):
551
414
  relative_path = os.path.relpath(absolute_path, cwd)
552
415
  return relative_path
553
416
 
554
- def parse_diff_per_file(diff_text):
555
- """Parse unified diff text into individual file patches.
556
-
557
- Splits a multi-file diff into per-file entries for processing. Handles:
558
- - File creations (+++ /dev/null)
559
- - File deletions (--- /dev/null)
560
- - Standard modifications
561
-
562
- Args:
563
- diff_text: Unified diff string as generated by `git diff`
564
-
565
- Returns:
566
- List of tuples (file_path, patch) where:
567
- - file_path: Relative path to modified file
568
- - patch: Full diff fragment for this file
569
-
570
- Note:
571
- Uses 'b/' prefix detection from git diffs to determine target paths
572
- """
573
- header_re = re.compile(r'^(?:diff --git\s+)?(a/[^ ]+)\s+(b/[^ ]+)\s*$', re.MULTILINE)
574
- lines = diff_text.splitlines()
575
-
576
- # Check if any header line exists.
577
- if not any(header_re.match(line) for line in lines):
578
- # Fallback strategy: detect file headers starting with '--- a/' or '-- a/'
579
- diffs = []
580
- current_lines = []
581
- current_file = None
582
- deletion_mode = False
583
- header_line_re = re.compile(r'^-{2,3}\s+a/(.+)$')
584
-
585
- for line in lines:
586
- if header_line_re.match(line):
587
- if current_file is not None and current_lines:
588
- if deletion_mode and not any(l.startswith("+++ ") for l in current_lines):
589
- current_lines.append("+++ /dev/null")
590
- diffs.append((current_file, "\n".join(current_lines)))
591
- current_lines = [line]
592
- deletion_mode = False
593
- file_from = header_line_re.match(line).group(1).strip()
594
- current_file = file_from
595
- else:
596
- current_lines.append(line)
597
- if "deleted file mode" in line:
598
- deletion_mode = True
599
- if line.startswith("+++ "):
600
- parts = line.split()
601
- if len(parts) >= 2:
602
- file_to = parts[1].strip()
603
- if file_to != "/dev/null":
604
- current_file = file_to[2:] if (file_to.startswith("a/") or file_to.startswith("b/")) else file_to
605
- if current_file is not None and current_lines:
606
- if deletion_mode and not any(l.startswith("+++ ") for l in current_lines):
607
- current_lines.append("+++ /dev/null")
608
- diffs.append((current_file, "\n".join(current_lines)))
609
- return diffs
610
- else:
611
- # Use header-based strategy.
612
- diffs = []
613
- current_lines = []
614
- current_file = None
615
- deletion_mode = False
616
- for line in lines:
617
- m = header_re.match(line)
618
- if m:
619
- if current_file is not None and current_lines:
620
- if deletion_mode and not any(l.startswith("+++ ") for l in current_lines):
621
- current_lines.append("+++ /dev/null")
622
- diffs.append((current_file, "\n".join(current_lines)))
623
- current_lines = [line]
624
- deletion_mode = False
625
- file_from = m.group(1) # e.g. "a/index.html"
626
- file_to = m.group(2) # e.g. "b/index.html"
627
- current_file = file_to[2:] if file_to.startswith("b/") else file_to
628
- else:
629
- current_lines.append(line)
630
- if "deleted file mode" in line:
631
- deletion_mode = True
632
- if line.startswith("+++ "):
633
- parts = line.split()
634
- if len(parts) >= 2:
635
- file_to = parts[1].strip()
636
- if file_to != "/dev/null":
637
- current_file = file_to[2:] if (file_to.startswith("a/") or file_to.startswith("b/")) else file_to
638
- if current_file is not None and current_lines:
639
- if deletion_mode and not any(l.startswith("+++ ") for l in current_lines):
640
- current_lines.append("+++ /dev/null")
641
- diffs.append((current_file, "\n".join(current_lines)))
642
- return diffs
417
+ def colorize_warning_warning(message):
418
+ return f"\033[91m\033[1m{message}\033[0m"
643
419
 
644
420
  def call_llm_for_apply_with_think_tool_available(file_path, original_content, file_diff, model, api_key=None, base_url=None, extra_prompt=None, max_tokens=30000):
645
421
  parser = FlatXMLParser("think")
@@ -719,9 +495,9 @@ Diff to apply:
719
495
  {"role": "system", "content": system_prompt},
720
496
  {"role": "user", "content": user_prompt},
721
497
  ]
722
- if api_key is None:
498
+ if not api_key:
723
499
  api_key = os.getenv('GPTDIFF_LLM_API_KEY')
724
- if base_url is None:
500
+ if not base_url:
725
501
  base_url = os.getenv('GPTDIFF_LLM_BASE_URL', "https://nano-gpt.com/api/v1/")
726
502
  client = OpenAI(api_key=api_key, base_url=base_url)
727
503
  start_time = time.time()
@@ -733,8 +509,11 @@ Diff to apply:
733
509
  elapsed = time.time() - start_time
734
510
  minutes, seconds = divmod(int(elapsed), 60)
735
511
  time_str = f"{minutes}m {seconds}s" if minutes else f"{seconds}s"
736
- print(f"Smartapply time: {time_str}")
737
- print("-" * 40)
512
+ if VERBOSE:
513
+ print(f"Smartapply time: {time_str}")
514
+ print("-" * 40)
515
+ else:
516
+ print(f"Smartapply completed in {time_str}")
738
517
  return full_response
739
518
 
740
519
  def build_environment_from_filelist(file_list, cwd):
@@ -762,7 +541,7 @@ def smart_apply_patch(project_dir, diff_text, user_prompt, args):
762
541
  parsed_diffs = parse_diff_per_file(diff_text)
763
542
  print("Found", len(parsed_diffs), "files in diff, processing smart apply concurrently:")
764
543
  if len(parsed_diffs) == 0:
765
- print("\033[1;33mThere were no entries in this diff. The LLM may have returned something invalid.\033[0m")
544
+ print(colorize_warning_warning("There were no entries in this diff. The LLM may have returned something invalid."))
766
545
  if args.beep:
767
546
  print("\a")
768
547
  return
@@ -770,32 +549,50 @@ def smart_apply_patch(project_dir, diff_text, user_prompt, args):
770
549
 
771
550
  def process_file(file_path, file_diff):
772
551
  full_path = Path(project_dir) / file_path
773
- print(f"Processing file: {file_path}")
552
+ if VERBOSE:
553
+ print(f"Processing file: {file_path}")
774
554
  if '+++ /dev/null' in file_diff:
775
555
  if full_path.exists():
776
556
  full_path.unlink()
777
557
  print(f"\033[1;32mDeleted file {file_path}.\033[0m")
778
558
  else:
779
- print(f"\033[1;33mFile {file_path} not found - skipping deletion\033[0m")
559
+ print(colorize_warning_warning(f"File {file_path} not found - skipping deletion"))
780
560
  return
781
- original_content = ''
782
- if full_path.exists():
783
- try:
784
- original_content = full_path.read_text()
785
- except UnicodeDecodeError:
786
- print(f"Skipping binary file {file_path}")
787
- return
788
- if not hasattr(args, "applymodel") or args.applymodel is None:
789
- args.applymodel = args.model
790
- if args.applymodel is None:
791
- args.applymodel = os.getenv("GPTDIFF_MODEL")
561
+
562
+ try:
563
+ original_content = full_path.read_text()
564
+ except (UnicodeDecodeError, IOError):
565
+ print(f"Skipping file {file_path} due to read error")
566
+ return
567
+
568
+ # Use SMARTAPPLY-specific environment variables if set, otherwise fallback.
569
+ smart_apply_model = os.getenv("GPTDIFF_SMARTAPPLY_MODEL")
570
+ if smart_apply_model and smart_apply_model.strip():
571
+ model = smart_apply_model
572
+ elif hasattr(args, "applymodel") and args.applymodel:
573
+ model = args.applymodel
574
+ else:
575
+ model = os.getenv("GPTDIFF_MODEL", "deepseek-reasoner")
576
+
577
+ smart_api_key = os.getenv("GPTDIFF_SMARTAPPLY_API_KEY")
578
+ if smart_api_key and smart_api_key.strip():
579
+ api_key = smart_api_key
580
+ else:
581
+ api_key = os.getenv("GPTDIFF_LLM_API_KEY")
582
+
583
+ smart_base_url = os.getenv("GPTDIFF_SMARTAPPLY_BASE_URL")
584
+ if smart_base_url and smart_base_url.strip():
585
+ base_url = smart_base_url
586
+ else:
587
+ base_url = os.getenv("GPTDIFF_LLM_BASE_URL", "https://nano-gpt.com/api/v1/")
792
588
 
793
589
  print("-" * 40)
794
- print("Running smartapply with", args.applymodel,"on",file_path)
590
+ print("Running smartapply with", model, "on", file_path)
795
591
  print("-" * 40)
796
592
  try:
797
593
  updated_content = call_llm_for_apply_with_think_tool_available(
798
- file_path, original_content, file_diff, args.applymodel,
594
+ file_path, original_content, file_diff, model,
595
+ api_key=api_key, base_url=base_url,
799
596
  extra_prompt=f"This changeset is from the following instructions:\n{user_prompt}",
800
597
  max_tokens=args.max_tokens)
801
598
  if updated_content.strip() == "":
@@ -844,11 +641,13 @@ def save_files(files_dict, target_directory):
844
641
  print(f"Saved: {full_path}")
845
642
 
846
643
  def main():
644
+ global VERBOSE
847
645
  # Adding color support for Windows CMD
848
646
  if os.name == 'nt':
849
647
  os.system('color')
850
648
 
851
649
  args = parse_arguments()
650
+ VERBOSE = args.verbose
852
651
 
853
652
  # openai.api_base = "https://nano-gpt.com/api/v1/"
854
653
  if len(sys.argv) < 2:
@@ -898,9 +697,8 @@ def main():
898
697
 
899
698
  files_content = ""
900
699
  for file, content in project_files:
901
- print(f"Including {len(enc.encode(content)):5d} tokens", absolute_to_relative(file))
902
-
903
- # Prepare the prompt for GPT-4
700
+ if VERBOSE:
701
+ print(f"Including {len(enc.encode(content)):5d} tokens", absolute_to_relative(file))
904
702
  files_content += f"File: {absolute_to_relative(file)}\nContent:\n{content}\n"
905
703
 
906
704
  full_prompt = f"{system_prompt}\n\n{user_prompt}\n\n{files_content}"
@@ -1003,5 +801,29 @@ def swallow_reasoning(full_response: str) -> (str, str):
1003
801
  final_content = full_response.strip()
1004
802
  return final_content, reasoning
1005
803
 
804
+ def strip_bad_output(updated: str, original: str) -> str:
805
+ """
806
+ If the original file content does not start with a code fence but the LLM’s updated output
807
+ starts with triple backticks (possibly with an introductory message), extract and return only
808
+ the content within the first code block.
809
+ """
810
+ updated_stripped = updated.strip()
811
+ # If the original file does not start with a code fence, but the updated output contains a code block,
812
+ # extract and return only the content inside the first code block.
813
+ if not original.lstrip().startswith("```"):
814
+ # Search for the first code block in the updated output.
815
+ m = re.search(r"```(.*?)```", updated_stripped, re.DOTALL)
816
+ if m:
817
+ content = m.group(1).strip()
818
+ lines = content.splitlines()
819
+ if len(lines) > 1:
820
+ first_line = lines[0].strip()
821
+ # If the first line appears to be a language specifier (i.e., a single word)
822
+ # and is not "diff", then drop it.
823
+ if " " not in first_line and first_line.lower() != "diff":
824
+ content = "\n".join(lines[1:]).strip()
825
+ return content
826
+ return updated_stripped
827
+
1006
828
  if __name__ == "__main__":
1007
- main()
829
+ main()
gptdiff/gptpatch.py CHANGED
@@ -13,8 +13,7 @@ This tool uses the same patch-application logic as gptdiff.
13
13
  import sys
14
14
  import argparse
15
15
  from pathlib import Path
16
- from gptdiff.gptdiff import apply_diff
17
-
16
+ from gptdiff.gptdiff import apply_diff, smart_apply_patch
18
17
 
19
18
  def parse_arguments():
20
19
  parser = argparse.ArgumentParser(
@@ -50,6 +49,7 @@ def parse_arguments():
50
49
  default=30000,
51
50
  help="Maximum tokens to use for LLM responses"
52
51
  )
52
+ parser.add_argument('--dumb', action='store_true', default=False, help='Attempt dumb apply before trying smart apply')
53
53
  return parser.parse_args()
54
54
 
55
55
  def main():
@@ -64,12 +64,14 @@ def main():
64
64
  diff_text = diff_path.read_text(encoding="utf8")
65
65
 
66
66
  project_dir = args.project_dir
67
- success = apply_diff(project_dir, diff_text)
68
- if success:
69
- print("✅ Diff applied successfully.")
67
+ if args.dumb:
68
+ success = apply_diff(project_dir, diff_text)
69
+ if success:
70
+ print("✅ Diff applied successfully.")
71
+ else:
72
+ print("❌ Failed to apply diff using git apply. Attempting smart apply.")
73
+ smart_apply_patch(project_dir, diff_text, "", args)
70
74
  else:
71
- print("❌ Failed to apply diff using git apply. Attempting smart apply.")
72
- from gptdiff.gptdiff import smart_apply_patch
73
75
  smart_apply_patch(project_dir, diff_text, "", args)
74
76
 
75
77
  if __name__ == "__main__":
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: gptdiff
3
- Version: 0.1.21
3
+ Version: 0.1.24
4
4
  Summary: A tool to generate and apply git diffs using LLMs
5
5
  Author: 255labs
6
6
  Classifier: License :: OSI Approved :: MIT License
@@ -0,0 +1,10 @@
1
+ gptdiff/__init__.py,sha256=o1hrK4GFvbfKcHPlLVArz4OunE3euIicEBYaLrdDo0k,198
2
+ gptdiff/applydiff.py,sha256=nvTerBtFuXWf1j6nisGY7CQ6qJCIM8J9UHxgoiWReoY,11116
3
+ gptdiff/gptdiff.py,sha256=XN05Zbr1H69_iG8Bx8RQ34vTXXg3WHDANRcGo3ihrhA,31518
4
+ gptdiff/gptpatch.py,sha256=opakY6j_I05ZNx2ACYgxB8SxoZ3POf9iFxDkV5Yn1oU,2393
5
+ gptdiff-0.1.24.dist-info/LICENSE.txt,sha256=zCJk7yUYpMjFvlipi1dKtaljF8WdZ2NASndBYYbU8BY,1228
6
+ gptdiff-0.1.24.dist-info/METADATA,sha256=TE_nbtNX0IMjDek5MHxvDDUaAeVhCXw7p5kCmh0TpZg,8785
7
+ gptdiff-0.1.24.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
8
+ gptdiff-0.1.24.dist-info/entry_points.txt,sha256=0VlVNr-gc04a3SZD5_qKIBbtg_L5P2x3xlKE5ftcdkc,82
9
+ gptdiff-0.1.24.dist-info/top_level.txt,sha256=XNkQkQGINaDndEwRxg8qToOrJ9coyfAb-EHrSUXzdCE,8
10
+ gptdiff-0.1.24.dist-info/RECORD,,
@@ -1,9 +0,0 @@
1
- gptdiff/__init__.py,sha256=o1hrK4GFvbfKcHPlLVArz4OunE3euIicEBYaLrdDo0k,198
2
- gptdiff/gptdiff.py,sha256=kDp7gDgBydfKxNm73QIT54AKnv117cZdXhRYQnfJm6A,39426
3
- gptdiff/gptpatch.py,sha256=Z8CWWIfIL2o7xPLVdhzN5GSyJq0vsK4XQRzu4hMWNQk,2194
4
- gptdiff-0.1.21.dist-info/LICENSE.txt,sha256=zCJk7yUYpMjFvlipi1dKtaljF8WdZ2NASndBYYbU8BY,1228
5
- gptdiff-0.1.21.dist-info/METADATA,sha256=Y5O4deytuqvxRV4WaK2vAw9jFuz0OdR3Rxm3lIBNxHk,8785
6
- gptdiff-0.1.21.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
7
- gptdiff-0.1.21.dist-info/entry_points.txt,sha256=0VlVNr-gc04a3SZD5_qKIBbtg_L5P2x3xlKE5ftcdkc,82
8
- gptdiff-0.1.21.dist-info/top_level.txt,sha256=XNkQkQGINaDndEwRxg8qToOrJ9coyfAb-EHrSUXzdCE,8
9
- gptdiff-0.1.21.dist-info/RECORD,,