gptdiff 0.1.21__py3-none-any.whl → 0.1.24__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
gptdiff/applydiff.py ADDED
@@ -0,0 +1,264 @@
1
+ """
2
+ Module: applydiff
3
+
4
+ Contains the function to apply unified git diffs to files on disk.
5
+ """
6
+
7
+ from pathlib import Path
8
+ import re
9
+ import hashlib
10
+
11
+ def apply_diff(project_dir, diff_text):
12
+ """
13
+ Applies a unified diff (as generated by git diff) to the files in project_dir
14
+ using pure Python (without calling the external 'patch' command).
15
+
16
+ Handles file modifications, new file creation, and file deletions.
17
+
18
+ Returns:
19
+ True if at least one file was modified (or deleted/created) as a result of the patch,
20
+ False otherwise.
21
+ """
22
+ from pathlib import Path
23
+ import re, hashlib
24
+
25
+ def file_hash(filepath):
26
+ h = hashlib.sha256()
27
+ with open(filepath, "rb") as f:
28
+ h.update(f.read())
29
+ return h.hexdigest()
30
+
31
+ def apply_patch_to_file(file_path, patch):
32
+ """
33
+ Applies a unified diff patch (for a single file) to file_path.
34
+
35
+ Returns True if the patch was applied successfully, False otherwise.
36
+ """
37
+ # Read the original file lines; if the file doesn't exist, treat it as empty.
38
+ if file_path.exists():
39
+ original_lines = file_path.read_text(encoding="utf8").splitlines(keepends=True)
40
+ else:
41
+ original_lines = []
42
+ new_lines = []
43
+ current_index = 0
44
+
45
+ patch_lines = patch.splitlines()
46
+ # Regex for a hunk header, e.g., @@ -3,7 +3,6 @@
47
+ hunk_header_re = re.compile(r"^@@(?: -(\d+)(?:,(\d+))?)?(?: \+(\d+)(?:,(\d+))?)? @@")
48
+ i = 0
49
+ while i < len(patch_lines):
50
+ line = patch_lines[i]
51
+ if line.lstrip().startswith("@@"):
52
+ if line.strip() == "@@":
53
+ # Handle minimal hunk header without line numbers.
54
+ orig_start = 1
55
+ else:
56
+ m = hunk_header_re.match(line.strip())
57
+ if not m:
58
+ print("Invalid hunk header:", line)
59
+ return False
60
+ orig_start = int(m.group(1)) if m.group(1) is not None else 1
61
+ hunk_start_index = orig_start - 1 # diff headers are 1-indexed
62
+ if hunk_start_index > len(original_lines):
63
+ print("Hunk start index beyond file length")
64
+ return False
65
+ new_lines.extend(original_lines[current_index:hunk_start_index])
66
+ current_index = hunk_start_index
67
+ i += 1
68
+ # Process the hunk lines until the next hunk header.
69
+ while i < len(patch_lines) and not patch_lines[i].startswith("@@"):
70
+ pline = patch_lines[i]
71
+ if pline.startswith(" "):
72
+ # Context line must match exactly.
73
+ expected = pline[1:]
74
+ if current_index >= len(original_lines):
75
+ print("Context line expected but file ended")
76
+ return False
77
+ orig_line = original_lines[current_index].rstrip("\n")
78
+ if orig_line != expected:
79
+ print("Context line mismatch. Expected:", expected, "Got:", orig_line)
80
+ return False
81
+ new_lines.append(original_lines[current_index])
82
+ current_index += 1
83
+ elif pline.startswith("-"):
84
+ # Removal line: verify and skip from original.
85
+ expected = pline[1:]
86
+ if current_index >= len(original_lines):
87
+ print("Removal line expected but file ended")
88
+ return False
89
+ orig_line = original_lines[current_index].rstrip("\n")
90
+ if orig_line != expected:
91
+ print("Removal line mismatch. Expected:", expected, "Got:", orig_line)
92
+ return False
93
+ current_index += 1
94
+ elif pline.startswith("+"):
95
+ # Addition line: add to new_lines.
96
+ new_lines.append(pline[1:] + "\n")
97
+ else:
98
+ print("Unexpected line in hunk:", pline)
99
+ return False
100
+ i += 1
101
+ else:
102
+ # Skip non-hunk header lines.
103
+ i += 1
104
+
105
+ # Append any remaining lines from the original file.
106
+ new_lines.extend(original_lines[current_index:])
107
+ # Ensure parent directories exist before writing the file.
108
+ file_path.parent.mkdir(parents=True, exist_ok=True)
109
+ # Write the new content back to the file.
110
+ file_path.write_text("".join(new_lines), encoding="utf8")
111
+ return True
112
+
113
+ # Parse the diff into per-file patches.
114
+ file_patches = parse_diff_per_file(diff_text)
115
+ if not file_patches:
116
+ print("No file patches found in diff.")
117
+ return False
118
+
119
+ # Record original file hashes.
120
+ original_hashes = {}
121
+ for file_path, _ in file_patches:
122
+ target_file = Path(project_dir) / file_path
123
+ if target_file.exists():
124
+ original_hashes[file_path] = file_hash(target_file)
125
+ else:
126
+ original_hashes[file_path] = None
127
+
128
+ any_change = False
129
+ # Process each file patch.
130
+ for file_path, patch in file_patches:
131
+ target_file = Path(project_dir) / file_path
132
+ if "+++ /dev/null" in patch:
133
+ # Deletion patch: delete the file if it exists.
134
+ if target_file.exists():
135
+ target_file.unlink()
136
+ if not target_file.exists():
137
+ any_change = True
138
+ else:
139
+ print(f"Failed to delete file: {target_file}")
140
+ return False
141
+ else:
142
+ # Modification or new file creation.
143
+ success = apply_patch_to_file(target_file, patch)
144
+ if not success:
145
+ print(f"Failed to apply patch to file: {target_file}")
146
+ return False
147
+
148
+ # Verify that at least one file was changed by comparing hashes.
149
+ for file_path, patch in file_patches:
150
+ target_file = Path(project_dir) / file_path
151
+ if "+++ /dev/null" in patch:
152
+ if not target_file.exists():
153
+ any_change = True
154
+ else:
155
+ print(f"Expected deletion but file still exists: {target_file}")
156
+ return False
157
+ else:
158
+ old_hash = original_hashes.get(file_path)
159
+ if target_file.exists():
160
+ new_hash = file_hash(target_file)
161
+ if old_hash != new_hash:
162
+ any_change = True
163
+ else:
164
+ print(f"No change detected in file: {target_file}")
165
+ else:
166
+ print(f"Expected modification or creation but file is missing: {target_file}")
167
+ return False
168
+
169
+ if not any_change:
170
+ print("Patch applied but no file modifications detected.")
171
+ return False
172
+ return True
173
+
174
+ def parse_diff_per_file(diff_text):
175
+ """Parse unified diff text into individual file patches.
176
+
177
+ Splits a multi-file diff into per-file entries for processing. Handles:
178
+ - File creations (+++ /dev/null)
179
+ - File deletions (--- /dev/null)
180
+ - Standard modifications
181
+
182
+ Args:
183
+ diff_text: Unified diff string as generated by `git diff`
184
+
185
+ Returns:
186
+ List of tuples (file_path, patch) where:
187
+ - file_path: Relative path to modified file
188
+ - patch: Full diff fragment for this file
189
+
190
+ Note:
191
+ Uses 'b/' prefix detection from git diffs to determine target paths
192
+ """
193
+ header_re = re.compile(r'^(?:diff --git\s+)?(a/[^ ]+)\s+(b/[^ ]+)\s*$', re.MULTILINE)
194
+ lines = diff_text.splitlines()
195
+
196
+ # Check if any header line exists.
197
+ if not any(header_re.match(line) for line in lines):
198
+ # Fallback strategy: detect file headers starting with '--- a/' or '-- a/'
199
+ diffs = []
200
+ current_lines = []
201
+ current_file = None
202
+ deletion_mode = False
203
+ header_line_re = re.compile(r'^-{2,3}\s+a/(.+)$')
204
+
205
+ for line in lines:
206
+ if header_line_re.match(line):
207
+ if current_file is not None and current_lines:
208
+ if deletion_mode and not any(l.startswith("+++ /dev/null") for l in current_lines):
209
+ current_lines.append("+++ /dev/null")
210
+ diffs.append((current_file, "\n".join(current_lines)))
211
+ current_lines = [line]
212
+ deletion_mode = False
213
+ file_from = header_line_re.match(line).group(1).strip()
214
+ current_file = file_from
215
+ else:
216
+ current_lines.append(line)
217
+ if "deleted file mode" in line:
218
+ deletion_mode = True
219
+ if line.startswith("+++ "):
220
+ parts = line.split()
221
+ if len(parts) >= 2:
222
+ file_to = parts[1].strip()
223
+ if file_to != "/dev/null":
224
+ current_file = file_to[2:] if (file_to.startswith("a/") or file_to.startswith("b/")) else file_to
225
+ if current_file is not None and current_lines:
226
+ if deletion_mode and not any(l.startswith("+++ ") for l in current_lines):
227
+ current_lines.append("+++ /dev/null")
228
+ diffs.append((current_file, "\n".join(current_lines)))
229
+ return diffs
230
+ else:
231
+ # Use header-based strategy.
232
+ diffs = []
233
+ current_lines = []
234
+ current_file = None
235
+ deletion_mode = False
236
+ for line in lines:
237
+ m = header_re.match(line)
238
+ if m:
239
+ if current_file is not None and current_lines:
240
+ if deletion_mode and not any(l.startswith("+++ ") for l in current_lines):
241
+ current_lines.append("+++ /dev/null")
242
+ diffs.append((current_file, "\n".join(current_lines)))
243
+ current_lines = [line]
244
+ deletion_mode = False
245
+ file_from = m.group(1) # e.g. "a/index.html"
246
+ file_to = m.group(2) # e.g. "b/index.html"
247
+ current_file = file_to[2:] if file_to.startswith("b/") else file_to
248
+ else:
249
+ current_lines.append(line)
250
+ if "deleted file mode" in line:
251
+ deletion_mode = True
252
+ if line.startswith("+++ "):
253
+ parts = line.split()
254
+ if len(parts) >= 2:
255
+ file_to = parts[1].strip()
256
+ if file_to != "/dev/null":
257
+ current_file = file_to[2:] if (file_to.startswith("a/") or file_to.startswith("b/")) else file_to
258
+ if current_file is not None and current_lines:
259
+ if deletion_mode and not any(l.startswith("+++ ") for l in current_lines):
260
+ current_lines.append("+++ /dev/null")
261
+ diffs.append((current_file, "\n".join(current_lines)))
262
+ return diffs
263
+
264
+
gptdiff/gptdiff.py CHANGED
@@ -3,29 +3,38 @@ from pathlib import Path
3
3
  import subprocess
4
4
  import hashlib
5
5
  import re
6
-
6
+ import time
7
+ import os
8
+ import json
9
+ import subprocess
10
+ import sys
11
+ import fnmatch
12
+ import argparse
13
+ import pkgutil
14
+ import contextvars
15
+ from pkgutil import get_data
16
+ import threading
7
17
 
8
18
  import openai
9
19
  from openai import OpenAI
10
-
11
20
  import tiktoken
12
21
  import time
13
-
14
22
  import os
15
23
  import json
16
24
  import subprocess
17
- from pathlib import Path
18
25
  import sys
19
26
  import fnmatch
20
27
  import argparse
21
28
  import pkgutil
22
- import re
23
29
  import contextvars
24
- from ai_agent_toolbox import MarkdownParser, MarkdownPromptFormatter, Toolbox, FlatXMLParser, FlatXMLPromptFormatter
25
- import threading
26
30
  from pkgutil import get_data
31
+ import threading
32
+ from ai_agent_toolbox import MarkdownParser, MarkdownPromptFormatter, Toolbox, FlatXMLParser, FlatXMLPromptFormatter
33
+ from .applydiff import apply_diff, parse_diff_per_file
27
34
 
35
+ VERBOSE = False
28
36
  diff_context = contextvars.ContextVar('diffcontent', default="")
37
+
29
38
  def create_diff_toolbox():
30
39
  toolbox = Toolbox()
31
40
 
@@ -97,7 +106,9 @@ def color_code_diff(diff_text: str) -> str:
97
106
 
98
107
  def load_gitignore_patterns(gitignore_path):
99
108
  with open(gitignore_path, 'r') as f:
100
- patterns = [line.strip() for line in f if line.strip() and not line.startswith('#')]
109
+ patterns = [
110
+ line.strip() for line in f if line.strip() and not line.startswith('#')
111
+ ]
101
112
  return patterns
102
113
 
103
114
  def is_ignored(filepath, gitignore_patterns):
@@ -165,7 +176,7 @@ def load_project_files(project_dir, cwd):
165
176
  Prints skipped files to stdout for visibility
166
177
  """
167
178
  ignore_paths = [Path(cwd) / ".gitignore", Path(cwd) / ".gptignore"]
168
- gitignore_patterns = [".gitignore", "diff.patch", "prompt.txt", ".gptignore", "*.pdf", "*.docx", ".git", "*.orig", "*.rej", "*.diff"]
179
+ gitignore_patterns = [".gitignore", "diff.patch", "prompt.txt", ".*", ".gptignore", "*.pdf", "*.docx", ".git", "*.orig", "*.rej", "*.diff"]
169
180
 
170
181
  for p in ignore_paths:
171
182
  if p.exists():
@@ -175,14 +186,15 @@ def load_project_files(project_dir, cwd):
175
186
  project_files = []
176
187
  for file in list_files_and_dirs(project_dir, gitignore_patterns):
177
188
  if os.path.isfile(file):
178
- try:
179
- with open(file, 'r') as f:
180
- content = f.read()
189
+ try:
190
+ with open(file, 'r') as f:
191
+ content = f.read()
192
+ if VERBOSE:
181
193
  print(file)
182
- project_files.append((file, content))
183
- except UnicodeDecodeError:
184
- print(f"Skipping file {file} due to UnicodeDecodeError")
185
- continue
194
+ project_files.append((file, content))
195
+ except UnicodeDecodeError:
196
+ print(f"Skipping file {file} due to UnicodeDecodeError")
197
+ continue
186
198
 
187
199
  print("")
188
200
  return project_files
@@ -194,37 +206,54 @@ def load_prepend_file(file):
194
206
  # Function to call GPT-4 API and calculate the cost
195
207
  def call_llm_for_diff(system_prompt, user_prompt, files_content, model, temperature=0.7, max_tokens=30000, api_key=None, base_url=None):
196
208
  enc = tiktoken.get_encoding("o200k_base")
209
+
210
+ # Use colors in print statements
211
+ red = "\033[91m"
212
+ green = "\033[92m"
213
+ reset = "\033[0m"
197
214
  start_time = time.time()
198
215
 
199
216
  parser = MarkdownParser()
200
217
  formatter = MarkdownPromptFormatter()
201
218
  toolbox = create_diff_toolbox()
202
219
  tool_prompt = formatter.usage_prompt(toolbox)
203
- system_prompt += "\n"+tool_prompt
220
+ system_prompt += "\n" + tool_prompt
204
221
 
205
222
  if 'gemini' in model:
206
- user_prompt = system_prompt+"\n"+user_prompt
223
+ user_prompt = system_prompt + "\n" + user_prompt
207
224
 
208
225
  messages = [
209
- {"role": "system", "content": system_prompt},
210
- {"role": "user", "content": user_prompt + "\n"+files_content},
226
+ {"role": "system", "content": f"{green}{system_prompt}{reset}"},
227
+ {"role": "user", "content": user_prompt + "\n" + files_content},
211
228
  ]
212
- print("Using", model)
213
- print("SYSTEM PROMPT")
214
- print(system_prompt)
215
- print("USER PROMPT")
216
- print(user_prompt, "+", len(enc.encode(files_content)), "tokens of file content")
229
+ if VERBOSE:
230
+ print(f"{green}Using {model}{reset}")
231
+ print(f"{green}SYSTEM PROMPT{reset}")
232
+ print(system_prompt)
233
+ print(f"{green}USER PROMPT{reset}")
234
+ print(user_prompt, "+", len(enc.encode(files_content)), "tokens of file content")
235
+ else:
236
+ print("Generating diff...")
217
237
 
218
- if api_key is None:
238
+ if not api_key:
219
239
  api_key = os.getenv('GPTDIFF_LLM_API_KEY')
220
- if base_url is None:
240
+ if not base_url:
221
241
  base_url = os.getenv('GPTDIFF_LLM_BASE_URL', "https://nano-gpt.com/api/v1/")
242
+ base_url = base_url or "https://nano-gpt.com/api/v1/"
243
+
222
244
  client = OpenAI(api_key=api_key, base_url=base_url)
223
245
  response = client.chat.completions.create(model=model,
224
246
  messages=messages,
225
247
  max_tokens=max_tokens,
226
248
  temperature=temperature)
227
249
 
250
+ if VERBOSE:
251
+ print("RESPONSE RAW-------------")
252
+ print(response.choices[0].message.content.strip())
253
+ print("/RESPONSE RAW-------------")
254
+ else:
255
+ print("Diff generated.")
256
+
228
257
  prompt_tokens = response.usage.prompt_tokens
229
258
  completion_tokens = response.usage.completion_tokens
230
259
  total_tokens = response.usage.total_tokens
@@ -345,7 +374,8 @@ def smartapply(diff_text, files, model=None, api_key=None, base_url=None):
345
374
  del files[path]
346
375
  else:
347
376
  updated = call_llm_for_apply_with_think_tool_available(path, original, patch, model, api_key=api_key, base_url=base_url)
348
- files[path] = updated.strip()
377
+ cleaned = strip_bad_output(updated, original)
378
+ files[path] = cleaned
349
379
 
350
380
  threads = []
351
381
 
@@ -360,172 +390,6 @@ def smartapply(diff_text, files, model=None, api_key=None, base_url=None):
360
390
 
361
391
  return files
362
392
 
363
- def apply_diff(project_dir, diff_text):
364
- """
365
- Applies a unified diff (as generated by git diff) to the files in project_dir
366
- using pure Python (without calling the external 'patch' command).
367
-
368
- Handles file modifications, new file creation, and file deletions.
369
-
370
- Returns:
371
- True if at least one file was modified (or deleted/created) as a result of the patch,
372
- False otherwise.
373
- """
374
- from pathlib import Path
375
- import re, hashlib
376
-
377
- def file_hash(filepath):
378
- h = hashlib.sha256()
379
- with open(filepath, "rb") as f:
380
- h.update(f.read())
381
- return h.hexdigest()
382
-
383
- def apply_patch_to_file(file_path, patch):
384
- """
385
- Applies a unified diff patch (for a single file) to file_path.
386
-
387
- Returns True if the patch was applied successfully, False otherwise.
388
- """
389
- # Read the original file lines; if the file doesn't exist, treat it as empty.
390
- if file_path.exists():
391
- original_lines = file_path.read_text(encoding="utf8").splitlines(keepends=True)
392
- else:
393
- original_lines = []
394
- new_lines = []
395
- current_index = 0
396
-
397
- patch_lines = patch.splitlines()
398
- # Regex for a hunk header, e.g., @@ -3,7 +3,6 @@
399
- hunk_header_re = re.compile(r"^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@")
400
- i = 0
401
- while i < len(patch_lines):
402
- line = patch_lines[i]
403
- if line.startswith("@@"):
404
- m = hunk_header_re.match(line)
405
- if not m:
406
- print("Invalid hunk header:", line)
407
- return False
408
- orig_start = int(m.group(1))
409
- # orig_len = int(m.group(2)) if m.group(2) else 1 # not used explicitly
410
- # new_start = int(m.group(3))
411
- # new_len = int(m.group(4)) if m.group(4) else 1
412
-
413
- # Copy unchanged lines before the hunk.
414
- hunk_start_index = orig_start - 1 # diff headers are 1-indexed
415
- if hunk_start_index > len(original_lines):
416
- print("Hunk start index beyond file length")
417
- return False
418
- new_lines.extend(original_lines[current_index:hunk_start_index])
419
- current_index = hunk_start_index
420
-
421
- i += 1
422
- # Process the hunk lines until the next hunk header.
423
- while i < len(patch_lines) and not patch_lines[i].startswith("@@"):
424
- pline = patch_lines[i]
425
- if pline.startswith(" "):
426
- # Context line must match exactly.
427
- expected = pline[1:]
428
- if current_index >= len(original_lines):
429
- print("Context line expected but file ended")
430
- return False
431
- orig_line = original_lines[current_index].rstrip("\n")
432
- if orig_line != expected:
433
- print("Context line mismatch. Expected:", expected, "Got:", orig_line)
434
- return False
435
- new_lines.append(original_lines[current_index])
436
- current_index += 1
437
- elif pline.startswith("-"):
438
- # Removal line: verify and skip from original.
439
- expected = pline[1:]
440
- if current_index >= len(original_lines):
441
- print("Removal line expected but file ended")
442
- return False
443
- orig_line = original_lines[current_index].rstrip("\n")
444
- if orig_line != expected:
445
- print("Removal line mismatch. Expected:", expected, "Got:", orig_line)
446
- return False
447
- current_index += 1
448
- elif pline.startswith("+"):
449
- # Addition line: add to new_lines.
450
- new_lines.append(pline[1:] + "\n")
451
- else:
452
- print("Unexpected line in hunk:", pline)
453
- return False
454
- i += 1
455
- else:
456
- # Skip non-hunk header lines.
457
- i += 1
458
-
459
- # Append any remaining lines from the original file.
460
- new_lines.extend(original_lines[current_index:])
461
- # Ensure parent directories exist before writing the file.
462
- file_path.parent.mkdir(parents=True, exist_ok=True)
463
- # Write the new content back to the file.
464
- file_path.write_text("".join(new_lines), encoding="utf8")
465
- return True
466
-
467
- # Parse the diff into per-file patches.
468
- file_patches = parse_diff_per_file(diff_text)
469
- if not file_patches:
470
- print("No file patches found in diff.")
471
- return False
472
-
473
- # Record original file hashes.
474
- original_hashes = {}
475
- for file_path, _ in file_patches:
476
- target_file = Path(project_dir) / file_path
477
- if target_file.exists():
478
- original_hashes[file_path] = file_hash(target_file)
479
- else:
480
- original_hashes[file_path] = None
481
-
482
- any_change = False
483
- # Process each file patch.
484
- for file_path, patch in file_patches:
485
- target_file = Path(project_dir) / file_path
486
- if "+++ /dev/null" in patch:
487
- # Deletion patch: delete the file if it exists.
488
- if target_file.exists():
489
- target_file.unlink()
490
- if not target_file.exists():
491
- any_change = True
492
- else:
493
- print(f"Failed to delete file: {target_file}")
494
- return False
495
- else:
496
- # Modification or new file creation.
497
- success = apply_patch_to_file(target_file, patch)
498
- if not success:
499
- print(f"Failed to apply patch to file: {target_file}")
500
- return False
501
-
502
- # Verify that at least one file was changed by comparing hashes.
503
- for file_path, patch in file_patches:
504
- target_file = Path(project_dir) / file_path
505
- if "+++ /dev/null" in patch:
506
- if not target_file.exists():
507
- any_change = True
508
- else:
509
- print(f"Expected deletion but file still exists: {target_file}")
510
- return False
511
- else:
512
- old_hash = original_hashes.get(file_path)
513
- if target_file.exists():
514
- new_hash = file_hash(target_file)
515
- if old_hash != new_hash:
516
- any_change = True
517
- else:
518
- print(f"No change detected in file: {target_file}")
519
- else:
520
- print(f"Expected modification or creation but file is missing: {target_file}")
521
- return False
522
-
523
- if not any_change:
524
- print("Patch applied but no file modifications detected.")
525
- return False
526
- return True
527
-
528
-
529
393
  def parse_arguments():
530
394
  parser = argparse.ArgumentParser(description='Generate and optionally apply git diffs using GPT-4.')
531
395
  parser.add_argument('prompt', type=str, help='Prompt that runs on the codebase.')
@@ -541,9 +405,8 @@ def parse_arguments():
541
405
  parser.add_argument('--max_tokens', type=int, default=30000, help='Temperature parameter for model creativity (0.0 to 2.0)')
542
406
  parser.add_argument('--model', type=str, default=None, help='Model to use for the API call.')
543
407
  parser.add_argument('--applymodel', type=str, default=None, help='Model to use for applying the diff. Defaults to the value of --model if not specified.')
544
-
545
408
  parser.add_argument('--nowarn', action='store_true', help='Disable large token warning')
546
-
409
+ parser.add_argument('--verbose', action='store_true', help='Enable verbose output with detailed information')
547
410
  return parser.parse_args()
548
411
 
549
412
  def absolute_to_relative(absolute_path):
@@ -551,95 +414,8 @@ def absolute_to_relative(absolute_path):
551
414
  relative_path = os.path.relpath(absolute_path, cwd)
552
415
  return relative_path
553
416
 
554
- def parse_diff_per_file(diff_text):
555
- """Parse unified diff text into individual file patches.
556
-
557
- Splits a multi-file diff into per-file entries for processing. Handles:
558
- - File creations (+++ /dev/null)
559
- - File deletions (--- /dev/null)
560
- - Standard modifications
561
-
562
- Args:
563
- diff_text: Unified diff string as generated by `git diff`
564
-
565
- Returns:
566
- List of tuples (file_path, patch) where:
567
- - file_path: Relative path to modified file
568
- - patch: Full diff fragment for this file
569
-
570
- Note:
571
- Uses 'b/' prefix detection from git diffs to determine target paths
572
- """
573
- header_re = re.compile(r'^(?:diff --git\s+)?(a/[^ ]+)\s+(b/[^ ]+)\s*$', re.MULTILINE)
574
- lines = diff_text.splitlines()
575
-
576
- # Check if any header line exists.
577
- if not any(header_re.match(line) for line in lines):
578
- # Fallback strategy: detect file headers starting with '--- a/' or '-- a/'
579
- diffs = []
580
- current_lines = []
581
- current_file = None
582
- deletion_mode = False
583
- header_line_re = re.compile(r'^-{2,3}\s+a/(.+)$')
584
-
585
- for line in lines:
586
- if header_line_re.match(line):
587
- if current_file is not None and current_lines:
588
- if deletion_mode and not any(l.startswith("+++ ") for l in current_lines):
589
- current_lines.append("+++ /dev/null")
590
- diffs.append((current_file, "\n".join(current_lines)))
591
- current_lines = [line]
592
- deletion_mode = False
593
- file_from = header_line_re.match(line).group(1).strip()
594
- current_file = file_from
595
- else:
596
- current_lines.append(line)
597
- if "deleted file mode" in line:
598
- deletion_mode = True
599
- if line.startswith("+++ "):
600
- parts = line.split()
601
- if len(parts) >= 2:
602
- file_to = parts[1].strip()
603
- if file_to != "/dev/null":
604
- current_file = file_to[2:] if (file_to.startswith("a/") or file_to.startswith("b/")) else file_to
605
- if current_file is not None and current_lines:
606
- if deletion_mode and not any(l.startswith("+++ ") for l in current_lines):
607
- current_lines.append("+++ /dev/null")
608
- diffs.append((current_file, "\n".join(current_lines)))
609
- return diffs
610
- else:
611
- # Use header-based strategy.
612
- diffs = []
613
- current_lines = []
614
- current_file = None
615
- deletion_mode = False
616
- for line in lines:
617
- m = header_re.match(line)
618
- if m:
619
- if current_file is not None and current_lines:
620
- if deletion_mode and not any(l.startswith("+++ ") for l in current_lines):
621
- current_lines.append("+++ /dev/null")
622
- diffs.append((current_file, "\n".join(current_lines)))
623
- current_lines = [line]
624
- deletion_mode = False
625
- file_from = m.group(1) # e.g. "a/index.html"
626
- file_to = m.group(2) # e.g. "b/index.html"
627
- current_file = file_to[2:] if file_to.startswith("b/") else file_to
628
- else:
629
- current_lines.append(line)
630
- if "deleted file mode" in line:
631
- deletion_mode = True
632
- if line.startswith("+++ "):
633
- parts = line.split()
634
- if len(parts) >= 2:
635
- file_to = parts[1].strip()
636
- if file_to != "/dev/null":
637
- current_file = file_to[2:] if (file_to.startswith("a/") or file_to.startswith("b/")) else file_to
638
- if current_file is not None and current_lines:
639
- if deletion_mode and not any(l.startswith("+++ ") for l in current_lines):
640
- current_lines.append("+++ /dev/null")
641
- diffs.append((current_file, "\n".join(current_lines)))
642
- return diffs
417
+ def colorize_warning_warning(message):
418
+ return f"\033[91m\033[1m{message}\033[0m"
643
419
 
644
420
  def call_llm_for_apply_with_think_tool_available(file_path, original_content, file_diff, model, api_key=None, base_url=None, extra_prompt=None, max_tokens=30000):
645
421
  parser = FlatXMLParser("think")
@@ -719,9 +495,9 @@ Diff to apply:
719
495
  {"role": "system", "content": system_prompt},
720
496
  {"role": "user", "content": user_prompt},
721
497
  ]
722
- if api_key is None:
498
+ if not api_key:
723
499
  api_key = os.getenv('GPTDIFF_LLM_API_KEY')
724
- if base_url is None:
500
+ if not base_url:
725
501
  base_url = os.getenv('GPTDIFF_LLM_BASE_URL', "https://nano-gpt.com/api/v1/")
726
502
  client = OpenAI(api_key=api_key, base_url=base_url)
727
503
  start_time = time.time()
@@ -733,8 +509,11 @@ Diff to apply:
733
509
  elapsed = time.time() - start_time
734
510
  minutes, seconds = divmod(int(elapsed), 60)
735
511
  time_str = f"{minutes}m {seconds}s" if minutes else f"{seconds}s"
736
- print(f"Smartapply time: {time_str}")
737
- print("-" * 40)
512
+ if VERBOSE:
513
+ print(f"Smartapply time: {time_str}")
514
+ print("-" * 40)
515
+ else:
516
+ print(f"Smartapply completed in {time_str}")
738
517
  return full_response
739
518
 
740
519
  def build_environment_from_filelist(file_list, cwd):
@@ -762,7 +541,7 @@ def smart_apply_patch(project_dir, diff_text, user_prompt, args):
762
541
  parsed_diffs = parse_diff_per_file(diff_text)
763
542
  print("Found", len(parsed_diffs), "files in diff, processing smart apply concurrently:")
764
543
  if len(parsed_diffs) == 0:
765
- print("\033[1;33mThere were no entries in this diff. The LLM may have returned something invalid.\033[0m")
544
+ print(colorize_warning_warning("There were no entries in this diff. The LLM may have returned something invalid."))
766
545
  if args.beep:
767
546
  print("\a")
768
547
  return
@@ -770,32 +549,50 @@ def smart_apply_patch(project_dir, diff_text, user_prompt, args):
770
549
 
771
550
  def process_file(file_path, file_diff):
772
551
  full_path = Path(project_dir) / file_path
773
- print(f"Processing file: {file_path}")
552
+ if VERBOSE:
553
+ print(f"Processing file: {file_path}")
774
554
  if '+++ /dev/null' in file_diff:
775
555
  if full_path.exists():
776
556
  full_path.unlink()
777
557
  print(f"\033[1;32mDeleted file {file_path}.\033[0m")
778
558
  else:
779
- print(f"\033[1;33mFile {file_path} not found - skipping deletion\033[0m")
559
+ print(colorize_warning_warning(f"File {file_path} not found - skipping deletion"))
780
560
  return
781
- original_content = ''
782
- if full_path.exists():
783
- try:
784
- original_content = full_path.read_text()
785
- except UnicodeDecodeError:
786
- print(f"Skipping binary file {file_path}")
787
- return
788
- if not hasattr(args, "applymodel") or args.applymodel is None:
789
- args.applymodel = args.model
790
- if args.applymodel is None:
791
- args.applymodel = os.getenv("GPTDIFF_MODEL")
561
+
562
+ try:
563
+ original_content = full_path.read_text()
564
+ except (UnicodeDecodeError, IOError):
565
+ print(f"Skipping file {file_path} due to read error")
566
+ return
567
+
568
+ # Use SMARTAPPLY-specific environment variables if set, otherwise fallback.
569
+ smart_apply_model = os.getenv("GPTDIFF_SMARTAPPLY_MODEL")
570
+ if smart_apply_model and smart_apply_model.strip():
571
+ model = smart_apply_model
572
+ elif hasattr(args, "applymodel") and args.applymodel:
573
+ model = args.applymodel
574
+ else:
575
+ model = os.getenv("GPTDIFF_MODEL", "deepseek-reasoner")
576
+
577
+ smart_api_key = os.getenv("GPTDIFF_SMARTAPPLY_API_KEY")
578
+ if smart_api_key and smart_api_key.strip():
579
+ api_key = smart_api_key
580
+ else:
581
+ api_key = os.getenv("GPTDIFF_LLM_API_KEY")
582
+
583
+ smart_base_url = os.getenv("GPTDIFF_SMARTAPPLY_BASE_URL")
584
+ if smart_base_url and smart_base_url.strip():
585
+ base_url = smart_base_url
586
+ else:
587
+ base_url = os.getenv("GPTDIFF_LLM_BASE_URL", "https://nano-gpt.com/api/v1/")
792
588
 
793
589
  print("-" * 40)
794
- print("Running smartapply with", args.applymodel,"on",file_path)
590
+ print("Running smartapply with", model, "on", file_path)
795
591
  print("-" * 40)
796
592
  try:
797
593
  updated_content = call_llm_for_apply_with_think_tool_available(
798
- file_path, original_content, file_diff, args.applymodel,
594
+ file_path, original_content, file_diff, model,
595
+ api_key=api_key, base_url=base_url,
799
596
  extra_prompt=f"This changeset is from the following instructions:\n{user_prompt}",
800
597
  max_tokens=args.max_tokens)
801
598
  if updated_content.strip() == "":
@@ -844,11 +641,13 @@ def save_files(files_dict, target_directory):
844
641
  print(f"Saved: {full_path}")
845
642
 
846
643
  def main():
644
+ global VERBOSE
847
645
  # Adding color support for Windows CMD
848
646
  if os.name == 'nt':
849
647
  os.system('color')
850
648
 
851
649
  args = parse_arguments()
650
+ VERBOSE = args.verbose
852
651
 
853
652
  # openai.api_base = "https://nano-gpt.com/api/v1/"
854
653
  if len(sys.argv) < 2:
@@ -898,9 +697,8 @@ def main():
898
697
 
899
698
  files_content = ""
900
699
  for file, content in project_files:
901
- print(f"Including {len(enc.encode(content)):5d} tokens", absolute_to_relative(file))
902
-
903
- # Prepare the prompt for GPT-4
700
+ if VERBOSE:
701
+ print(f"Including {len(enc.encode(content)):5d} tokens", absolute_to_relative(file))
904
702
  files_content += f"File: {absolute_to_relative(file)}\nContent:\n{content}\n"
905
703
 
906
704
  full_prompt = f"{system_prompt}\n\n{user_prompt}\n\n{files_content}"
@@ -1003,5 +801,29 @@ def swallow_reasoning(full_response: str) -> (str, str):
1003
801
  final_content = full_response.strip()
1004
802
  return final_content, reasoning
1005
803
 
804
+ def strip_bad_output(updated: str, original: str) -> str:
805
+ """
806
+ If the original file content does not start with a code fence but the LLM’s updated output
807
+ starts with triple backticks (possibly with an introductory message), extract and return only
808
+ the content within the first code block.
809
+ """
810
+ updated_stripped = updated.strip()
811
+ # If the original file does not start with a code fence, but the updated output contains a code block,
812
+ # extract and return only the content inside the first code block.
813
+ if not original.lstrip().startswith("```"):
814
+ # Search for the first code block in the updated output.
815
+ m = re.search(r"```(.*?)```", updated_stripped, re.DOTALL)
816
+ if m:
817
+ content = m.group(1).strip()
818
+ lines = content.splitlines()
819
+ if len(lines) > 1:
820
+ first_line = lines[0].strip()
821
+ # If the first line appears to be a language specifier (i.e., a single word)
822
+ # and is not "diff", then drop it.
823
+ if " " not in first_line and first_line.lower() != "diff":
824
+ content = "\n".join(lines[1:]).strip()
825
+ return content
826
+ return updated_stripped
827
+
1006
828
  if __name__ == "__main__":
1007
- main()
829
+ main()
gptdiff/gptpatch.py CHANGED
@@ -13,8 +13,7 @@ This tool uses the same patch-application logic as gptdiff.
13
13
  import sys
14
14
  import argparse
15
15
  from pathlib import Path
16
- from gptdiff.gptdiff import apply_diff
17
-
16
+ from gptdiff.gptdiff import apply_diff, smart_apply_patch
18
17
 
19
18
  def parse_arguments():
20
19
  parser = argparse.ArgumentParser(
@@ -50,6 +49,7 @@ def parse_arguments():
50
49
  default=30000,
51
50
  help="Maximum tokens to use for LLM responses"
52
51
  )
52
+ parser.add_argument('--dumb', action='store_true', default=False, help='Attempt dumb apply before trying smart apply')
53
53
  return parser.parse_args()
54
54
 
55
55
  def main():
@@ -64,12 +64,14 @@ def main():
64
64
  diff_text = diff_path.read_text(encoding="utf8")
65
65
 
66
66
  project_dir = args.project_dir
67
- success = apply_diff(project_dir, diff_text)
68
- if success:
69
- print("✅ Diff applied successfully.")
67
+ if args.dumb:
68
+ success = apply_diff(project_dir, diff_text)
69
+ if success:
70
+ print("✅ Diff applied successfully.")
71
+ else:
72
+ print("❌ Failed to apply diff using git apply. Attempting smart apply.")
73
+ smart_apply_patch(project_dir, diff_text, "", args)
70
74
  else:
71
- print("❌ Failed to apply diff using git apply. Attempting smart apply.")
72
- from gptdiff.gptdiff import smart_apply_patch
73
75
  smart_apply_patch(project_dir, diff_text, "", args)
74
76
 
75
77
  if __name__ == "__main__":
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: gptdiff
3
- Version: 0.1.21
3
+ Version: 0.1.24
4
4
  Summary: A tool to generate and apply git diffs using LLMs
5
5
  Author: 255labs
6
6
  Classifier: License :: OSI Approved :: MIT License
@@ -0,0 +1,10 @@
1
+ gptdiff/__init__.py,sha256=o1hrK4GFvbfKcHPlLVArz4OunE3euIicEBYaLrdDo0k,198
2
+ gptdiff/applydiff.py,sha256=nvTerBtFuXWf1j6nisGY7CQ6qJCIM8J9UHxgoiWReoY,11116
3
+ gptdiff/gptdiff.py,sha256=XN05Zbr1H69_iG8Bx8RQ34vTXXg3WHDANRcGo3ihrhA,31518
4
+ gptdiff/gptpatch.py,sha256=opakY6j_I05ZNx2ACYgxB8SxoZ3POf9iFxDkV5Yn1oU,2393
5
+ gptdiff-0.1.24.dist-info/LICENSE.txt,sha256=zCJk7yUYpMjFvlipi1dKtaljF8WdZ2NASndBYYbU8BY,1228
6
+ gptdiff-0.1.24.dist-info/METADATA,sha256=TE_nbtNX0IMjDek5MHxvDDUaAeVhCXw7p5kCmh0TpZg,8785
7
+ gptdiff-0.1.24.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
8
+ gptdiff-0.1.24.dist-info/entry_points.txt,sha256=0VlVNr-gc04a3SZD5_qKIBbtg_L5P2x3xlKE5ftcdkc,82
9
+ gptdiff-0.1.24.dist-info/top_level.txt,sha256=XNkQkQGINaDndEwRxg8qToOrJ9coyfAb-EHrSUXzdCE,8
10
+ gptdiff-0.1.24.dist-info/RECORD,,
@@ -1,9 +0,0 @@
1
- gptdiff/__init__.py,sha256=o1hrK4GFvbfKcHPlLVArz4OunE3euIicEBYaLrdDo0k,198
2
- gptdiff/gptdiff.py,sha256=kDp7gDgBydfKxNm73QIT54AKnv117cZdXhRYQnfJm6A,39426
3
- gptdiff/gptpatch.py,sha256=Z8CWWIfIL2o7xPLVdhzN5GSyJq0vsK4XQRzu4hMWNQk,2194
4
- gptdiff-0.1.21.dist-info/LICENSE.txt,sha256=zCJk7yUYpMjFvlipi1dKtaljF8WdZ2NASndBYYbU8BY,1228
5
- gptdiff-0.1.21.dist-info/METADATA,sha256=Y5O4deytuqvxRV4WaK2vAw9jFuz0OdR3Rxm3lIBNxHk,8785
6
- gptdiff-0.1.21.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
7
- gptdiff-0.1.21.dist-info/entry_points.txt,sha256=0VlVNr-gc04a3SZD5_qKIBbtg_L5P2x3xlKE5ftcdkc,82
8
- gptdiff-0.1.21.dist-info/top_level.txt,sha256=XNkQkQGINaDndEwRxg8qToOrJ9coyfAb-EHrSUXzdCE,8
9
- gptdiff-0.1.21.dist-info/RECORD,,