gptdiff 0.1.22__py3-none-any.whl → 0.1.27__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
gptdiff/applydiff.py ADDED
@@ -0,0 +1,265 @@
1
+ """
2
+ Module: applydiff
3
+
4
+ Contains the function to apply unified git diffs to files on disk.
5
+ """
6
+
7
+ from pathlib import Path
8
+ import re
9
+ import hashlib
10
+
11
+ def apply_diff(project_dir, diff_text):
12
+ """
13
+ Applies a unified diff (as generated by git diff) to the files in project_dir
14
+ using pure Python (without calling the external 'patch' command).
15
+
16
+ Handles file modifications, new file creation, and file deletions.
17
+
18
+ Returns:
19
+ True if at least one file was modified (or deleted/created) as a result of the patch,
20
+ False otherwise.
21
+ """
22
+ from pathlib import Path
23
+ import re, hashlib
24
+
25
+ def file_hash(filepath):
26
+ h = hashlib.sha256()
27
+ with open(filepath, "rb") as f:
28
+ h.update(f.read())
29
+ return h.hexdigest()
30
+
31
+ def apply_patch_to_file(file_path, patch):
32
+ """
33
+ Applies a unified diff patch (for a single file) to file_path.
34
+
35
+ Returns True if the patch was applied successfully, False otherwise.
36
+ """
37
+ # Read the original file lines; if the file doesn't exist, treat it as empty.
38
+ if file_path.exists():
39
+ original_lines = file_path.read_text(encoding="utf8").splitlines(keepends=True)
40
+ else:
41
+ original_lines = []
42
+ new_lines = []
43
+ current_index = 0
44
+
45
+ patch_lines = patch.splitlines()
46
+ # Regex for a hunk header, e.g., @@ -3,7 +3,6 @@
47
+ hunk_header_re = re.compile(r"^@@(?: -(\d+)(?:,(\d+))?)?(?: \+(\d+)(?:,(\d+))?)? @@")
48
+ i = 0
49
+ while i < len(patch_lines):
50
+ line = patch_lines[i]
51
+ if line.lstrip().startswith("@@"):
52
+ if line.strip() == "@@":
53
+ # Handle minimal hunk header without line numbers.
54
+ orig_start = 1
55
+ else:
56
+ m = hunk_header_re.match(line.strip())
57
+ if not m:
58
+ print("Invalid hunk header:", line)
59
+ return False
60
+ orig_start = int(m.group(1)) if m.group(1) is not None else 1
61
+ hunk_start_index = orig_start - 1 # diff headers are 1-indexed
62
+ if hunk_start_index > len(original_lines):
63
+ print("Hunk start index beyond file length")
64
+ return False
65
+ new_lines.extend(original_lines[current_index:hunk_start_index])
66
+ current_index = hunk_start_index
67
+ i += 1
68
+ # Process the hunk lines until the next hunk header.
69
+ while i < len(patch_lines) and not patch_lines[i].startswith("@@"):
70
+ pline = patch_lines[i]
71
+ if pline.startswith(" "):
72
+ # Context line must match exactly.
73
+ expected = pline[1:]
74
+ if current_index >= len(original_lines):
75
+ print("Context line expected but file ended")
76
+ return False
77
+ orig_line = original_lines[current_index].rstrip("\n")
78
+ if orig_line != expected:
79
+ print("Context line mismatch. Expected:", expected, "Got:", orig_line)
80
+ return False
81
+ new_lines.append(original_lines[current_index])
82
+ current_index += 1
83
+ elif pline.startswith("-"):
84
+ # Removal line: verify and skip from original.
85
+ expected = pline[1:]
86
+ if current_index >= len(original_lines):
87
+ print("Removal line expected but file ended")
88
+ return False
89
+ orig_line = original_lines[current_index].rstrip("\n")
90
+ if orig_line != expected:
91
+ print("Removal line mismatch. Expected:", expected, "Got:", orig_line)
92
+ return False
93
+ current_index += 1
94
+ elif pline.startswith("+"):
95
+ # Addition line: add to new_lines.
96
+ new_lines.append(pline[1:] + "\n")
97
+ else:
98
+ print("Unexpected line in hunk:", pline)
99
+ return False
100
+ i += 1
101
+ else:
102
+ # Skip non-hunk header lines.
103
+ i += 1
104
+
105
+ # Append any remaining lines from the original file.
106
+ new_lines.extend(original_lines[current_index:])
107
+ # Ensure parent directories exist before writing the file.
108
+ file_path.parent.mkdir(parents=True, exist_ok=True)
109
+ # Write the new content back to the file.
110
+ file_path.write_text("".join(new_lines), encoding="utf8")
111
+ return True
112
+
113
+ # Parse the diff into per-file patches.
114
+ file_patches = parse_diff_per_file(diff_text)
115
+ if not file_patches:
116
+ print("No file patches found in diff.")
117
+ return False
118
+
119
+ # Record original file hashes.
120
+ original_hashes = {}
121
+ for file_path, _ in file_patches:
122
+ target_file = Path(project_dir) / file_path
123
+ if target_file.exists():
124
+ original_hashes[file_path] = file_hash(target_file)
125
+ else:
126
+ original_hashes[file_path] = None
127
+
128
+ any_change = False
129
+ # Process each file patch.
130
+ for file_path, patch in file_patches:
131
+ target_file = Path(project_dir) / file_path
132
+ if "+++ /dev/null" in patch:
133
+ # Deletion patch: delete the file if it exists.
134
+ if target_file.exists():
135
+ target_file.unlink()
136
+ if not target_file.exists():
137
+ any_change = True
138
+ else:
139
+ print(f"Failed to delete file: {target_file}")
140
+ return False
141
+ else:
142
+ # Modification or new file creation.
143
+ success = apply_patch_to_file(target_file, patch)
144
+ if not success:
145
+ print(f"Failed to apply patch to file: {target_file}")
146
+ return False
147
+
148
+ # Verify that at least one file was changed by comparing hashes.
149
+ for file_path, patch in file_patches:
150
+ target_file = Path(project_dir) / file_path
151
+ if "+++ /dev/null" in patch:
152
+ if not target_file.exists():
153
+ any_change = True
154
+ else:
155
+ print(f"Expected deletion but file still exists: {target_file}")
156
+ return False
157
+ else:
158
+ old_hash = original_hashes.get(file_path)
159
+ if target_file.exists():
160
+ new_hash = file_hash(target_file)
161
+ if old_hash != new_hash:
162
+ any_change = True
163
+ else:
164
+ print(f"No change detected in file: {target_file}")
165
+ else:
166
+ print(f"Expected modification or creation but file is missing: {target_file}")
167
+ return False
168
+
169
+ if not any_change:
170
+ print("Patch applied but no file modifications detected.")
171
+ return False
172
+ return True
173
+
174
+ def parse_diff_per_file(diff_text):
175
+ """Parse unified diff text into individual file patches.
176
+
177
+ Splits a multi-file diff into per-file entries for processing. Handles:
178
+ - File creations (+++ /dev/null)
179
+ - File deletions (--- /dev/null)
180
+ - Standard modifications
181
+
182
+ Args:
183
+ diff_text: Unified diff string as generated by `git diff`
184
+
185
+ Returns:
186
+ List of tuples (file_path, patch) where:
187
+ - file_path: Relative path to modified file
188
+ - patch: Full diff fragment for this file
189
+
190
+ Note:
191
+ Uses 'b/' prefix detection from git diffs to determine target paths
192
+ This doesn't work all the time and needs to be revised with stronger models
193
+ """
194
+ header_re = re.compile(r'^(?:diff --git\s+)?(a/[^ ]+)\s+(b/[^ ]+)\s*$', re.MULTILINE)
195
+ lines = diff_text.splitlines()
196
+
197
+ # Check if any header line exists.
198
+ if not any(header_re.match(line) for line in lines):
199
+ # Fallback strategy: detect file headers starting with '--- a/' or '-- a/'
200
+ diffs = []
201
+ current_lines = []
202
+ current_file = None
203
+ deletion_mode = False
204
+ header_line_re = re.compile(r'^-{2,3}\s+a/(.+)$')
205
+
206
+ for line in lines:
207
+ if header_line_re.match(line):
208
+ if current_file is not None and current_lines:
209
+ if deletion_mode and not any(l.startswith("+++ /dev/null") for l in current_lines):
210
+ current_lines.append("+++ /dev/null")
211
+ diffs.append((current_file, "\n".join(current_lines)))
212
+ current_lines = [line]
213
+ deletion_mode = False
214
+ file_from = header_line_re.match(line).group(1).strip()
215
+ current_file = file_from
216
+ else:
217
+ current_lines.append(line)
218
+ if "deleted file mode" in line:
219
+ deletion_mode = True
220
+ if line.startswith("+++ "):
221
+ parts = line.split()
222
+ if len(parts) >= 2:
223
+ file_to = parts[1].strip()
224
+ if file_to != "/dev/null":
225
+ current_file = file_to[2:] if (file_to.startswith("a/") or file_to.startswith("b/")) else file_to
226
+ if current_file is not None and current_lines:
227
+ if deletion_mode and not any(l.startswith("+++ ") for l in current_lines):
228
+ current_lines.append("+++ /dev/null")
229
+ diffs.append((current_file, "\n".join(current_lines)))
230
+ return diffs
231
+ else:
232
+ # Use header-based strategy.
233
+ diffs = []
234
+ current_lines = []
235
+ current_file = None
236
+ deletion_mode = False
237
+ for line in lines:
238
+ m = header_re.match(line)
239
+ if m:
240
+ if current_file is not None and current_lines:
241
+ if deletion_mode and not any(l.startswith("+++ ") for l in current_lines):
242
+ current_lines.append("+++ /dev/null")
243
+ diffs.append((current_file, "\n".join(current_lines)))
244
+ current_lines = [line]
245
+ deletion_mode = False
246
+ file_from = m.group(1) # e.g. "a/index.html"
247
+ file_to = m.group(2) # e.g. "b/index.html"
248
+ current_file = file_to[2:] if file_to.startswith("b/") else file_to
249
+ else:
250
+ current_lines.append(line)
251
+ if "deleted file mode" in line:
252
+ deletion_mode = True
253
+ if line.startswith("+++ "):
254
+ parts = line.split()
255
+ if len(parts) >= 2:
256
+ file_to = parts[1].strip()
257
+ if file_to != "/dev/null":
258
+ current_file = file_to[2:] if (file_to.startswith("a/") or file_to.startswith("b/")) else file_to
259
+ if current_file is not None and current_lines:
260
+ if deletion_mode and not any(l.startswith("+++ ") for l in current_lines):
261
+ current_lines.append("+++ /dev/null")
262
+ diffs.append((current_file, "\n".join(current_lines)))
263
+ return diffs
264
+
265
+
gptdiff/gptdiff.py CHANGED
@@ -1,31 +1,42 @@
1
1
  #!/usr/bin/env python3
2
2
  from pathlib import Path
3
+ from urllib.parse import urlparse
3
4
  import subprocess
4
5
  import hashlib
5
6
  import re
6
-
7
+ import time
8
+ import os
9
+ import json
10
+ import subprocess
11
+ import sys
12
+ import fnmatch
13
+ import argparse
14
+ import pkgutil
15
+ import contextvars
16
+ from pkgutil import get_data
17
+ import threading
18
+ from threading import Lock
7
19
 
8
20
  import openai
9
21
  from openai import OpenAI
10
-
11
22
  import tiktoken
12
23
  import time
13
-
14
24
  import os
15
25
  import json
16
26
  import subprocess
17
- from pathlib import Path
18
27
  import sys
19
28
  import fnmatch
20
29
  import argparse
21
30
  import pkgutil
22
- import re
23
31
  import contextvars
24
- from ai_agent_toolbox import MarkdownParser, MarkdownPromptFormatter, Toolbox, FlatXMLParser, FlatXMLPromptFormatter
25
- import threading
26
32
  from pkgutil import get_data
33
+ import threading
34
+ from ai_agent_toolbox import MarkdownParser, MarkdownPromptFormatter, Toolbox, FlatXMLParser, FlatXMLPromptFormatter
35
+ from .applydiff import apply_diff, parse_diff_per_file
27
36
 
37
+ VERBOSE = False
28
38
  diff_context = contextvars.ContextVar('diffcontent', default="")
39
+
29
40
  def create_diff_toolbox():
30
41
  toolbox = Toolbox()
31
42
 
@@ -97,7 +108,9 @@ def color_code_diff(diff_text: str) -> str:
97
108
 
98
109
  def load_gitignore_patterns(gitignore_path):
99
110
  with open(gitignore_path, 'r') as f:
100
- patterns = [line.strip() for line in f if line.strip() and not line.startswith('#')]
111
+ patterns = [
112
+ line.strip() for line in f if line.strip() and not line.startswith('#')
113
+ ]
101
114
  return patterns
102
115
 
103
116
  def is_ignored(filepath, gitignore_patterns):
@@ -165,7 +178,7 @@ def load_project_files(project_dir, cwd):
165
178
  Prints skipped files to stdout for visibility
166
179
  """
167
180
  ignore_paths = [Path(cwd) / ".gitignore", Path(cwd) / ".gptignore"]
168
- gitignore_patterns = [".gitignore", "diff.patch", "prompt.txt", ".gptignore", "*.pdf", "*.docx", ".git", "*.orig", "*.rej", "*.diff"]
181
+ gitignore_patterns = [".gitignore", "diff.patch", "prompt.txt", ".*", ".gptignore", "*.pdf", "*.docx", ".git", "*.orig", "*.rej", "*.diff"]
169
182
 
170
183
  for p in ignore_paths:
171
184
  if p.exists():
@@ -175,14 +188,15 @@ def load_project_files(project_dir, cwd):
175
188
  project_files = []
176
189
  for file in list_files_and_dirs(project_dir, gitignore_patterns):
177
190
  if os.path.isfile(file):
178
- try:
179
- with open(file, 'r') as f:
180
- content = f.read()
191
+ try:
192
+ with open(file, 'r') as f:
193
+ content = f.read()
194
+ if VERBOSE:
181
195
  print(file)
182
- project_files.append((file, content))
183
- except UnicodeDecodeError:
184
- print(f"Skipping file {file} due to UnicodeDecodeError")
185
- continue
196
+ project_files.append((file, content))
197
+ except UnicodeDecodeError:
198
+ print(f"Skipping file {file} due to UnicodeDecodeError")
199
+ continue
186
200
 
187
201
  print("")
188
202
  return project_files
@@ -191,40 +205,73 @@ def load_prepend_file(file):
191
205
  with open(file, 'r') as f:
192
206
  return f.read()
193
207
 
194
- # Function to call GPT-4 API and calculate the cost
208
+ def domain_for_url(base_url):
209
+ parsed = urlparse(base_url)
210
+ if parsed.netloc:
211
+ if parsed.username:
212
+ domain = parsed.hostname
213
+ if parsed.port:
214
+ domain += f":{parsed.port}"
215
+ else:
216
+ domain = parsed.netloc
217
+ else:
218
+ domain = base_url
219
+ return domain
220
+
195
221
  def call_llm_for_diff(system_prompt, user_prompt, files_content, model, temperature=0.7, max_tokens=30000, api_key=None, base_url=None):
196
222
  enc = tiktoken.get_encoding("o200k_base")
223
+
224
+ # Use colors in print statements
225
+ red = "\033[91m"
226
+ green = "\033[92m"
227
+ blue = "\033[94m"
228
+ reset = "\033[0m"
197
229
  start_time = time.time()
198
230
 
199
231
  parser = MarkdownParser()
200
232
  formatter = MarkdownPromptFormatter()
201
233
  toolbox = create_diff_toolbox()
202
234
  tool_prompt = formatter.usage_prompt(toolbox)
203
- system_prompt += "\n"+tool_prompt
235
+ system_prompt += "\n" + tool_prompt
204
236
 
205
237
  if 'gemini' in model:
206
- user_prompt = system_prompt+"\n"+user_prompt
238
+ user_prompt = system_prompt + "\n" + user_prompt
207
239
 
240
+ input_content = system_prompt + "\n" + user_prompt + "\n" + files_content
241
+ token_count = len(enc.encode(input_content))
208
242
  messages = [
209
243
  {"role": "system", "content": system_prompt},
210
- {"role": "user", "content": user_prompt + "\n"+files_content},
244
+ {"role": "user", "content": user_prompt + "\n" + files_content},
211
245
  ]
212
- print("Using", model)
213
- print("SYSTEM PROMPT")
214
- print(system_prompt)
215
- print("USER PROMPT")
216
- print(user_prompt, "+", len(enc.encode(files_content)), "tokens of file content")
217
246
 
218
- if api_key is None:
247
+ if VERBOSE:
248
+ print(f"{green}Using {model}{reset}")
249
+ print(f"{green}SYSTEM PROMPT{reset}")
250
+ print(system_prompt)
251
+ print(f"{green}USER PROMPT{reset}")
252
+ print(user_prompt, "+", len(enc.encode(files_content)), "tokens of file content")
253
+ else:
254
+ print(f"Generating diff using model '{green}{model}{reset}' from '{blue}{domain_for_url(base_url)}{reset}' with {token_count} input tokens...")
255
+
256
+ if not api_key:
219
257
  api_key = os.getenv('GPTDIFF_LLM_API_KEY')
220
- if base_url is None:
258
+ if not base_url:
221
259
  base_url = os.getenv('GPTDIFF_LLM_BASE_URL', "https://nano-gpt.com/api/v1/")
260
+ base_url = base_url or "https://nano-gpt.com/api/v1/"
261
+
222
262
  client = OpenAI(api_key=api_key, base_url=base_url)
223
263
  response = client.chat.completions.create(model=model,
224
264
  messages=messages,
225
265
  max_tokens=max_tokens,
226
266
  temperature=temperature)
227
267
 
268
+ if VERBOSE:
269
+ print("Debug: Raw LLM Response\n---")
270
+ print(response.choices[0].message.content.strip())
271
+ print("---")
272
+ else:
273
+ print("Diff generated.")
274
+
228
275
  prompt_tokens = response.usage.prompt_tokens
229
276
  completion_tokens = response.usage.completion_tokens
230
277
  total_tokens = response.usage.total_tokens
@@ -236,9 +283,6 @@ def call_llm_for_diff(system_prompt, user_prompt, files_content, model, temperat
236
283
  print("-" * 40)
237
284
 
238
285
  # Now, these rates are updated to per million tokens
239
- cost_per_million_prompt_tokens = 30
240
- cost_per_million_completion_tokens = 60
241
- cost = (prompt_tokens / 1_000_000 * cost_per_million_prompt_tokens) + (completion_tokens / 1_000_000 * cost_per_million_completion_tokens)
242
286
 
243
287
  full_response = response.choices[0].message.content.strip()
244
288
  full_response, reasoning = swallow_reasoning(full_response)
@@ -250,7 +294,7 @@ def call_llm_for_diff(system_prompt, user_prompt, files_content, model, temperat
250
294
  toolbox.use(event)
251
295
  diff_response = diff_context.get()
252
296
 
253
- return full_response, diff_response, prompt_tokens, completion_tokens, total_tokens, cost
297
+ return full_response, diff_response, prompt_tokens, completion_tokens, total_tokens
254
298
 
255
299
  # New API functions
256
300
  def build_environment(files_dict):
@@ -361,172 +405,6 @@ def smartapply(diff_text, files, model=None, api_key=None, base_url=None):
361
405
 
362
406
  return files
363
407
 
364
- def apply_diff(project_dir, diff_text):
365
- """
366
- Applies a unified diff (as generated by git diff) to the files in project_dir
367
- using pure Python (without calling the external 'patch' command).
368
-
369
- Handles file modifications, new file creation, and file deletions.
370
-
371
- Returns:
372
- True if at least one file was modified (or deleted/created) as a result of the patch,
373
- False otherwise.
374
- """
375
- from pathlib import Path
376
- import re, hashlib
377
-
378
- def file_hash(filepath):
379
- h = hashlib.sha256()
380
- with open(filepath, "rb") as f:
381
- h.update(f.read())
382
- return h.hexdigest()
383
-
384
- def apply_patch_to_file(file_path, patch):
385
- """
386
- Applies a unified diff patch (for a single file) to file_path.
387
-
388
- Returns True if the patch was applied successfully, False otherwise.
389
- """
390
- # Read the original file lines; if the file doesn't exist, treat it as empty.
391
- if file_path.exists():
392
- original_lines = file_path.read_text(encoding="utf8").splitlines(keepends=True)
393
- else:
394
- original_lines = []
395
- new_lines = []
396
- current_index = 0
397
-
398
- patch_lines = patch.splitlines()
399
- # Regex for a hunk header, e.g., @@ -3,7 +3,6 @@
400
- hunk_header_re = re.compile(r"^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))? @@")
401
- i = 0
402
- while i < len(patch_lines):
403
- line = patch_lines[i]
404
- if line.startswith("@@"):
405
- m = hunk_header_re.match(line)
406
- if not m:
407
- print("Invalid hunk header:", line)
408
- return False
409
- orig_start = int(m.group(1))
410
- # orig_len = int(m.group(2)) if m.group(2) else 1 # not used explicitly
411
- # new_start = int(m.group(3))
412
- # new_len = int(m.group(4)) if m.group(4) else 1
413
-
414
- # Copy unchanged lines before the hunk.
415
- hunk_start_index = orig_start - 1 # diff headers are 1-indexed
416
- if hunk_start_index > len(original_lines):
417
- print("Hunk start index beyond file length")
418
- return False
419
- new_lines.extend(original_lines[current_index:hunk_start_index])
420
- current_index = hunk_start_index
421
-
422
- i += 1
423
- # Process the hunk lines until the next hunk header.
424
- while i < len(patch_lines) and not patch_lines[i].startswith("@@"):
425
- pline = patch_lines[i]
426
- if pline.startswith(" "):
427
- # Context line must match exactly.
428
- expected = pline[1:]
429
- if current_index >= len(original_lines):
430
- print("Context line expected but file ended")
431
- return False
432
- orig_line = original_lines[current_index].rstrip("\n")
433
- if orig_line != expected:
434
- print("Context line mismatch. Expected:", expected, "Got:", orig_line)
435
- return False
436
- new_lines.append(original_lines[current_index])
437
- current_index += 1
438
- elif pline.startswith("-"):
439
- # Removal line: verify and skip from original.
440
- expected = pline[1:]
441
- if current_index >= len(original_lines):
442
- print("Removal line expected but file ended")
443
- return False
444
- orig_line = original_lines[current_index].rstrip("\n")
445
- if orig_line != expected:
446
- print("Removal line mismatch. Expected:", expected, "Got:", orig_line)
447
- return False
448
- current_index += 1
449
- elif pline.startswith("+"):
450
- # Addition line: add to new_lines.
451
- new_lines.append(pline[1:] + "\n")
452
- else:
453
- print("Unexpected line in hunk:", pline)
454
- return False
455
- i += 1
456
- else:
457
- # Skip non-hunk header lines.
458
- i += 1
459
-
460
- # Append any remaining lines from the original file.
461
- new_lines.extend(original_lines[current_index:])
462
- # Ensure parent directories exist before writing the file.
463
- file_path.parent.mkdir(parents=True, exist_ok=True)
464
- # Write the new content back to the file.
465
- file_path.write_text("".join(new_lines), encoding="utf8")
466
- return True
467
-
468
- # Parse the diff into per-file patches.
469
- file_patches = parse_diff_per_file(diff_text)
470
- if not file_patches:
471
- print("No file patches found in diff.")
472
- return False
473
-
474
- # Record original file hashes.
475
- original_hashes = {}
476
- for file_path, _ in file_patches:
477
- target_file = Path(project_dir) / file_path
478
- if target_file.exists():
479
- original_hashes[file_path] = file_hash(target_file)
480
- else:
481
- original_hashes[file_path] = None
482
-
483
- any_change = False
484
- # Process each file patch.
485
- for file_path, patch in file_patches:
486
- target_file = Path(project_dir) / file_path
487
- if "+++ /dev/null" in patch:
488
- # Deletion patch: delete the file if it exists.
489
- if target_file.exists():
490
- target_file.unlink()
491
- if not target_file.exists():
492
- any_change = True
493
- else:
494
- print(f"Failed to delete file: {target_file}")
495
- return False
496
- else:
497
- # Modification or new file creation.
498
- success = apply_patch_to_file(target_file, patch)
499
- if not success:
500
- print(f"Failed to apply patch to file: {target_file}")
501
- return False
502
-
503
- # Verify that at least one file was changed by comparing hashes.
504
- for file_path, patch in file_patches:
505
- target_file = Path(project_dir) / file_path
506
- if "+++ /dev/null" in patch:
507
- if not target_file.exists():
508
- any_change = True
509
- else:
510
- print(f"Expected deletion but file still exists: {target_file}")
511
- return False
512
- else:
513
- old_hash = original_hashes.get(file_path)
514
- if target_file.exists():
515
- new_hash = file_hash(target_file)
516
- if old_hash != new_hash:
517
- any_change = True
518
- else:
519
- print(f"No change detected in file: {target_file}")
520
- else:
521
- print(f"Expected modification or creation but file is missing: {target_file}")
522
- return False
523
-
524
- if not any_change:
525
- print("Patch applied but no file modifications detected.")
526
- return False
527
- return True
528
-
529
-
530
408
  def parse_arguments():
531
409
  parser = argparse.ArgumentParser(description='Generate and optionally apply git diffs using GPT-4.')
532
410
  parser.add_argument('prompt', type=str, help='Prompt that runs on the codebase.')
@@ -542,9 +420,8 @@ def parse_arguments():
542
420
  parser.add_argument('--max_tokens', type=int, default=30000, help='Temperature parameter for model creativity (0.0 to 2.0)')
543
421
  parser.add_argument('--model', type=str, default=None, help='Model to use for the API call.')
544
422
  parser.add_argument('--applymodel', type=str, default=None, help='Model to use for applying the diff. Defaults to the value of --model if not specified.')
545
-
546
423
  parser.add_argument('--nowarn', action='store_true', help='Disable large token warning')
547
-
424
+ parser.add_argument('--verbose', action='store_true', help='Enable verbose output with detailed information')
548
425
  return parser.parse_args()
549
426
 
550
427
  def absolute_to_relative(absolute_path):
@@ -552,95 +429,8 @@ def absolute_to_relative(absolute_path):
552
429
  relative_path = os.path.relpath(absolute_path, cwd)
553
430
  return relative_path
554
431
 
555
- def parse_diff_per_file(diff_text):
556
- """Parse unified diff text into individual file patches.
557
-
558
- Splits a multi-file diff into per-file entries for processing. Handles:
559
- - File creations (+++ /dev/null)
560
- - File deletions (--- /dev/null)
561
- - Standard modifications
562
-
563
- Args:
564
- diff_text: Unified diff string as generated by `git diff`
565
-
566
- Returns:
567
- List of tuples (file_path, patch) where:
568
- - file_path: Relative path to modified file
569
- - patch: Full diff fragment for this file
570
-
571
- Note:
572
- Uses 'b/' prefix detection from git diffs to determine target paths
573
- """
574
- header_re = re.compile(r'^(?:diff --git\s+)?(a/[^ ]+)\s+(b/[^ ]+)\s*$', re.MULTILINE)
575
- lines = diff_text.splitlines()
576
-
577
- # Check if any header line exists.
578
- if not any(header_re.match(line) for line in lines):
579
- # Fallback strategy: detect file headers starting with '--- a/' or '-- a/'
580
- diffs = []
581
- current_lines = []
582
- current_file = None
583
- deletion_mode = False
584
- header_line_re = re.compile(r'^-{2,3}\s+a/(.+)$')
585
-
586
- for line in lines:
587
- if header_line_re.match(line):
588
- if current_file is not None and current_lines:
589
- if deletion_mode and not any(l.startswith("+++ /dev/null") for l in current_lines):
590
- current_lines.append("+++ /dev/null")
591
- diffs.append((current_file, "\n".join(current_lines)))
592
- current_lines = [line]
593
- deletion_mode = False
594
- file_from = header_line_re.match(line).group(1).strip()
595
- current_file = file_from
596
- else:
597
- current_lines.append(line)
598
- if "deleted file mode" in line:
599
- deletion_mode = True
600
- if line.startswith("+++ "):
601
- parts = line.split()
602
- if len(parts) >= 2:
603
- file_to = parts[1].strip()
604
- if file_to != "/dev/null":
605
- current_file = file_to[2:] if (file_to.startswith("a/") or file_to.startswith("b/")) else file_to
606
- if current_file is not None and current_lines:
607
- if deletion_mode and not any(l.startswith("+++ ") for l in current_lines):
608
- current_lines.append("+++ /dev/null")
609
- diffs.append((current_file, "\n".join(current_lines)))
610
- return diffs
611
- else:
612
- # Use header-based strategy.
613
- diffs = []
614
- current_lines = []
615
- current_file = None
616
- deletion_mode = False
617
- for line in lines:
618
- m = header_re.match(line)
619
- if m:
620
- if current_file is not None and current_lines:
621
- if deletion_mode and not any(l.startswith("+++ ") for l in current_lines):
622
- current_lines.append("+++ /dev/null")
623
- diffs.append((current_file, "\n".join(current_lines)))
624
- current_lines = [line]
625
- deletion_mode = False
626
- file_from = m.group(1) # e.g. "a/index.html"
627
- file_to = m.group(2) # e.g. "b/index.html"
628
- current_file = file_to[2:] if file_to.startswith("b/") else file_to
629
- else:
630
- current_lines.append(line)
631
- if "deleted file mode" in line:
632
- deletion_mode = True
633
- if line.startswith("+++ "):
634
- parts = line.split()
635
- if len(parts) >= 2:
636
- file_to = parts[1].strip()
637
- if file_to != "/dev/null":
638
- current_file = file_to[2:] if (file_to.startswith("a/") or file_to.startswith("b/")) else file_to
639
- if current_file is not None and current_lines:
640
- if deletion_mode and not any(l.startswith("+++ ") for l in current_lines):
641
- current_lines.append("+++ /dev/null")
642
- diffs.append((current_file, "\n".join(current_lines)))
643
- return diffs
432
+ def colorize_warning_warning(message):
433
+ return f"\033[91m\033[1m{message}\033[0m"
644
434
 
645
435
  def call_llm_for_apply_with_think_tool_available(file_path, original_content, file_diff, model, api_key=None, base_url=None, extra_prompt=None, max_tokens=30000):
646
436
  parser = FlatXMLParser("think")
@@ -720,9 +510,9 @@ Diff to apply:
720
510
  {"role": "system", "content": system_prompt},
721
511
  {"role": "user", "content": user_prompt},
722
512
  ]
723
- if api_key is None:
513
+ if not api_key:
724
514
  api_key = os.getenv('GPTDIFF_LLM_API_KEY')
725
- if base_url is None:
515
+ if not base_url:
726
516
  base_url = os.getenv('GPTDIFF_LLM_BASE_URL', "https://nano-gpt.com/api/v1/")
727
517
  client = OpenAI(api_key=api_key, base_url=base_url)
728
518
  start_time = time.time()
@@ -734,8 +524,11 @@ Diff to apply:
734
524
  elapsed = time.time() - start_time
735
525
  minutes, seconds = divmod(int(elapsed), 60)
736
526
  time_str = f"{minutes}m {seconds}s" if minutes else f"{seconds}s"
737
- print(f"Smartapply time: {time_str}")
738
- print("-" * 40)
527
+ if VERBOSE:
528
+ print(f"Smartapply time: {time_str}")
529
+ print("-" * 40)
530
+ else:
531
+ print(f"Smartapply completed in {time_str}")
739
532
  return full_response
740
533
 
741
534
  def build_environment_from_filelist(file_list, cwd):
@@ -760,31 +553,44 @@ def smart_apply_patch(project_dir, diff_text, user_prompt, args):
760
553
  Attempt to apply a diff via smartapply: process each file concurrently using the LLM.
761
554
  """
762
555
  from pathlib import Path
556
+ start_time = time.time()
763
557
  parsed_diffs = parse_diff_per_file(diff_text)
764
558
  print("Found", len(parsed_diffs), "files in diff, processing smart apply concurrently:")
559
+ green = "\033[92m"
560
+ red = "\033[91m"
561
+ blue = "\033[94m"
562
+ reset = "\033[0m"
563
+
765
564
  if len(parsed_diffs) == 0:
766
- print("\033[1;33mThere were no entries in this diff. The LLM may have returned something invalid.\033[0m")
565
+ print(colorize_warning_warning("There were no entries in this diff. The LLM may have returned something invalid."))
767
566
  if args.beep:
768
567
  print("\a")
769
568
  return
770
569
  threads = []
570
+ success_files = []
571
+ failed_files = []
572
+ success_lock = Lock()
771
573
 
772
574
  def process_file(file_path, file_diff):
773
575
  full_path = Path(project_dir) / file_path
774
- print(f"Processing file: {file_path}")
576
+ if VERBOSE:
577
+ print(f"Processing file: {file_path}")
775
578
  if '+++ /dev/null' in file_diff:
776
579
  if full_path.exists():
777
580
  full_path.unlink()
778
581
  print(f"\033[1;32mDeleted file {file_path}.\033[0m")
779
582
  else:
780
- print(f"\033[1;33mFile {file_path} not found - skipping deletion\033[0m")
583
+ print(colorize_warning_warning(f"File {file_path} not found - skipping deletion"))
781
584
  return
782
585
 
783
- try:
784
- original_content = full_path.read_text()
785
- except (UnicodeDecodeError, IOError):
786
- print(f"Skipping file {file_path} due to read error")
787
- return
586
+ original_content = ""
587
+ if full_path.exists():
588
+ try:
589
+ original_content = full_path.read_text()
590
+ except (UnicodeDecodeError, IOError) as e:
591
+ print(f"Cannot read {file_path} due to {str(e)}, treating as new file")
592
+ else:
593
+ print(f"File {file_path} does not exist, treating as new file")
788
594
 
789
595
  # Use SMARTAPPLY-specific environment variables if set, otherwise fallback.
790
596
  smart_apply_model = os.getenv("GPTDIFF_SMARTAPPLY_MODEL")
@@ -807,9 +613,7 @@ def smart_apply_patch(project_dir, diff_text, user_prompt, args):
807
613
  else:
808
614
  base_url = os.getenv("GPTDIFF_LLM_BASE_URL", "https://nano-gpt.com/api/v1/")
809
615
 
810
- print("-" * 40)
811
- print("Running smartapply with", model, "on", file_path)
812
- print("-" * 40)
616
+ print(f"Running smartapply in parallel using model '{green}{model}{reset}' from '{blue}{domain_for_url(base_url)}{reset}'...")
813
617
  try:
814
618
  updated_content = call_llm_for_apply_with_think_tool_available(
815
619
  file_path, original_content, file_diff, model,
@@ -818,12 +622,18 @@ def smart_apply_patch(project_dir, diff_text, user_prompt, args):
818
622
  max_tokens=args.max_tokens)
819
623
  if updated_content.strip() == "":
820
624
  print("Cowardly refusing to write empty file to", file_path, "merge failed")
625
+ with success_lock:
626
+ failed_files.append(file_path)
821
627
  return
822
628
  full_path.parent.mkdir(parents=True, exist_ok=True)
823
629
  full_path.write_text(updated_content)
824
630
  print(f"\033[1;32mSuccessful 'smartapply' update {file_path}.\033[0m")
631
+ with success_lock:
632
+ success_files.append(file_path)
825
633
  except Exception as e:
826
634
  print(f"\033[1;31mFailed to process {file_path}: {str(e)}\033[0m")
635
+ with success_lock:
636
+ failed_files.append(file_path)
827
637
 
828
638
  for file_path, file_diff in parsed_diffs:
829
639
  thread = threading.Thread(target=process_file, args=(file_path, file_diff))
@@ -831,7 +641,17 @@ def smart_apply_patch(project_dir, diff_text, user_prompt, args):
831
641
  threads.append(thread)
832
642
  for thread in threads:
833
643
  thread.join()
834
-
644
+ elapsed = time.time() - start_time
645
+ minutes, seconds = divmod(int(elapsed), 60)
646
+ time_str = f"{minutes}m {seconds}s" if minutes else f"{seconds}s"
647
+ print(f"Smartapply successfully applied changes in {time_str}. Check the updated files to confirm.")
648
+ if failed_files:
649
+ print(f"\033[1;31mSmart apply completed in {time_str} with failures for {len(failed_files)} files:\033[0m")
650
+ for file in failed_files:
651
+ print(f" - {file}")
652
+ print("Please check the errors above for details.")
653
+ else:
654
+ print(f"\033[1;32mSmart apply completed successfully in {time_str} for all {len(success_files)} files.\033[0m")
835
655
  if args.beep:
836
656
  print("\a")
837
657
 
@@ -862,11 +682,13 @@ def save_files(files_dict, target_directory):
862
682
  print(f"Saved: {full_path}")
863
683
 
864
684
  def main():
685
+ global VERBOSE
865
686
  # Adding color support for Windows CMD
866
687
  if os.name == 'nt':
867
688
  os.system('color')
868
689
 
869
690
  args = parse_arguments()
691
+ VERBOSE = args.verbose
870
692
 
871
693
  # openai.api_base = "https://nano-gpt.com/api/v1/"
872
694
  if len(sys.argv) < 2:
@@ -916,9 +738,8 @@ def main():
916
738
 
917
739
  files_content = ""
918
740
  for file, content in project_files:
919
- print(f"Including {len(enc.encode(content)):5d} tokens", absolute_to_relative(file))
920
-
921
- # Prepare the prompt for GPT-4
741
+ if VERBOSE:
742
+ print(f"Including {len(enc.encode(content)):5d} tokens", absolute_to_relative(file))
922
743
  files_content += f"File: {absolute_to_relative(file)}\nContent:\n{content}\n"
923
744
 
924
745
  full_prompt = f"{system_prompt}\n\n{user_prompt}\n\n{files_content}"
@@ -930,9 +751,8 @@ def main():
930
751
  with open('prompt.txt', 'w') as f:
931
752
  f.write(full_prompt)
932
753
  print(f"Total tokens: {token_count:5d}")
933
- print(f"\033[1;32mNot calling GPT-4.\033[0m") # Green color for success message
754
+ print(f"\033[1;32mWrote full prompt to prompt.txt.\033[0m")
934
755
  print('Instead, wrote full prompt to prompt.txt. Use `xclip -selection clipboard < prompt.txt` then paste into chatgpt')
935
- print(f"Total cost: ${0.0:.4f}")
936
756
  exit(0)
937
757
  else:
938
758
  # Validate API key presence before any API operations
@@ -949,7 +769,7 @@ def main():
949
769
  print("Request canceled")
950
770
  sys.exit(0)
951
771
  try:
952
- full_text, diff_text, prompt_tokens, completion_tokens, total_tokens, cost = call_llm_for_diff(system_prompt, user_prompt, files_content, args.model,
772
+ full_text, diff_text, prompt_tokens, completion_tokens, total_tokens = call_llm_for_diff(system_prompt, user_prompt, files_content, args.model,
953
773
  temperature=args.temperature,
954
774
  api_key=os.getenv('GPTDIFF_LLM_API_KEY'),
955
775
  base_url=os.getenv('GPTDIFF_LLM_BASE_URL', "https://nano-gpt.com/api/v1/"),
@@ -961,35 +781,40 @@ def main():
961
781
  prompt_tokens = 0
962
782
  completion_tokens = 0
963
783
  total_tokens = 0
964
- cost = 0
965
784
  print(f"Error in LLM response {e}")
966
785
 
967
786
  if(diff_text.strip() == ""):
968
- print(f"\033[1;33mThere was no data in this diff. The LLM may have returned something invalid.\033[0m")
969
- print("Unable to parse diff text. Full response:", full_text)
787
+ print(f"\033[1;33mWarning: No valid diff data was generated. This could be due to an unclear prompt or an invalid LLM response.\033[0m")
788
+ print("Suggested action: Refine your prompt or check the full response below for clues.")
789
+ print("Full LLM response:\n---\n" + full_text + "\n---")
970
790
  if args.beep:
971
- print("\a") # Terminal bell for completion notification
791
+ print("\a")
972
792
  return
973
793
 
974
794
  elif args.apply:
975
795
  print("\nAttempting apply with the following diff:")
976
- print("\n<diff>")
977
796
  print(color_code_diff(diff_text))
978
- print("\n</diff>")
979
- print("Saved to patch.diff")
980
- if apply_diff(project_dir, diff_text):
981
- print(f"\033[1;32mPatch applied successfully with 'git apply'.\033[0m")
797
+ print("\033[94m**Attempting to apply patch using basic method...**\033[0m")
798
+ apply_result = apply_diff(project_dir, diff_text)
799
+ if apply_result:
800
+ print(f"\033[1;32mPatch applied successfully with basic apply.\033[0m")
982
801
  else:
983
- print("Apply failed, attempting smart apply.")
802
+ print("\033[94m**Attempting smart apply with LLM...**\033[0m")
984
803
  smart_apply_patch(project_dir, diff_text, user_prompt, args)
985
804
 
986
805
  if args.beep:
987
- print("\a") # Terminal bell for completion notification
806
+ print("\a")
988
807
 
989
- print(f"Prompt tokens: {prompt_tokens}")
990
- print(f"Completion tokens: {completion_tokens}")
991
- print(f"Total tokens: {total_tokens}")
992
- #print(f"Total cost: ${cost:.4f}")
808
+ green = "\033[92m"
809
+ reset = "\033[0m"
810
+ if VERBOSE:
811
+ print("API Usage Details:")
812
+ print(f"- Prompt tokens: {prompt_tokens}")
813
+ print(f"- Completion tokens: {completion_tokens}")
814
+ print(f"- Total tokens: {total_tokens}")
815
+ print(f"- Model used: {green}{args.model}{reset}")
816
+ else:
817
+ print(f"API Usage: {total_tokens} tokens, Model used: {green}{args.model}{reset}")
993
818
 
994
819
  def swallow_reasoning(full_response: str) -> (str, str):
995
820
  """
gptdiff/gptpatch.py CHANGED
@@ -13,8 +13,7 @@ This tool uses the same patch-application logic as gptdiff.
13
13
  import sys
14
14
  import argparse
15
15
  from pathlib import Path
16
- from gptdiff.gptdiff import apply_diff
17
-
16
+ from gptdiff.gptdiff import apply_diff, smart_apply_patch, color_code_diff
18
17
 
19
18
  def parse_arguments():
20
19
  parser = argparse.ArgumentParser(
@@ -50,10 +49,14 @@ def parse_arguments():
50
49
  default=30000,
51
50
  help="Maximum tokens to use for LLM responses"
52
51
  )
52
+ parser.add_argument('--verbose', action='store_true', help='Enable verbose output with detailed information')
53
+ parser.add_argument('--dumb', action='store_true', default=False, help='Attempt dumb apply before trying smart apply')
53
54
  return parser.parse_args()
54
55
 
55
56
  def main():
56
57
  args = parse_arguments()
58
+ import gptdiff.gptdiff as gd
59
+ gd.VERBOSE = args.verbose
57
60
  if args.diff:
58
61
  diff_text = args.diff
59
62
  else:
@@ -64,13 +67,21 @@ def main():
64
67
  diff_text = diff_path.read_text(encoding="utf8")
65
68
 
66
69
  project_dir = args.project_dir
67
- success = apply_diff(project_dir, diff_text)
68
- if success:
69
- print(" Diff applied successfully.")
70
+
71
+ if args.verbose:
72
+ print("\n\033[1;34mDiff to be applied:\033[0m")
73
+ print(color_code_diff(diff_text))
74
+ print("")
75
+
76
+ if args.dumb:
77
+ success = apply_diff(project_dir, diff_text)
78
+ if success:
79
+ print("\033[1;32m✅ Diff applied successfully.\033[0m")
80
+ else:
81
+ print("\033[1;31m❌ Failed to apply diff using git apply. Attempting smart apply.\033[0m")
82
+ smart_apply_patch(project_dir, diff_text, "", args)
70
83
  else:
71
- print("❌ Failed to apply diff using git apply. Attempting smart apply.")
72
- from gptdiff.gptdiff import smart_apply_patch
73
84
  smart_apply_patch(project_dir, diff_text, "", args)
74
85
 
75
86
  if __name__ == "__main__":
76
- main()
87
+ main()
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: gptdiff
3
- Version: 0.1.22
3
+ Version: 0.1.27
4
4
  Summary: A tool to generate and apply git diffs using LLMs
5
5
  Author: 255labs
6
6
  Classifier: License :: OSI Approved :: MIT License
@@ -82,13 +82,15 @@ done
82
82
 
83
83
  *Requires reasoning model*
84
84
 
85
- ### Why GPTDiff?
85
+ ## Why Choose GPTDiff?
86
86
 
87
- - **Understands Your Code** - Describe changes in plain English
88
- - **Safe Modifications** - Keeps existing code working
89
- - **Auto-Fix** - `--apply` fixes mistakes in generated changes
90
- - **Works Instantly** - No complex setup needed
91
- - **Whole Project View** - Handles multiple files together
87
+ - **Describe changes in plain English**
88
+ - **AI gets your whole project**
89
+ - **Auto-fixes conflicts**
90
+ - **Keeps code functional**
91
+ - **Fast setup, no fuss**
92
+ - **You approve every change**
93
+ - **Costs are upfront**
92
94
 
93
95
  ## Core Capabilities
94
96
 
@@ -0,0 +1,10 @@
1
+ gptdiff/__init__.py,sha256=o1hrK4GFvbfKcHPlLVArz4OunE3euIicEBYaLrdDo0k,198
2
+ gptdiff/applydiff.py,sha256=_11ITFMcigwvVptaIpEtyfLUTIy_mYPWExcXUqCBfOs,11200
3
+ gptdiff/gptdiff.py,sha256=sG0tPku3d5agx9F8EqKTl914S5ZkRi_8oMIBhFiz-nI,33355
4
+ gptdiff/gptpatch.py,sha256=Vqk2vliYs_BxtuTpwdS88n3A8XToh6RvrCA4N8VqOu0,2759
5
+ gptdiff-0.1.27.dist-info/LICENSE.txt,sha256=zCJk7yUYpMjFvlipi1dKtaljF8WdZ2NASndBYYbU8BY,1228
6
+ gptdiff-0.1.27.dist-info/METADATA,sha256=l5oPGbwCn731KeLRT3xEMElWuE1fr15c_pGLGrEzzA8,8723
7
+ gptdiff-0.1.27.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
8
+ gptdiff-0.1.27.dist-info/entry_points.txt,sha256=0VlVNr-gc04a3SZD5_qKIBbtg_L5P2x3xlKE5ftcdkc,82
9
+ gptdiff-0.1.27.dist-info/top_level.txt,sha256=XNkQkQGINaDndEwRxg8qToOrJ9coyfAb-EHrSUXzdCE,8
10
+ gptdiff-0.1.27.dist-info/RECORD,,
@@ -1,9 +0,0 @@
1
- gptdiff/__init__.py,sha256=o1hrK4GFvbfKcHPlLVArz4OunE3euIicEBYaLrdDo0k,198
2
- gptdiff/gptdiff.py,sha256=AuZwZ1pg52RPheAzdhtZXSTjBGH4t4KRm7r9ziGHJVQ,41388
3
- gptdiff/gptpatch.py,sha256=Z8CWWIfIL2o7xPLVdhzN5GSyJq0vsK4XQRzu4hMWNQk,2194
4
- gptdiff-0.1.22.dist-info/LICENSE.txt,sha256=zCJk7yUYpMjFvlipi1dKtaljF8WdZ2NASndBYYbU8BY,1228
5
- gptdiff-0.1.22.dist-info/METADATA,sha256=_RspqYV4VPaRrpYTQXNVecFirrxzZq7MelPpZLV3O9Q,8785
6
- gptdiff-0.1.22.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
7
- gptdiff-0.1.22.dist-info/entry_points.txt,sha256=0VlVNr-gc04a3SZD5_qKIBbtg_L5P2x3xlKE5ftcdkc,82
8
- gptdiff-0.1.22.dist-info/top_level.txt,sha256=XNkQkQGINaDndEwRxg8qToOrJ9coyfAb-EHrSUXzdCE,8
9
- gptdiff-0.1.22.dist-info/RECORD,,