gptdiff 0.1.5__tar.gz → 0.1.7__tar.gz

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: gptdiff
3
- Version: 0.1.5
3
+ Version: 0.1.7
4
4
  Summary: A tool to generate and apply git diffs using LLMs
5
5
  Author: 255labs
6
6
  Classifier: License :: OSI Approved :: MIT License
@@ -4,6 +4,7 @@ import openai
4
4
  from openai import OpenAI
5
5
 
6
6
  import tiktoken
7
+ import time
7
8
 
8
9
  import os
9
10
  import json
@@ -20,7 +21,7 @@ import threading
20
21
  from pkgutil import get_data
21
22
 
22
23
  diff_context = contextvars.ContextVar('diffcontent', default="")
23
- def create_toolbox():
24
+ def create_diff_toolbox():
24
25
  toolbox = Toolbox()
25
26
 
26
27
  def diff(content: str):
@@ -47,6 +48,25 @@ a/file.py b/file.py
47
48
  )
48
49
  return toolbox
49
50
 
51
+ def create_think_toolbox():
52
+ toolbox = Toolbox()
53
+
54
+ def think(content: str):
55
+ print("Swallowed thoughts", content)
56
+
57
+ toolbox.add_tool(
58
+ name="think",
59
+ fn=think,
60
+ args={
61
+ "content": {
62
+ "type": "string",
63
+ "description": "Thoughts"
64
+ }
65
+ },
66
+ description=""
67
+ )
68
+ return toolbox
69
+
50
70
 
51
71
  def load_gitignore_patterns(gitignore_path):
52
72
  with open(gitignore_path, 'r') as f:
@@ -145,11 +165,13 @@ def load_prepend_file(file):
145
165
  return f.read()
146
166
 
147
167
  # Function to call GPT-4 API and calculate the cost
148
- def call_gpt4_api(system_prompt, user_prompt, files_content, model, temperature=0.7, max_tokens=2500, api_key=None, base_url=None):
168
+ def call_llm_for_diff(system_prompt, user_prompt, files_content, model, temperature=0.7, max_tokens=30000, api_key=None, base_url=None):
169
+ enc = tiktoken.get_encoding("o200k_base")
170
+ start_time = time.time()
149
171
 
150
172
  parser = FlatXMLParser("diff")
151
173
  formatter = FlatXMLPromptFormatter(tag="diff")
152
- toolbox = create_toolbox()
174
+ toolbox = create_diff_toolbox()
153
175
  tool_prompt = formatter.usage_prompt(toolbox)
154
176
  system_prompt += "\n"+tool_prompt
155
177
 
@@ -164,7 +186,7 @@ def call_gpt4_api(system_prompt, user_prompt, files_content, model, temperature=
164
186
  print("SYSTEM PROMPT")
165
187
  print(system_prompt)
166
188
  print("USER PROMPT")
167
- print(user_prompt, "+", len(files_content), "characters of file content")
189
+ print(user_prompt, "+", len(enc.encode(files_content)), "tokens of file content")
168
190
 
169
191
  if api_key is None:
170
192
  api_key = os.getenv('GPTDIFF_LLM_API_KEY')
@@ -180,6 +202,12 @@ def call_gpt4_api(system_prompt, user_prompt, files_content, model, temperature=
180
202
  completion_tokens = response.usage.completion_tokens
181
203
  total_tokens = response.usage.total_tokens
182
204
 
205
+ elapsed = time.time() - start_time
206
+ minutes, seconds = divmod(int(elapsed), 60)
207
+ time_str = f"{minutes}m {seconds}s" if minutes else f"{seconds}s"
208
+ print(f"Diff creation time: {time_str}")
209
+ print("-" * 40)
210
+
183
211
  # Now, these rates are updated to per million tokens
184
212
  cost_per_million_prompt_tokens = 30
185
213
  cost_per_million_completion_tokens = 60
@@ -187,7 +215,6 @@ def call_gpt4_api(system_prompt, user_prompt, files_content, model, temperature=
187
215
 
188
216
  full_response = response.choices[0].message.content.strip()
189
217
 
190
-
191
218
  events = parser.parse(full_response)
192
219
  for event in events:
193
220
  toolbox.use(event)
@@ -216,7 +243,7 @@ def generate_diff(environment, goal, model=None, temperature=0.7, max_tokens=320
216
243
  prepend = ""
217
244
 
218
245
  system_prompt = prepend+f"Output a git diff into a <diff> block."
219
- _, diff_text, _, _, _, _ = call_gpt4_api(
246
+ _, diff_text, _, _, _, _ = call_llm_for_diff(
220
247
  system_prompt,
221
248
  goal,
222
249
  environment,
@@ -266,7 +293,10 @@ def smartapply(diff_text, files, model=None, api_key=None, base_url=None):
266
293
  if model is None:
267
294
  model = os.getenv('GPTDIFF_MODEL', 'deepseek-reasoner')
268
295
  parsed_diffs = parse_diff_per_file(diff_text)
269
- print("SMARTAPPLY", diff_text)
296
+ print("-" * 40)
297
+ print("SMARTAPPLY")
298
+ print(diff_text)
299
+ print("-" * 40)
270
300
 
271
301
  def process_file(path, patch):
272
302
  original = files.get(path, '')
@@ -275,12 +305,12 @@ def smartapply(diff_text, files, model=None, api_key=None, base_url=None):
275
305
  if path in files:
276
306
  del files[path]
277
307
  else:
278
- updated = call_llm_for_apply(path, original, patch, model, api_key=api_key, base_url=base_url)
308
+ updated = call_llm_for_apply_with_think_tool_available(path, original, patch, model, api_key=api_key, base_url=base_url)
279
309
  files[path] = updated.strip()
280
-
310
+
281
311
  for path, patch in parsed_diffs:
282
312
  process_file(path, patch)
283
-
313
+
284
314
  return files
285
315
 
286
316
  # Function to apply diff to project files
@@ -307,6 +337,7 @@ def parse_arguments():
307
337
  help='Call the GPT-4 API. Writes the full prompt to prompt.txt if not specified.')
308
338
  parser.add_argument('files', nargs='*', default=[], help='Specify additional files or directories to include.')
309
339
  parser.add_argument('--temperature', type=float, default=0.7, help='Temperature parameter for model creativity (0.0 to 2.0)')
340
+ parser.add_argument('--max_tokens', type=int, default=30000, help='Temperature parameter for model creativity (0.0 to 2.0)')
310
341
  parser.add_argument('--model', type=str, default=None, help='Model to use for the API call.')
311
342
 
312
343
  parser.add_argument('--nowarn', action='store_true', help='Disable large token warning')
@@ -320,20 +351,20 @@ def absolute_to_relative(absolute_path):
320
351
 
321
352
  def parse_diff_per_file(diff_text):
322
353
  """Parse unified diff text into individual file patches.
323
-
354
+
324
355
  Splits a multi-file diff into per-file entries for processing. Handles:
325
356
  - File creations (+++ /dev/null)
326
357
  - File deletions (--- /dev/null)
327
358
  - Standard modifications
328
-
359
+
329
360
  Args:
330
361
  diff_text: Unified diff string as generated by `git diff`
331
-
362
+
332
363
  Returns:
333
364
  List of tuples (file_path, patch) where:
334
365
  - file_path: Relative path to modified file
335
366
  - patch: Full diff fragment for this file
336
-
367
+
337
368
  Note:
338
369
  Uses 'b/' prefix detection from git diffs to determine target paths
339
370
  """
@@ -373,6 +404,26 @@ def parse_diff_per_file(diff_text):
373
404
 
374
405
  return diffs
375
406
 
407
+ def call_llm_for_apply_with_think_tool_available(file_path, original_content, file_diff, model, api_key=None, base_url=None):
408
+ parser = FlatXMLParser("think")
409
+ formatter = FlatXMLPromptFormatter(tag="think")
410
+ toolbox = create_think_toolbox()
411
+ full_response = call_llm_for_apply(file_path, original_content, file_diff, model, api_key=None, base_url=None)
412
+ notool_response = ""
413
+ events = parser.parse(full_response)
414
+ is_in_tool = False
415
+ appended_content = ""
416
+ for event in events:
417
+ if event.mode == 'append':
418
+ appended_content += event.content
419
+ if event.mode == 'close' and appended_content and event.tool is None:
420
+ notool_response += appended_content
421
+ if event.mode == 'close':
422
+ appended_content = ""
423
+ toolbox.use(event)
424
+
425
+ return notool_response
426
+
376
427
  def call_llm_for_apply(file_path, original_content, file_diff, model, api_key=None, base_url=None):
377
428
  """AI-powered diff application with conflict resolution.
378
429
 
@@ -409,7 +460,8 @@ def call_llm_for_apply(file_path, original_content, file_diff, model, api_key=No
409
460
 
410
461
  1. Carefully apply all changes from the diff
411
462
  2. Preserve surrounding context that isn't changed
412
- 3. Only return the final file content, do not add any additional markup and do not add a code block"""
463
+ 3. Only return the final file content, do not add any additional markup and do not add a code block
464
+ 4. You must return the entire file. It overwrites the existing file."""
413
465
 
414
466
  user_prompt = f"""File: {file_path}
415
467
  File contents:
@@ -434,12 +486,19 @@ Diff to apply:
434
486
  if base_url is None:
435
487
  base_url = os.getenv('GPTDIFF_LLM_BASE_URL', "https://nano-gpt.com/api/v1/")
436
488
  client = OpenAI(api_key=api_key, base_url=base_url)
489
+ start_time = time.time()
437
490
  response = client.chat.completions.create(model=model,
438
491
  messages=messages,
439
492
  temperature=0.0,
440
493
  max_tokens=30000)
494
+ full_response = response.choices[0].message.content
441
495
 
442
- return response.choices[0].message.content
496
+ elapsed = time.time() - start_time
497
+ minutes, seconds = divmod(int(elapsed), 60)
498
+ time_str = f"{minutes}m {seconds}s" if minutes else f"{seconds}s"
499
+ print(f"Smartapply time: {time_str}")
500
+ print("-" * 40)
501
+ return full_response
443
502
 
444
503
  def build_environment_from_filelist(file_list, cwd):
445
504
  """Build environment string from list of file paths"""
@@ -531,10 +590,11 @@ def main():
531
590
  if confirmation != 'y':
532
591
  print("Request canceled")
533
592
  sys.exit(0)
534
- full_text, diff_text, prompt_tokens, completion_tokens, total_tokens, cost = call_gpt4_api(system_prompt, user_prompt, files_content, args.model,
593
+ full_text, diff_text, prompt_tokens, completion_tokens, total_tokens, cost = call_llm_for_diff(system_prompt, user_prompt, files_content, args.model,
535
594
  temperature=args.temperature,
536
595
  api_key=os.getenv('GPTDIFF_LLM_API_KEY'),
537
- base_url=os.getenv('GPTDIFF_LLM_BASE_URL', "https://nano-gpt.com/api/v1/")
596
+ base_url=os.getenv('GPTDIFF_LLM_BASE_URL', "https://nano-gpt.com/api/v1/"),
597
+ max_tokens=args.max_tokens
538
598
  )
539
599
 
540
600
  if(diff_text.strip() == ""):
@@ -587,8 +647,17 @@ def main():
587
647
  print(f"Skipping binary file {file_path}")
588
648
  return
589
649
 
650
+ print("-" * 40)
651
+ print("SMARTAPPLY")
652
+ print(file_diff)
653
+ print("-" * 40)
590
654
  try:
591
- updated_content = call_llm_for_apply(file_path, original_content, file_diff, args.model)
655
+ updated_content = call_llm_for_apply_with_think_tool_available(file_path, original_content, file_diff, args.model)
656
+
657
+ if updated_content.strip() == "":
658
+ print("Cowardly refusing to write empty file to", file_path, "merge failed")
659
+ return
660
+
592
661
  full_path.parent.mkdir(parents=True, exist_ok=True)
593
662
  full_path.write_text(updated_content)
594
663
  print(f"\033[1;32mSuccessful 'smartapply' update {file_path}.\033[0m")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: gptdiff
3
- Version: 0.1.5
3
+ Version: 0.1.7
4
4
  Summary: A tool to generate and apply git diffs using LLMs
5
5
  Author: 255labs
6
6
  Classifier: License :: OSI Approved :: MIT License
@@ -2,7 +2,7 @@ from setuptools import setup, find_packages
2
2
 
3
3
  setup(
4
4
  name='gptdiff',
5
- version='0.1.5',
5
+ version='0.1.7',
6
6
  description='A tool to generate and apply git diffs using LLMs',
7
7
  author='255labs',
8
8
  packages=find_packages(), # Use find_packages() to automatically discover packages
@@ -35,6 +35,32 @@ def test_smartapply_file_modification():
35
35
  print('Hello')
36
36
  +
37
37
  +def goodbye():
38
+ + print('Goodbye')'''
39
+
40
+ original_hello = "def hello():\n print('Hello')"
41
+ original_files = {
42
+ "hello.py": original_hello
43
+ }
44
+
45
+ # Mock LLM to return modified content
46
+ with patch('gptdiff.gptdiff.call_llm_for_apply',
47
+ return_value="\ndef goodbye():\n print('Goodbye')"):
48
+
49
+ updated_files = smartapply(diff_text, original_files)
50
+
51
+ assert "hello.py" in updated_files
52
+ assert original_hello != updated_files["hello.py"]
53
+
54
+ def test_smartapply_think_then_modify():
55
+ """Test that smartapply correctly handles file modification diffs"""
56
+ diff_text = '''diff --git a/hello.py b/hello.py
57
+ --- a/hello.py
58
+ +++ b/hello.py
59
+ @@ -1,2 +1,5 @@
60
+ def hello():
61
+ print('Hello')
62
+ +
63
+ +def goodbye():
38
64
  + print('Goodbye')'''
39
65
 
40
66
  original_files = {
@@ -43,12 +69,13 @@ def test_smartapply_file_modification():
43
69
 
44
70
  # Mock LLM to return modified content
45
71
  with patch('gptdiff.gptdiff.call_llm_for_apply',
46
- return_value="def hello():\n print('Hello')\n\ndef goodbye():\n print('Goodbye')"):
72
+ return_value="<think>Hello from thoughts</think>\ndef goodbye():\n print('Goodbye')"):
47
73
 
48
74
  updated_files = smartapply(diff_text, original_files)
49
75
 
50
76
  assert "hello.py" in updated_files
51
- assert original_files["hello.py"] != updated_files["hello.py"]
77
+ assert updated_files["hello.py"] == "def goodbye():\n print('Goodbye')"
78
+
52
79
 
53
80
  def test_smartapply_new_file_creation():
54
81
  """Test that smartapply handles new file creation through diffs"""
File without changes
File without changes
File without changes
File without changes