gptdiff 0.1.5__tar.gz → 0.1.7__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: gptdiff
3
- Version: 0.1.5
3
+ Version: 0.1.7
4
4
  Summary: A tool to generate and apply git diffs using LLMs
5
5
  Author: 255labs
6
6
  Classifier: License :: OSI Approved :: MIT License
@@ -4,6 +4,7 @@ import openai
4
4
  from openai import OpenAI
5
5
 
6
6
  import tiktoken
7
+ import time
7
8
 
8
9
  import os
9
10
  import json
@@ -20,7 +21,7 @@ import threading
20
21
  from pkgutil import get_data
21
22
 
22
23
  diff_context = contextvars.ContextVar('diffcontent', default="")
23
- def create_toolbox():
24
+ def create_diff_toolbox():
24
25
  toolbox = Toolbox()
25
26
 
26
27
  def diff(content: str):
@@ -47,6 +48,25 @@ a/file.py b/file.py
47
48
  )
48
49
  return toolbox
49
50
 
51
+ def create_think_toolbox():
52
+ toolbox = Toolbox()
53
+
54
+ def think(content: str):
55
+ print("Swallowed thoughts", content)
56
+
57
+ toolbox.add_tool(
58
+ name="think",
59
+ fn=think,
60
+ args={
61
+ "content": {
62
+ "type": "string",
63
+ "description": "Thoughts"
64
+ }
65
+ },
66
+ description=""
67
+ )
68
+ return toolbox
69
+
50
70
 
51
71
  def load_gitignore_patterns(gitignore_path):
52
72
  with open(gitignore_path, 'r') as f:
@@ -145,11 +165,13 @@ def load_prepend_file(file):
145
165
  return f.read()
146
166
 
147
167
  # Function to call GPT-4 API and calculate the cost
148
- def call_gpt4_api(system_prompt, user_prompt, files_content, model, temperature=0.7, max_tokens=2500, api_key=None, base_url=None):
168
+ def call_llm_for_diff(system_prompt, user_prompt, files_content, model, temperature=0.7, max_tokens=30000, api_key=None, base_url=None):
169
+ enc = tiktoken.get_encoding("o200k_base")
170
+ start_time = time.time()
149
171
 
150
172
  parser = FlatXMLParser("diff")
151
173
  formatter = FlatXMLPromptFormatter(tag="diff")
152
- toolbox = create_toolbox()
174
+ toolbox = create_diff_toolbox()
153
175
  tool_prompt = formatter.usage_prompt(toolbox)
154
176
  system_prompt += "\n"+tool_prompt
155
177
 
@@ -164,7 +186,7 @@ def call_gpt4_api(system_prompt, user_prompt, files_content, model, temperature=
164
186
  print("SYSTEM PROMPT")
165
187
  print(system_prompt)
166
188
  print("USER PROMPT")
167
- print(user_prompt, "+", len(files_content), "characters of file content")
189
+ print(user_prompt, "+", len(enc.encode(files_content)), "tokens of file content")
168
190
 
169
191
  if api_key is None:
170
192
  api_key = os.getenv('GPTDIFF_LLM_API_KEY')
@@ -180,6 +202,12 @@ def call_gpt4_api(system_prompt, user_prompt, files_content, model, temperature=
180
202
  completion_tokens = response.usage.completion_tokens
181
203
  total_tokens = response.usage.total_tokens
182
204
 
205
+ elapsed = time.time() - start_time
206
+ minutes, seconds = divmod(int(elapsed), 60)
207
+ time_str = f"{minutes}m {seconds}s" if minutes else f"{seconds}s"
208
+ print(f"Diff creation time: {time_str}")
209
+ print("-" * 40)
210
+
183
211
  # Now, these rates are updated to per million tokens
184
212
  cost_per_million_prompt_tokens = 30
185
213
  cost_per_million_completion_tokens = 60
@@ -187,7 +215,6 @@ def call_gpt4_api(system_prompt, user_prompt, files_content, model, temperature=
187
215
 
188
216
  full_response = response.choices[0].message.content.strip()
189
217
 
190
-
191
218
  events = parser.parse(full_response)
192
219
  for event in events:
193
220
  toolbox.use(event)
@@ -216,7 +243,7 @@ def generate_diff(environment, goal, model=None, temperature=0.7, max_tokens=320
216
243
  prepend = ""
217
244
 
218
245
  system_prompt = prepend+f"Output a git diff into a <diff> block."
219
- _, diff_text, _, _, _, _ = call_gpt4_api(
246
+ _, diff_text, _, _, _, _ = call_llm_for_diff(
220
247
  system_prompt,
221
248
  goal,
222
249
  environment,
@@ -266,7 +293,10 @@ def smartapply(diff_text, files, model=None, api_key=None, base_url=None):
266
293
  if model is None:
267
294
  model = os.getenv('GPTDIFF_MODEL', 'deepseek-reasoner')
268
295
  parsed_diffs = parse_diff_per_file(diff_text)
269
- print("SMARTAPPLY", diff_text)
296
+ print("-" * 40)
297
+ print("SMARTAPPLY")
298
+ print(diff_text)
299
+ print("-" * 40)
270
300
 
271
301
  def process_file(path, patch):
272
302
  original = files.get(path, '')
@@ -275,12 +305,12 @@ def smartapply(diff_text, files, model=None, api_key=None, base_url=None):
275
305
  if path in files:
276
306
  del files[path]
277
307
  else:
278
- updated = call_llm_for_apply(path, original, patch, model, api_key=api_key, base_url=base_url)
308
+ updated = call_llm_for_apply_with_think_tool_available(path, original, patch, model, api_key=api_key, base_url=base_url)
279
309
  files[path] = updated.strip()
280
-
310
+
281
311
  for path, patch in parsed_diffs:
282
312
  process_file(path, patch)
283
-
313
+
284
314
  return files
285
315
 
286
316
  # Function to apply diff to project files
@@ -307,6 +337,7 @@ def parse_arguments():
307
337
  help='Call the GPT-4 API. Writes the full prompt to prompt.txt if not specified.')
308
338
  parser.add_argument('files', nargs='*', default=[], help='Specify additional files or directories to include.')
309
339
  parser.add_argument('--temperature', type=float, default=0.7, help='Temperature parameter for model creativity (0.0 to 2.0)')
340
+ parser.add_argument('--max_tokens', type=int, default=30000, help='Temperature parameter for model creativity (0.0 to 2.0)')
310
341
  parser.add_argument('--model', type=str, default=None, help='Model to use for the API call.')
311
342
 
312
343
  parser.add_argument('--nowarn', action='store_true', help='Disable large token warning')
@@ -320,20 +351,20 @@ def absolute_to_relative(absolute_path):
320
351
 
321
352
  def parse_diff_per_file(diff_text):
322
353
  """Parse unified diff text into individual file patches.
323
-
354
+
324
355
  Splits a multi-file diff into per-file entries for processing. Handles:
325
356
  - File creations (+++ /dev/null)
326
357
  - File deletions (--- /dev/null)
327
358
  - Standard modifications
328
-
359
+
329
360
  Args:
330
361
  diff_text: Unified diff string as generated by `git diff`
331
-
362
+
332
363
  Returns:
333
364
  List of tuples (file_path, patch) where:
334
365
  - file_path: Relative path to modified file
335
366
  - patch: Full diff fragment for this file
336
-
367
+
337
368
  Note:
338
369
  Uses 'b/' prefix detection from git diffs to determine target paths
339
370
  """
@@ -373,6 +404,26 @@ def parse_diff_per_file(diff_text):
373
404
 
374
405
  return diffs
375
406
 
407
+ def call_llm_for_apply_with_think_tool_available(file_path, original_content, file_diff, model, api_key=None, base_url=None):
408
+ parser = FlatXMLParser("think")
409
+ formatter = FlatXMLPromptFormatter(tag="think")
410
+ toolbox = create_think_toolbox()
411
+ full_response = call_llm_for_apply(file_path, original_content, file_diff, model, api_key=None, base_url=None)
412
+ notool_response = ""
413
+ events = parser.parse(full_response)
414
+ is_in_tool = False
415
+ appended_content = ""
416
+ for event in events:
417
+ if event.mode == 'append':
418
+ appended_content += event.content
419
+ if event.mode == 'close' and appended_content and event.tool is None:
420
+ notool_response += appended_content
421
+ if event.mode == 'close':
422
+ appended_content = ""
423
+ toolbox.use(event)
424
+
425
+ return notool_response
426
+
376
427
  def call_llm_for_apply(file_path, original_content, file_diff, model, api_key=None, base_url=None):
377
428
  """AI-powered diff application with conflict resolution.
378
429
 
@@ -409,7 +460,8 @@ def call_llm_for_apply(file_path, original_content, file_diff, model, api_key=No
409
460
 
410
461
  1. Carefully apply all changes from the diff
411
462
  2. Preserve surrounding context that isn't changed
412
- 3. Only return the final file content, do not add any additional markup and do not add a code block"""
463
+ 3. Only return the final file content, do not add any additional markup and do not add a code block
464
+ 4. You must return the entire file. It overwrites the existing file."""
413
465
 
414
466
  user_prompt = f"""File: {file_path}
415
467
  File contents:
@@ -434,12 +486,19 @@ Diff to apply:
434
486
  if base_url is None:
435
487
  base_url = os.getenv('GPTDIFF_LLM_BASE_URL', "https://nano-gpt.com/api/v1/")
436
488
  client = OpenAI(api_key=api_key, base_url=base_url)
489
+ start_time = time.time()
437
490
  response = client.chat.completions.create(model=model,
438
491
  messages=messages,
439
492
  temperature=0.0,
440
493
  max_tokens=30000)
494
+ full_response = response.choices[0].message.content
441
495
 
442
- return response.choices[0].message.content
496
+ elapsed = time.time() - start_time
497
+ minutes, seconds = divmod(int(elapsed), 60)
498
+ time_str = f"{minutes}m {seconds}s" if minutes else f"{seconds}s"
499
+ print(f"Smartapply time: {time_str}")
500
+ print("-" * 40)
501
+ return full_response
443
502
 
444
503
  def build_environment_from_filelist(file_list, cwd):
445
504
  """Build environment string from list of file paths"""
@@ -531,10 +590,11 @@ def main():
531
590
  if confirmation != 'y':
532
591
  print("Request canceled")
533
592
  sys.exit(0)
534
- full_text, diff_text, prompt_tokens, completion_tokens, total_tokens, cost = call_gpt4_api(system_prompt, user_prompt, files_content, args.model,
593
+ full_text, diff_text, prompt_tokens, completion_tokens, total_tokens, cost = call_llm_for_diff(system_prompt, user_prompt, files_content, args.model,
535
594
  temperature=args.temperature,
536
595
  api_key=os.getenv('GPTDIFF_LLM_API_KEY'),
537
- base_url=os.getenv('GPTDIFF_LLM_BASE_URL', "https://nano-gpt.com/api/v1/")
596
+ base_url=os.getenv('GPTDIFF_LLM_BASE_URL', "https://nano-gpt.com/api/v1/"),
597
+ max_tokens=args.max_tokens
538
598
  )
539
599
 
540
600
  if(diff_text.strip() == ""):
@@ -587,8 +647,17 @@ def main():
587
647
  print(f"Skipping binary file {file_path}")
588
648
  return
589
649
 
650
+ print("-" * 40)
651
+ print("SMARTAPPLY")
652
+ print(file_diff)
653
+ print("-" * 40)
590
654
  try:
591
- updated_content = call_llm_for_apply(file_path, original_content, file_diff, args.model)
655
+ updated_content = call_llm_for_apply_with_think_tool_available(file_path, original_content, file_diff, args.model)
656
+
657
+ if updated_content.strip() == "":
658
+ print("Cowardly refusing to write empty file to", file_path, "merge failed")
659
+ return
660
+
592
661
  full_path.parent.mkdir(parents=True, exist_ok=True)
593
662
  full_path.write_text(updated_content)
594
663
  print(f"\033[1;32mSuccessful 'smartapply' update {file_path}.\033[0m")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: gptdiff
3
- Version: 0.1.5
3
+ Version: 0.1.7
4
4
  Summary: A tool to generate and apply git diffs using LLMs
5
5
  Author: 255labs
6
6
  Classifier: License :: OSI Approved :: MIT License
@@ -2,7 +2,7 @@ from setuptools import setup, find_packages
2
2
 
3
3
  setup(
4
4
  name='gptdiff',
5
- version='0.1.5',
5
+ version='0.1.7',
6
6
  description='A tool to generate and apply git diffs using LLMs',
7
7
  author='255labs',
8
8
  packages=find_packages(), # Use find_packages() to automatically discover packages
@@ -35,6 +35,32 @@ def test_smartapply_file_modification():
35
35
  print('Hello')
36
36
  +
37
37
  +def goodbye():
38
+ + print('Goodbye')'''
39
+
40
+ original_hello = "def hello():\n print('Hello')"
41
+ original_files = {
42
+ "hello.py": original_hello
43
+ }
44
+
45
+ # Mock LLM to return modified content
46
+ with patch('gptdiff.gptdiff.call_llm_for_apply',
47
+ return_value="\ndef goodbye():\n print('Goodbye')"):
48
+
49
+ updated_files = smartapply(diff_text, original_files)
50
+
51
+ assert "hello.py" in updated_files
52
+ assert original_hello != updated_files["hello.py"]
53
+
54
+ def test_smartapply_think_then_modify():
55
+ """Test that smartapply correctly handles file modification diffs"""
56
+ diff_text = '''diff --git a/hello.py b/hello.py
57
+ --- a/hello.py
58
+ +++ b/hello.py
59
+ @@ -1,2 +1,5 @@
60
+ def hello():
61
+ print('Hello')
62
+ +
63
+ +def goodbye():
38
64
  + print('Goodbye')'''
39
65
 
40
66
  original_files = {
@@ -43,12 +69,13 @@ def test_smartapply_file_modification():
43
69
 
44
70
  # Mock LLM to return modified content
45
71
  with patch('gptdiff.gptdiff.call_llm_for_apply',
46
- return_value="def hello():\n print('Hello')\n\ndef goodbye():\n print('Goodbye')"):
72
+ return_value="<think>Hello from thoughts</think>\ndef goodbye():\n print('Goodbye')"):
47
73
 
48
74
  updated_files = smartapply(diff_text, original_files)
49
75
 
50
76
  assert "hello.py" in updated_files
51
- assert original_files["hello.py"] != updated_files["hello.py"]
77
+ assert updated_files["hello.py"] == "def goodbye():\n print('Goodbye')"
78
+
52
79
 
53
80
  def test_smartapply_new_file_creation():
54
81
  """Test that smartapply handles new file creation through diffs"""
File without changes
File without changes
File without changes
File without changes