gptdiff 0.1.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,10 @@
1
+ # We help companies adapt to the AI age through product development and consulting. 255labs.xyz
2
+ This is free and unencumbered software released into the public domain.
3
+
4
+ Copyright 2025 255labs.xyz
5
+
6
+ Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
7
+
8
+ The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
9
+
10
+ THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
gptdiff-0.1.2/PKG-INFO ADDED
@@ -0,0 +1,23 @@
1
+ Metadata-Version: 2.2
2
+ Name: gptdiff
3
+ Version: 0.1.2
4
+ Summary: A tool to generate and apply git diffs using LLMs
5
+ Author: 255labs
6
+ Classifier: License :: OSI Approved :: MIT License
7
+ Classifier: Programming Language :: Python :: 3
8
+ Classifier: Operating System :: OS Independent
9
+ License-File: LICENSE.txt
10
+ Requires-Dist: openai>=1.0.0
11
+ Requires-Dist: tiktoken>=0.5.0
12
+ Requires-Dist: ai_agent_toolbox>=0.1.0
13
+ Provides-Extra: test
14
+ Requires-Dist: pytest; extra == "test"
15
+ Requires-Dist: pytest-mock; extra == "test"
16
+ Provides-Extra: docs
17
+ Requires-Dist: mkdocs; extra == "docs"
18
+ Requires-Dist: mkdocs-material; extra == "docs"
19
+ Dynamic: author
20
+ Dynamic: classifier
21
+ Dynamic: provides-extra
22
+ Dynamic: requires-dist
23
+ Dynamic: summary
@@ -0,0 +1,246 @@
1
+ # GPTDiff
2
+
3
+ 🚀 **AI-Powered Code Evolution** - Transform your codebase with natural language instructions
4
+
5
+ ```bash
6
+ cd myproject
7
+ gptdiff 'add hover effects to the buttons'
8
+ ```
9
+
10
+ Generates a prompt.txt file that you can copy and paste into a large context gpt to have a conversation with suggested changes. You can also invoke the API and try to directly apply the patch using a smartapply if the git apply fails.
11
+
12
+ ## Value Proposition
13
+
14
+ ```bash
15
+ gptdiff "Update the readme with an api section" --apply
16
+ ```
17
+ <span style="color: #00ff00;">Patch applied successfully.</span>
18
+
19
+ ### Why GPTDiff?
20
+
21
+ - **Understands Your Code** - Describe changes in plain English
22
+ - **Safe Modifications** - Keeps existing code working
23
+ - **Auto-Fix** - `--apply` fixes mistakes in generated changes
24
+ - **Works Instantly** - No complex setup needed
25
+ - **Whole Project View** - Handles multiple files together
26
+
27
+ ## Core Capabilities
28
+
29
+ ### ⚡ CLI Excellence
30
+ - **Target Specific Files** - Change just what you need
31
+ - **Live Updates** - See changes as they're made
32
+ - **Try Before Applying** - Test changes safely first
33
+ - **Clear Pricing** - Know costs upfront
34
+ - **Preview Changes** - See what will change with `--call`
35
+ - **Fix Mistakes** - Automatic error correction
36
+
37
+ ### ✨ Magic Diff Generation
38
+ ```bash
39
+ gptdiff "Convert class components to React hooks" --model deepseek-reasoner
40
+ ```
41
+ - Full project context awareness
42
+ - Cross-file refactoring support
43
+ - Automatic conflict prevention
44
+
45
+ ### 🧠 Smart Apply System
46
+
47
+ **Git-native Workflow:**
48
+ ```bash
49
+ # 1. Apply AI-generated changes
50
+ gptdiff "Improve error handling" --apply
51
+
52
+ # 2. Review each change interactively
53
+ git add -p
54
+
55
+ # 3. Commit or discard
56
+ git commit -m "Enhanced error handling"
57
+ git reset --hard # To undo all changes
58
+ ```
59
+
60
+ ```bash
61
+ gptdiff "Refactor authentication to use OAuth 2.0" --apply
62
+ ```
63
+ <span style="color: #00ff00;">✅ Successfully applied complex changes across 5 files</span>
64
+
65
+ ## Get Started
66
+
67
+ ### Installation
68
+
69
+ Requires Python 3.8+. Install from PyPI:
70
+
71
+ ```bash
72
+ pip install gptdiff
73
+ pip install tiktoken # For token counting
74
+ ```
75
+
76
+ Development install (no pip package yet)
77
+ ```bash
78
+ python setup.py install
79
+ ```
80
+
81
+ ### Configuration
82
+
83
+ First sign up for an API key at https://nano-gpt.com/api and generate your key. Then configure your environment:
84
+
85
+ #### Linux/MacOS
86
+ ```bash
87
+ export GPTDIFF_LLM_API_KEY='your-api-key'
88
+ # Optional: For switching API providers
89
+ export GPTDIFF_MODEL='deepseek-reasoner' # Set default model for all commands
90
+ export GPTDIFF_LLM_BASE_URL='https://nano-gpt.com/api/v1/
91
+ ```
92
+
93
+ #### Windows
94
+ ```cmd
95
+ set GPTDIFF_LLM_API_KEY=your-api-key
96
+ rem Optional: For switching API providers
97
+ set GPTDIFF_MODEL=deepseek-reasoner
98
+ set GPTDIFF_LLM_BASE_URL=https://nano-gpt.com/api/v1/
99
+ ```
100
+
101
+ The default base URL points to nano-gpt.com's API. Supported models can be specified with:
102
+
103
+ ```bash
104
+ gptdiff 'your prompt' --model deepseek-reasoner
105
+ # Default model can be set via GPTDIFF_MODEL environment variable
106
+ ```
107
+
108
+ OpenAI will not be called unless you specify `--call` or `--apply`
109
+
110
+ Prevent files being appended to the prompt by adding them to `.gitignore` or `.gptignore`
111
+
112
+ ### Command Line Usage
113
+
114
+ After installing the package, you can use the `gptdiff` command in your terminal. cd into your codebase and run:
115
+
116
+ ```bash
117
+ gptdiff '<user_prompt>'
118
+ ```
119
+
120
+ any files that are included in .gitignore are ignored when generating prompt.txt.
121
+
122
+ #### Specifying Additional Files
123
+
124
+ You can specify additional files or directories to include in the prompt by adding them as arguments to the `gptdiff` command. If no additional files or directories are specified, the tool will default to using the current working directory.
125
+
126
+ Example usage:
127
+
128
+ ```bash
129
+ gptdiff 'make this change' src test
130
+ ```
131
+
132
+ #### Autopatch Changes
133
+
134
+ You can also call openai and automatically apply the generated git diff with the `--apply` flag:
135
+
136
+ ```bash
137
+ gptdiff '<user_prompt>' --apply
138
+ ```
139
+
140
+ ### Dry-Run Validation
141
+ Preview changes without applying them by omitting the `--apply` flag when using `--call`:
142
+ ```bash
143
+ gptdiff "Modernize database queries" --call
144
+ ```
145
+ <span style="color: #0066cc;">ℹ️ Diff preview generated - review changes before applying</span>
146
+
147
+ This often generates incorrect diffs that need to be manually merged.
148
+
149
+ #### Smart Apply
150
+
151
+ For more reliable patching of complex changes, use `smartapply` which processes each file's diff individually with the LLM:
152
+
153
+ ```bash
154
+ gptdiff 'refactor authentication system' --apply
155
+ ```
156
+
157
+ ### Completion Notification
158
+
159
+ Use the `--nobeep` option to disable the default completion beep:
160
+
161
+ ```bash
162
+ gptdiff '<user_prompt>' --beep
163
+ ```
164
+
165
+ ## Local API Documentation
166
+
167
+ Preview API docs locally using MkDocs:
168
+
169
+ ```bash
170
+ pip install .[docs]
171
+ mkdocs serve
172
+ ```
173
+ Open http://localhost:8000 to view the documentation
174
+
175
+ ## Python API
176
+
177
+ Integrate GPTDiff directly into your Python workflows:
178
+
179
+ ```python
180
+ from gptdiff import generate_diff, smartapply
181
+ import os
182
+
183
+ os.environ['GPTDIFF_LLM_API_KEY'] = 'your-api-key'
184
+
185
+ # Create environment representation
186
+ environment = '''
187
+ File: main.py
188
+ Content:
189
+ def old_name():
190
+ print("Need renaming")
191
+ '''
192
+
193
+ # Generate transformation diff
194
+ diff = generate_diff(
195
+ environment=environment,
196
+ goal='Rename function to new_name()',
197
+ model='deepseek-reasoner'
198
+ )
199
+
200
+ # Apply changes safely
201
+ updated_environment = smartapply(
202
+ diff_text=diff,
203
+ environment_str=environment
204
+ )
205
+
206
+ print("Transformed codebase:")
207
+ print(updated_environment)
208
+ ```
209
+
210
+ **Batch Processing Example:**
211
+ ```python
212
+ from gptdiff import generate_diff, smartapply
213
+
214
+ files = load_your_codebase() # Dict of {path: content}
215
+
216
+ transformations = [
217
+ "Add python type annotations",
218
+ "Convert string formatting to f-strings",
219
+ "Update deprecated API calls"
220
+ ]
221
+
222
+ for task in transformations:
223
+ files = smartapply(generate_diff(build_environment(files), task), files)
224
+ ```
225
+
226
+ **Integration Note:** GPTDiff leverages the [AI Agent Toolbox](https://github.com/255BITS/ai-agent-toolbox) for seamless tool usage across AI models and frameworks, making it ideal for both single responses and complex agent workflows.
227
+
228
+ ### Core Functions
229
+
230
+ - `generate_diff(environment: str, goal: str, model: str) -> str`
231
+ Generates a git diff implementing the requested changes
232
+
233
+ *`model` parameter defaults to `GPTDIFF_MODEL` environment variable*
234
+ - `smartapply(diff_text: str, environment_str: str, model: str) -> str`
235
+ Applies complex diffs while preserving file context
236
+
237
+ ## Testing
238
+
239
+ To run the test suite:
240
+
241
+ ```bash
242
+ pip install -e .[test]
243
+ pytest tests/
244
+ ```
245
+
246
+ This will execute all unit tests verifying core diff generation and application logic.
@@ -0,0 +1,3 @@
1
+ from .gptdiff import generate_diff, smartapply
2
+
3
+ __all__ = ['generate_diff', 'smartapply']
@@ -0,0 +1,609 @@
1
+ #!/usr/bin/env python3
2
+
3
+ import openai
4
+ from openai import OpenAI
5
+
6
+ import tiktoken
7
+
8
+ import os
9
+ import json
10
+ import subprocess
11
+ from pathlib import Path
12
+ import sys
13
+ import fnmatch
14
+ import argparse
15
+ import pkgutil
16
+ import re
17
+ import contextvars
18
+ from ai_agent_toolbox import FlatXMLParser, FlatXMLPromptFormatter, Toolbox
19
+ import threading
20
+ from pkgutil import get_data
21
+
22
+ diff_context = contextvars.ContextVar('diffcontent', default="")
23
+ def create_toolbox():
24
+ toolbox = Toolbox()
25
+
26
+ def diff(content: str):
27
+ diff_context.set(content)
28
+ return content
29
+
30
+ toolbox.add_tool(
31
+ name="diff",
32
+ fn=diff,
33
+ args={
34
+ "content": {
35
+ "type": "string",
36
+ "description": "Complete diff."
37
+ }
38
+ },
39
+ description="Save the calculated diff as used in 'git apply'"
40
+ )
41
+ return toolbox
42
+
43
+
44
+ def load_gitignore_patterns(gitignore_path):
45
+ with open(gitignore_path, 'r') as f:
46
+ patterns = [line.strip() for line in f if line.strip() and not line.startswith('#')]
47
+ return patterns
48
+
49
+ def is_ignored(filepath, gitignore_patterns):
50
+ filepath = Path(filepath).resolve()
51
+ ignored = False
52
+
53
+ for pattern in gitignore_patterns:
54
+ if pattern.startswith('!'):
55
+ negated_pattern = pattern[1:]
56
+ if fnmatch.fnmatch(str(filepath), negated_pattern) or fnmatch.fnmatch(str(filepath.relative_to(Path.cwd())), negated_pattern):
57
+ ignored = False
58
+ else:
59
+ relative_path = str(filepath.relative_to(Path.cwd()))
60
+ if fnmatch.fnmatch(str(filepath), pattern) or fnmatch.fnmatch(relative_path, pattern):
61
+ ignored = True
62
+ break
63
+ if pattern in relative_path:
64
+ ignored = True
65
+ break
66
+
67
+ # Ensure .gitignore itself is not ignored unless explicitly mentioned
68
+ if filepath.name == ".gitignore" and not any(pattern == ".gitignore" for pattern in gitignore_patterns):
69
+ ignored = False
70
+
71
+ return ignored
72
+
73
+ def list_files_and_dirs(path, ignore_list=None):
74
+ if ignore_list is None:
75
+ ignore_list = []
76
+
77
+ result = []
78
+
79
+ # List all items in the current directory
80
+ for item in os.listdir(path):
81
+ item_path = os.path.join(path, item)
82
+
83
+ if is_ignored(item_path, ignore_list):
84
+ continue
85
+
86
+ # Add the item to the result list
87
+ result.append(item_path)
88
+
89
+ # If it's a directory, recurse into it
90
+ if os.path.isdir(item_path):
91
+ result.extend(list_files_and_dirs(item_path, ignore_list))
92
+
93
+ return result
94
+
95
+ # Function to load project files considering .gitignore
96
+ def load_project_files(project_dir, cwd):
97
+ """Load project files while respecting .gitignore and .gptignore rules.
98
+
99
+ Recursively scans directories, skipping:
100
+ - Files/directories matching patterns in .gitignore/.gptignore
101
+ - Binary files that can't be decoded as UTF-8 text
102
+
103
+ Args:
104
+ project_dir: Root directory to scan for files
105
+ cwd: Base directory for resolving ignore files
106
+
107
+ Returns:
108
+ List of (absolute_path, file_content) tuples
109
+
110
+ Note:
111
+ Prints skipped files to stdout for visibility
112
+ """
113
+ ignore_paths = [Path(cwd) / ".gitignore", Path(cwd) / ".gptignore"]
114
+ gitignore_patterns = [".gitignore", "diff.patch", "prompt.txt", ".gptignore", "*.pdf", "*.docx", ".git", "*.orig", "*.rej"]
115
+
116
+ for p in ignore_paths:
117
+ if p.exists():
118
+ with open(p, 'r') as f:
119
+ gitignore_patterns.extend([line.strip() for line in f if line.strip() and not line.startswith('#')])
120
+
121
+ project_files = []
122
+ for file in list_files_and_dirs(project_dir, gitignore_patterns):
123
+ if os.path.isfile(file):
124
+ try:
125
+ with open(file, 'r') as f:
126
+ content = f.read()
127
+ print(file)
128
+ project_files.append((file, content))
129
+ except UnicodeDecodeError:
130
+ print(f"Skipping file {file} due to UnicodeDecodeError")
131
+ continue
132
+
133
+ print("")
134
+ return project_files
135
+
136
+ def load_prepend_file(file):
137
+ with open(file, 'r') as f:
138
+ return f.read()
139
+
140
+ # Function to call GPT-4 API and calculate the cost
141
+ def call_gpt4_api(system_prompt, user_prompt, files_content, model, temperature=0.7, max_tokens=2500, api_key=None, base_url=None):
142
+
143
+ parser = FlatXMLParser("diff")
144
+ formatter = FlatXMLPromptFormatter(tag="diff")
145
+ toolbox = create_toolbox()
146
+ tool_prompt = formatter.usage_prompt(toolbox)
147
+ system_prompt += "\n"+tool_prompt
148
+
149
+ if model == "gemini-2.0-flash-thinking-exp-01-21":
150
+ user_prompt = system_prompt+"\n"+user_prompt
151
+
152
+ messages = [
153
+ {"role": "system", "content": system_prompt},
154
+ {"role": "user", "content": user_prompt + "\n"+files_content},
155
+ ]
156
+ print("Using", model)
157
+ print("SYSTEM PROMPT")
158
+ print(system_prompt)
159
+ print("USER PROMPT")
160
+ print(user_prompt, "+", len(files_content), "characters of file content")
161
+
162
+ if api_key is None:
163
+ api_key = os.getenv('GPTDIFF_LLM_API_KEY')
164
+ if base_url is None:
165
+ base_url = os.getenv('GPTDIFF_LLM_BASE_URL', "https://nano-gpt.com/api/v1/")
166
+ client = OpenAI(api_key=api_key, base_url=base_url)
167
+ response = client.chat.completions.create(model=model,
168
+ messages=messages,
169
+ max_tokens=max_tokens,
170
+ temperature=temperature)
171
+
172
+ prompt_tokens = response.usage.prompt_tokens
173
+ completion_tokens = response.usage.completion_tokens
174
+ total_tokens = response.usage.total_tokens
175
+
176
+ # Now, these rates are updated to per million tokens
177
+ cost_per_million_prompt_tokens = 30
178
+ cost_per_million_completion_tokens = 60
179
+ cost = (prompt_tokens / 1_000_000 * cost_per_million_prompt_tokens) + (completion_tokens / 1_000_000 * cost_per_million_completion_tokens)
180
+
181
+ full_response = response.choices[0].message.content.strip()
182
+
183
+
184
+ events = parser.parse(full_response)
185
+ for event in events:
186
+ toolbox.use(event)
187
+ diff_response = diff_context.get()
188
+
189
+ return full_response, diff_response, prompt_tokens, completion_tokens, total_tokens, cost
190
+
191
+ # New API functions
192
+ def build_environment(files_dict):
193
+ """Rebuild environment string from file dictionary"""
194
+ env = []
195
+ for path, content in files_dict.items():
196
+ env.append(f"File: {path}")
197
+ env.append("Content:")
198
+ env.append(content)
199
+ return '\n'.join(env)
200
+
201
+ def generate_diff(environment, goal, model=None, temperature=0.7, max_tokens=32000, api_key=None, base_url=None, prepend=None):
202
+ """API: Generate diff from environment and goal"""
203
+ if model is None:
204
+ model = os.getenv('GPTDIFF_MODEL', 'deepseek-reasoner')
205
+ if prepend:
206
+ prepend = load_prepend_file(args.prepend)
207
+ print("Including prepend",len(enc.encode(json.dumps(prepend))), "tokens")
208
+ else:
209
+ prepend = ""
210
+
211
+ system_prompt = prepend+f"Output a git diff into a <diff> block."
212
+ _, diff_text, _, _, _, _ = call_gpt4_api(
213
+ system_prompt,
214
+ goal,
215
+ environment,
216
+ model=model,
217
+ api_key=api_key,
218
+ base_url=base_url,
219
+ max_tokens=max_tokens,
220
+ temperature=temperature
221
+ )
222
+ return diff_text
223
+
224
+ def smartapply(diff_text, files, model=None, api_key=None, base_url=None):
225
+ """Applies unified diffs to file contents with AI-powered conflict resolution.
226
+
227
+ Key features:
228
+ - Handles file creations, modifications, and deletions
229
+ - Maintains idempotency - reapplying same diff produces same result
230
+ - Uses LLM to resolve ambiguous changes while preserving context
231
+ - Returns new files dictionary without modifying input
232
+
233
+ Args:
234
+ diff_text: Unified diff string compatible with git apply
235
+ files: Dictionary of {file_path: content} to modify
236
+ model: LLM to use for conflict resolution (default: deepseek-reasoner)
237
+ api_key: Optional API key override
238
+ base_url: Optional API base URL override
239
+
240
+ Returns:
241
+ New dictionary with updated file contents. Deleted files are omitted.
242
+
243
+ Raises:
244
+ APIError: If LLM API calls fail
245
+
246
+ Example:
247
+ >>> original = {"file.py": "def old():\n pass"}
248
+ >>> diff = '''diff --git a/file.py b/file.py
249
+ ... --- a/file.py
250
+ ... +++ b/file.py
251
+ ... @@ -1,2 +1,2 @@
252
+ ... -def old():
253
+ ... +def new():'''
254
+ >>> updated = smartapply(diff, original)
255
+ >>> print(updated["file.py"])
256
+ def new():
257
+ pass
258
+ """
259
+ if model is None:
260
+ model = os.getenv('GPTDIFF_MODEL', 'deepseek-reasoner')
261
+ parsed_diffs = parse_diff_per_file(diff_text)
262
+ print("SMARTAPPLY", diff_text)
263
+
264
+ def process_file(path, patch):
265
+ original = files.get(path, '')
266
+ # Handle file deletions
267
+ if '+++ /dev/null' in patch:
268
+ if path in files:
269
+ del files[path]
270
+ else:
271
+ updated = call_llm_for_apply(path, original, patch, model, api_key=api_key, base_url=base_url)
272
+ files[path] = updated.strip()
273
+
274
+ for path, patch in parsed_diffs:
275
+ process_file(path, patch)
276
+
277
+ return files
278
+
279
+ # Function to apply diff to project files
280
+ def apply_diff(project_dir, diff_text):
281
+ diff_file = Path(project_dir) / "diff.patch"
282
+ with open(diff_file, 'w') as f:
283
+ f.write(diff_text)
284
+
285
+ result = subprocess.run(["patch", "-p1", "--remove-empty-files", "--input", str(diff_file)], cwd=project_dir, capture_output=True, text=True)
286
+ if result.returncode != 0:
287
+ return False
288
+ else:
289
+ return True
290
+
291
+ def parse_arguments():
292
+ parser = argparse.ArgumentParser(description='Generate and optionally apply git diffs using GPT-4.')
293
+ parser.add_argument('prompt', type=str, help='Prompt that runs on the codebase.')
294
+ parser.add_argument('--apply', action='store_true', help='Attempt to apply the generated git diff. Uses smartapply if applying the patch fails.')
295
+ parser.add_argument('--prepend', type=str, default=None, help='Path to content prepended to system prompt')
296
+
297
+ parser.add_argument('--nobeep', action='store_false', dest='beep', default=True, help='Disable completion notification beep')
298
+ # New flag --prompt that does not call the API but instead writes the full prompt to prompt.txt
299
+ parser.add_argument('--call', action='store_true',
300
+ help='Call the GPT-4 API. Writes the full prompt to prompt.txt if not specified.')
301
+ parser.add_argument('files', nargs='*', default=[], help='Specify additional files or directories to include.')
302
+ parser.add_argument('--temperature', type=float, default=0.7, help='Temperature parameter for model creativity (0.0 to 2.0)')
303
+ parser.add_argument('--model', type=str, default=None, help='Model to use for the API call.')
304
+
305
+ parser.add_argument('--nowarn', action='store_true', help='Disable large token warning')
306
+
307
+ return parser.parse_args()
308
+
309
+ def absolute_to_relative(absolute_path):
310
+ cwd = os.getcwd()
311
+ relative_path = os.path.relpath(absolute_path, cwd)
312
+ return relative_path
313
+
314
+ def parse_diff_per_file(diff_text):
315
+ """Parse unified diff text into individual file patches.
316
+
317
+ Splits a multi-file diff into per-file entries for processing. Handles:
318
+ - File creations (+++ /dev/null)
319
+ - File deletions (--- /dev/null)
320
+ - Standard modifications
321
+
322
+ Args:
323
+ diff_text: Unified diff string as generated by `git diff`
324
+
325
+ Returns:
326
+ List of tuples (file_path, patch) where:
327
+ - file_path: Relative path to modified file
328
+ - patch: Full diff fragment for this file
329
+
330
+ Note:
331
+ Uses 'b/' prefix detection from git diffs to determine target paths
332
+ """
333
+ diffs = []
334
+ file_path = None
335
+ current_diff = []
336
+ from_path = None
337
+
338
+ for line in diff_text.split('\n'):
339
+ if line.startswith('diff --git'):
340
+ if current_diff and file_path is not None:
341
+ diffs.append((file_path, '\n'.join(current_diff)))
342
+ current_diff = [line]
343
+ file_path = None
344
+ from_path = None
345
+ parts = line.split()
346
+ if len(parts) >= 4:
347
+ b_path = parts[3]
348
+ file_path = b_path[2:] if b_path.startswith('b/') else b_path
349
+ else:
350
+ current_diff.append(line)
351
+ if line.startswith('--- '):
352
+ from_path = line[4:].strip()
353
+ elif line.startswith('+++ '):
354
+ to_path = line[4:].strip()
355
+ if to_path == '/dev/null':
356
+ if from_path:
357
+ # For deletions, use from_path after stripping 'a/' prefix
358
+ file_path = from_path[2:] if from_path.startswith('a/') else from_path
359
+ else:
360
+ # For normal cases, use to_path after stripping 'b/' prefix
361
+ file_path = to_path[2:] if to_path.startswith('b/') else to_path
362
+
363
+ # Handle remaining diff content after loop
364
+ if current_diff and file_path is not None:
365
+ diffs.append((file_path, '\n'.join(current_diff)))
366
+
367
+ return diffs
368
+
369
+ def call_llm_for_apply(file_path, original_content, file_diff, model, api_key=None, base_url=None):
370
+ """AI-powered diff application with conflict resolution.
371
+
372
+ Internal workhorse for smartapply that handles individual file patches.
373
+ Uses LLM to reconcile diffs while preserving code structure and context.
374
+
375
+ Args:
376
+ file_path: Target file path (used for context/error messages)
377
+ original_content: Current file content as string
378
+ file_diff: Unified diff snippet to apply
379
+ model: LLM identifier for processing
380
+ api_key: Optional override for LLM API credentials
381
+ base_url: Optional override for LLM API endpoint
382
+
383
+ Returns:
384
+ Updated file content as string with diff applied
385
+
386
+ Raises:
387
+ APIError: If LLM processing fails
388
+
389
+ Example:
390
+ >>> updated = call_llm_for_apply(
391
+ ... file_path='utils.py',
392
+ ... original_content='def old(): pass',
393
+ ... file_diff='''@@ -1 +1 @@
394
+ ... -def old()
395
+ ... +def new()''',
396
+ ... model='deepseek-reasoner'
397
+ ... )
398
+ >>> print(updated)
399
+ def new(): pass"""
400
+
401
+ system_prompt = """Please apply the diff to this file. Return the result in a block. Write the entire file.
402
+
403
+ 1. Carefully apply all changes from the diff
404
+ 2. Preserve surrounding context that isn't changed
405
+ 3. Only return the final file content, do not add any additional markup and do not add a code block"""
406
+
407
+ user_prompt = f"""File: {file_path}
408
+ File contents:
409
+ <filecontents>
410
+ {original_content}
411
+ </filecontents>
412
+
413
+ Diff to apply:
414
+ <diff>
415
+ {file_diff}
416
+ </diff>"""
417
+
418
+ if model == "gemini-2.0-flash-thinking-exp-01-21":
419
+ user_prompt = system_prompt+"\n"+user_prompt
420
+ messages = [
421
+ {"role": "system", "content": system_prompt},
422
+ {"role": "user", "content": user_prompt},
423
+ ]
424
+
425
+ if api_key is None:
426
+ api_key = os.getenv('GPTDIFF_LLM_API_KEY')
427
+ if base_url is None:
428
+ base_url = os.getenv('GPTDIFF_LLM_BASE_URL', "https://nano-gpt.com/api/v1/")
429
+ client = OpenAI(api_key=api_key, base_url=base_url)
430
+ response = client.chat.completions.create(model=model,
431
+ messages=messages,
432
+ temperature=0.0,
433
+ max_tokens=30000)
434
+
435
+ return response.choices[0].message.content
436
+
437
+ def build_environment_from_filelist(file_list, cwd):
438
+ """Build environment string from list of file paths"""
439
+ files_dict = {}
440
+ for file_path in file_list:
441
+ relative_path = os.path.relpath(file_path, cwd)
442
+ try:
443
+ with open(file_path, 'r') as f:
444
+ content = f.read()
445
+ files_dict[relative_path] = content
446
+ except UnicodeDecodeError:
447
+ print(f"Skipping file {file_path} due to UnicodeDecodeError")
448
+ continue
449
+ except IOError as e:
450
+ print(f"Error reading {file_path}: {e}")
451
+ continue
452
+ return build_environment(files_dict)
453
+
454
+ def main():
455
+ # Adding color support for Windows CMD
456
+ if os.name == 'nt':
457
+ os.system('color')
458
+
459
+ args = parse_arguments()
460
+
461
+ # TODO: The 'openai.api_base' option isn't read in the client API. You will need to pass it when you instantiate the client, e.g. 'OpenAI(base_url="https://nano-gpt.com/api/v1/")'
462
+ # openai.api_base = "https://nano-gpt.com/api/v1/"
463
+ if len(sys.argv) < 2:
464
+ print("Usage: python script.py '<user_prompt>' [--apply]")
465
+ sys.exit(1)
466
+
467
+ user_prompt = sys.argv[1]
468
+ project_dir = os.getcwd()
469
+ enc = tiktoken.get_encoding("o200k_base")
470
+
471
+
472
+ # Load project files, defaulting to current working directory if no additional paths are specified
473
+ if not args.files:
474
+ project_files = load_project_files(project_dir, project_dir)
475
+ else:
476
+ project_files = []
477
+ for additional_path in args.files:
478
+ if os.path.isfile(additional_path):
479
+ with open(additional_path, 'r') as f:
480
+ project_files.append((additional_path, f.read()))
481
+ elif os.path.isdir(additional_path):
482
+ project_files.extend(load_project_files(additional_path, project_dir))
483
+
484
+ if args.prepend:
485
+ prepend = load_prepend_file(args.prepend)
486
+ print("Including prepend",len(enc.encode(json.dumps(prepend))), "tokens")
487
+ else:
488
+ prepend = ""
489
+
490
+ # Prepare system prompt
491
+ system_prompt = prepend + f"Output a git diff into a <diff> block."
492
+
493
+ files_content = ""
494
+ for file, content in project_files:
495
+ print(f"Including {len(enc.encode(content)):5d} tokens", absolute_to_relative(file))
496
+
497
+ # Prepare the prompt for GPT-4
498
+ files_content += f"File: {absolute_to_relative(file)}\nContent:\n{content}\n"
499
+
500
+ full_prompt = f"{system_prompt}\n\n{user_prompt}\n\n{files_content}"
501
+ token_count = len(enc.encode(full_prompt))
502
+ if args.model is None:
503
+ args.model = os.getenv('GPTDIFF_MODEL', 'deepseek-reasoner')
504
+
505
+ if not args.call and not args.apply:
506
+ with open('prompt.txt', 'w') as f:
507
+ f.write(full_prompt)
508
+ print(f"Total tokens: {token_count:5d}")
509
+ print(f"\033[1;32mNot calling GPT-4.\033[0m") # Green color for success message
510
+ print('Instead, wrote full prompt to prompt.txt. Use `xclip -selection clipboard < prompt.txt` then paste into chatgpt')
511
+ print(f"Total cost: ${0.0:.4f}")
512
+ exit(0)
513
+ else:
514
+ # Validate API key presence before any API operations
515
+ if not os.getenv('GPTDIFF_LLM_API_KEY'):
516
+ print("\033[1;31mError: GPTDIFF_LLM_API_KEY environment variable required\033[0m")
517
+ print("Set it with: export GPTDIFF_LLM_API_KEY='your-key'")
518
+ sys.exit(1)
519
+
520
+ # Confirm large requests without specified files
521
+ if (not args.nowarn) and (not args.files) and token_count > 10000 and (args.call or args.apply):
522
+ print(f"\033[1;33mThis is a larger request ({token_count} tokens). Disable this warning with --nowarn. Are you sure you want to send it? [y/N]\033[0m")
523
+ confirmation = input().strip().lower()
524
+ if confirmation != 'y':
525
+ print("Request canceled")
526
+ sys.exit(0)
527
+ full_text, diff_text, prompt_tokens, completion_tokens, total_tokens, cost = call_gpt4_api(system_prompt, user_prompt, files_content, args.model,
528
+ temperature=args.temperature,
529
+ api_key=os.getenv('GPTDIFF_LLM_API_KEY'),
530
+ base_url=os.getenv('GPTDIFF_LLM_BASE_URL', "https://nano-gpt.com/api/v1/")
531
+ )
532
+
533
+ if(diff_text.strip() == ""):
534
+ print(f"\033[1;33mThere was no data in this diff. The LLM may have returned something invalid.\033[0m")
535
+ print("Unable to parse diff text. Full response:", full_text)
536
+ if args.beep:
537
+ print("\a") # Terminal bell for completion notification
538
+ return
539
+
540
+ # Output result
541
+ elif args.apply:
542
+ print("\nAttempting apply with the following diff:")
543
+ print("\n<diff>")
544
+ print(diff_text)
545
+ print("\n</diff>")
546
+ print("Saved to patch.diff")
547
+ if apply_diff(project_dir, diff_text):
548
+ print(f"\033[1;32mPatch applied successfully with 'git apply'.\033[0m") # Green color for success message
549
+ else:
550
+ print("Apply failed, attempting smart apply.")
551
+ parsed_diffs = parse_diff_per_file(diff_text)
552
+ print("Found", len(parsed_diffs), " files in diff, calling smartdiff for each file concurrently:")
553
+
554
+ if(len(parsed_diffs) == 0):
555
+ print(f"\033[1;33mThere were no entries in this diff. The LLM may have returned something invalid.\033[0m")
556
+ if args.beep:
557
+ print("\a") # Terminal bell for completion notification
558
+ return
559
+
560
+ threads = []
561
+
562
+ def process_file(file_path, file_diff):
563
+ full_path = Path(project_dir) / file_path
564
+ print(f"Processing file: {file_path}")
565
+
566
+ # Handle file deletions from diff
567
+ if '+++ /dev/null' in file_diff:
568
+ if full_path.exists():
569
+ full_path.unlink()
570
+ print(f"\033[1;32mDeleted file {file_path}.\033[0m")
571
+ else:
572
+ print(f"\033[1;33mFile {file_path} not found - skipping deletion\033[0m")
573
+ return
574
+
575
+ original_content = ''
576
+ if full_path.exists():
577
+ try:
578
+ original_content = full_path.read_text()
579
+ except UnicodeDecodeError:
580
+ print(f"Skipping binary file {file_path}")
581
+ return
582
+
583
+ try:
584
+ updated_content = call_llm_for_apply(file_path, original_content, file_diff, args.model)
585
+ full_path.parent.mkdir(parents=True, exist_ok=True)
586
+ full_path.write_text(updated_content)
587
+ print(f"\033[1;32mSuccessful 'smartapply' update {file_path}.\033[0m")
588
+ except Exception as e:
589
+ print(f"\033[1;31mFailed to process {file_path}: {str(e)}\033[0m")
590
+
591
+ threads = []
592
+ for file_path, file_diff in parsed_diffs:
593
+ thread = threading.Thread(
594
+ target=process_file,
595
+ args=(file_path, file_diff)
596
+ )
597
+ thread.start()
598
+ threads.append(thread)
599
+ for thread in threads:
600
+ thread.join()
601
+
602
+
603
+ if args.beep:
604
+ print("\a") # Terminal bell for completion notification
605
+
606
+ print(f"Prompt tokens: {prompt_tokens}")
607
+ print(f"Completion tokens: {completion_tokens}")
608
+ print(f"Total tokens: {total_tokens}")
609
+ #print(f"Total cost: ${cost:.4f}")
@@ -0,0 +1,23 @@
1
+ Metadata-Version: 2.2
2
+ Name: gptdiff
3
+ Version: 0.1.2
4
+ Summary: A tool to generate and apply git diffs using LLMs
5
+ Author: 255labs
6
+ Classifier: License :: OSI Approved :: MIT License
7
+ Classifier: Programming Language :: Python :: 3
8
+ Classifier: Operating System :: OS Independent
9
+ License-File: LICENSE.txt
10
+ Requires-Dist: openai>=1.0.0
11
+ Requires-Dist: tiktoken>=0.5.0
12
+ Requires-Dist: ai_agent_toolbox>=0.1.0
13
+ Provides-Extra: test
14
+ Requires-Dist: pytest; extra == "test"
15
+ Requires-Dist: pytest-mock; extra == "test"
16
+ Provides-Extra: docs
17
+ Requires-Dist: mkdocs; extra == "docs"
18
+ Requires-Dist: mkdocs-material; extra == "docs"
19
+ Dynamic: author
20
+ Dynamic: classifier
21
+ Dynamic: provides-extra
22
+ Dynamic: requires-dist
23
+ Dynamic: summary
@@ -0,0 +1,12 @@
1
+ LICENSE.txt
2
+ README.md
3
+ setup.py
4
+ gptdiff/__init__.py
5
+ gptdiff/gptdiff.py
6
+ gptdiff.egg-info/PKG-INFO
7
+ gptdiff.egg-info/SOURCES.txt
8
+ gptdiff.egg-info/dependency_links.txt
9
+ gptdiff.egg-info/entry_points.txt
10
+ gptdiff.egg-info/requires.txt
11
+ gptdiff.egg-info/top_level.txt
12
+ tests/test_smartapply.py
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ gptdiff = gptdiff.gptdiff:main
@@ -0,0 +1,11 @@
1
+ openai>=1.0.0
2
+ tiktoken>=0.5.0
3
+ ai_agent_toolbox>=0.1.0
4
+
5
+ [docs]
6
+ mkdocs
7
+ mkdocs-material
8
+
9
+ [test]
10
+ pytest
11
+ pytest-mock
@@ -0,0 +1 @@
1
+ gptdiff
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
gptdiff-0.1.2/setup.py ADDED
@@ -0,0 +1,29 @@
1
+ from setuptools import setup, find_packages
2
+
3
+ setup(
4
+ name='gptdiff',
5
+ version='0.1.2',
6
+ description='A tool to generate and apply git diffs using LLMs',
7
+ author='255labs',
8
+ packages=find_packages(), # Use find_packages() to automatically discover packages
9
+ package_data={'gptdiff': []}, # Add any package data if needed
10
+ install_requires=[
11
+ 'openai>=1.0.0',
12
+ 'tiktoken>=0.5.0',
13
+ 'ai_agent_toolbox>=0.1.0'
14
+ ],
15
+ extras_require={
16
+ 'test': ['pytest', 'pytest-mock'],
17
+ 'docs': ['mkdocs', 'mkdocs-material']
18
+ },
19
+ entry_points={
20
+ 'console_scripts': ['gptdiff=gptdiff.gptdiff:main'],
21
+ },
22
+ license=None, # Remove license argument
23
+ # license_file='LICENSE.txt', # Remove license_file argument
24
+ classifiers=[ # Add license classifiers
25
+ 'License :: OSI Approved :: MIT License', # Standard MIT license classifier
26
+ 'Programming Language :: Python :: 3',
27
+ 'Operating System :: OS Independent',
28
+ ],
29
+ )
@@ -0,0 +1,220 @@
1
+ from gptdiff import smartapply
2
+ from unittest.mock import patch
3
+
4
+ def test_smartapply_file_deletion():
5
+ """Test that smartapply correctly handles file deletion diffs"""
6
+ diff_text = '''diff --git a/old.py b/old.py
7
+ deleted file mode 100644
8
+ --- a/old.py
9
+ +++ /dev/null
10
+ @@ -1,3 +0,0 @@
11
+ -def deprecated():
12
+ - print("Remove me")'''
13
+
14
+ original_files = {
15
+ "old.py": "def deprecated():\n print('Remove me')"
16
+ }
17
+
18
+ updated_files = smartapply(diff_text, original_files)
19
+
20
+ # Verify deleted file is removed from dictionary
21
+ assert "old.py" not in updated_files
22
+ assert len(updated_files) == 0
23
+
24
+ # Test applying deletion to non-existent file
25
+ result = smartapply(diff_text, {})
26
+ assert len(result) == 0
27
+
28
+ def test_smartapply_file_modification():
29
+ """Test that smartapply correctly handles file modification diffs"""
30
+ diff_text = '''diff --git a/hello.py b/hello.py
31
+ --- a/hello.py
32
+ +++ b/hello.py
33
+ @@ -1,2 +1,5 @@
34
+ def hello():
35
+ print('Hello')
36
+ +
37
+ +def goodbye():
38
+ + print('Goodbye')'''
39
+
40
+ original_files = {
41
+ "hello.py": "def hello():\n print('Hello')"
42
+ }
43
+
44
+ # Mock LLM to return modified content
45
+ with patch('gptdiff.gptdiff.call_llm_for_apply',
46
+ return_value="def hello():\n print('Hello')\n\ndef goodbye():\n print('Goodbye')"):
47
+
48
+ updated_files = smartapply(diff_text, original_files)
49
+
50
+ assert "hello.py" in updated_files
51
+ assert original_files["hello.py"] != updated_files["hello.py"]
52
+
53
+ def test_smartapply_new_file_creation():
54
+ """Test that smartapply handles new file creation through diffs"""
55
+ diff_text = '''diff --git a/new.py b/new.py
56
+ new file mode 100644
57
+ --- /dev/null
58
+ +++ b/new.py
59
+ @@ -0,0 +1,2 @@
60
+ +def new_func():
61
+ + print('New function')'''
62
+
63
+ original_files = {}
64
+
65
+ # Mock LLM for new file creation
66
+ with patch('gptdiff.gptdiff.call_llm_for_apply', return_value="def new_func():\n print('New function')"):
67
+
68
+ updated_files = smartapply(diff_text, original_files)
69
+
70
+ assert "new.py" in updated_files
71
+ assert updated_files["new.py"] == "def new_func():\n print('New function')"
72
+
73
+
74
+ def test_smartapply_modify_nonexistent_file():
75
+ """Test that smartapply handles modification diffs for non-existent files by creating them"""
76
+ diff_text = '''diff --git a/newfile.py b/newfile.py
77
+ --- a/newfile.py
78
+ +++ b/newfile.py
79
+ @@ -0,0 +1,2 @@
80
+ ++def new_func():
81
+ ++ print('Created via diff')'''
82
+
83
+ original_files = {}
84
+
85
+ # Mock LLM to return content for new file
86
+ with patch('gptdiff.gptdiff.call_llm_for_apply',
87
+ return_value="def new_func():\n print('Created via diff')"):
88
+
89
+ updated_files = smartapply(diff_text, original_files)
90
+
91
+ # Verify new file created with expected content
92
+ assert "newfile.py" in updated_files
93
+ assert updated_files["newfile.py"] == "def new_func():\n print('Created via diff')"
94
+
95
+ result = smartapply(diff_text, original_files)
96
+ assert "newfile.py" in result
97
+
98
+ def test_smartapply_multi_file_modification(mocker):
99
+ """Test smartapply handles multi-file modifications through LLM integration.
100
+
101
+ Verifies:
102
+ - Correct processing of diffs affecting multiple files in single patch
103
+ - Mocked LLM responses properly update each target file's content
104
+ - Non-targeted files remain unmodified
105
+
106
+ Test Setup:
107
+ - Original files include two target files and unrelated file
108
+ - Mock LLM to return modified content based on file path
109
+ - Apply diff through LLM-powered smartapply
110
+
111
+ Assertions:
112
+ - Both target files show expected modifications
113
+ - Unrelated file content remains unchanged"""
114
+ diff_text = '''diff --git a/file1.py b/file1.py
115
+ --- a/file1.py
116
+ +++ b/file1.py
117
+ @@ -1,2 +1,2 @@
118
+ def func1():
119
+ - print("Old func1")
120
+ + print("New func1")
121
+ diff --git a/file2.py b/file2.py
122
+ --- a/file2.py
123
+ +++ b/file2.py
124
+ @@ -1,2 +1,2 @@
125
+ def func2():
126
+ - print("Old func2")
127
+ + print("New func2")'''
128
+
129
+ original_files = {
130
+ "file1.py": "def func1():\n print('Old func1')",
131
+ "file2.py": "def func2():\n print('Old func2')",
132
+ "unrelated.py": "def unrelated():\n pass"
133
+ }
134
+
135
+ # Mock LLM to return modified content based on file path
136
+ def mock_call_llm(file_path, original_content, file_diff, model, api_key, base_url):
137
+ if file_path == "file1.py":
138
+ return "def func1():\n print('New func1')"
139
+ elif file_path == "file2.py":
140
+ return "def func2():\n print('New func2')"
141
+ return original_content
142
+
143
+ mocker.patch('gptdiff.gptdiff.call_llm_for_apply', side_effect=mock_call_llm)
144
+
145
+ updated_files = smartapply(diff_text, original_files)
146
+
147
+ # Verify both target files modified
148
+ assert "file1.py" in updated_files
149
+ assert "file2.py" in updated_files
150
+ assert "unrelated.py" in updated_files
151
+
152
+ # Check exact modifications
153
+ assert "print('New func1')" in updated_files["file1.py"]
154
+ assert "print('New func2')" in updated_files["file2.py"]
155
+
156
+ # Verify unrelated file remains untouched
157
+ assert updated_files["unrelated.py"] == "def unrelated():\n pass"
158
+
159
+
160
+ def test_smartapply_complex_single_hunk(mocker):
161
+ """Test complex single hunk with multiple change types
162
+
163
+ Validates proper handling of:
164
+ - Line deletions (# Old processing logic, temp = data * 2)
165
+ - Additions with new control flow (# Optimized pipeline, if not data check)
166
+ - Context preservation (results list initialization)
167
+
168
+ Setup:
169
+ - Original function contains legacy processing logic
170
+ - Diff removes temporary variable and adds guard clause
171
+ - Mock LLM to return optimized version matching diff
172
+ """
173
+ diff_text = '''diff --git a/complex.py b/complex.py
174
+ --- a/complex.py
175
+ +++ b/complex.py
176
+ @@ -1,7 +1,8 @@
177
+ def process(data):
178
+ - # Old processing logic
179
+ - temp = data * 2
180
+ + # Optimized pipeline
181
+ + if not data:
182
+ + return []
183
+ results = []
184
+ - for x in temp:
185
+ + for x in data:
186
+ results.append(x ** 2)
187
+ return results'''
188
+
189
+ original_files = {
190
+ "complex.py": (
191
+ "def process(data):\n"
192
+ " # Old processing logic\n"
193
+ " temp = data * 2\n"
194
+ " results = []\n"
195
+ " for x in temp:\n"
196
+ " results.append(x ** 2)\n"
197
+ " return results"
198
+ )
199
+ }
200
+
201
+ expected_content = (
202
+ "def process(data):\n"
203
+ " # Optimized pipeline\n"
204
+ " if not data:\n"
205
+ " return []\n"
206
+ " results = []\n"
207
+ " for x in data:\n"
208
+ " results.append(x ** 2)\n"
209
+ " return results"
210
+ )
211
+ mocker.patch('gptdiff.gptdiff.call_llm_for_apply', return_value=expected_content)
212
+
213
+ updated_files = smartapply(diff_text, original_files)
214
+
215
+ assert "complex.py" in updated_files
216
+ updated = updated_files["complex.py"]
217
+ assert "Optimized pipeline" in updated
218
+ assert "if not data:" in updated
219
+ assert "temp = data * 2" not in updated
220
+ assert "for x in data:" in updated