auto-coder 0.1.399__py3-none-any.whl → 0.1.400__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of auto-coder might be problematic. Click here for more details.

Files changed (38) hide show
  1. {auto_coder-0.1.399.dist-info → auto_coder-0.1.400.dist-info}/METADATA +1 -1
  2. {auto_coder-0.1.399.dist-info → auto_coder-0.1.400.dist-info}/RECORD +38 -19
  3. autocoder/auto_coder_runner.py +2 -1
  4. autocoder/common/ac_style_command_parser/parser.py +27 -12
  5. autocoder/common/auto_coder_lang.py +78 -0
  6. autocoder/common/command_completer_v2.py +1 -1
  7. autocoder/common/pull_requests/__init__.py +256 -0
  8. autocoder/common/pull_requests/base_provider.py +191 -0
  9. autocoder/common/pull_requests/config.py +66 -0
  10. autocoder/common/pull_requests/example.py +1 -0
  11. autocoder/common/pull_requests/exceptions.py +46 -0
  12. autocoder/common/pull_requests/manager.py +201 -0
  13. autocoder/common/pull_requests/models.py +164 -0
  14. autocoder/common/pull_requests/providers/__init__.py +23 -0
  15. autocoder/common/pull_requests/providers/gitcode_provider.py +19 -0
  16. autocoder/common/pull_requests/providers/gitee_provider.py +20 -0
  17. autocoder/common/pull_requests/providers/github_provider.py +214 -0
  18. autocoder/common/pull_requests/providers/gitlab_provider.py +29 -0
  19. autocoder/common/pull_requests/test_module.py +1 -0
  20. autocoder/common/pull_requests/utils.py +344 -0
  21. autocoder/common/tokens/__init__.py +62 -0
  22. autocoder/common/tokens/counter.py +211 -0
  23. autocoder/common/tokens/file_detector.py +105 -0
  24. autocoder/common/tokens/filters.py +111 -0
  25. autocoder/common/tokens/models.py +28 -0
  26. autocoder/common/v2/agent/agentic_edit.py +182 -68
  27. autocoder/common/v2/agent/agentic_edit_types.py +1 -0
  28. autocoder/sdk/cli/handlers.py +2 -1
  29. autocoder/sdk/cli/main.py +4 -2
  30. autocoder/sdk/cli/options.py +4 -3
  31. autocoder/sdk/core/auto_coder_core.py +14 -1
  32. autocoder/sdk/core/bridge.py +3 -0
  33. autocoder/sdk/models/options.py +8 -6
  34. autocoder/version.py +1 -1
  35. {auto_coder-0.1.399.dist-info → auto_coder-0.1.400.dist-info}/WHEEL +0 -0
  36. {auto_coder-0.1.399.dist-info → auto_coder-0.1.400.dist-info}/entry_points.txt +0 -0
  37. {auto_coder-0.1.399.dist-info → auto_coder-0.1.400.dist-info}/licenses/LICENSE +0 -0
  38. {auto_coder-0.1.399.dist-info → auto_coder-0.1.400.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,211 @@
1
+ import os
2
+ import concurrent.futures
3
+ from typing import List, Dict, Optional, Union, Callable
4
+ from pathlib import Path
5
+ import time
6
+ import re
7
+
8
+ from autocoder.rag.variable_holder import VariableHolder
9
+ from .models import TokenResult, DirectoryTokenResult
10
+ from .file_detector import FileTypeDetector
11
+ from .filters import FileFilter
12
+
13
+
14
+ class TokenCounter:
15
+ """Token 计数器,用于统计文件和目录的 token 数量"""
16
+
17
+ def __init__(self,
18
+ timeout: int = 30,
19
+ parallel: bool = True,
20
+ max_workers: int = 4):
21
+ """
22
+ 初始化 Token 计数器
23
+
24
+ Args:
25
+ timeout: 单文件处理超时时间(秒)
26
+ parallel: 是否并行处理
27
+ max_workers: 最大工作线程数
28
+ """
29
+ self.timeout = timeout
30
+ self.parallel = parallel
31
+ self.max_workers = max_workers
32
+
33
+ # 确保 tokenizer 已经加载
34
+ if VariableHolder.TOKENIZER_MODEL is None:
35
+ raise RuntimeError("Tokenizer model not initialized. Please call load_tokenizer() first.")
36
+
37
+ def count_file(self, file_path: str) -> TokenResult:
38
+ """
39
+ 统计单个文件的 token 数量
40
+
41
+ Args:
42
+ file_path: 文件路径
43
+
44
+ Returns:
45
+ TokenResult: 统计结果
46
+ """
47
+ try:
48
+ if not os.path.isfile(file_path):
49
+ return TokenResult(
50
+ file_path=file_path,
51
+ token_count=0,
52
+ char_count=0,
53
+ line_count=0,
54
+ success=False,
55
+ error="File does not exist"
56
+ )
57
+
58
+ # 检查是否为文本文件
59
+ if not FileTypeDetector.is_text_file(file_path):
60
+ return TokenResult(
61
+ file_path=file_path,
62
+ token_count=0,
63
+ char_count=0,
64
+ line_count=0,
65
+ success=False,
66
+ error="Not a text file"
67
+ )
68
+
69
+ # 检测文件编码
70
+ encoding = FileTypeDetector.detect_encoding(file_path)
71
+
72
+ # 读取文件内容
73
+ with open(file_path, 'r', encoding=encoding, errors='replace') as f:
74
+ content = f.read()
75
+
76
+ # 统计行数
77
+ line_count = content.count('\n') + (0 if content == "" or content.endswith('\n') else 1)
78
+
79
+ # 统计字符数
80
+ char_count = len(content)
81
+
82
+ # 统计 token 数量
83
+ tokens = VariableHolder.TOKENIZER_MODEL.encode(content)
84
+ token_count = len(tokens)
85
+
86
+ return TokenResult(
87
+ file_path=file_path,
88
+ token_count=token_count,
89
+ char_count=char_count,
90
+ line_count=line_count
91
+ )
92
+ except Exception as e:
93
+ return TokenResult(
94
+ file_path=file_path,
95
+ token_count=0,
96
+ char_count=0,
97
+ line_count=0,
98
+ success=False,
99
+ error=str(e)
100
+ )
101
+
102
+ def count_files(self, file_paths: List[str]) -> List[TokenResult]:
103
+ """
104
+ 批量统计多个文件的 token 数量
105
+
106
+ Args:
107
+ file_paths: 文件路径列表
108
+
109
+ Returns:
110
+ List[TokenResult]: 统计结果列表
111
+ """
112
+ if not self.parallel or len(file_paths) <= 1:
113
+ return [self.count_file(file_path) for file_path in file_paths]
114
+
115
+ results = []
116
+ with concurrent.futures.ThreadPoolExecutor(max_workers=self.max_workers) as executor:
117
+ future_to_file = {
118
+ executor.submit(self.count_file, file_path): file_path
119
+ for file_path in file_paths
120
+ }
121
+
122
+ for future in concurrent.futures.as_completed(future_to_file):
123
+ results.append(future.result())
124
+
125
+ return results
126
+
127
+ def count_directory(self,
128
+ dir_path: str,
129
+ pattern: str = None,
130
+ exclude_pattern: str = None,
131
+ recursive: bool = True,
132
+ max_depth: int = None) -> DirectoryTokenResult:
133
+ """
134
+ 统计目录中所有文件的 token 数量
135
+
136
+ Args:
137
+ dir_path: 目录路径
138
+ pattern: 文件名匹配模式(正则表达式)
139
+ exclude_pattern: 排除的文件名模式(正则表达式)
140
+ recursive: 是否递归处理子目录
141
+ max_depth: 最大递归深度
142
+
143
+ Returns:
144
+ DirectoryTokenResult: 目录统计结果
145
+ """
146
+ if not os.path.isdir(dir_path):
147
+ return DirectoryTokenResult(
148
+ directory_path=dir_path,
149
+ total_tokens=0,
150
+ file_count=0,
151
+ skipped_count=0,
152
+ files=[],
153
+ errors=["Directory does not exist"]
154
+ )
155
+
156
+ # 创建文件过滤器
157
+ patterns = [pattern] if pattern else []
158
+ exclude_patterns = [exclude_pattern] if exclude_pattern else []
159
+ file_filter = FileFilter(patterns=patterns, exclude_patterns=exclude_patterns)
160
+
161
+ # 收集所有匹配的文件
162
+ all_files = []
163
+ skipped_count = 0
164
+
165
+ for root, dirs, files in os.walk(dir_path):
166
+ # 检查递归深度
167
+ if max_depth is not None:
168
+ current_depth = root[len(dir_path):].count(os.sep)
169
+ if current_depth >= max_depth:
170
+ dirs.clear() # 不再递归子目录
171
+
172
+ for file in files:
173
+ file_path = os.path.join(root, file)
174
+ if file_filter.matches(file_path):
175
+ all_files.append(file_path)
176
+ else:
177
+ skipped_count += 1
178
+
179
+ if not recursive:
180
+ break # 不递归处理子目录
181
+
182
+ # 统计所有文件
183
+ file_results = self.count_files(all_files)
184
+
185
+ # 计算总 token 数
186
+ total_tokens = sum(result.token_count for result in file_results if result.success)
187
+
188
+ # 收集错误
189
+ errors = [
190
+ f"{result.file_path}: {result.error}"
191
+ for result in file_results if not result.success
192
+ ]
193
+
194
+ return DirectoryTokenResult(
195
+ directory_path=dir_path,
196
+ total_tokens=total_tokens,
197
+ file_count=len(file_results),
198
+ skipped_count=skipped_count,
199
+ files=file_results,
200
+ errors=errors
201
+ )
202
+
203
+ def set_tokenizer(self, tokenizer_name: str) -> None:
204
+ """
205
+ 更改 tokenizer(目前不支持,仅为接口预留)
206
+
207
+ Args:
208
+ tokenizer_name: tokenizer 名称
209
+ """
210
+ # 目前仅支持默认的 tokenizer
211
+ pass
@@ -0,0 +1,105 @@
1
+ import os
2
+ import mimetypes
3
+ from pathlib import Path
4
+
5
+
6
+ class FileTypeDetector:
7
+ """文件类型检测器,用于判断文件类型和编码"""
8
+
9
+ # 常见的文本文件MIME类型前缀
10
+ TEXT_MIME_PREFIXES = ('text/', 'application/json', 'application/xml', 'application/javascript')
11
+
12
+ # 常见的文本文件扩展名
13
+ TEXT_EXTENSIONS = {
14
+ '.txt', '.md', '.py', '.js', '.jsx', '.ts', '.tsx', '.html', '.css', '.scss', '.sass',
15
+ '.json', '.xml', '.yaml', '.yml', '.ini', '.conf', '.sh', '.bash', '.zsh', '.c', '.cpp',
16
+ '.h', '.hpp', '.java', '.kt', '.rs', '.go', '.rb', '.php', '.pl', '.swift', '.dart',
17
+ '.vue', '.svelte', '.lua', '.r', '.sql', '.graphql', '.toml', '.csv'
18
+ }
19
+
20
+ @staticmethod
21
+ def is_text_file(file_path: str) -> bool:
22
+ """
23
+ 判断文件是否为文本文件
24
+
25
+ Args:
26
+ file_path: 文件路径
27
+
28
+ Returns:
29
+ bool: 是否为文本文件
30
+ """
31
+ # 首先通过扩展名判断
32
+ ext = os.path.splitext(file_path)[1].lower()
33
+ if ext in FileTypeDetector.TEXT_EXTENSIONS:
34
+ return True
35
+
36
+ # 通过MIME类型判断
37
+ mime_type = FileTypeDetector.get_mime_type(file_path)
38
+ if any(mime_type.startswith(prefix) for prefix in FileTypeDetector.TEXT_MIME_PREFIXES):
39
+ return True
40
+
41
+ # 通过文件内容判断
42
+ try:
43
+ with open(file_path, 'rb') as f:
44
+ # 读取前4KB进行判断
45
+ chunk = f.read(4096)
46
+ # 检查是否包含空字节(二进制文件通常包含空字节)
47
+ if b'\x00' in chunk:
48
+ return False
49
+ # 尝试解码为UTF-8
50
+ try:
51
+ chunk.decode('utf-8')
52
+ return True
53
+ except UnicodeDecodeError:
54
+ # 尝试其他常见编码
55
+ for encoding in ['gbk', 'latin1', 'ascii']:
56
+ try:
57
+ chunk.decode(encoding)
58
+ return True
59
+ except UnicodeDecodeError:
60
+ continue
61
+ return False
62
+ except (IOError, OSError):
63
+ pass
64
+
65
+ return False
66
+
67
+ @staticmethod
68
+ def detect_encoding(file_path: str) -> str:
69
+ """
70
+ 检测文件编码
71
+
72
+ Args:
73
+ file_path: 文件路径
74
+
75
+ Returns:
76
+ str: 文件编码,默认为utf-8
77
+ """
78
+ # 尝试常见编码
79
+ encodings = ['utf-8', 'gbk', 'latin1', 'ascii']
80
+
81
+ for encoding in encodings:
82
+ try:
83
+ with open(file_path, 'r', encoding=encoding) as f:
84
+ f.read(100) # 尝试读取一小部分内容
85
+ return encoding
86
+ except UnicodeDecodeError:
87
+ continue
88
+ except (IOError, OSError):
89
+ break
90
+
91
+ return 'utf-8' # 默认编码
92
+
93
+ @staticmethod
94
+ def get_mime_type(file_path: str) -> str:
95
+ """
96
+ 获取文件的MIME类型
97
+
98
+ Args:
99
+ file_path: 文件路径
100
+
101
+ Returns:
102
+ str: MIME类型
103
+ """
104
+ mime_type, _ = mimetypes.guess_type(file_path)
105
+ return mime_type or 'application/octet-stream' # 默认为二进制类型
@@ -0,0 +1,111 @@
1
+ import os
2
+ import re
3
+ from pathlib import Path
4
+ from typing import List, Optional, Tuple
5
+
6
+
7
+ class FileFilter:
8
+ """文件过滤器,用于过滤需要统计的文件"""
9
+
10
+ def __init__(self,
11
+ patterns: List[str] = None,
12
+ exclude_patterns: List[str] = None,
13
+ min_size: int = None,
14
+ max_size: int = None,
15
+ only_text_files: bool = True):
16
+ """
17
+ 初始化文件过滤器
18
+
19
+ Args:
20
+ patterns: 包含的文件名模式(正则表达式)
21
+ exclude_patterns: 排除的文件名模式(正则表达式)
22
+ min_size: 最小文件大小(字节)
23
+ max_size: 最大文件大小(字节)
24
+ only_text_files: 是否只包含文本文件
25
+ """
26
+ self.patterns = []
27
+ self.exclude_patterns = []
28
+ self.min_size = min_size
29
+ self.max_size = max_size
30
+ self.only_text_files = only_text_files
31
+
32
+ if patterns:
33
+ for pattern in patterns:
34
+ self.add_pattern(pattern)
35
+
36
+ if exclude_patterns:
37
+ for pattern in exclude_patterns:
38
+ self.add_exclude_pattern(pattern)
39
+
40
+ def add_pattern(self, pattern: str) -> None:
41
+ """
42
+ 添加包含的文件名模式
43
+
44
+ Args:
45
+ pattern: 正则表达式模式
46
+ """
47
+ try:
48
+ self.patterns.append(re.compile(pattern))
49
+ except re.error:
50
+ raise ValueError(f"Invalid regex pattern: {pattern}")
51
+
52
+ def add_exclude_pattern(self, pattern: str) -> None:
53
+ """
54
+ 添加排除的文件名模式
55
+
56
+ Args:
57
+ pattern: 正则表达式模式
58
+ """
59
+ try:
60
+ self.exclude_patterns.append(re.compile(pattern))
61
+ except re.error:
62
+ raise ValueError(f"Invalid regex pattern: {pattern}")
63
+
64
+ def set_size_range(self, min_size: Optional[int] = None, max_size: Optional[int] = None) -> None:
65
+ """
66
+ 设置文件大小范围
67
+
68
+ Args:
69
+ min_size: 最小文件大小(字节)
70
+ max_size: 最大文件大小(字节)
71
+ """
72
+ self.min_size = min_size
73
+ self.max_size = max_size
74
+
75
+ def matches(self, file_path: str) -> bool:
76
+ """
77
+ 检查文件是否匹配过滤条件
78
+
79
+ Args:
80
+ file_path: 文件路径
81
+
82
+ Returns:
83
+ bool: 是否匹配
84
+ """
85
+ # 检查文件是否存在
86
+ if not os.path.isfile(file_path):
87
+ return False
88
+
89
+ # 检查文件大小
90
+ if self.min_size is not None or self.max_size is not None:
91
+ size = os.path.getsize(file_path)
92
+ if self.min_size is not None and size < self.min_size:
93
+ return False
94
+ if self.max_size is not None and size > self.max_size:
95
+ return False
96
+
97
+ # 检查是否匹配排除模式
98
+ for pattern in self.exclude_patterns:
99
+ if pattern.search(file_path):
100
+ return False
101
+
102
+ # 如果没有包含模式,则默认匹配所有文件
103
+ if not self.patterns:
104
+ return True
105
+
106
+ # 检查是否匹配包含模式
107
+ for pattern in self.patterns:
108
+ if pattern.search(file_path):
109
+ return True
110
+
111
+ return False
@@ -0,0 +1,28 @@
1
+ from typing import List, Dict, Optional
2
+ from dataclasses import dataclass
3
+
4
+
5
+ @dataclass
6
+ class TokenResult:
7
+ """单个文件的 token 统计结果"""
8
+ file_path: str
9
+ token_count: int
10
+ char_count: int
11
+ line_count: int
12
+ success: bool = True
13
+ error: Optional[str] = None
14
+
15
+
16
+ @dataclass
17
+ class DirectoryTokenResult:
18
+ """目录的 token 统计结果"""
19
+ directory_path: str
20
+ total_tokens: int
21
+ file_count: int
22
+ skipped_count: int
23
+ files: List[TokenResult]
24
+ errors: List[str] = None
25
+
26
+ def __post_init__(self):
27
+ if self.errors is None:
28
+ self.errors = []