auto-coder 0.1.287__py3-none-any.whl → 0.1.289__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of auto-coder might be problematic. Click here for more details.

@@ -0,0 +1,343 @@
1
+ """
2
+ Token Helper Plugin for Chat Auto Coder.
3
+ Provides token counting functionality for files and projects.
4
+ """
5
+
6
+ import os
7
+ from typing import Any, Callable, Dict, List, Optional, Tuple
8
+ from dataclasses import dataclass
9
+ from collections import defaultdict
10
+
11
+ from autocoder.plugins import Plugin, PluginManager
12
+ from autocoder.rag.token_counter import count_tokens
13
+ from autocoder.suffixproject import SuffixProject
14
+ from autocoder.common import AutoCoderArgs, SourceCode
15
+ from loguru import logger
16
+ import json
17
+
18
+
19
+ @dataclass
20
+ class TokenCount:
21
+ """Represents token count information for a file."""
22
+ filename: str
23
+ tokens: int
24
+ relative_path: Optional[str] = None
25
+ file_size: Optional[int] = None
26
+
27
+
28
+ class TokenHelperPlugin(Plugin):
29
+ """Token helper plugin for the Chat Auto Coder."""
30
+
31
+ name = "token_helper"
32
+ description = "Token helper plugin providing token counting for files and projects"
33
+ version = "0.1.0"
34
+
35
+ def __init__(self, manager: PluginManager, config: Optional[Dict[str, Any]] = None, config_path: Optional[str] = None):
36
+ """Initialize the Token helper plugin."""
37
+ super().__init__(manager, config, config_path)
38
+ self.token_counts = {}
39
+ self.project_dir = os.getcwd()
40
+ self.base_persist_dir = os.path.join(".auto-coder", "plugins", "chat-auto-coder")
41
+ self.auto_coder_config = {}
42
+ self.exclude_files = []
43
+
44
+ def load_auto_coder_config(self):
45
+ memory_path = os.path.join(self.base_persist_dir, "memory.json")
46
+ if os.path.exists(memory_path):
47
+ with open(memory_path, "r", encoding="utf-8") as f:
48
+ _memory = json.load(f)
49
+ self.auto_coder_config = _memory.get("conf",{})
50
+ self.exclude_files = _memory.get("exclude_files",[])
51
+
52
+ def initialize(self) -> bool:
53
+ """Initialize the plugin.
54
+
55
+ Returns:
56
+ True if initialization was successful
57
+ """
58
+ self.load_auto_coder_config()
59
+ print(f"[{self.name}] Token Helper plugin initialized")
60
+ return True
61
+
62
+ def get_commands(self) -> Dict[str, Tuple[Callable, str]]:
63
+ """Get commands provided by this plugin.
64
+
65
+ Returns:
66
+ A dictionary of command name to handler and description
67
+ """
68
+ return {
69
+ "token/count": (self.count_tokens_in_project, "Count tokens in all project files"),
70
+ "token/top": (self.show_top_token_files, "Show top N files by token count"),
71
+ "token/file": (self.count_tokens_in_file, "Count tokens in a specific file"),
72
+ "token/summary": (self.show_token_summary, "Show token count summary for the project"),
73
+ }
74
+
75
+ def get_completions(self) -> Dict[str, List[str]]:
76
+ """Get completions provided by this plugin.
77
+
78
+ Returns:
79
+ A dictionary mapping command prefixes to completion options
80
+ """
81
+ completions = {
82
+ "/token/count": [],
83
+ "/token/top": ["5", "10", "20", "50", "100"],
84
+ "/token/file": [],
85
+ "/token/summary": [],
86
+ }
87
+ return completions
88
+
89
+ def count_tokens_in_project(self, args: str) -> None:
90
+ """Count tokens in all project files.
91
+
92
+ Args:
93
+ args: Optional arguments including project directory and file types
94
+ """
95
+ args_dict = self._parse_args(args)
96
+ project_dir = args_dict.get("dir", self.auto_coder_config.get("project_dir", os.getcwd()))
97
+ project_type = args_dict.get("type", self.auto_coder_config.get("project_type", ".py"))
98
+ exclude = args_dict.get("exclude_files", [])
99
+
100
+ self.project_dir = project_dir
101
+ print(f"Counting tokens in project: {project_dir}")
102
+ print(f"File types: {project_type}")
103
+
104
+ try:
105
+ # Create AutoCoderArgs with necessary parameters
106
+ coder_args = AutoCoderArgs(
107
+ source_dir=project_dir,
108
+ project_type=project_type,
109
+ exclude_files=[f"regex://{exclude}"] + self.exclude_files if exclude else self.exclude_files
110
+ )
111
+
112
+ # Use SuffixProject to get all source files
113
+ project = SuffixProject(args=coder_args)
114
+
115
+ total_tokens = 0
116
+ file_count = 0
117
+ self.token_counts = {}
118
+
119
+ print("Scanning files and counting tokens...")
120
+
121
+ for source_code in project.get_source_codes():
122
+ file_count += 1
123
+ if file_count % 10 == 0:
124
+ print(f"Processed {file_count} files...")
125
+
126
+ tokens = count_tokens(source_code.source_code)
127
+ file_path = source_code.module_name
128
+ relative_path = os.path.relpath(file_path, project_dir)
129
+ file_size = len(source_code.source_code)
130
+
131
+ self.token_counts[file_path] = TokenCount(
132
+ filename=file_path,
133
+ tokens=tokens,
134
+ relative_path=relative_path,
135
+ file_size=file_size
136
+ )
137
+
138
+ total_tokens += tokens
139
+
140
+ print(f"\nToken count complete!")
141
+ print(f"Total files: {file_count}")
142
+ print(f"Total tokens: {total_tokens:,}")
143
+ print(f"Use /token/top N to see the top N files by token count")
144
+ print(f"Use /token/summary to see a summary by file type")
145
+
146
+ except Exception as e:
147
+ logger.error(f"Error counting tokens in project: {str(e)}")
148
+ print(f"Error: {str(e)}")
149
+
150
+ def _parse_args(self, args: str) -> Dict[str, str]:
151
+ """Parse command arguments.
152
+
153
+ Args:
154
+ args: Command arguments string. Supports both:
155
+ - Key=value format: dir=. type=.py,.java
156
+ - Command line format: --dir . --type .py,.java
157
+
158
+ Returns:
159
+ Dictionary of parsed arguments
160
+ """
161
+ result = {}
162
+ if not args:
163
+ return result
164
+
165
+ # Try using argparse first
166
+ try:
167
+ import argparse
168
+ import shlex
169
+
170
+ # Create parser with arguments expected by the plugin
171
+ parser = argparse.ArgumentParser(description='Token counter options')
172
+ parser.add_argument('--dir', '-d', help='Project directory')
173
+ parser.add_argument('--type', '-t', help='File types (comma separated)')
174
+ parser.add_argument('--exclude', '-e', help='Exclude pattern')
175
+ parser.add_argument('--verbose', '-v', action='store_true', help='Verbose output')
176
+
177
+ # Parse with shlex to handle quoted arguments properly
178
+ parsed_args, unknown = parser.parse_known_args(shlex.split(args))
179
+
180
+ # Convert namespace to dictionary, ignoring None values
181
+ for key, value in vars(parsed_args).items():
182
+ if value is not None:
183
+ result[key] = value
184
+
185
+ # Handle any unknown arguments as key=value pairs
186
+ for arg in unknown:
187
+ if '=' in arg:
188
+ key, value = arg.split('=', 1)
189
+ # Strip leading dashes if present
190
+ if key.startswith('--'):
191
+ key = key[2:]
192
+ elif key.startswith('-'):
193
+ key = key[1:]
194
+ result[key] = value
195
+
196
+ return result
197
+
198
+ except Exception as e:
199
+ # Fallback to original implementation if argparse fails
200
+ logger.debug(f"Argparse failed, using fallback parser: {str(e)}")
201
+
202
+ parts = args.split()
203
+ i = 0
204
+ while i < len(parts):
205
+ part = parts[i]
206
+
207
+ # Handle key=value format
208
+ if "=" in part:
209
+ key, value = part.split("=", 1)
210
+ # Strip leading dashes if present
211
+ if key.startswith("--"):
212
+ key = key[2:]
213
+ elif key.startswith("-"):
214
+ key = key[1:]
215
+ result[key] = value
216
+ i += 1
217
+ continue
218
+
219
+ # Handle --key value or -key value format
220
+ if part.startswith("--"):
221
+ key = part[2:]
222
+ # Check if there's a value following this key
223
+ if i + 1 < len(parts) and not parts[i + 1].startswith("-"):
224
+ result[key] = parts[i + 1]
225
+ i += 2
226
+ else:
227
+ # Flag option without value
228
+ result[key] = "true"
229
+ i += 1
230
+ elif part.startswith("-"):
231
+ key = part[1:]
232
+ # Check if there's a value following this key
233
+ if i + 1 < len(parts) and not parts[i + 1].startswith("-"):
234
+ result[key] = parts[i + 1]
235
+ i += 2
236
+ else:
237
+ # Flag option without value
238
+ result[key] = "true"
239
+ i += 1
240
+ else:
241
+ # Standalone argument without a key
242
+ i += 1
243
+
244
+ return result
245
+
246
+ def show_top_token_files(self, args: str) -> None:
247
+ """Show top N files by token count.
248
+
249
+ Args:
250
+ args: Number of files to show
251
+ """
252
+ if not self.token_counts:
253
+ print("No token count data available. Run /token/count first.")
254
+ return
255
+
256
+ try:
257
+ n = int(args.strip()) if args.strip() else 10
258
+ except ValueError:
259
+ print(f"Invalid value: {args}. Using default of 10.")
260
+ n = 10
261
+
262
+ print(f"\nTop {n} files by token count:")
263
+ print(f"{'Tokens':<10} {'Size (bytes)':<15} {'File'}")
264
+ print(f"{'-'*10} {'-'*15} {'-'*50}")
265
+
266
+ sorted_files = sorted(
267
+ self.token_counts.values(),
268
+ key=lambda x: x.tokens,
269
+ reverse=True
270
+ )
271
+
272
+ for i, token_count in enumerate(sorted_files[:n], 1):
273
+ relative_path = token_count.relative_path or token_count.filename
274
+ print(f"{token_count.tokens:<10,} {token_count.file_size:<15,} {relative_path}")
275
+
276
+ def count_tokens_in_file(self, args: str) -> None:
277
+ """Count tokens in a specific file.
278
+
279
+ Args:
280
+ args: Path to the file
281
+ """
282
+ if not args:
283
+ print("Please specify a file path.")
284
+ return
285
+
286
+ file_path = args.strip()
287
+
288
+ try:
289
+ with open(file_path, 'r', encoding='utf-8') as f:
290
+ content = f.read()
291
+
292
+ tokens = count_tokens(content)
293
+ print(f"\nFile: {file_path}")
294
+ print(f"Tokens: {tokens:,}")
295
+ print(f"File size: {len(content):,} bytes")
296
+ print(f"Avg bytes per token: {len(content)/tokens:.2f}")
297
+
298
+ except Exception as e:
299
+ print(f"Error counting tokens in file: {str(e)}")
300
+
301
+ def show_token_summary(self, args: str) -> None:
302
+ """Show token count summary by file type.
303
+
304
+ Args:
305
+ args: Optional arguments
306
+ """
307
+ if not self.token_counts:
308
+ print("No token count data available. Run /token/count first.")
309
+ return
310
+
311
+ by_extension = defaultdict(lambda: {"files": 0, "tokens": 0, "size": 0})
312
+
313
+ for token_count in self.token_counts.values():
314
+ filename = token_count.filename
315
+ ext = os.path.splitext(filename)[1].lower() or "no_extension"
316
+
317
+ by_extension[ext]["files"] += 1
318
+ by_extension[ext]["tokens"] += token_count.tokens
319
+ by_extension[ext]["size"] += token_count.file_size or 0
320
+
321
+ total_tokens = sum(data["tokens"] for data in by_extension.values())
322
+ total_files = sum(data["files"] for data in by_extension.values())
323
+ total_size = sum(data["size"] for data in by_extension.values())
324
+
325
+ print(f"\nToken count summary by file type:")
326
+ print(f"{'Extension':<12} {'Files':<8} {'Tokens':<12} {'% of Total':<12} {'Size (KB)':<12}")
327
+ print(f"{'-'*12} {'-'*8} {'-'*12} {'-'*12} {'-'*12}")
328
+
329
+ for ext, data in sorted(by_extension.items(), key=lambda x: x[1]["tokens"], reverse=True):
330
+ percent = (data["tokens"] / total_tokens * 100) if total_tokens > 0 else 0
331
+ size_kb = data["size"] / 1024
332
+ print(f"{ext:<12} {data['files']:<8} {data['tokens']:<12,} {percent:<12.2f} {size_kb:<12.2f}")
333
+
334
+ print(f"\nTotal Files: {total_files:,}")
335
+ print(f"Total Tokens: {total_tokens:,}")
336
+ print(f"Total Size: {total_size/1024/1024:.2f} MB")
337
+
338
+ if self.project_dir:
339
+ print(f"Project Directory: {self.project_dir}")
340
+
341
+ def shutdown(self) -> None:
342
+ """Shutdown the plugin."""
343
+ print(f"[{self.name}] Token Helper plugin shutdown")
@@ -0,0 +1,9 @@
1
+ import json
2
+
3
+ def load_json_file(file_path: str) -> dict:
4
+ with open(file_path, 'r') as f:
5
+ return json.load(f)
6
+
7
+ def save_json_file(file_path: str, data: dict):
8
+ with open(file_path, 'w') as f:
9
+ json.dump(data, f, ensure_ascii=False, indent=4)
@@ -38,7 +38,7 @@ from pydantic import BaseModel
38
38
  from byzerllm.utils.types import SingleOutputMeta
39
39
  from autocoder.rag.lang import get_message_with_format_and_newline
40
40
  from autocoder.rag.qa_conversation_strategy import get_qa_strategy
41
-
41
+ from autocoder.rag.searchable import SearchableResults
42
42
  try:
43
43
  from autocoder_pro.rag.llm_compute import LLMComputeEngine
44
44
  pro_version = version("auto-coder-pro")
@@ -257,7 +257,7 @@ class LongContextRAG:
257
257
  请根据提供的文档内容、用户对话历史以及最后一个问题,提取并总结文档中与问题相关的重要信息。
258
258
  如果文档中没有相关信息,请回复"该文档中没有与问题相关的信息"。
259
259
  提取的信息尽量保持和原文中的一样,并且只输出这些信息。
260
- """
260
+ """
261
261
 
262
262
  def _get_document_retriever_class(self):
263
263
  """Get the document retriever class based on configuration."""
@@ -500,6 +500,9 @@ class LongContextRAG:
500
500
  except json.JSONDecodeError:
501
501
  pass
502
502
 
503
+ if not only_contexts and extra_request_params.get("only_contexts", False):
504
+ only_contexts = True
505
+
503
506
  logger.info(f"Query: {query} only_contexts: {only_contexts}")
504
507
  start_time = time.time()
505
508
 
@@ -593,10 +596,19 @@ class LongContextRAG:
593
596
  )
594
597
 
595
598
  if only_contexts:
596
- final_docs = []
597
- for doc in relevant_docs:
598
- final_docs.append(doc.model_dump())
599
- return [json.dumps(final_docs, ensure_ascii=False)], []
599
+ try:
600
+ searcher = SearchableResults()
601
+ result = searcher.reorder(docs=relevant_docs)
602
+ yield (json.dumps(result.model_dump(), ensure_ascii=False), SingleOutputMeta(input_tokens_count=rag_stat.recall_stat.total_input_tokens + rag_stat.chunk_stat.total_input_tokens,
603
+ generated_tokens_count=rag_stat.recall_stat.total_generated_tokens +
604
+ rag_stat.chunk_stat.total_generated_tokens,
605
+ ))
606
+ except Exception as e:
607
+ yield (str(e), SingleOutputMeta(input_tokens_count=rag_stat.recall_stat.total_input_tokens + rag_stat.chunk_stat.total_input_tokens,
608
+ generated_tokens_count=rag_stat.recall_stat.total_generated_tokens +
609
+ rag_stat.chunk_stat.total_generated_tokens,
610
+ ))
611
+ return
600
612
 
601
613
  if not relevant_docs:
602
614
  yield ("没有找到可以回答你问题的相关文档", SingleOutputMeta(input_tokens_count=rag_stat.recall_stat.total_input_tokens + rag_stat.chunk_stat.total_input_tokens,
@@ -816,12 +828,13 @@ class LongContextRAG:
816
828
 
817
829
  self._print_rag_stats(rag_stat)
818
830
  else:
819
-
820
- qa_strategy = get_qa_strategy(self.args.rag_qa_conversation_strategy)
831
+
832
+ qa_strategy = get_qa_strategy(
833
+ self.args.rag_qa_conversation_strategy)
821
834
  new_conversations = qa_strategy.create_conversation(
822
835
  documents=[doc.source_code for doc in relevant_docs],
823
836
  conversations=conversations
824
- )
837
+ )
825
838
 
826
839
  chunks = target_llm.stream_chat_oai(
827
840
  conversations=new_conversations,
@@ -19,7 +19,7 @@ class TaskTiming(BaseModel):
19
19
 
20
20
  class FilterDoc(BaseModel):
21
21
  source_code: SourceCode
22
- relevance: DocRelevance
22
+ relevance: Optional[DocRelevance]
23
23
  task_timing: TaskTiming
24
24
 
25
25
 
@@ -0,0 +1,58 @@
1
+ import json
2
+ from collections import Counter
3
+ from typing import Dict, List, Any, Optional, Tuple, Set
4
+ from pydantic import BaseModel
5
+ from autocoder.rag.relevant_utils import FilterDoc
6
+
7
+
8
+ class FileOccurrence(BaseModel):
9
+ """Represents a file and its occurrence count in search results"""
10
+ file_path: str
11
+ count: int
12
+ score: float = 0.0 # Optional relevance score
13
+
14
+ class FileResult(BaseModel):
15
+ files: List[FileOccurrence]
16
+
17
+ class SearchableResults:
18
+ """Class to process and organize search results by file frequency"""
19
+
20
+ def __init__(self):
21
+ """Initialize the SearchableResults instance"""
22
+ pass
23
+
24
+ def extract_original_docs(self, docs: List[FilterDoc]) -> List[str]:
25
+ """Extract all original_docs from a list of document metadata"""
26
+ all_files = []
27
+
28
+ for doc in docs:
29
+ # Extract from metadata if available
30
+ metadata = doc.source_code.metadata
31
+ if "original_docs" in metadata:
32
+ all_files.extend(metadata["original_docs"])
33
+ # Also include the module_name from source_code as a fallback
34
+ else:
35
+ all_files.append(doc.source_code.module_name)
36
+
37
+ return all_files
38
+
39
+ def count_file_occurrences(self, files: List[str]) -> List[FileOccurrence]:
40
+ """Count occurrences of each file and return sorted list"""
41
+ # Count occurrences
42
+ counter = Counter(files)
43
+
44
+ # Convert to FileOccurrence objects
45
+ occurrences = [
46
+ FileOccurrence(file_path=file_path, count=count)
47
+ for file_path, count in counter.items()
48
+ ]
49
+
50
+ # Sort by count (descending)
51
+ return sorted(occurrences, key=lambda x: x.count, reverse=True)
52
+
53
+ def reorder(self, docs: List[FilterDoc]) -> List[FileOccurrence]:
54
+ """Process search results to extract and rank files by occurrence (main entry point)"""
55
+ all_files = self.extract_original_docs(docs)
56
+ return FileResult(files=self.count_file_occurrences(all_files))
57
+
58
+
autocoder/version.py CHANGED
@@ -1 +1 @@
1
- __version__ = "0.1.287"
1
+ __version__ = "0.1.289"