auto-coder 0.1.287__py3-none-any.whl → 0.1.289__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of auto-coder might be problematic. Click here for more details.
- {auto_coder-0.1.287.dist-info → auto_coder-0.1.289.dist-info}/METADATA +1 -1
- {auto_coder-0.1.287.dist-info → auto_coder-0.1.289.dist-info}/RECORD +26 -17
- autocoder/chat_auto_coder.py +265 -82
- autocoder/chat_auto_coder_lang.py +25 -21
- autocoder/commands/auto_web.py +1062 -0
- autocoder/common/__init__.py +1 -2
- autocoder/common/anything2img.py +113 -43
- autocoder/common/auto_coder_lang.py +40 -1
- autocoder/common/computer_use.py +931 -0
- autocoder/common/mcp_hub.py +99 -77
- autocoder/common/mcp_server.py +162 -61
- autocoder/index/filter/quick_filter.py +373 -3
- autocoder/plugins/__init__.py +1123 -0
- autocoder/plugins/dynamic_completion_example.py +148 -0
- autocoder/plugins/git_helper_plugin.py +252 -0
- autocoder/plugins/sample_plugin.py +160 -0
- autocoder/plugins/token_helper_plugin.py +343 -0
- autocoder/plugins/utils.py +9 -0
- autocoder/rag/long_context_rag.py +22 -9
- autocoder/rag/relevant_utils.py +1 -1
- autocoder/rag/searchable.py +58 -0
- autocoder/version.py +1 -1
- {auto_coder-0.1.287.dist-info → auto_coder-0.1.289.dist-info}/LICENSE +0 -0
- {auto_coder-0.1.287.dist-info → auto_coder-0.1.289.dist-info}/WHEEL +0 -0
- {auto_coder-0.1.287.dist-info → auto_coder-0.1.289.dist-info}/entry_points.txt +0 -0
- {auto_coder-0.1.287.dist-info → auto_coder-0.1.289.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,343 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Token Helper Plugin for Chat Auto Coder.
|
|
3
|
+
Provides token counting functionality for files and projects.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
import os
|
|
7
|
+
from typing import Any, Callable, Dict, List, Optional, Tuple
|
|
8
|
+
from dataclasses import dataclass
|
|
9
|
+
from collections import defaultdict
|
|
10
|
+
|
|
11
|
+
from autocoder.plugins import Plugin, PluginManager
|
|
12
|
+
from autocoder.rag.token_counter import count_tokens
|
|
13
|
+
from autocoder.suffixproject import SuffixProject
|
|
14
|
+
from autocoder.common import AutoCoderArgs, SourceCode
|
|
15
|
+
from loguru import logger
|
|
16
|
+
import json
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
@dataclass
|
|
20
|
+
class TokenCount:
|
|
21
|
+
"""Represents token count information for a file."""
|
|
22
|
+
filename: str
|
|
23
|
+
tokens: int
|
|
24
|
+
relative_path: Optional[str] = None
|
|
25
|
+
file_size: Optional[int] = None
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class TokenHelperPlugin(Plugin):
|
|
29
|
+
"""Token helper plugin for the Chat Auto Coder."""
|
|
30
|
+
|
|
31
|
+
name = "token_helper"
|
|
32
|
+
description = "Token helper plugin providing token counting for files and projects"
|
|
33
|
+
version = "0.1.0"
|
|
34
|
+
|
|
35
|
+
def __init__(self, manager: PluginManager, config: Optional[Dict[str, Any]] = None, config_path: Optional[str] = None):
|
|
36
|
+
"""Initialize the Token helper plugin."""
|
|
37
|
+
super().__init__(manager, config, config_path)
|
|
38
|
+
self.token_counts = {}
|
|
39
|
+
self.project_dir = os.getcwd()
|
|
40
|
+
self.base_persist_dir = os.path.join(".auto-coder", "plugins", "chat-auto-coder")
|
|
41
|
+
self.auto_coder_config = {}
|
|
42
|
+
self.exclude_files = []
|
|
43
|
+
|
|
44
|
+
def load_auto_coder_config(self):
|
|
45
|
+
memory_path = os.path.join(self.base_persist_dir, "memory.json")
|
|
46
|
+
if os.path.exists(memory_path):
|
|
47
|
+
with open(memory_path, "r", encoding="utf-8") as f:
|
|
48
|
+
_memory = json.load(f)
|
|
49
|
+
self.auto_coder_config = _memory.get("conf",{})
|
|
50
|
+
self.exclude_files = _memory.get("exclude_files",[])
|
|
51
|
+
|
|
52
|
+
def initialize(self) -> bool:
|
|
53
|
+
"""Initialize the plugin.
|
|
54
|
+
|
|
55
|
+
Returns:
|
|
56
|
+
True if initialization was successful
|
|
57
|
+
"""
|
|
58
|
+
self.load_auto_coder_config()
|
|
59
|
+
print(f"[{self.name}] Token Helper plugin initialized")
|
|
60
|
+
return True
|
|
61
|
+
|
|
62
|
+
def get_commands(self) -> Dict[str, Tuple[Callable, str]]:
|
|
63
|
+
"""Get commands provided by this plugin.
|
|
64
|
+
|
|
65
|
+
Returns:
|
|
66
|
+
A dictionary of command name to handler and description
|
|
67
|
+
"""
|
|
68
|
+
return {
|
|
69
|
+
"token/count": (self.count_tokens_in_project, "Count tokens in all project files"),
|
|
70
|
+
"token/top": (self.show_top_token_files, "Show top N files by token count"),
|
|
71
|
+
"token/file": (self.count_tokens_in_file, "Count tokens in a specific file"),
|
|
72
|
+
"token/summary": (self.show_token_summary, "Show token count summary for the project"),
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
def get_completions(self) -> Dict[str, List[str]]:
|
|
76
|
+
"""Get completions provided by this plugin.
|
|
77
|
+
|
|
78
|
+
Returns:
|
|
79
|
+
A dictionary mapping command prefixes to completion options
|
|
80
|
+
"""
|
|
81
|
+
completions = {
|
|
82
|
+
"/token/count": [],
|
|
83
|
+
"/token/top": ["5", "10", "20", "50", "100"],
|
|
84
|
+
"/token/file": [],
|
|
85
|
+
"/token/summary": [],
|
|
86
|
+
}
|
|
87
|
+
return completions
|
|
88
|
+
|
|
89
|
+
def count_tokens_in_project(self, args: str) -> None:
|
|
90
|
+
"""Count tokens in all project files.
|
|
91
|
+
|
|
92
|
+
Args:
|
|
93
|
+
args: Optional arguments including project directory and file types
|
|
94
|
+
"""
|
|
95
|
+
args_dict = self._parse_args(args)
|
|
96
|
+
project_dir = args_dict.get("dir", self.auto_coder_config.get("project_dir", os.getcwd()))
|
|
97
|
+
project_type = args_dict.get("type", self.auto_coder_config.get("project_type", ".py"))
|
|
98
|
+
exclude = args_dict.get("exclude_files", [])
|
|
99
|
+
|
|
100
|
+
self.project_dir = project_dir
|
|
101
|
+
print(f"Counting tokens in project: {project_dir}")
|
|
102
|
+
print(f"File types: {project_type}")
|
|
103
|
+
|
|
104
|
+
try:
|
|
105
|
+
# Create AutoCoderArgs with necessary parameters
|
|
106
|
+
coder_args = AutoCoderArgs(
|
|
107
|
+
source_dir=project_dir,
|
|
108
|
+
project_type=project_type,
|
|
109
|
+
exclude_files=[f"regex://{exclude}"] + self.exclude_files if exclude else self.exclude_files
|
|
110
|
+
)
|
|
111
|
+
|
|
112
|
+
# Use SuffixProject to get all source files
|
|
113
|
+
project = SuffixProject(args=coder_args)
|
|
114
|
+
|
|
115
|
+
total_tokens = 0
|
|
116
|
+
file_count = 0
|
|
117
|
+
self.token_counts = {}
|
|
118
|
+
|
|
119
|
+
print("Scanning files and counting tokens...")
|
|
120
|
+
|
|
121
|
+
for source_code in project.get_source_codes():
|
|
122
|
+
file_count += 1
|
|
123
|
+
if file_count % 10 == 0:
|
|
124
|
+
print(f"Processed {file_count} files...")
|
|
125
|
+
|
|
126
|
+
tokens = count_tokens(source_code.source_code)
|
|
127
|
+
file_path = source_code.module_name
|
|
128
|
+
relative_path = os.path.relpath(file_path, project_dir)
|
|
129
|
+
file_size = len(source_code.source_code)
|
|
130
|
+
|
|
131
|
+
self.token_counts[file_path] = TokenCount(
|
|
132
|
+
filename=file_path,
|
|
133
|
+
tokens=tokens,
|
|
134
|
+
relative_path=relative_path,
|
|
135
|
+
file_size=file_size
|
|
136
|
+
)
|
|
137
|
+
|
|
138
|
+
total_tokens += tokens
|
|
139
|
+
|
|
140
|
+
print(f"\nToken count complete!")
|
|
141
|
+
print(f"Total files: {file_count}")
|
|
142
|
+
print(f"Total tokens: {total_tokens:,}")
|
|
143
|
+
print(f"Use /token/top N to see the top N files by token count")
|
|
144
|
+
print(f"Use /token/summary to see a summary by file type")
|
|
145
|
+
|
|
146
|
+
except Exception as e:
|
|
147
|
+
logger.error(f"Error counting tokens in project: {str(e)}")
|
|
148
|
+
print(f"Error: {str(e)}")
|
|
149
|
+
|
|
150
|
+
def _parse_args(self, args: str) -> Dict[str, str]:
|
|
151
|
+
"""Parse command arguments.
|
|
152
|
+
|
|
153
|
+
Args:
|
|
154
|
+
args: Command arguments string. Supports both:
|
|
155
|
+
- Key=value format: dir=. type=.py,.java
|
|
156
|
+
- Command line format: --dir . --type .py,.java
|
|
157
|
+
|
|
158
|
+
Returns:
|
|
159
|
+
Dictionary of parsed arguments
|
|
160
|
+
"""
|
|
161
|
+
result = {}
|
|
162
|
+
if not args:
|
|
163
|
+
return result
|
|
164
|
+
|
|
165
|
+
# Try using argparse first
|
|
166
|
+
try:
|
|
167
|
+
import argparse
|
|
168
|
+
import shlex
|
|
169
|
+
|
|
170
|
+
# Create parser with arguments expected by the plugin
|
|
171
|
+
parser = argparse.ArgumentParser(description='Token counter options')
|
|
172
|
+
parser.add_argument('--dir', '-d', help='Project directory')
|
|
173
|
+
parser.add_argument('--type', '-t', help='File types (comma separated)')
|
|
174
|
+
parser.add_argument('--exclude', '-e', help='Exclude pattern')
|
|
175
|
+
parser.add_argument('--verbose', '-v', action='store_true', help='Verbose output')
|
|
176
|
+
|
|
177
|
+
# Parse with shlex to handle quoted arguments properly
|
|
178
|
+
parsed_args, unknown = parser.parse_known_args(shlex.split(args))
|
|
179
|
+
|
|
180
|
+
# Convert namespace to dictionary, ignoring None values
|
|
181
|
+
for key, value in vars(parsed_args).items():
|
|
182
|
+
if value is not None:
|
|
183
|
+
result[key] = value
|
|
184
|
+
|
|
185
|
+
# Handle any unknown arguments as key=value pairs
|
|
186
|
+
for arg in unknown:
|
|
187
|
+
if '=' in arg:
|
|
188
|
+
key, value = arg.split('=', 1)
|
|
189
|
+
# Strip leading dashes if present
|
|
190
|
+
if key.startswith('--'):
|
|
191
|
+
key = key[2:]
|
|
192
|
+
elif key.startswith('-'):
|
|
193
|
+
key = key[1:]
|
|
194
|
+
result[key] = value
|
|
195
|
+
|
|
196
|
+
return result
|
|
197
|
+
|
|
198
|
+
except Exception as e:
|
|
199
|
+
# Fallback to original implementation if argparse fails
|
|
200
|
+
logger.debug(f"Argparse failed, using fallback parser: {str(e)}")
|
|
201
|
+
|
|
202
|
+
parts = args.split()
|
|
203
|
+
i = 0
|
|
204
|
+
while i < len(parts):
|
|
205
|
+
part = parts[i]
|
|
206
|
+
|
|
207
|
+
# Handle key=value format
|
|
208
|
+
if "=" in part:
|
|
209
|
+
key, value = part.split("=", 1)
|
|
210
|
+
# Strip leading dashes if present
|
|
211
|
+
if key.startswith("--"):
|
|
212
|
+
key = key[2:]
|
|
213
|
+
elif key.startswith("-"):
|
|
214
|
+
key = key[1:]
|
|
215
|
+
result[key] = value
|
|
216
|
+
i += 1
|
|
217
|
+
continue
|
|
218
|
+
|
|
219
|
+
# Handle --key value or -key value format
|
|
220
|
+
if part.startswith("--"):
|
|
221
|
+
key = part[2:]
|
|
222
|
+
# Check if there's a value following this key
|
|
223
|
+
if i + 1 < len(parts) and not parts[i + 1].startswith("-"):
|
|
224
|
+
result[key] = parts[i + 1]
|
|
225
|
+
i += 2
|
|
226
|
+
else:
|
|
227
|
+
# Flag option without value
|
|
228
|
+
result[key] = "true"
|
|
229
|
+
i += 1
|
|
230
|
+
elif part.startswith("-"):
|
|
231
|
+
key = part[1:]
|
|
232
|
+
# Check if there's a value following this key
|
|
233
|
+
if i + 1 < len(parts) and not parts[i + 1].startswith("-"):
|
|
234
|
+
result[key] = parts[i + 1]
|
|
235
|
+
i += 2
|
|
236
|
+
else:
|
|
237
|
+
# Flag option without value
|
|
238
|
+
result[key] = "true"
|
|
239
|
+
i += 1
|
|
240
|
+
else:
|
|
241
|
+
# Standalone argument without a key
|
|
242
|
+
i += 1
|
|
243
|
+
|
|
244
|
+
return result
|
|
245
|
+
|
|
246
|
+
def show_top_token_files(self, args: str) -> None:
|
|
247
|
+
"""Show top N files by token count.
|
|
248
|
+
|
|
249
|
+
Args:
|
|
250
|
+
args: Number of files to show
|
|
251
|
+
"""
|
|
252
|
+
if not self.token_counts:
|
|
253
|
+
print("No token count data available. Run /token/count first.")
|
|
254
|
+
return
|
|
255
|
+
|
|
256
|
+
try:
|
|
257
|
+
n = int(args.strip()) if args.strip() else 10
|
|
258
|
+
except ValueError:
|
|
259
|
+
print(f"Invalid value: {args}. Using default of 10.")
|
|
260
|
+
n = 10
|
|
261
|
+
|
|
262
|
+
print(f"\nTop {n} files by token count:")
|
|
263
|
+
print(f"{'Tokens':<10} {'Size (bytes)':<15} {'File'}")
|
|
264
|
+
print(f"{'-'*10} {'-'*15} {'-'*50}")
|
|
265
|
+
|
|
266
|
+
sorted_files = sorted(
|
|
267
|
+
self.token_counts.values(),
|
|
268
|
+
key=lambda x: x.tokens,
|
|
269
|
+
reverse=True
|
|
270
|
+
)
|
|
271
|
+
|
|
272
|
+
for i, token_count in enumerate(sorted_files[:n], 1):
|
|
273
|
+
relative_path = token_count.relative_path or token_count.filename
|
|
274
|
+
print(f"{token_count.tokens:<10,} {token_count.file_size:<15,} {relative_path}")
|
|
275
|
+
|
|
276
|
+
def count_tokens_in_file(self, args: str) -> None:
|
|
277
|
+
"""Count tokens in a specific file.
|
|
278
|
+
|
|
279
|
+
Args:
|
|
280
|
+
args: Path to the file
|
|
281
|
+
"""
|
|
282
|
+
if not args:
|
|
283
|
+
print("Please specify a file path.")
|
|
284
|
+
return
|
|
285
|
+
|
|
286
|
+
file_path = args.strip()
|
|
287
|
+
|
|
288
|
+
try:
|
|
289
|
+
with open(file_path, 'r', encoding='utf-8') as f:
|
|
290
|
+
content = f.read()
|
|
291
|
+
|
|
292
|
+
tokens = count_tokens(content)
|
|
293
|
+
print(f"\nFile: {file_path}")
|
|
294
|
+
print(f"Tokens: {tokens:,}")
|
|
295
|
+
print(f"File size: {len(content):,} bytes")
|
|
296
|
+
print(f"Avg bytes per token: {len(content)/tokens:.2f}")
|
|
297
|
+
|
|
298
|
+
except Exception as e:
|
|
299
|
+
print(f"Error counting tokens in file: {str(e)}")
|
|
300
|
+
|
|
301
|
+
def show_token_summary(self, args: str) -> None:
|
|
302
|
+
"""Show token count summary by file type.
|
|
303
|
+
|
|
304
|
+
Args:
|
|
305
|
+
args: Optional arguments
|
|
306
|
+
"""
|
|
307
|
+
if not self.token_counts:
|
|
308
|
+
print("No token count data available. Run /token/count first.")
|
|
309
|
+
return
|
|
310
|
+
|
|
311
|
+
by_extension = defaultdict(lambda: {"files": 0, "tokens": 0, "size": 0})
|
|
312
|
+
|
|
313
|
+
for token_count in self.token_counts.values():
|
|
314
|
+
filename = token_count.filename
|
|
315
|
+
ext = os.path.splitext(filename)[1].lower() or "no_extension"
|
|
316
|
+
|
|
317
|
+
by_extension[ext]["files"] += 1
|
|
318
|
+
by_extension[ext]["tokens"] += token_count.tokens
|
|
319
|
+
by_extension[ext]["size"] += token_count.file_size or 0
|
|
320
|
+
|
|
321
|
+
total_tokens = sum(data["tokens"] for data in by_extension.values())
|
|
322
|
+
total_files = sum(data["files"] for data in by_extension.values())
|
|
323
|
+
total_size = sum(data["size"] for data in by_extension.values())
|
|
324
|
+
|
|
325
|
+
print(f"\nToken count summary by file type:")
|
|
326
|
+
print(f"{'Extension':<12} {'Files':<8} {'Tokens':<12} {'% of Total':<12} {'Size (KB)':<12}")
|
|
327
|
+
print(f"{'-'*12} {'-'*8} {'-'*12} {'-'*12} {'-'*12}")
|
|
328
|
+
|
|
329
|
+
for ext, data in sorted(by_extension.items(), key=lambda x: x[1]["tokens"], reverse=True):
|
|
330
|
+
percent = (data["tokens"] / total_tokens * 100) if total_tokens > 0 else 0
|
|
331
|
+
size_kb = data["size"] / 1024
|
|
332
|
+
print(f"{ext:<12} {data['files']:<8} {data['tokens']:<12,} {percent:<12.2f} {size_kb:<12.2f}")
|
|
333
|
+
|
|
334
|
+
print(f"\nTotal Files: {total_files:,}")
|
|
335
|
+
print(f"Total Tokens: {total_tokens:,}")
|
|
336
|
+
print(f"Total Size: {total_size/1024/1024:.2f} MB")
|
|
337
|
+
|
|
338
|
+
if self.project_dir:
|
|
339
|
+
print(f"Project Directory: {self.project_dir}")
|
|
340
|
+
|
|
341
|
+
def shutdown(self) -> None:
|
|
342
|
+
"""Shutdown the plugin."""
|
|
343
|
+
print(f"[{self.name}] Token Helper plugin shutdown")
|
|
@@ -38,7 +38,7 @@ from pydantic import BaseModel
|
|
|
38
38
|
from byzerllm.utils.types import SingleOutputMeta
|
|
39
39
|
from autocoder.rag.lang import get_message_with_format_and_newline
|
|
40
40
|
from autocoder.rag.qa_conversation_strategy import get_qa_strategy
|
|
41
|
-
|
|
41
|
+
from autocoder.rag.searchable import SearchableResults
|
|
42
42
|
try:
|
|
43
43
|
from autocoder_pro.rag.llm_compute import LLMComputeEngine
|
|
44
44
|
pro_version = version("auto-coder-pro")
|
|
@@ -257,7 +257,7 @@ class LongContextRAG:
|
|
|
257
257
|
请根据提供的文档内容、用户对话历史以及最后一个问题,提取并总结文档中与问题相关的重要信息。
|
|
258
258
|
如果文档中没有相关信息,请回复"该文档中没有与问题相关的信息"。
|
|
259
259
|
提取的信息尽量保持和原文中的一样,并且只输出这些信息。
|
|
260
|
-
"""
|
|
260
|
+
"""
|
|
261
261
|
|
|
262
262
|
def _get_document_retriever_class(self):
|
|
263
263
|
"""Get the document retriever class based on configuration."""
|
|
@@ -500,6 +500,9 @@ class LongContextRAG:
|
|
|
500
500
|
except json.JSONDecodeError:
|
|
501
501
|
pass
|
|
502
502
|
|
|
503
|
+
if not only_contexts and extra_request_params.get("only_contexts", False):
|
|
504
|
+
only_contexts = True
|
|
505
|
+
|
|
503
506
|
logger.info(f"Query: {query} only_contexts: {only_contexts}")
|
|
504
507
|
start_time = time.time()
|
|
505
508
|
|
|
@@ -593,10 +596,19 @@ class LongContextRAG:
|
|
|
593
596
|
)
|
|
594
597
|
|
|
595
598
|
if only_contexts:
|
|
596
|
-
|
|
597
|
-
|
|
598
|
-
|
|
599
|
-
|
|
599
|
+
try:
|
|
600
|
+
searcher = SearchableResults()
|
|
601
|
+
result = searcher.reorder(docs=relevant_docs)
|
|
602
|
+
yield (json.dumps(result.model_dump(), ensure_ascii=False), SingleOutputMeta(input_tokens_count=rag_stat.recall_stat.total_input_tokens + rag_stat.chunk_stat.total_input_tokens,
|
|
603
|
+
generated_tokens_count=rag_stat.recall_stat.total_generated_tokens +
|
|
604
|
+
rag_stat.chunk_stat.total_generated_tokens,
|
|
605
|
+
))
|
|
606
|
+
except Exception as e:
|
|
607
|
+
yield (str(e), SingleOutputMeta(input_tokens_count=rag_stat.recall_stat.total_input_tokens + rag_stat.chunk_stat.total_input_tokens,
|
|
608
|
+
generated_tokens_count=rag_stat.recall_stat.total_generated_tokens +
|
|
609
|
+
rag_stat.chunk_stat.total_generated_tokens,
|
|
610
|
+
))
|
|
611
|
+
return
|
|
600
612
|
|
|
601
613
|
if not relevant_docs:
|
|
602
614
|
yield ("没有找到可以回答你问题的相关文档", SingleOutputMeta(input_tokens_count=rag_stat.recall_stat.total_input_tokens + rag_stat.chunk_stat.total_input_tokens,
|
|
@@ -816,12 +828,13 @@ class LongContextRAG:
|
|
|
816
828
|
|
|
817
829
|
self._print_rag_stats(rag_stat)
|
|
818
830
|
else:
|
|
819
|
-
|
|
820
|
-
qa_strategy = get_qa_strategy(
|
|
831
|
+
|
|
832
|
+
qa_strategy = get_qa_strategy(
|
|
833
|
+
self.args.rag_qa_conversation_strategy)
|
|
821
834
|
new_conversations = qa_strategy.create_conversation(
|
|
822
835
|
documents=[doc.source_code for doc in relevant_docs],
|
|
823
836
|
conversations=conversations
|
|
824
|
-
)
|
|
837
|
+
)
|
|
825
838
|
|
|
826
839
|
chunks = target_llm.stream_chat_oai(
|
|
827
840
|
conversations=new_conversations,
|
autocoder/rag/relevant_utils.py
CHANGED
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
import json
|
|
2
|
+
from collections import Counter
|
|
3
|
+
from typing import Dict, List, Any, Optional, Tuple, Set
|
|
4
|
+
from pydantic import BaseModel
|
|
5
|
+
from autocoder.rag.relevant_utils import FilterDoc
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class FileOccurrence(BaseModel):
|
|
9
|
+
"""Represents a file and its occurrence count in search results"""
|
|
10
|
+
file_path: str
|
|
11
|
+
count: int
|
|
12
|
+
score: float = 0.0 # Optional relevance score
|
|
13
|
+
|
|
14
|
+
class FileResult(BaseModel):
|
|
15
|
+
files: List[FileOccurrence]
|
|
16
|
+
|
|
17
|
+
class SearchableResults:
|
|
18
|
+
"""Class to process and organize search results by file frequency"""
|
|
19
|
+
|
|
20
|
+
def __init__(self):
|
|
21
|
+
"""Initialize the SearchableResults instance"""
|
|
22
|
+
pass
|
|
23
|
+
|
|
24
|
+
def extract_original_docs(self, docs: List[FilterDoc]) -> List[str]:
|
|
25
|
+
"""Extract all original_docs from a list of document metadata"""
|
|
26
|
+
all_files = []
|
|
27
|
+
|
|
28
|
+
for doc in docs:
|
|
29
|
+
# Extract from metadata if available
|
|
30
|
+
metadata = doc.source_code.metadata
|
|
31
|
+
if "original_docs" in metadata:
|
|
32
|
+
all_files.extend(metadata["original_docs"])
|
|
33
|
+
# Also include the module_name from source_code as a fallback
|
|
34
|
+
else:
|
|
35
|
+
all_files.append(doc.source_code.module_name)
|
|
36
|
+
|
|
37
|
+
return all_files
|
|
38
|
+
|
|
39
|
+
def count_file_occurrences(self, files: List[str]) -> List[FileOccurrence]:
|
|
40
|
+
"""Count occurrences of each file and return sorted list"""
|
|
41
|
+
# Count occurrences
|
|
42
|
+
counter = Counter(files)
|
|
43
|
+
|
|
44
|
+
# Convert to FileOccurrence objects
|
|
45
|
+
occurrences = [
|
|
46
|
+
FileOccurrence(file_path=file_path, count=count)
|
|
47
|
+
for file_path, count in counter.items()
|
|
48
|
+
]
|
|
49
|
+
|
|
50
|
+
# Sort by count (descending)
|
|
51
|
+
return sorted(occurrences, key=lambda x: x.count, reverse=True)
|
|
52
|
+
|
|
53
|
+
def reorder(self, docs: List[FilterDoc]) -> List[FileOccurrence]:
|
|
54
|
+
"""Process search results to extract and rank files by occurrence (main entry point)"""
|
|
55
|
+
all_files = self.extract_original_docs(docs)
|
|
56
|
+
return FileResult(files=self.count_file_occurrences(all_files))
|
|
57
|
+
|
|
58
|
+
|
autocoder/version.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = "0.1.
|
|
1
|
+
__version__ = "0.1.289"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|