auto-coder 0.1.219__py3-none-any.whl → 0.1.220__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {auto_coder-0.1.219.dist-info → auto_coder-0.1.220.dist-info}/METADATA +1 -1
- {auto_coder-0.1.219.dist-info → auto_coder-0.1.220.dist-info}/RECORD +9 -9
- autocoder/rag/doc_filter.py +68 -165
- autocoder/rag/long_context_rag.py +6 -4
- autocoder/version.py +1 -1
- {auto_coder-0.1.219.dist-info → auto_coder-0.1.220.dist-info}/LICENSE +0 -0
- {auto_coder-0.1.219.dist-info → auto_coder-0.1.220.dist-info}/WHEEL +0 -0
- {auto_coder-0.1.219.dist-info → auto_coder-0.1.220.dist-info}/entry_points.txt +0 -0
- {auto_coder-0.1.219.dist-info → auto_coder-0.1.220.dist-info}/top_level.txt +0 -0
|
@@ -8,7 +8,7 @@ autocoder/chat_auto_coder.py,sha256=NTCWQKBQJluEhay5NGoTl5tdv00Zmu8oFioCjCpJCd8,
|
|
|
8
8
|
autocoder/chat_auto_coder_lang.py,sha256=ReWukXKVvuzVvpbYk5O9kc1ev7XNmAv3DnuQhmpLmnc,8717
|
|
9
9
|
autocoder/command_args.py,sha256=9aYJ-AmPxP1sQh6ciw04FWHjSn31f2W9afXFwo8wgx4,30441
|
|
10
10
|
autocoder/lang.py,sha256=U6AjVV8Rs1uLyjFCZ8sT6WWuNUxMBqkXXIOs4S120uk,14511
|
|
11
|
-
autocoder/version.py,sha256=
|
|
11
|
+
autocoder/version.py,sha256=AkV-rQzPFI1YGmw3_1zhV77j25aK6xZpk7snIDE9-KU,24
|
|
12
12
|
autocoder/agent/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
13
13
|
autocoder/agent/auto_demand_organizer.py,sha256=NWSAEsEk94vT3lGjfo25kKLMwYdPcpy9e-i21txPasQ,6942
|
|
14
14
|
autocoder/agent/auto_filegroup.py,sha256=CW7bqp0FW1GIEMnl-blyAc2UGT7O9Mom0q66ITz1ckM,6635
|
|
@@ -75,10 +75,10 @@ autocoder/index/symbols_utils.py,sha256=CjcjUVajmJZB75Ty3a7kMv1BZphrm-tIBAdOJv6u
|
|
|
75
75
|
autocoder/pyproject/__init__.py,sha256=7ZuIxD2QBYIwhjmpva8eL2knorKo03yNqUhSyecpt7c,14448
|
|
76
76
|
autocoder/rag/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
77
77
|
autocoder/rag/api_server.py,sha256=dRbhAZVRAOlZ64Cnxf4_rKb4iJwHnrWS9Zr67IVORw0,7288
|
|
78
|
-
autocoder/rag/doc_filter.py,sha256=
|
|
78
|
+
autocoder/rag/doc_filter.py,sha256=B99Qcy3tcNLuSz2kWbpgfBj2_Igme91zWKOJ2Niq2UY,6652
|
|
79
79
|
autocoder/rag/document_retriever.py,sha256=5oThtxukGuRFF96o3pHKsk306a8diXbhgSrbqyU2BvM,8894
|
|
80
80
|
autocoder/rag/llm_wrapper.py,sha256=sbDxCANiZyWb_ocqNgqu2oy3c2t8orPNRGleEs-Uwl8,2649
|
|
81
|
-
autocoder/rag/long_context_rag.py,sha256=
|
|
81
|
+
autocoder/rag/long_context_rag.py,sha256=jEhil33y2ryAF393zKdUhrk_QEHUlZ30gZcvPH0Z3MY,25074
|
|
82
82
|
autocoder/rag/rag_config.py,sha256=8LwFcTd8OJWWwi1_WY4IzjqgtT6RyE2j4PjxS5cCTDE,802
|
|
83
83
|
autocoder/rag/rag_entry.py,sha256=6TKtErZ0Us9XSV6HgRKXA6yR3SiZGPHpynOKSaR1wgE,2463
|
|
84
84
|
autocoder/rag/raw_rag.py,sha256=yS2Ur6kG0IRjhCj2_VonwxjY_xls_E62jO5Gz5j2nqE,2952
|
|
@@ -122,9 +122,9 @@ autocoder/utils/request_event_queue.py,sha256=r3lo5qGsB1dIjzVQ05dnr0z_9Z3zOkBdP1
|
|
|
122
122
|
autocoder/utils/request_queue.py,sha256=nwp6PMtgTCiuwJI24p8OLNZjUiprC-TsefQrhMI-yPE,3889
|
|
123
123
|
autocoder/utils/rest.py,sha256=HawagAap3wMIDROGhY1730zSZrJR_EycODAA5qOj83c,8807
|
|
124
124
|
autocoder/utils/tests.py,sha256=BqphrwyycGAvs-5mhH8pKtMZdObwhFtJ5MC_ZAOiLq8,1340
|
|
125
|
-
auto_coder-0.1.
|
|
126
|
-
auto_coder-0.1.
|
|
127
|
-
auto_coder-0.1.
|
|
128
|
-
auto_coder-0.1.
|
|
129
|
-
auto_coder-0.1.
|
|
130
|
-
auto_coder-0.1.
|
|
125
|
+
auto_coder-0.1.220.dist-info/LICENSE,sha256=HrhfyXIkWY2tGFK11kg7vPCqhgh5DcxleloqdhrpyMY,11558
|
|
126
|
+
auto_coder-0.1.220.dist-info/METADATA,sha256=uQi6g28X2Lzv4rVNmWu0j3yEQ0W2uo_YQOqdXmpSHwc,2615
|
|
127
|
+
auto_coder-0.1.220.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
|
|
128
|
+
auto_coder-0.1.220.dist-info/entry_points.txt,sha256=0nzHtHH4pNcM7xq4EBA2toS28Qelrvcbrr59GqD_0Ak,350
|
|
129
|
+
auto_coder-0.1.220.dist-info/top_level.txt,sha256=Jqc0_uJSw2GwoFQAa9iJxYns-2mWla-9ok_Y3Gcznjk,10
|
|
130
|
+
auto_coder-0.1.220.dist-info/RECORD,,
|
autocoder/rag/doc_filter.py
CHANGED
|
@@ -1,24 +1,11 @@
|
|
|
1
1
|
import time
|
|
2
2
|
from typing import List, Dict, Optional
|
|
3
|
-
from pydantic import BaseModel
|
|
4
|
-
import ray
|
|
5
3
|
from loguru import logger
|
|
6
|
-
import os
|
|
7
4
|
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
8
|
-
from rich.progress import (
|
|
9
|
-
Progress,
|
|
10
|
-
SpinnerColumn,
|
|
11
|
-
TextColumn,
|
|
12
|
-
BarColumn,
|
|
13
|
-
TaskProgressColumn,
|
|
14
|
-
TimeElapsedColumn,
|
|
15
|
-
)
|
|
16
|
-
from rich.console import Console
|
|
17
5
|
|
|
18
6
|
from autocoder.rag.relevant_utils import (
|
|
19
7
|
parse_relevance,
|
|
20
|
-
FilterDoc,
|
|
21
|
-
DocRelevance,
|
|
8
|
+
FilterDoc,
|
|
22
9
|
TaskTiming,
|
|
23
10
|
)
|
|
24
11
|
|
|
@@ -38,14 +25,18 @@ def _check_relevance_with_conversation(
|
|
|
38
25
|
使用以下文档和对话历史来回答问题。如果文档中没有相关信息,请说"我没有足够的信息来回答这个问题"。
|
|
39
26
|
|
|
40
27
|
文档:
|
|
28
|
+
<documents>
|
|
41
29
|
{% for doc in documents %}
|
|
42
30
|
{{ doc }}
|
|
43
31
|
{% endfor %}
|
|
32
|
+
</documents>
|
|
44
33
|
|
|
45
34
|
对话历史:
|
|
35
|
+
<conversations>
|
|
46
36
|
{% for msg in conversations %}
|
|
47
37
|
<{{ msg.role }}>: {{ msg.content }}
|
|
48
38
|
{% endfor %}
|
|
39
|
+
</conversations>
|
|
49
40
|
|
|
50
41
|
{% if filter_config %}
|
|
51
42
|
一些提示:
|
|
@@ -58,32 +49,6 @@ def _check_relevance_with_conversation(
|
|
|
58
49
|
"""
|
|
59
50
|
|
|
60
51
|
|
|
61
|
-
@ray.remote
|
|
62
|
-
class DocFilterWorker:
|
|
63
|
-
def __init__(self, llm: ByzerLLM):
|
|
64
|
-
self.llm = llm
|
|
65
|
-
if self.llm.get_sub_client("recall_model"):
|
|
66
|
-
self.recall_llm = self.llm.get_sub_client("recall_model")
|
|
67
|
-
else:
|
|
68
|
-
self.recall_llm = self.llm
|
|
69
|
-
|
|
70
|
-
def filter_doc(
|
|
71
|
-
self, conversations: List[Dict[str, str]], docs: List[str]
|
|
72
|
-
) -> Optional[FilterDoc]:
|
|
73
|
-
submit_time_1 = time.time()
|
|
74
|
-
try:
|
|
75
|
-
v = _check_relevance_with_conversation.with_llm(self.llm).run(
|
|
76
|
-
conversations=conversations, documents=docs
|
|
77
|
-
)
|
|
78
|
-
except Exception as e:
|
|
79
|
-
logger.error(
|
|
80
|
-
f"Error in _check_relevance_with_conversation: {str(e)}")
|
|
81
|
-
return (None, submit_time_1, time.time())
|
|
82
|
-
|
|
83
|
-
end_time_2 = time.time()
|
|
84
|
-
return (v, submit_time_1, end_time_2)
|
|
85
|
-
|
|
86
|
-
|
|
87
52
|
class DocFilter:
|
|
88
53
|
def __init__(
|
|
89
54
|
self,
|
|
@@ -101,144 +66,67 @@ class DocFilter:
|
|
|
101
66
|
self.args = args
|
|
102
67
|
self.relevant_score = self.args.rag_doc_filter_relevance or 5
|
|
103
68
|
self.on_ray = on_ray
|
|
104
|
-
self.path = path
|
|
105
|
-
if self.on_ray:
|
|
106
|
-
cpu_count = os.cpu_count() or 1
|
|
107
|
-
self.workers = [
|
|
108
|
-
DocFilterWorker.options(
|
|
109
|
-
max_concurrency=1000, num_cpus=0).remote(llm)
|
|
110
|
-
for _ in range(cpu_count)
|
|
111
|
-
]
|
|
69
|
+
self.path = path
|
|
112
70
|
|
|
113
71
|
def filter_docs(
|
|
114
72
|
self, conversations: List[Dict[str, str]], documents: List[SourceCode]
|
|
115
73
|
) -> List[FilterDoc]:
|
|
116
|
-
|
|
117
|
-
return self.filter_docs_with_ray(conversations, documents)
|
|
118
|
-
else:
|
|
119
|
-
return self.filter_docs_with_threads(conversations, documents)
|
|
74
|
+
return self.filter_docs_with_threads(conversations, documents)
|
|
120
75
|
|
|
121
76
|
def filter_docs_with_threads(
|
|
122
77
|
self, conversations: List[Dict[str, str]], documents: List[SourceCode]
|
|
123
78
|
) -> List[FilterDoc]:
|
|
124
|
-
|
|
125
|
-
console = Console()
|
|
79
|
+
|
|
126
80
|
rag_manager = RagConfigManager(path=self.path)
|
|
127
81
|
rag_config = rag_manager.load_config()
|
|
128
|
-
documents = list(documents)
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
llm.setup_default_model_name(self.recall_llm.default_model_name)
|
|
153
|
-
|
|
154
|
-
v = (
|
|
155
|
-
_check_relevance_with_conversation.with_llm(
|
|
156
|
-
llm)
|
|
157
|
-
.options({"llm_config": {"max_length": 10}})
|
|
158
|
-
.run(
|
|
159
|
-
conversations=conversations,
|
|
160
|
-
documents=docs,
|
|
161
|
-
filter_config=rag_config.filter_config,
|
|
162
|
-
)
|
|
163
|
-
)
|
|
164
|
-
except Exception as e:
|
|
165
|
-
logger.error(
|
|
166
|
-
f"Error in _check_relevance_with_conversation: {str(e)}"
|
|
167
|
-
)
|
|
168
|
-
return (None, submit_time_1, time.time())
|
|
169
|
-
|
|
170
|
-
end_time_2 = time.time()
|
|
171
|
-
return (v, submit_time_1, end_time_2)
|
|
172
|
-
|
|
173
|
-
m = executor.submit(
|
|
174
|
-
_run,
|
|
175
|
-
conversations,
|
|
176
|
-
[f"##File: {doc.module_name}\n{doc.source_code}"],
|
|
177
|
-
)
|
|
178
|
-
future_to_doc[m] = (doc, submit_time)
|
|
179
|
-
|
|
180
|
-
relevant_docs = []
|
|
181
|
-
for future in as_completed(list(future_to_doc.keys())):
|
|
182
|
-
try:
|
|
183
|
-
doc, submit_time = future_to_doc[future]
|
|
184
|
-
end_time = time.time()
|
|
185
|
-
v, submit_time_1, end_time_2 = future.result()
|
|
186
|
-
task_timing = TaskTiming(
|
|
187
|
-
submit_time=submit_time,
|
|
188
|
-
end_time=end_time,
|
|
189
|
-
duration=end_time - submit_time,
|
|
190
|
-
real_start_time=submit_time_1,
|
|
191
|
-
real_end_time=end_time_2,
|
|
192
|
-
real_duration=end_time_2 - submit_time_1,
|
|
193
|
-
)
|
|
194
|
-
progress.update(task, advance=1)
|
|
195
|
-
|
|
196
|
-
relevance = parse_relevance(v)
|
|
197
|
-
if (
|
|
198
|
-
relevance
|
|
199
|
-
and relevance.is_relevant
|
|
200
|
-
and relevance.relevant_score >= self.relevant_score
|
|
201
|
-
):
|
|
202
|
-
relevant_docs.append(
|
|
203
|
-
FilterDoc(
|
|
204
|
-
source_code=doc,
|
|
205
|
-
relevance=relevance,
|
|
206
|
-
task_timing=task_timing,
|
|
82
|
+
documents = list(documents)
|
|
83
|
+
logger.info(f"Filtering {len(documents)} documents....")
|
|
84
|
+
with ThreadPoolExecutor(
|
|
85
|
+
max_workers=self.args.index_filter_workers or 5
|
|
86
|
+
) as executor:
|
|
87
|
+
future_to_doc = {}
|
|
88
|
+
for doc in documents:
|
|
89
|
+
submit_time = time.time()
|
|
90
|
+
|
|
91
|
+
def _run(conversations, docs):
|
|
92
|
+
submit_time_1 = time.time()
|
|
93
|
+
try:
|
|
94
|
+
llm = ByzerLLM()
|
|
95
|
+
llm.skip_nontext_check = True
|
|
96
|
+
llm.setup_default_model_name(self.recall_llm.default_model_name)
|
|
97
|
+
|
|
98
|
+
v = (
|
|
99
|
+
_check_relevance_with_conversation.with_llm(
|
|
100
|
+
llm)
|
|
101
|
+
.options({"llm_config": {"max_length": 10}})
|
|
102
|
+
.run(
|
|
103
|
+
conversations=conversations,
|
|
104
|
+
documents=docs,
|
|
105
|
+
filter_config=rag_config.filter_config,
|
|
207
106
|
)
|
|
208
107
|
)
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
108
|
+
except Exception as e:
|
|
109
|
+
logger.error(
|
|
110
|
+
f"Error in _check_relevance_with_conversation: {str(e)}"
|
|
111
|
+
)
|
|
112
|
+
return (None, submit_time_1, time.time())
|
|
212
113
|
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
key=lambda x: x.relevance.relevant_score, reverse=True)
|
|
216
|
-
return relevant_docs
|
|
114
|
+
end_time_2 = time.time()
|
|
115
|
+
return (v, submit_time_1, end_time_2)
|
|
217
116
|
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
for doc in documents:
|
|
225
|
-
worker = self.workers[count % len(self.workers)]
|
|
226
|
-
count += 1
|
|
227
|
-
future = worker.filter_doc.remote(
|
|
228
|
-
conversations, [
|
|
229
|
-
f"##File: {doc.module_name}\n{doc.source_code}"]
|
|
230
|
-
)
|
|
231
|
-
futures.append((future, doc))
|
|
117
|
+
m = executor.submit(
|
|
118
|
+
_run,
|
|
119
|
+
conversations,
|
|
120
|
+
[f"##File: {doc.module_name}\n{doc.source_code}"],
|
|
121
|
+
)
|
|
122
|
+
future_to_doc[m] = (doc, submit_time)
|
|
232
123
|
|
|
233
124
|
relevant_docs = []
|
|
234
|
-
for future
|
|
125
|
+
for future in as_completed(list(future_to_doc.keys())):
|
|
235
126
|
try:
|
|
236
|
-
|
|
127
|
+
doc, submit_time = future_to_doc[future]
|
|
237
128
|
end_time = time.time()
|
|
238
|
-
|
|
239
|
-
if v is None:
|
|
240
|
-
continue
|
|
241
|
-
|
|
129
|
+
v, submit_time_1, end_time_2 = future.result()
|
|
242
130
|
task_timing = TaskTiming(
|
|
243
131
|
submit_time=submit_time,
|
|
244
132
|
end_time=end_time,
|
|
@@ -246,11 +134,20 @@ class DocFilter:
|
|
|
246
134
|
real_start_time=submit_time_1,
|
|
247
135
|
real_end_time=end_time_2,
|
|
248
136
|
real_duration=end_time_2 - submit_time_1,
|
|
249
|
-
)
|
|
137
|
+
)
|
|
138
|
+
|
|
139
|
+
relevance = parse_relevance(v)
|
|
250
140
|
logger.info(
|
|
251
|
-
f"Document
|
|
141
|
+
f"Document filtering progress:\n"
|
|
142
|
+
f" - File: {doc.module_name}\n"
|
|
143
|
+
f" - Relevance: {'Relevant' if relevance and relevance.is_relevant else 'Not Relevant'}\n"
|
|
144
|
+
f" - Score: {relevance.relevant_score if relevance else 'N/A'}\n"
|
|
145
|
+
f" - Raw Response: {v}\n"
|
|
146
|
+
f" - Timing:\n"
|
|
147
|
+
f" * Total Duration: {task_timing.duration:.2f}s\n"
|
|
148
|
+
f" * Real Duration: {task_timing.real_duration:.2f}s\n"
|
|
149
|
+
f" * Queue Time: {(task_timing.real_start_time - task_timing.submit_time):.2f}s"
|
|
252
150
|
)
|
|
253
|
-
relevance = parse_relevance(v)
|
|
254
151
|
if (
|
|
255
152
|
relevance
|
|
256
153
|
and relevance.is_relevant
|
|
@@ -264,10 +161,16 @@ class DocFilter:
|
|
|
264
161
|
)
|
|
265
162
|
)
|
|
266
163
|
except Exception as exc:
|
|
267
|
-
|
|
268
|
-
|
|
164
|
+
try:
|
|
165
|
+
doc, submit_time = future_to_doc[future]
|
|
166
|
+
logger.error(
|
|
167
|
+
f"Filtering document generated an exception (doc: {doc.module_name}): {exc}")
|
|
168
|
+
except Exception as e:
|
|
169
|
+
logger.error(
|
|
170
|
+
f"Filtering document generated an exception: {exc}")
|
|
269
171
|
|
|
270
172
|
# Sort relevant_docs by relevance score in descending order
|
|
271
173
|
relevant_docs.sort(
|
|
272
174
|
key=lambda x: x.relevance.relevant_score, reverse=True)
|
|
273
175
|
return relevant_docs
|
|
176
|
+
|
|
@@ -428,6 +428,7 @@ class LongContextRAG:
|
|
|
428
428
|
if "only_contexts" in v:
|
|
429
429
|
query = v["query"]
|
|
430
430
|
only_contexts = v["only_contexts"]
|
|
431
|
+
conversations[-1]["content"] = query
|
|
431
432
|
except json.JSONDecodeError:
|
|
432
433
|
pass
|
|
433
434
|
|
|
@@ -463,9 +464,10 @@ class LongContextRAG:
|
|
|
463
464
|
)
|
|
464
465
|
|
|
465
466
|
if only_contexts:
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
|
|
467
|
+
final_docs = []
|
|
468
|
+
for doc in relevant_docs:
|
|
469
|
+
final_docs.append(doc.model_dump())
|
|
470
|
+
return [json.dumps(final_docs,ensure_ascii=False)], []
|
|
469
471
|
|
|
470
472
|
if not relevant_docs:
|
|
471
473
|
return ["没有找到相关的文档来回答这个问题。"], []
|
|
@@ -612,7 +614,7 @@ class LongContextRAG:
|
|
|
612
614
|
),
|
|
613
615
|
}
|
|
614
616
|
]
|
|
615
|
-
|
|
617
|
+
|
|
616
618
|
chunks = target_llm.stream_chat_oai(
|
|
617
619
|
conversations=new_conversations,
|
|
618
620
|
model=model,
|
autocoder/version.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = "0.1.
|
|
1
|
+
__version__ = "0.1.220"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|