auto-coder 0.1.175__py3-none-any.whl → 0.1.177__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of auto-coder might be problematic. Click here for more details.
- {auto_coder-0.1.175.dist-info → auto_coder-0.1.177.dist-info}/METADATA +1 -1
- {auto_coder-0.1.175.dist-info → auto_coder-0.1.177.dist-info}/RECORD +12 -12
- autocoder/auto_coder_rag.py +5 -0
- autocoder/common/__init__.py +3 -2
- autocoder/rag/document_retriever.py +3 -1
- autocoder/rag/long_context_rag.py +50 -17
- autocoder/rag/token_limiter.py +45 -7
- autocoder/version.py +1 -1
- {auto_coder-0.1.175.dist-info → auto_coder-0.1.177.dist-info}/LICENSE +0 -0
- {auto_coder-0.1.175.dist-info → auto_coder-0.1.177.dist-info}/WHEEL +0 -0
- {auto_coder-0.1.175.dist-info → auto_coder-0.1.177.dist-info}/entry_points.txt +0 -0
- {auto_coder-0.1.175.dist-info → auto_coder-0.1.177.dist-info}/top_level.txt +0 -0
|
@@ -1,13 +1,13 @@
|
|
|
1
1
|
autocoder/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
2
|
autocoder/auto_coder.py,sha256=HmgKa_ZApFlCsqo6BvuVeCPuncBT_Dh29ayZxxGR6lo,32216
|
|
3
3
|
autocoder/auto_coder_lang.py,sha256=4qIS1tbEI8mpbtt6ThppTwKOM6MLuJTWJdgs5jIDGE0,2301
|
|
4
|
-
autocoder/auto_coder_rag.py,sha256=
|
|
4
|
+
autocoder/auto_coder_rag.py,sha256=V82EyeslAO2Z8qkMrwkyC11f1Cz6Ccjo9c867f0J_x8,11455
|
|
5
5
|
autocoder/auto_coder_server.py,sha256=qRY88mkBnqSGFDcwYE5gwpe2WPhIw1nEH6LdbjCQhQk,20306
|
|
6
6
|
autocoder/chat_auto_coder.py,sha256=i5xIuWlTqF0pJz8kXoa-_bW3Ic3SfCFvU2WJIMxrUHU,81798
|
|
7
7
|
autocoder/chat_auto_coder_lang.py,sha256=QYtu5gWEQmWKVovR_qUZ8plySZarNFX_Onk-1vN9IiA,8524
|
|
8
8
|
autocoder/command_args.py,sha256=ftWw6HnFUZPiQPt1oV-SfpHQe69XN3knaFy1lpROBcU,26854
|
|
9
9
|
autocoder/lang.py,sha256=e-07rYTgimpxS8sm-AxKSmH4kKQX4N05YFHJBg9trVs,12598
|
|
10
|
-
autocoder/version.py,sha256=
|
|
10
|
+
autocoder/version.py,sha256=jOsxKVrfpzRExZ84ji63BWpk4HGAHjIouu90Hi2jqGI,23
|
|
11
11
|
autocoder/agent/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
12
12
|
autocoder/agent/auto_tool.py,sha256=DBzip-P_T6ZtT2eHexPcusmKYD0h7ufzp7TLwXAY10E,11554
|
|
13
13
|
autocoder/agent/coder.py,sha256=dnITYHqkcOip8zV4lywbkYNH9w7Q3qyYaUArJ4WPrTs,866
|
|
@@ -17,7 +17,7 @@ autocoder/agent/project_reader.py,sha256=-MWRqsr7O4mvU0PIpAhOUBb29htZAvA37pa_GeE
|
|
|
17
17
|
autocoder/chat/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
18
18
|
autocoder/common/JupyterClient.py,sha256=O-wi6pXeAEYhAY24kDa0BINrLYvKS6rKyWe98pDClS0,2816
|
|
19
19
|
autocoder/common/ShellClient.py,sha256=fM1q8t_XMSbLBl2zkCNC2J9xuyKN3eXzGm6hHhqL2WY,2286
|
|
20
|
-
autocoder/common/__init__.py,sha256=
|
|
20
|
+
autocoder/common/__init__.py,sha256=wKrFLZk9BMl755nL1gvPjXU-3uWKEnYBP8xsObIjM4g,10156
|
|
21
21
|
autocoder/common/anything2images.py,sha256=0ILBbWzY02M-CiWB-vzuomb_J1hVdxRcenAfIrAXq9M,25283
|
|
22
22
|
autocoder/common/audio.py,sha256=Kn9nWKQddWnUrAz0a_ZUgjcu4VUU_IcZBigT7n3N3qc,7439
|
|
23
23
|
autocoder/common/cleaner.py,sha256=NU72i8C6o9m0vXExab7nao5bstBUsfJFcj11cXa9l4U,1089
|
|
@@ -60,9 +60,9 @@ autocoder/pyproject/__init__.py,sha256=-2-ImQVw6e3NQZQOyDlHEP5b4xVs5ur2G5izB-JCa
|
|
|
60
60
|
autocoder/rag/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
61
61
|
autocoder/rag/api_server.py,sha256=zokIlDJlk7ucRorSLQm80uICO1mecfmn4J2zVqEBskE,6786
|
|
62
62
|
autocoder/rag/doc_filter.py,sha256=LqU8Wi6klwpY9WTHVtkioSHpmo9IWhRz39dzV1gvp6E,9315
|
|
63
|
-
autocoder/rag/document_retriever.py,sha256=
|
|
63
|
+
autocoder/rag/document_retriever.py,sha256=itypkUdY9vUITMAGhxHvWe-IZpxLi5h0A1mJuIVW6QA,23406
|
|
64
64
|
autocoder/rag/llm_wrapper.py,sha256=xRbTBpLUH43Ah5jplL8WWWU-kjKfNgEJoUntLGBq5F4,2484
|
|
65
|
-
autocoder/rag/long_context_rag.py,sha256=
|
|
65
|
+
autocoder/rag/long_context_rag.py,sha256=n3HkjIYZqjrHHLcGDb1u0SVEae7qChu6py30x-Jp-KQ,20160
|
|
66
66
|
autocoder/rag/rag_config.py,sha256=8LwFcTd8OJWWwi1_WY4IzjqgtT6RyE2j4PjxS5cCTDE,802
|
|
67
67
|
autocoder/rag/rag_entry.py,sha256=V1RJ8RGqM30DNPmzymv64rZjNRGWn6kfc8sRy_LECg0,2451
|
|
68
68
|
autocoder/rag/raw_rag.py,sha256=yS2Ur6kG0IRjhCj2_VonwxjY_xls_E62jO5Gz5j2nqE,2952
|
|
@@ -71,7 +71,7 @@ autocoder/rag/simple_directory_reader.py,sha256=LkKreCkNdEOoL4fNhc3_hDoyyWTQUte4
|
|
|
71
71
|
autocoder/rag/simple_rag.py,sha256=I902EUqOK1WM0Y2WFd7RzDJYofElvTZNLVCBtX5A9rc,14885
|
|
72
72
|
autocoder/rag/token_checker.py,sha256=jc76x6KWmvVxds6W8juZfQGaoErudc2HenG3sNQfSLs,2819
|
|
73
73
|
autocoder/rag/token_counter.py,sha256=9ujfI5xQvwzKpN9XFWQGnXpm0h1sL7kgIJxgposcxNo,2096
|
|
74
|
-
autocoder/rag/token_limiter.py,sha256=
|
|
74
|
+
autocoder/rag/token_limiter.py,sha256=4cGy2kFCvbsM5CtONfuvLmXgpK_1HZTHehLTob08eks,10959
|
|
75
75
|
autocoder/rag/types.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
76
76
|
autocoder/rag/variable_holder.py,sha256=pDayuCnlKj7-bkn4iUHX5gea9UObddbi3ZnXotmxCs4,45
|
|
77
77
|
autocoder/rag/loaders/__init__.py,sha256=EQHEZ5Cmz-mGP2SllUTvcIbYCnF7W149dNpNItfs0yE,304
|
|
@@ -95,9 +95,9 @@ autocoder/utils/request_event_queue.py,sha256=r3lo5qGsB1dIjzVQ05dnr0z_9Z3zOkBdP1
|
|
|
95
95
|
autocoder/utils/request_queue.py,sha256=nwp6PMtgTCiuwJI24p8OLNZjUiprC-TsefQrhMI-yPE,3889
|
|
96
96
|
autocoder/utils/rest.py,sha256=3tXA8KZG6jKz_tddHNLGx77Icee88WcUeesfNsgPno4,8790
|
|
97
97
|
autocoder/utils/tests.py,sha256=BqphrwyycGAvs-5mhH8pKtMZdObwhFtJ5MC_ZAOiLq8,1340
|
|
98
|
-
auto_coder-0.1.
|
|
99
|
-
auto_coder-0.1.
|
|
100
|
-
auto_coder-0.1.
|
|
101
|
-
auto_coder-0.1.
|
|
102
|
-
auto_coder-0.1.
|
|
103
|
-
auto_coder-0.1.
|
|
98
|
+
auto_coder-0.1.177.dist-info/LICENSE,sha256=HrhfyXIkWY2tGFK11kg7vPCqhgh5DcxleloqdhrpyMY,11558
|
|
99
|
+
auto_coder-0.1.177.dist-info/METADATA,sha256=IBRPsgVVwqup2c8xX7d5xeLh2ay5kIiPBrxQthG2JBM,2352
|
|
100
|
+
auto_coder-0.1.177.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
|
|
101
|
+
auto_coder-0.1.177.dist-info/entry_points.txt,sha256=0nzHtHH4pNcM7xq4EBA2toS28Qelrvcbrr59GqD_0Ak,350
|
|
102
|
+
auto_coder-0.1.177.dist-info/top_level.txt,sha256=Jqc0_uJSw2GwoFQAa9iJxYns-2mWla-9ok_Y3Gcznjk,10
|
|
103
|
+
auto_coder-0.1.177.dist-info/RECORD,,
|
autocoder/auto_coder_rag.py
CHANGED
|
@@ -225,6 +225,11 @@ def main(input_args: Optional[List[str]] = None):
|
|
|
225
225
|
action="store_true",
|
|
226
226
|
help="Disable automatic window adaptation for documents",
|
|
227
227
|
)
|
|
228
|
+
serve_parser.add_argument(
|
|
229
|
+
"--disable_segment_reorder",
|
|
230
|
+
action="store_true",
|
|
231
|
+
help="Disable reordering of document segments after retrieval",
|
|
232
|
+
)
|
|
228
233
|
|
|
229
234
|
# Tools command
|
|
230
235
|
tools_parser = subparsers.add_parser("tools", help="Various tools")
|
autocoder/common/__init__.py
CHANGED
|
@@ -285,8 +285,9 @@ class AutoCoderArgs(pydantic.BaseModel):
|
|
|
285
285
|
|
|
286
286
|
monitor_mode: bool = False
|
|
287
287
|
disable_auto_window: bool = False
|
|
288
|
-
|
|
289
|
-
|
|
288
|
+
disable_segment_reorder: bool = False
|
|
289
|
+
rag_doc_filter_relevance: int = 5
|
|
290
|
+
tokenizer_path: Optional[str] = None
|
|
290
291
|
skip_confirm: Optional[bool] = False
|
|
291
292
|
silence: Optional[bool] = False
|
|
292
293
|
exclude_files: Optional[Union[str, List[str]]] = ""
|
|
@@ -159,6 +159,8 @@ def process_file_local(file_path: str) -> List[SourceCode]:
|
|
|
159
159
|
return v
|
|
160
160
|
except Exception as e:
|
|
161
161
|
logger.error(f"Error processing file {file_path}: {str(e)}")
|
|
162
|
+
import traceback
|
|
163
|
+
traceback.print_exc()
|
|
162
164
|
return []
|
|
163
165
|
|
|
164
166
|
|
|
@@ -392,7 +394,7 @@ class AutoCoderRAGAsyncUpdateQueue:
|
|
|
392
394
|
elif isinstance(file_list, AddOrUpdateEvent):
|
|
393
395
|
for file_info in file_list.file_infos:
|
|
394
396
|
logger.info(f"{file_info[0]} is detected to be updated")
|
|
395
|
-
result = process_file_local(file_info)
|
|
397
|
+
result = process_file_local(file_info[0])
|
|
396
398
|
self.update_cache(file_info, result)
|
|
397
399
|
|
|
398
400
|
self.write_cache()
|
|
@@ -7,7 +7,6 @@ import byzerllm
|
|
|
7
7
|
import pandas as pd
|
|
8
8
|
import pathspec
|
|
9
9
|
from byzerllm import ByzerLLM
|
|
10
|
-
from jinja2 import Template
|
|
11
10
|
from loguru import logger
|
|
12
11
|
from openai import OpenAI
|
|
13
12
|
from rich.console import Console
|
|
@@ -30,6 +29,11 @@ from autocoder.rag.token_limiter import TokenLimiter
|
|
|
30
29
|
from tokenizers import Tokenizer
|
|
31
30
|
from autocoder.rag import variable_holder
|
|
32
31
|
|
|
32
|
+
try:
|
|
33
|
+
from autocoder_pro.rag.llm_compute import LLMComputeEngine
|
|
34
|
+
except ImportError:
|
|
35
|
+
LLMComputeEngine = None
|
|
36
|
+
|
|
33
37
|
|
|
34
38
|
class LongContextRAG:
|
|
35
39
|
def __init__(
|
|
@@ -119,13 +123,13 @@ class LongContextRAG:
|
|
|
119
123
|
self.monitor_mode,
|
|
120
124
|
## 确保全文区至少能放下一个文件
|
|
121
125
|
single_file_token_limit=self.full_text_limit - 100,
|
|
122
|
-
disable_auto_window=self.args.disable_auto_window
|
|
126
|
+
disable_auto_window=self.args.disable_auto_window,
|
|
123
127
|
)
|
|
124
128
|
|
|
125
129
|
self.doc_filter = DocFilter(
|
|
126
130
|
self.index_model, self.args, on_ray=self.on_ray, path=self.path
|
|
127
131
|
)
|
|
128
|
-
|
|
132
|
+
|
|
129
133
|
doc_num = 0
|
|
130
134
|
token_num = 0
|
|
131
135
|
token_counts = []
|
|
@@ -151,7 +155,7 @@ class LongContextRAG:
|
|
|
151
155
|
f" Max doc tokens: {max(token_counts) if token_counts else 0}\n"
|
|
152
156
|
f" Min doc tokens: {min(token_counts) if token_counts else 0}\n"
|
|
153
157
|
f" Avg doc tokens: {avg_tokens:.2f}\n"
|
|
154
|
-
f" Median doc tokens: {median_tokens:.2f}\n"
|
|
158
|
+
f" Median doc tokens: {median_tokens:.2f}\n"
|
|
155
159
|
)
|
|
156
160
|
|
|
157
161
|
def count_tokens(self, text: str) -> int:
|
|
@@ -391,8 +395,13 @@ class LongContextRAG:
|
|
|
391
395
|
relevant_docs_info = []
|
|
392
396
|
for doc in relevant_docs:
|
|
393
397
|
info = f"- {doc.module_name.replace(self.path,'',1)}"
|
|
394
|
-
if
|
|
395
|
-
original_docs = ", ".join(
|
|
398
|
+
if "original_docs" in doc.metadata:
|
|
399
|
+
original_docs = ", ".join(
|
|
400
|
+
[
|
|
401
|
+
doc.replace(self.path, "", 1)
|
|
402
|
+
for doc in doc.metadata["original_docs"]
|
|
403
|
+
]
|
|
404
|
+
)
|
|
396
405
|
info += f" (Original docs: {original_docs})"
|
|
397
406
|
relevant_docs_info.append(info)
|
|
398
407
|
|
|
@@ -411,6 +420,7 @@ class LongContextRAG:
|
|
|
411
420
|
segment_limit=self.segment_limit,
|
|
412
421
|
buff_limit=self.buff_limit,
|
|
413
422
|
llm=self.llm,
|
|
423
|
+
disable_segment_reorder=self.args.disable_segment_reorder,
|
|
414
424
|
)
|
|
415
425
|
final_relevant_docs = token_limiter.limit_tokens(
|
|
416
426
|
relevant_docs=relevant_docs,
|
|
@@ -442,13 +452,20 @@ class LongContextRAG:
|
|
|
442
452
|
|
|
443
453
|
# Add relevant docs information
|
|
444
454
|
final_relevant_docs_info = []
|
|
445
|
-
for doc in relevant_docs:
|
|
455
|
+
for doc in relevant_docs:
|
|
446
456
|
info = f"- {doc.module_name.replace(self.path,'',1)}"
|
|
447
|
-
if
|
|
448
|
-
original_docs = ", ".join(
|
|
457
|
+
if "original_docs" in doc.metadata:
|
|
458
|
+
original_docs = ", ".join(
|
|
459
|
+
[
|
|
460
|
+
doc.replace(self.path, "", 1)
|
|
461
|
+
for doc in doc.metadata["original_docs"]
|
|
462
|
+
]
|
|
463
|
+
)
|
|
449
464
|
info += f" (Original docs: {original_docs})"
|
|
450
465
|
if "chunk_ranges" in doc.metadata:
|
|
451
|
-
chunk_ranges = json.dumps(
|
|
466
|
+
chunk_ranges = json.dumps(
|
|
467
|
+
doc.metadata["chunk_ranges"], ensure_ascii=False
|
|
468
|
+
)
|
|
452
469
|
info += f" (Chunk ranges: {chunk_ranges})"
|
|
453
470
|
final_relevant_docs_info.append(info)
|
|
454
471
|
|
|
@@ -464,10 +481,29 @@ class LongContextRAG:
|
|
|
464
481
|
|
|
465
482
|
# Log the panel using rich
|
|
466
483
|
console.print(panel)
|
|
467
|
-
|
|
484
|
+
|
|
468
485
|
request_tokens = sum([doc.tokens for doc in relevant_docs])
|
|
469
|
-
target_model = model or self.llm.default_model_name
|
|
470
|
-
logger.info(
|
|
486
|
+
target_model = model or self.llm.default_model_name
|
|
487
|
+
logger.info(
|
|
488
|
+
f"Start to send to model {target_model} with {request_tokens} tokens"
|
|
489
|
+
)
|
|
490
|
+
|
|
491
|
+
if LLMComputeEngine is not None:
|
|
492
|
+
llm_compute_engine = LLMComputeEngine(llm=self.llm)
|
|
493
|
+
new_conversations = llm_compute_engine.process_conversation(
|
|
494
|
+
conversations, query, [doc.source_code for doc in relevant_docs]
|
|
495
|
+
)
|
|
496
|
+
|
|
497
|
+
return (
|
|
498
|
+
llm_compute_engine.stream_chat_oai(
|
|
499
|
+
conversations=new_conversations,
|
|
500
|
+
model=model,
|
|
501
|
+
role_mapping=role_mapping,
|
|
502
|
+
llm_config=llm_config,
|
|
503
|
+
delta_mode=True,
|
|
504
|
+
),
|
|
505
|
+
context,
|
|
506
|
+
)
|
|
471
507
|
|
|
472
508
|
new_conversations = conversations[:-1] + [
|
|
473
509
|
{
|
|
@@ -479,10 +515,6 @@ class LongContextRAG:
|
|
|
479
515
|
}
|
|
480
516
|
]
|
|
481
517
|
|
|
482
|
-
# # 将 new_conversations 转化为 JSON 并写入文件
|
|
483
|
-
# with open('/tmp/rag.json', 'w', encoding='utf-8') as f:
|
|
484
|
-
# json.dump(new_conversations, f, ensure_ascii=False, indent=2)
|
|
485
|
-
|
|
486
518
|
chunks = self.llm.stream_chat_oai(
|
|
487
519
|
conversations=new_conversations,
|
|
488
520
|
model=model,
|
|
@@ -490,4 +522,5 @@ class LongContextRAG:
|
|
|
490
522
|
llm_config=llm_config,
|
|
491
523
|
delta_mode=True,
|
|
492
524
|
)
|
|
525
|
+
|
|
493
526
|
return (chunk[0] for chunk in chunks), context
|
autocoder/rag/token_limiter.py
CHANGED
|
@@ -17,6 +17,7 @@ class TokenLimiter:
|
|
|
17
17
|
segment_limit: int,
|
|
18
18
|
buff_limit: int,
|
|
19
19
|
llm,
|
|
20
|
+
disable_segment_reorder: bool,
|
|
20
21
|
):
|
|
21
22
|
self.count_tokens = count_tokens
|
|
22
23
|
self.full_text_limit = full_text_limit
|
|
@@ -26,6 +27,7 @@ class TokenLimiter:
|
|
|
26
27
|
self.first_round_full_docs = []
|
|
27
28
|
self.second_round_extracted_docs = []
|
|
28
29
|
self.sencond_round_time = 0
|
|
30
|
+
self.disable_segment_reorder = disable_segment_reorder
|
|
29
31
|
|
|
30
32
|
@byzerllm.prompt()
|
|
31
33
|
def extract_relevance_range_from_docs_with_conversation(
|
|
@@ -93,8 +95,46 @@ class TokenLimiter:
|
|
|
93
95
|
token_count = 0
|
|
94
96
|
doc_num_count = 0
|
|
95
97
|
|
|
98
|
+
reorder_relevant_docs = []
|
|
99
|
+
|
|
100
|
+
## 文档分段(单个文档过大)和重排序逻辑
|
|
101
|
+
## 1. 背景:在检索过程中,许多文档被切割成多个段落(segments)
|
|
102
|
+
## 2. 问题:这些segments在召回时因为是按相关分做了排序可能是乱序的,不符合原文顺序,会强化大模型的幻觉。
|
|
103
|
+
## 3. 目标:重新排序这些segments,确保来自同一文档的segments保持连续且按正确顺序排列。
|
|
104
|
+
## 4. 实现方案:
|
|
105
|
+
## a) 方案一(保留位置):统一文档的不同segments 根据chunk_index 来置换位置
|
|
106
|
+
## b) 方案二(当前实现):遍历文档,发现某文档的segment A,立即查找该文档的所有其他segments,
|
|
107
|
+
## 对它们进行排序,并将排序后多个segments插入到当前的segment A 位置中。
|
|
108
|
+
## TODO:
|
|
109
|
+
## 1. 未来根据参数决定是否开启重排以及重排的策略
|
|
110
|
+
if not self.disable_segment_reorder:
|
|
111
|
+
num_count = 0
|
|
112
|
+
for doc in relevant_docs:
|
|
113
|
+
num_count += 1
|
|
114
|
+
reorder_relevant_docs.append(doc)
|
|
115
|
+
if "original_doc" in doc.metadata and "chunk_index" in doc.metadata:
|
|
116
|
+
original_doc_name = doc.metadata["original_doc"].module_name
|
|
117
|
+
|
|
118
|
+
temp_docs = []
|
|
119
|
+
for temp_doc in relevant_docs[num_count:]:
|
|
120
|
+
if (
|
|
121
|
+
"original_doc" in temp_doc.metadata
|
|
122
|
+
and "chunk_index" in temp_doc.metadata
|
|
123
|
+
):
|
|
124
|
+
if (
|
|
125
|
+
temp_doc.metadata["original_doc"].module_name
|
|
126
|
+
== original_doc_name
|
|
127
|
+
):
|
|
128
|
+
if temp_doc not in reorder_relevant_docs:
|
|
129
|
+
temp_docs.append(temp_doc)
|
|
130
|
+
|
|
131
|
+
temp_docs.sort(key=lambda x: x.metadata["chunk_index"])
|
|
132
|
+
reorder_relevant_docs.extend(temp_docs)
|
|
133
|
+
else:
|
|
134
|
+
reorder_relevant_docs = relevant_docs
|
|
135
|
+
|
|
96
136
|
## 非窗口分区实现
|
|
97
|
-
for doc in
|
|
137
|
+
for doc in reorder_relevant_docs:
|
|
98
138
|
doc_tokens = self.count_tokens(doc.source_code)
|
|
99
139
|
doc_num_count += 1
|
|
100
140
|
if token_count + doc_tokens <= self.full_text_limit + self.segment_limit:
|
|
@@ -104,12 +144,12 @@ class TokenLimiter:
|
|
|
104
144
|
break
|
|
105
145
|
|
|
106
146
|
## 如果窗口无法放下所有的相关文档,则需要分区
|
|
107
|
-
if len(final_relevant_docs) < len(
|
|
147
|
+
if len(final_relevant_docs) < len(reorder_relevant_docs):
|
|
108
148
|
## 先填充full_text分区
|
|
109
149
|
token_count = 0
|
|
110
150
|
new_token_limit = self.full_text_limit
|
|
111
151
|
doc_num_count = 0
|
|
112
|
-
for doc in
|
|
152
|
+
for doc in reorder_relevant_docs:
|
|
113
153
|
doc_tokens = self.count_tokens(doc.source_code)
|
|
114
154
|
doc_num_count += 1
|
|
115
155
|
if token_count + doc_tokens <= new_token_limit:
|
|
@@ -130,7 +170,7 @@ class TokenLimiter:
|
|
|
130
170
|
|
|
131
171
|
## 继续填充segment分区
|
|
132
172
|
sencond_round_start_time = time.time()
|
|
133
|
-
remaining_docs =
|
|
173
|
+
remaining_docs = reorder_relevant_docs[len(self.first_round_full_docs) :]
|
|
134
174
|
logger.info(
|
|
135
175
|
f"first round docs: {len(self.first_round_full_docs)} remaining docs: {len(remaining_docs)} index_filter_workers: {index_filter_workers}"
|
|
136
176
|
)
|
|
@@ -219,7 +259,5 @@ class TokenLimiter:
|
|
|
219
259
|
f"Failed to process doc {doc.module_name} after {max_retries} attempts: {str(e)}"
|
|
220
260
|
)
|
|
221
261
|
return SourceCode(
|
|
222
|
-
module_name=doc.module_name,
|
|
223
|
-
source_code="",
|
|
224
|
-
tokens= 0
|
|
262
|
+
module_name=doc.module_name, source_code="", tokens=0
|
|
225
263
|
)
|
autocoder/version.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = "0.1.
|
|
1
|
+
__version__ = "0.1.177"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|