auto-coder 0.1.274__py3-none-any.whl → 0.1.276__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of auto-coder might be problematic. Click here for more details.
- {auto_coder-0.1.274.dist-info → auto_coder-0.1.276.dist-info}/METADATA +1 -1
- {auto_coder-0.1.274.dist-info → auto_coder-0.1.276.dist-info}/RECORD +22 -22
- autocoder/auto_coder_rag.py +30 -5
- autocoder/auto_coder_runner.py +1 -1
- autocoder/commands/auto_command.py +3 -1
- autocoder/common/__init__.py +1 -0
- autocoder/common/code_auto_merge.py +1 -2
- autocoder/common/code_auto_merge_diff.py +1 -1
- autocoder/common/code_auto_merge_editblock.py +2 -1
- autocoder/common/code_auto_merge_strict_diff.py +3 -1
- autocoder/common/command_generator.py +3 -1
- autocoder/common/git_utils.py +7 -3
- autocoder/common/shells.py +23 -1
- autocoder/rag/cache/base_cache.py +16 -1
- autocoder/rag/cache/byzer_storage_cache.py +74 -47
- autocoder/rag/long_context_rag.py +20 -13
- autocoder/rag/utils.py +1 -1
- autocoder/version.py +1 -1
- {auto_coder-0.1.274.dist-info → auto_coder-0.1.276.dist-info}/LICENSE +0 -0
- {auto_coder-0.1.274.dist-info → auto_coder-0.1.276.dist-info}/WHEEL +0 -0
- {auto_coder-0.1.274.dist-info → auto_coder-0.1.276.dist-info}/entry_points.txt +0 -0
- {auto_coder-0.1.274.dist-info → auto_coder-0.1.276.dist-info}/top_level.txt +0 -0
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
autocoder/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
2
|
autocoder/auto_coder.py,sha256=SgAAoSpUaPZiiNnEWRYIULEsKBJaKuQK_b7pEl_mKY8,65516
|
|
3
3
|
autocoder/auto_coder_lang.py,sha256=Rtupq6N3_HT7JRhDKdgCBcwRaiAnyCOR_Gsp4jUomrI,3229
|
|
4
|
-
autocoder/auto_coder_rag.py,sha256=
|
|
4
|
+
autocoder/auto_coder_rag.py,sha256=mX-szIG9T7Mzwoc4QwKp_GyYBcVf6dfsNJnKzYHHl6U,30329
|
|
5
5
|
autocoder/auto_coder_rag_client_mcp.py,sha256=QRxUbjc6A8UmDMQ8lXgZkjgqtq3lgKYeatJbDY6rSo0,6270
|
|
6
6
|
autocoder/auto_coder_rag_mcp.py,sha256=-RrjNwFaS2e5v8XDIrKR-zlUNUE8UBaeOtojffBrvJo,8521
|
|
7
|
-
autocoder/auto_coder_runner.py,sha256=
|
|
7
|
+
autocoder/auto_coder_runner.py,sha256=1M74XhbmmJg0z5rC_TjjlPaqyEPfg2z6N7mLN7z8qFw,100773
|
|
8
8
|
autocoder/auto_coder_server.py,sha256=6YQweNEKUrGAZ3yPvw8_qlNZJYLVSVUXGrn1K6udLts,20413
|
|
9
9
|
autocoder/benchmark.py,sha256=Ypomkdzd1T3GE6dRICY3Hj547dZ6_inqJbBJIp5QMco,4423
|
|
10
10
|
autocoder/chat_auto_coder.py,sha256=G7_CIgDOTdGGPzRQDo0hEOh5p8A36oJQaYJc514xBkk,16842
|
|
@@ -12,7 +12,7 @@ autocoder/chat_auto_coder_lang.py,sha256=ShOQVOnMA-WlT-fB9OrOer-xQkbcWxJGl-WMPuZ
|
|
|
12
12
|
autocoder/command_args.py,sha256=9aYJ-AmPxP1sQh6ciw04FWHjSn31f2W9afXFwo8wgx4,30441
|
|
13
13
|
autocoder/lang.py,sha256=U6AjVV8Rs1uLyjFCZ8sT6WWuNUxMBqkXXIOs4S120uk,14511
|
|
14
14
|
autocoder/models.py,sha256=PlG1tKHSHwB57cKLOl5gTl5yTzFUDzCgeHPJU3N9F6Q,9106
|
|
15
|
-
autocoder/version.py,sha256=
|
|
15
|
+
autocoder/version.py,sha256=P8t0JVsPFr7ktiosnhBO0A_KC2ivastmjzH6pWwESMU,23
|
|
16
16
|
autocoder/agent/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
17
17
|
autocoder/agent/auto_demand_organizer.py,sha256=NWSAEsEk94vT3lGjfo25kKLMwYdPcpy9e-i21txPasQ,6942
|
|
18
18
|
autocoder/agent/auto_filegroup.py,sha256=CW7bqp0FW1GIEMnl-blyAc2UGT7O9Mom0q66ITz1ckM,6635
|
|
@@ -26,11 +26,11 @@ autocoder/agent/planner.py,sha256=SZTSZHxHzDmuWZo3K5fs79RwvJLWurg-nbJRRNbX65o,91
|
|
|
26
26
|
autocoder/agent/project_reader.py,sha256=tWLaPoLw1gI6kO_NzivQj28KbobU2ceOLuppHMbfGl8,18234
|
|
27
27
|
autocoder/chat/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
28
28
|
autocoder/commands/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
29
|
-
autocoder/commands/auto_command.py,sha256=
|
|
29
|
+
autocoder/commands/auto_command.py,sha256=Qv5hObiIioIOAVkucFB2na6aLUuxSIgLVO9PfnXwyIo,52496
|
|
30
30
|
autocoder/commands/tools.py,sha256=lanjoBGR6H8HDJSY3KrM6ibrtHZbgKX6mKJHSSE66dg,20493
|
|
31
31
|
autocoder/common/JupyterClient.py,sha256=O-wi6pXeAEYhAY24kDa0BINrLYvKS6rKyWe98pDClS0,2816
|
|
32
32
|
autocoder/common/ShellClient.py,sha256=fM1q8t_XMSbLBl2zkCNC2J9xuyKN3eXzGm6hHhqL2WY,2286
|
|
33
|
-
autocoder/common/__init__.py,sha256=
|
|
33
|
+
autocoder/common/__init__.py,sha256=Z6gvzhzLWYnXGVCnek2UoWic5DRiqWGQh4AiGZL3XVQ,12989
|
|
34
34
|
autocoder/common/anything2images.py,sha256=0ILBbWzY02M-CiWB-vzuomb_J1hVdxRcenAfIrAXq9M,25283
|
|
35
35
|
autocoder/common/anything2img.py,sha256=4TREa-sOA-iargieUy7MpyCYVUE-9Mmq0wJtwomPqnE,7662
|
|
36
36
|
autocoder/common/audio.py,sha256=Kn9nWKQddWnUrAz0a_ZUgjcu4VUU_IcZBigT7n3N3qc,7439
|
|
@@ -44,13 +44,13 @@ autocoder/common/code_auto_generate.py,sha256=i0f0BPTvt8QN5-arC2PqfMs4sukGJqFw58
|
|
|
44
44
|
autocoder/common/code_auto_generate_diff.py,sha256=EYGHf24fVAk3M8kXllXukdUvzo2I2Fb8qNrTiU_Qtug,18759
|
|
45
45
|
autocoder/common/code_auto_generate_editblock.py,sha256=1XmoLZl-XrrPOyHkSfqe0pOxi0Lp4d69oDQX6zzqs_c,20523
|
|
46
46
|
autocoder/common/code_auto_generate_strict_diff.py,sha256=68fAks4S6Hz0wAeXrdI1PjdeCiy0gXRhgEeIlgxB04E,17539
|
|
47
|
-
autocoder/common/code_auto_merge.py,sha256=
|
|
48
|
-
autocoder/common/code_auto_merge_diff.py,sha256=
|
|
49
|
-
autocoder/common/code_auto_merge_editblock.py,sha256=
|
|
50
|
-
autocoder/common/code_auto_merge_strict_diff.py,sha256=
|
|
47
|
+
autocoder/common/code_auto_merge.py,sha256=wFxv0iHciqTwzy8osGPmyF0LcdK7ysFwnzXnsWr-gwM,8642
|
|
48
|
+
autocoder/common/code_auto_merge_diff.py,sha256=clVxvDlo-SN3QxARipUfOFiSEomTrC41ha7_AOmufI4,17948
|
|
49
|
+
autocoder/common/code_auto_merge_editblock.py,sha256=JzVTAqvqxVMkXOOPY5Ti2Wc_nL4YABYi470sCHKTOgg,20335
|
|
50
|
+
autocoder/common/code_auto_merge_strict_diff.py,sha256=oYVEs1dmObBqK_1PgFX0LeuVti7es2IZtD00iNOn91g,12116
|
|
51
51
|
autocoder/common/code_modification_ranker.py,sha256=e1i8oNPN_PfG4O8HQnQWxh9tyOd-ur6jFTXfkHPVXYo,13152
|
|
52
52
|
autocoder/common/command_completer.py,sha256=Nw_EFXLDuVXbOKGlmzTODNRJUV9mSUEWV73GUmO_WLw,35166
|
|
53
|
-
autocoder/common/command_generator.py,sha256=
|
|
53
|
+
autocoder/common/command_generator.py,sha256=t1o1d7xEyfnPvY_MpG5usyx0eKW7WCUR0urKoqCex60,2761
|
|
54
54
|
autocoder/common/command_templates.py,sha256=WAixVjue5QmCFAD13K4ElfcOEjdeGr8tFb0atDAbEoo,8658
|
|
55
55
|
autocoder/common/conf_import_export.py,sha256=w__WsIobe6nmsGns2pV-laU7R5ZvtQNuIbXebxhbY7A,3967
|
|
56
56
|
autocoder/common/conf_validator.py,sha256=EzSmadpZ22D9e8iWmfeWodUeYJt0IgMoaAOmCleXliI,8795
|
|
@@ -58,7 +58,7 @@ autocoder/common/const.py,sha256=eTjhjh4Aj4CUzviJ81jaf3Y5cwqsLATySn2wJxaS6RQ,291
|
|
|
58
58
|
autocoder/common/context_pruner.py,sha256=_JUpN8naoGNF2JupirM-5xn8Bl9uQgIVmZP3dXH4F1g,21725
|
|
59
59
|
autocoder/common/conversation_pruner.py,sha256=pzmrQEa7pFzA66eYSS_h7VqP6ZwUABeooDQzm0PGu0A,5770
|
|
60
60
|
autocoder/common/files.py,sha256=nPiKcnUcYZbSUn3TskKeTVnAxCJRtuehPuB_5d2imX8,4618
|
|
61
|
-
autocoder/common/git_utils.py,sha256=
|
|
61
|
+
autocoder/common/git_utils.py,sha256=EK8gekbXsG6BNDVrd1Nsan_7kJ71dd8_w9FiOFxjsVI,26276
|
|
62
62
|
autocoder/common/global_cancel.py,sha256=hT7J7J5ChThIhk2x11_v4v9ASIn4HtwyPD26t2s-fwc,418
|
|
63
63
|
autocoder/common/image_to_page.py,sha256=yWiTJQ49Lm3j0FngiJhQ9u7qayqE_bOGb8Rk0TmSWx0,14123
|
|
64
64
|
autocoder/common/index_import_export.py,sha256=h758AYY1df6JMTKUXYmMkSgxItfymDt82XT7O-ygEuw,4565
|
|
@@ -75,7 +75,7 @@ autocoder/common/result_manager.py,sha256=nBcFRj5reBC7vp13M91f4B8iPW8B8OehayHlUd
|
|
|
75
75
|
autocoder/common/screenshots.py,sha256=_gA-z1HxGjPShBrtgkdideq58MG6rqFB2qMUJKjrycs,3769
|
|
76
76
|
autocoder/common/search.py,sha256=245iPFgWhMldoUK3CqCP89ltaxZiNPK73evoG6Fp1h8,16518
|
|
77
77
|
autocoder/common/search_replace.py,sha256=GphFkc57Hb673CAwmbiocqTbw8vrV7TrZxtOhD0332g,22147
|
|
78
|
-
autocoder/common/shells.py,sha256
|
|
78
|
+
autocoder/common/shells.py,sha256=-5j45qb1SVmkZaORqDZ5EM2zJ16b5QGM1wHDfBfGejk,18944
|
|
79
79
|
autocoder/common/stats_panel.py,sha256=wGl9O45pjVVDxhNumLv4_NfLYSlUP_18Tw4hcJSjw50,4596
|
|
80
80
|
autocoder/common/sys_prompt.py,sha256=JlexfjZt554faqbgkCmzOJqYUzDHfbnxly5ugFfHfEE,26403
|
|
81
81
|
autocoder/common/text.py,sha256=KGRQq314GHBmY4MWG8ossRoQi1_DTotvhxchpn78c-k,1003
|
|
@@ -111,7 +111,7 @@ autocoder/rag/api_server.py,sha256=dRbhAZVRAOlZ64Cnxf4_rKb4iJwHnrWS9Zr67IVORw0,7
|
|
|
111
111
|
autocoder/rag/doc_filter.py,sha256=yEXaBw1XJH57Gtvk4-RFQtd5eawA6SBjzxeRZrIsQew,11623
|
|
112
112
|
autocoder/rag/document_retriever.py,sha256=5oThtxukGuRFF96o3pHKsk306a8diXbhgSrbqyU2BvM,8894
|
|
113
113
|
autocoder/rag/llm_wrapper.py,sha256=wf56ofQNOaBkLhnoxK9VoVnHWD0gsj0pP8mUBfS92RI,2737
|
|
114
|
-
autocoder/rag/long_context_rag.py,sha256=
|
|
114
|
+
autocoder/rag/long_context_rag.py,sha256=qFlNmbgQnstCSCb0SxfkMEYtZRr8p6YEc6u0jpve4Q0,32002
|
|
115
115
|
autocoder/rag/rag_config.py,sha256=8LwFcTd8OJWWwi1_WY4IzjqgtT6RyE2j4PjxS5cCTDE,802
|
|
116
116
|
autocoder/rag/rag_entry.py,sha256=6TKtErZ0Us9XSV6HgRKXA6yR3SiZGPHpynOKSaR1wgE,2463
|
|
117
117
|
autocoder/rag/raw_rag.py,sha256=BOr0YGf3umjqXOIDVO1LXQ0bIHx8hzBdiubND2ezyxc,2946
|
|
@@ -123,11 +123,11 @@ autocoder/rag/token_counter.py,sha256=C-Lwc4oIjJpZDEqp9WLHGOe6hb4yhrdJpMtkrtp_1q
|
|
|
123
123
|
autocoder/rag/token_limiter.py,sha256=3VgJF4may3ESyATmBIiOe05oc3VsidJcJTJ5EhoSvH8,18854
|
|
124
124
|
autocoder/rag/token_limiter_utils.py,sha256=FATNEXBnFJy8IK3PWNt1pspIv8wuTgy3F_ACNvqoc4I,404
|
|
125
125
|
autocoder/rag/types.py,sha256=WPgLpUTwbk0BAikyDOc0NOEwV5k73myF38zWdOuYdC4,2499
|
|
126
|
-
autocoder/rag/utils.py,sha256=
|
|
126
|
+
autocoder/rag/utils.py,sha256=f21ybCAlYVgr3tJP9MkVoM9d82-uG5NHu2gsv2oaVBQ,4961
|
|
127
127
|
autocoder/rag/variable_holder.py,sha256=PFvBjFcR7-fNDD4Vcsc8CpH2Te057vcpwJMxtrfUgKI,75
|
|
128
128
|
autocoder/rag/cache/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
129
|
-
autocoder/rag/cache/base_cache.py,sha256=
|
|
130
|
-
autocoder/rag/cache/byzer_storage_cache.py,sha256=
|
|
129
|
+
autocoder/rag/cache/base_cache.py,sha256=EaYYYbclMBvnlOUoM7qonnluwZX5oSvUjdvGvFun8_8,742
|
|
130
|
+
autocoder/rag/cache/byzer_storage_cache.py,sha256=gK90pf741CgccCzQ73urBorCqVyAfwU1FAqMtSorWVk,17232
|
|
131
131
|
autocoder/rag/cache/file_monitor_cache.py,sha256=2TnOW8Y81Zc0WA1upRrkmQH18IMdv40CeNccmnTvd3c,4981
|
|
132
132
|
autocoder/rag/cache/simple_cache.py,sha256=8FMmBAfhAPcdSNUWC6Ga43LBFGXD-klwabVbzm_bciI,9347
|
|
133
133
|
autocoder/rag/loaders/__init__.py,sha256=EQHEZ5Cmz-mGP2SllUTvcIbYCnF7W149dNpNItfs0yE,304
|
|
@@ -165,9 +165,9 @@ autocoder/utils/types.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
|
165
165
|
autocoder/utils/auto_coder_utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
166
166
|
autocoder/utils/auto_coder_utils/chat_stream_out.py,sha256=lkJ_A-sYU36JMzjFWkk3pR6uos8oZHYt9GPsPe_CPAo,11766
|
|
167
167
|
autocoder/utils/chat_auto_coder_utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
168
|
-
auto_coder-0.1.
|
|
169
|
-
auto_coder-0.1.
|
|
170
|
-
auto_coder-0.1.
|
|
171
|
-
auto_coder-0.1.
|
|
172
|
-
auto_coder-0.1.
|
|
173
|
-
auto_coder-0.1.
|
|
168
|
+
auto_coder-0.1.276.dist-info/LICENSE,sha256=HrhfyXIkWY2tGFK11kg7vPCqhgh5DcxleloqdhrpyMY,11558
|
|
169
|
+
auto_coder-0.1.276.dist-info/METADATA,sha256=515XHnua8SHruzQVf5S4pH2wboUIiasDzY92WnETZoc,2643
|
|
170
|
+
auto_coder-0.1.276.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
|
|
171
|
+
auto_coder-0.1.276.dist-info/entry_points.txt,sha256=0nzHtHH4pNcM7xq4EBA2toS28Qelrvcbrr59GqD_0Ak,350
|
|
172
|
+
auto_coder-0.1.276.dist-info/top_level.txt,sha256=Jqc0_uJSw2GwoFQAa9iJxYns-2mWla-9ok_Y3Gcznjk,10
|
|
173
|
+
auto_coder-0.1.276.dist-info/RECORD,,
|
autocoder/auto_coder_rag.py
CHANGED
|
@@ -17,6 +17,7 @@ import shlex
|
|
|
17
17
|
from rich.console import Console
|
|
18
18
|
from rich.table import Table
|
|
19
19
|
import os
|
|
20
|
+
import hashlib
|
|
20
21
|
from loguru import logger
|
|
21
22
|
import asyncio
|
|
22
23
|
from datetime import datetime
|
|
@@ -32,6 +33,21 @@ if platform.system() == "Windows":
|
|
|
32
33
|
init()
|
|
33
34
|
|
|
34
35
|
|
|
36
|
+
def generate_unique_name_from_path(path: str) -> str:
|
|
37
|
+
"""
|
|
38
|
+
Generate a unique name (MD5 hash) from a path after normalizing it.
|
|
39
|
+
For Linux/Unix systems, trailing path separators are removed.
|
|
40
|
+
"""
|
|
41
|
+
if not path:
|
|
42
|
+
return ""
|
|
43
|
+
|
|
44
|
+
# Normalize the path (resolve absolute path and remove trailing separators)
|
|
45
|
+
normalized_path = os.path.normpath(os.path.abspath(path))
|
|
46
|
+
|
|
47
|
+
# Generate MD5 hash from the normalized path
|
|
48
|
+
return hashlib.md5(normalized_path.encode("utf-8")).hexdigest()
|
|
49
|
+
|
|
50
|
+
|
|
35
51
|
def initialize_system(args):
|
|
36
52
|
if args.product_mode == "lite":
|
|
37
53
|
return
|
|
@@ -491,6 +507,10 @@ def main(input_args: Optional[List[str]] = None):
|
|
|
491
507
|
}
|
|
492
508
|
)
|
|
493
509
|
|
|
510
|
+
# Generate unique name for RAG build if doc_dir exists
|
|
511
|
+
if server_args.doc_dir:
|
|
512
|
+
auto_coder_args.rag_build_name = generate_unique_name_from_path(server_args.doc_dir)
|
|
513
|
+
logger.info(f"Generated RAG build name: {auto_coder_args.rag_build_name}")
|
|
494
514
|
|
|
495
515
|
if auto_coder_args.enable_hybrid_index and args.product_mode == "lite":
|
|
496
516
|
raise Exception("Hybrid index is not supported in lite mode")
|
|
@@ -500,7 +520,7 @@ def main(input_args: Optional[List[str]] = None):
|
|
|
500
520
|
try:
|
|
501
521
|
from byzerllm.apps.byzer_storage.simple_api import ByzerStorage
|
|
502
522
|
|
|
503
|
-
storage = ByzerStorage("byzerai_store", "rag",
|
|
523
|
+
storage = ByzerStorage("byzerai_store", "rag", auto_coder_args.rag_build_name)
|
|
504
524
|
storage.retrieval.cluster_info("byzerai_store")
|
|
505
525
|
except Exception as e:
|
|
506
526
|
logger.error(
|
|
@@ -611,6 +631,7 @@ def main(input_args: Optional[List[str]] = None):
|
|
|
611
631
|
|
|
612
632
|
if server_args.doc_dir:
|
|
613
633
|
auto_coder_args.rag_type = "simple"
|
|
634
|
+
auto_coder_args.rag_build_name = generate_unique_name_from_path(server_args.doc_dir)
|
|
614
635
|
rag = RAGFactory.get_rag(
|
|
615
636
|
llm=llm,
|
|
616
637
|
args=auto_coder_args,
|
|
@@ -618,6 +639,7 @@ def main(input_args: Optional[List[str]] = None):
|
|
|
618
639
|
tokenizer_path=server_args.tokenizer_path,
|
|
619
640
|
)
|
|
620
641
|
else:
|
|
642
|
+
auto_coder_args.rag_build_name = generate_unique_name_from_path("")
|
|
621
643
|
rag = RAGFactory.get_rag(llm=llm, args=auto_coder_args, path="")
|
|
622
644
|
|
|
623
645
|
llm_wrapper = LLWrapper(llm=llm, rag=rag)
|
|
@@ -636,10 +658,7 @@ def main(input_args: Optional[List[str]] = None):
|
|
|
636
658
|
logger.warning(f"Failed to save service info: {str(e)}")
|
|
637
659
|
|
|
638
660
|
serve(llm=llm_wrapper, args=server_args)
|
|
639
|
-
elif args.command == "build_hybrid_index":
|
|
640
|
-
if not args.quick:
|
|
641
|
-
initialize_system()
|
|
642
|
-
|
|
661
|
+
elif args.command == "build_hybrid_index":
|
|
643
662
|
auto_coder_args = AutoCoderArgs(
|
|
644
663
|
**{
|
|
645
664
|
arg: getattr(args, arg)
|
|
@@ -648,6 +667,11 @@ def main(input_args: Optional[List[str]] = None):
|
|
|
648
667
|
}
|
|
649
668
|
)
|
|
650
669
|
|
|
670
|
+
# Generate unique name for RAG build if doc_dir exists
|
|
671
|
+
if args.doc_dir:
|
|
672
|
+
auto_coder_args.rag_build_name = generate_unique_name_from_path(args.doc_dir)
|
|
673
|
+
logger.info(f"Generated RAG build name: {auto_coder_args.rag_build_name}")
|
|
674
|
+
|
|
651
675
|
auto_coder_args.enable_hybrid_index = True
|
|
652
676
|
auto_coder_args.rag_type = "simple"
|
|
653
677
|
|
|
@@ -675,6 +699,7 @@ def main(input_args: Optional[List[str]] = None):
|
|
|
675
699
|
return
|
|
676
700
|
llm.setup_default_emb_model_name("emb")
|
|
677
701
|
|
|
702
|
+
auto_coder_args.rag_build_name = generate_unique_name_from_path(args.doc_dir)
|
|
678
703
|
rag = RAGFactory.get_rag(
|
|
679
704
|
llm=llm,
|
|
680
705
|
args=auto_coder_args,
|
autocoder/auto_coder_runner.py
CHANGED
|
@@ -1384,7 +1384,7 @@ def commit(query: str):
|
|
|
1384
1384
|
md5 = hashlib.md5(file_content.encode("utf-8")).hexdigest()
|
|
1385
1385
|
file_name = os.path.basename(execute_file)
|
|
1386
1386
|
commit_result = git_utils.commit_changes(
|
|
1387
|
-
".", f"
|
|
1387
|
+
".", f"{commit_message}\nauto_coder_{file_name}_{md5}"
|
|
1388
1388
|
)
|
|
1389
1389
|
git_utils.print_commit_info(commit_result=commit_result)
|
|
1390
1390
|
if commit_message:
|
|
@@ -175,6 +175,7 @@ class CommandAutoTuner:
|
|
|
175
175
|
Python版本: {{ env_info.python_version }}
|
|
176
176
|
终端类型: {{ env_info.shell_type }}
|
|
177
177
|
终端编码: {{ env_info.shell_encoding }}
|
|
178
|
+
当前用户: {{ current_user }}
|
|
178
179
|
|
|
179
180
|
{%- if shell_type %}
|
|
180
181
|
脚本类型:{{ shell_type }}
|
|
@@ -284,7 +285,8 @@ class CommandAutoTuner:
|
|
|
284
285
|
"shell_type": shell_type,
|
|
285
286
|
"shell_encoding": shells.get_terminal_encoding(),
|
|
286
287
|
"conversation_safe_zone_tokens": self.args.conversation_prune_safe_zone_tokens,
|
|
287
|
-
"os_distribution": shells.get_os_distribution()
|
|
288
|
+
"os_distribution": shells.get_os_distribution(),
|
|
289
|
+
"current_user": shells.get_current_username()
|
|
288
290
|
}
|
|
289
291
|
|
|
290
292
|
@byzerllm.prompt()
|
autocoder/common/__init__.py
CHANGED
|
@@ -320,6 +320,7 @@ class AutoCoderArgs(pydantic.BaseModel):
|
|
|
320
320
|
|
|
321
321
|
monitor_mode: bool = False
|
|
322
322
|
enable_hybrid_index: bool = False
|
|
323
|
+
rag_build_name: Optional[str] = None
|
|
323
324
|
disable_auto_window: bool = False
|
|
324
325
|
filter_batch_size: Optional[int] = 5
|
|
325
326
|
disable_segment_reorder: bool = False
|
|
@@ -1,4 +1,3 @@
|
|
|
1
|
-
|
|
2
1
|
import os
|
|
3
2
|
from byzerllm.utils.client import code_utils
|
|
4
3
|
from autocoder.common import AutoCoderArgs, git_utils
|
|
@@ -197,5 +196,5 @@ class CodeAutoMerge:
|
|
|
197
196
|
|
|
198
197
|
self.printer.print_in_terminal("files_merged", total=total)
|
|
199
198
|
if not force_skip_git and not self.args.skip_commit:
|
|
200
|
-
commit_result = git_utils.commit_changes(self.args.source_dir, f"
|
|
199
|
+
commit_result = git_utils.commit_changes(self.args.source_dir, f"{self.args.query}\nauto_coder_{file_name}_{md5}")
|
|
201
200
|
git_utils.print_commit_info(commit_result=commit_result)
|
|
@@ -579,7 +579,7 @@ class CodeAutoMergeDiff:
|
|
|
579
579
|
|
|
580
580
|
self.printer.print_in_terminal("files_merged_total", total=total)
|
|
581
581
|
if not force_skip_git and not self.args.skip_commit:
|
|
582
|
-
commit_result = git_utils.commit_changes(self.args.source_dir, f"
|
|
582
|
+
commit_result = git_utils.commit_changes(self.args.source_dir, f"{self.args.query}\nauto_coder_{file_name}_{md5}")
|
|
583
583
|
git_utils.print_commit_info(commit_result=commit_result)
|
|
584
584
|
else:
|
|
585
585
|
# Print edits for review
|
|
@@ -425,7 +425,8 @@ class CodeAutoMergeEditBlock:
|
|
|
425
425
|
if not force_skip_git and not self.args.skip_commit:
|
|
426
426
|
try:
|
|
427
427
|
commit_result = git_utils.commit_changes(
|
|
428
|
-
self.args.source_dir,
|
|
428
|
+
self.args.source_dir,
|
|
429
|
+
f"{self.args.query}\nauto_coder_{file_name}_{md5}",
|
|
429
430
|
)
|
|
430
431
|
git_utils.print_commit_info(commit_result=commit_result)
|
|
431
432
|
except Exception as e:
|
|
@@ -285,7 +285,9 @@ class CodeAutoMergeStrictDiff:
|
|
|
285
285
|
|
|
286
286
|
self.printer.print_in_terminal("files_merged_total", total=total)
|
|
287
287
|
if not force_skip_git and not self.args.skip_commit:
|
|
288
|
-
commit_result = git_utils.commit_changes(
|
|
288
|
+
commit_result = git_utils.commit_changes(
|
|
289
|
+
self.args.source_dir, f"{self.args.query}\nauto_coder_{file_name}_{md5}"
|
|
290
|
+
)
|
|
289
291
|
git_utils.print_commit_info(commit_result=commit_result)
|
|
290
292
|
else:
|
|
291
293
|
# Print diff blocks for review
|
|
@@ -16,6 +16,7 @@ def _generate_shell_script(user_input: str) -> str:
|
|
|
16
16
|
Python版本: {{ env_info.python_version }}
|
|
17
17
|
终端类型: {{ env_info.shell_type }}
|
|
18
18
|
终端编码: {{ env_info.shell_encoding }}
|
|
19
|
+
当前用户: {{ current_user }}
|
|
19
20
|
|
|
20
21
|
{%- if shell_type %}
|
|
21
22
|
脚本类型:{{ shell_type }}
|
|
@@ -51,7 +52,8 @@ def _generate_shell_script(user_input: str) -> str:
|
|
|
51
52
|
"env_info": env_info,
|
|
52
53
|
"shell_type": shell_type,
|
|
53
54
|
"shell_encoding": shells.get_terminal_encoding(),
|
|
54
|
-
"os_distribution": shells.get_os_distribution()
|
|
55
|
+
"os_distribution": shells.get_os_distribution(),
|
|
56
|
+
"current_user": shells.get_current_username()
|
|
55
57
|
}
|
|
56
58
|
|
|
57
59
|
|
autocoder/common/git_utils.py
CHANGED
|
@@ -118,7 +118,8 @@ def revert_changes(repo_path: str, message: str) -> bool:
|
|
|
118
118
|
return False
|
|
119
119
|
|
|
120
120
|
# 通过message定位到commit_hash
|
|
121
|
-
|
|
121
|
+
# --grep 默认只搜索第一行 -F 参数将搜索模式视为固定字符串而非正则表达式
|
|
122
|
+
commit = repo.git.log("--all", f"--grep={message}", "-F", "--format=%H", "-n", "1")
|
|
122
123
|
if not commit:
|
|
123
124
|
logger.warning(f"No commit found with message: {message}")
|
|
124
125
|
return False
|
|
@@ -170,7 +171,8 @@ def revert_change(repo_path: str, message: str) -> bool:
|
|
|
170
171
|
repo = get_repo(repo_path)
|
|
171
172
|
if repo is None:
|
|
172
173
|
return False
|
|
173
|
-
|
|
174
|
+
# --grep 默认只搜索第一行 -F 参数将搜索模式视为固定字符串而非正则表达式
|
|
175
|
+
commit = repo.git.log("--all", f"--grep={message}", "-F", "--format=%H", "-n", "1")
|
|
174
176
|
if commit:
|
|
175
177
|
repo.git.revert(commit, no_edit=True)
|
|
176
178
|
logger.info(f"Reverted changes with commit message: {message}")
|
|
@@ -618,7 +620,9 @@ def generate_commit_message(changes_report: str) -> str:
|
|
|
618
620
|
def get_commit_by_message(repo_path: str, message: str):
|
|
619
621
|
repo = get_repo(repo_path)
|
|
620
622
|
try:
|
|
621
|
-
commit_hash = repo.git.log(
|
|
623
|
+
commit_hash = repo.git.log(
|
|
624
|
+
"--all", f"--grep={message}", "-F", "--format=%H", "-n", "1"
|
|
625
|
+
)
|
|
622
626
|
if not commit_hash:
|
|
623
627
|
return None
|
|
624
628
|
return repo.commit(commit_hash.strip())
|
autocoder/common/shells.py
CHANGED
|
@@ -10,6 +10,7 @@ from rich.console import Console
|
|
|
10
10
|
from rich.panel import Panel
|
|
11
11
|
from rich.text import Text
|
|
12
12
|
from rich.live import Live
|
|
13
|
+
import getpass
|
|
13
14
|
|
|
14
15
|
from autocoder.common.result_manager import ResultManager
|
|
15
16
|
|
|
@@ -547,4 +548,25 @@ set PYTHONIOENCODING=utf-8
|
|
|
547
548
|
try:
|
|
548
549
|
os.unlink(temp_file.name)
|
|
549
550
|
except Exception:
|
|
550
|
-
pass
|
|
551
|
+
pass
|
|
552
|
+
|
|
553
|
+
def get_current_username():
|
|
554
|
+
"""
|
|
555
|
+
Get the current username across different operating systems.
|
|
556
|
+
|
|
557
|
+
Returns:
|
|
558
|
+
str: The current username.
|
|
559
|
+
"""
|
|
560
|
+
try:
|
|
561
|
+
# getpass.getuser() works on Windows, macOS, and Linux
|
|
562
|
+
username = getpass.getuser()
|
|
563
|
+
return username
|
|
564
|
+
except Exception:
|
|
565
|
+
# Fallback methods if getpass.getuser() fails
|
|
566
|
+
try:
|
|
567
|
+
if platform.system() == 'Windows':
|
|
568
|
+
return os.environ.get('USERNAME', '')
|
|
569
|
+
else: # macOS/Linux
|
|
570
|
+
return os.environ.get('USER', '')
|
|
571
|
+
except Exception:
|
|
572
|
+
return ''
|
|
@@ -2,11 +2,26 @@ from pydantic import BaseModel
|
|
|
2
2
|
from typing import List, Tuple,Dict,Optional,Any
|
|
3
3
|
from abc import ABC, abstractmethod
|
|
4
4
|
|
|
5
|
+
# New model class for file information
|
|
6
|
+
class FileInfo(BaseModel):
|
|
7
|
+
file_path: str
|
|
8
|
+
relative_path: str
|
|
9
|
+
modify_time: float
|
|
10
|
+
file_md5: str
|
|
11
|
+
|
|
12
|
+
# New model class for cache items
|
|
13
|
+
class CacheItem(BaseModel):
|
|
14
|
+
file_path: str
|
|
15
|
+
relative_path: str
|
|
16
|
+
content: List[Dict[str, Any]] # Serialized SourceCode objects
|
|
17
|
+
modify_time: float
|
|
18
|
+
md5: str
|
|
19
|
+
|
|
5
20
|
class DeleteEvent(BaseModel):
|
|
6
21
|
file_paths: List[str]
|
|
7
22
|
|
|
8
23
|
class AddOrUpdateEvent(BaseModel):
|
|
9
|
-
file_infos: List[
|
|
24
|
+
file_infos: List[FileInfo]
|
|
10
25
|
|
|
11
26
|
class BaseCacheManager(ABC):
|
|
12
27
|
@abstractmethod
|
|
@@ -2,6 +2,8 @@ from autocoder.rag.cache.base_cache import (
|
|
|
2
2
|
BaseCacheManager,
|
|
3
3
|
DeleteEvent,
|
|
4
4
|
AddOrUpdateEvent,
|
|
5
|
+
FileInfo,
|
|
6
|
+
CacheItem
|
|
5
7
|
)
|
|
6
8
|
from typing import Generator, List, Dict, Any, Optional, Tuple
|
|
7
9
|
from autocoder.common import SourceCode
|
|
@@ -25,12 +27,15 @@ from autocoder.rag.variable_holder import VariableHolder
|
|
|
25
27
|
import platform
|
|
26
28
|
import hashlib
|
|
27
29
|
from typing import Union
|
|
30
|
+
from pydantic import BaseModel
|
|
28
31
|
|
|
29
32
|
if platform.system() != "Windows":
|
|
30
33
|
import fcntl
|
|
31
34
|
else:
|
|
32
35
|
fcntl = None
|
|
33
36
|
|
|
37
|
+
|
|
38
|
+
|
|
34
39
|
def generate_file_md5(file_path: str) -> str:
|
|
35
40
|
md5_hash = hashlib.md5()
|
|
36
41
|
with open(file_path, "rb") as f:
|
|
@@ -62,7 +67,8 @@ class ByzerStorageCache(BaseCacheManager):
|
|
|
62
67
|
self.path = path
|
|
63
68
|
self.ignore_spec = ignore_spec
|
|
64
69
|
self.required_exts = required_exts
|
|
65
|
-
self.
|
|
70
|
+
self.rag_build_name = extra_params.rag_build_name
|
|
71
|
+
self.storage = ByzerStorage("byzerai_store", "rag", self.rag_build_name)
|
|
66
72
|
self.queue = []
|
|
67
73
|
self.chunk_size = 1000
|
|
68
74
|
self._init_schema()
|
|
@@ -71,11 +77,12 @@ class ByzerStorageCache(BaseCacheManager):
|
|
|
71
77
|
raise ValueError("extra_params is required for ByzerStorageCache")
|
|
72
78
|
|
|
73
79
|
self.max_output_tokens = extra_params.hybrid_index_max_output_tokens
|
|
80
|
+
|
|
74
81
|
|
|
75
82
|
# 设置缓存文件路径
|
|
76
83
|
self.cache_dir = os.path.join(self.path, ".cache")
|
|
77
84
|
self.cache_file = os.path.join(self.cache_dir, "byzer_storage_speedup.jsonl")
|
|
78
|
-
self.cache = {}
|
|
85
|
+
self.cache: Dict[str, CacheItem] = {}
|
|
79
86
|
|
|
80
87
|
self.lock = threading.Lock()
|
|
81
88
|
self.stop_event = threading.Event()
|
|
@@ -122,18 +129,20 @@ class ByzerStorageCache(BaseCacheManager):
|
|
|
122
129
|
.execute()
|
|
123
130
|
)
|
|
124
131
|
|
|
125
|
-
def _load_cache(self) ->
|
|
132
|
+
def _load_cache(self) -> Dict[str, CacheItem]:
|
|
126
133
|
"""Load cache from file"""
|
|
127
134
|
if os.path.exists(self.cache_file):
|
|
128
135
|
try:
|
|
129
|
-
with open(self.cache_file, "r",encoding="utf-8") as f:
|
|
136
|
+
with open(self.cache_file, "r", encoding="utf-8") as f:
|
|
130
137
|
lines = f.readlines()
|
|
131
138
|
cache = {}
|
|
132
139
|
for line in lines:
|
|
133
140
|
try:
|
|
134
141
|
data = json.loads(line.strip())
|
|
135
142
|
if isinstance(data, dict) and "file_path" in data:
|
|
136
|
-
|
|
143
|
+
# 转换为 CacheItem 对象
|
|
144
|
+
cache_item = CacheItem.model_validate(data)
|
|
145
|
+
cache[data["file_path"]] = cache_item
|
|
137
146
|
except json.JSONDecodeError:
|
|
138
147
|
continue
|
|
139
148
|
return cache
|
|
@@ -147,38 +156,42 @@ class ByzerStorageCache(BaseCacheManager):
|
|
|
147
156
|
|
|
148
157
|
if not fcntl:
|
|
149
158
|
try:
|
|
150
|
-
with open(cache_file, "w",encoding="utf-8") as f:
|
|
151
|
-
for
|
|
152
|
-
|
|
159
|
+
with open(cache_file, "w", encoding="utf-8") as f:
|
|
160
|
+
for cache_item in self.cache.values():
|
|
161
|
+
# 确保序列化 Pydantic 模型
|
|
162
|
+
json.dump(cache_item.model_dump(), f, ensure_ascii=False)
|
|
153
163
|
f.write("\n")
|
|
154
164
|
except IOError as e:
|
|
155
165
|
logger.error(f"Error writing cache file: {str(e)}")
|
|
156
166
|
else:
|
|
157
167
|
lock_file = cache_file + ".lock"
|
|
158
|
-
with open(lock_file, "w",encoding="utf-8") as lockf:
|
|
168
|
+
with open(lock_file, "w", encoding="utf-8") as lockf:
|
|
159
169
|
try:
|
|
160
170
|
# 获取文件锁
|
|
161
171
|
fcntl.flock(lockf, fcntl.LOCK_EX | fcntl.LOCK_NB)
|
|
162
172
|
# 写入缓存文件
|
|
163
|
-
with open(cache_file, "w",encoding="utf-8") as f:
|
|
164
|
-
for
|
|
165
|
-
|
|
173
|
+
with open(cache_file, "w", encoding="utf-8") as f:
|
|
174
|
+
for cache_item in self.cache.values():
|
|
175
|
+
# 确保序列化 Pydantic 模型
|
|
176
|
+
json.dump(cache_item.model_dump(), f, ensure_ascii=False)
|
|
166
177
|
f.write("\n")
|
|
167
178
|
|
|
168
179
|
finally:
|
|
169
180
|
# 释放文件锁
|
|
170
181
|
fcntl.flock(lockf, fcntl.LOCK_UN)
|
|
171
182
|
|
|
183
|
+
def fileinfo_to_tuple(self, file_info: FileInfo) -> Tuple[str, str, float, str]:
|
|
184
|
+
return (file_info.file_path, file_info.relative_path, file_info.modify_time, file_info.file_md5)
|
|
185
|
+
|
|
172
186
|
def build_cache(self):
|
|
173
187
|
"""Build the cache by reading files and storing in Byzer Storage"""
|
|
174
188
|
logger.info(f"Building cache for path: {self.path}")
|
|
175
189
|
|
|
176
190
|
files_to_process = []
|
|
177
|
-
for file_info in self.get_all_files():
|
|
178
|
-
file_path, _, modify_time, file_md5 = file_info
|
|
191
|
+
for file_info in self.get_all_files():
|
|
179
192
|
if (
|
|
180
|
-
file_path not in self.cache
|
|
181
|
-
or self.cache[file_path]
|
|
193
|
+
file_info.file_path not in self.cache
|
|
194
|
+
or self.cache[file_info.file_path].md5 != file_info.file_md5
|
|
182
195
|
):
|
|
183
196
|
files_to_process.append(file_info)
|
|
184
197
|
|
|
@@ -192,19 +205,21 @@ class ByzerStorageCache(BaseCacheManager):
|
|
|
192
205
|
initializer=initialize_tokenizer,
|
|
193
206
|
initargs=(VariableHolder.TOKENIZER_PATH,),
|
|
194
207
|
) as pool:
|
|
195
|
-
|
|
208
|
+
target_files_to_process = []
|
|
209
|
+
for file_info in files_to_process:
|
|
210
|
+
target_files_to_process.append(self.fileinfo_to_tuple(file_info))
|
|
211
|
+
results = pool.map(process_file_in_multi_process, target_files_to_process)
|
|
196
212
|
|
|
197
213
|
items = []
|
|
198
214
|
for file_info, result in zip(files_to_process, results):
|
|
199
|
-
file_path, relative_path, modify_time, file_md5 = file_info
|
|
200
215
|
content: List[SourceCode] = result
|
|
201
|
-
self.cache[file_path] =
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
216
|
+
self.cache[file_info.file_path] = CacheItem(
|
|
217
|
+
file_path=file_info.file_path,
|
|
218
|
+
relative_path=file_info.relative_path,
|
|
219
|
+
content=[c.model_dump() for c in content],
|
|
220
|
+
modify_time=file_info.modify_time,
|
|
221
|
+
md5=file_info.file_md5,
|
|
222
|
+
)
|
|
208
223
|
|
|
209
224
|
for doc in content:
|
|
210
225
|
logger.info(f"Processing file: {doc.module_name}")
|
|
@@ -213,11 +228,11 @@ class ByzerStorageCache(BaseCacheManager):
|
|
|
213
228
|
for chunk_idx, chunk in enumerate(chunks):
|
|
214
229
|
chunk_item = {
|
|
215
230
|
"_id": f"{doc.module_name}_{chunk_idx}",
|
|
216
|
-
"file_path": file_path,
|
|
231
|
+
"file_path": file_info.file_path,
|
|
217
232
|
"content": chunk,
|
|
218
233
|
"raw_content": chunk,
|
|
219
234
|
"vector": chunk,
|
|
220
|
-
"mtime": modify_time,
|
|
235
|
+
"mtime": file_info.modify_time,
|
|
221
236
|
}
|
|
222
237
|
items.append(chunk_item)
|
|
223
238
|
|
|
@@ -260,9 +275,9 @@ class ByzerStorageCache(BaseCacheManager):
|
|
|
260
275
|
|
|
261
276
|
self.storage.commit()
|
|
262
277
|
|
|
263
|
-
def update_storage(self,
|
|
278
|
+
def update_storage(self, file_info: FileInfo, is_delete: bool):
|
|
264
279
|
query = self.storage.query_builder()
|
|
265
|
-
query.and_filter().add_condition("file_path", file_path).build()
|
|
280
|
+
query.and_filter().add_condition("file_path", file_info.file_path).build()
|
|
266
281
|
results = query.execute()
|
|
267
282
|
if results:
|
|
268
283
|
for result in results:
|
|
@@ -272,9 +287,9 @@ class ByzerStorageCache(BaseCacheManager):
|
|
|
272
287
|
if not is_delete:
|
|
273
288
|
content = [
|
|
274
289
|
SourceCode.model_validate(doc)
|
|
275
|
-
for doc in self.cache[file_path]
|
|
290
|
+
for doc in self.cache[file_info.file_path].content
|
|
276
291
|
]
|
|
277
|
-
modify_time = self.cache[file_path]
|
|
292
|
+
modify_time = self.cache[file_info.file_path].modify_time
|
|
278
293
|
for doc in content:
|
|
279
294
|
logger.info(f"Processing file: {doc.module_name}")
|
|
280
295
|
doc.module_name
|
|
@@ -282,7 +297,7 @@ class ByzerStorageCache(BaseCacheManager):
|
|
|
282
297
|
for chunk_idx, chunk in enumerate(chunks):
|
|
283
298
|
chunk_item = {
|
|
284
299
|
"_id": f"{doc.module_name}_{chunk_idx}",
|
|
285
|
-
"file_path": file_path,
|
|
300
|
+
"file_path": file_info.file_path,
|
|
286
301
|
"content": chunk,
|
|
287
302
|
"raw_content": chunk,
|
|
288
303
|
"vector": chunk,
|
|
@@ -302,26 +317,34 @@ class ByzerStorageCache(BaseCacheManager):
|
|
|
302
317
|
for item in file_list.file_paths:
|
|
303
318
|
logger.info(f"{item} is detected to be removed")
|
|
304
319
|
del self.cache[item]
|
|
305
|
-
|
|
320
|
+
# 创建一个临时的 FileInfo 对象
|
|
321
|
+
file_info = FileInfo(file_path=item, relative_path="", modify_time=0, file_md5="")
|
|
322
|
+
self.update_storage(file_info, is_delete=True)
|
|
306
323
|
|
|
307
324
|
elif isinstance(file_list, AddOrUpdateEvent):
|
|
308
325
|
for file_info in file_list.file_infos:
|
|
309
|
-
logger.info(f"{file_info
|
|
310
|
-
|
|
311
|
-
self.
|
|
312
|
-
self.
|
|
326
|
+
logger.info(f"{file_info.file_path} is detected to be updated")
|
|
327
|
+
# 处理文件并创建 CacheItem
|
|
328
|
+
content = process_file_local(self.fileinfo_to_tuple(file_info))
|
|
329
|
+
self.cache[file_info.file_path] = CacheItem(
|
|
330
|
+
file_path=file_info.file_path,
|
|
331
|
+
relative_path=file_info.relative_path,
|
|
332
|
+
content=[c.model_dump() for c in content],
|
|
333
|
+
modify_time=file_info.modify_time,
|
|
334
|
+
md5=file_info.file_md5,
|
|
335
|
+
)
|
|
336
|
+
self.update_storage(file_info, is_delete=False)
|
|
313
337
|
self.write_cache()
|
|
314
338
|
|
|
315
339
|
def trigger_update(self):
|
|
316
340
|
logger.info("检查文件是否有更新.....")
|
|
317
341
|
files_to_process = []
|
|
318
342
|
current_files = set()
|
|
319
|
-
for file_info in self.get_all_files():
|
|
320
|
-
file_path
|
|
321
|
-
current_files.add(file_path)
|
|
343
|
+
for file_info in self.get_all_files():
|
|
344
|
+
current_files.add(file_info.file_path)
|
|
322
345
|
if (
|
|
323
|
-
file_path not in self.cache
|
|
324
|
-
or self.cache[file_path]
|
|
346
|
+
file_info.file_path not in self.cache
|
|
347
|
+
or self.cache[file_info.file_path].md5 != file_info.file_md5
|
|
325
348
|
):
|
|
326
349
|
files_to_process.append(file_info)
|
|
327
350
|
|
|
@@ -341,7 +364,7 @@ class ByzerStorageCache(BaseCacheManager):
|
|
|
341
364
|
self.trigger_update()
|
|
342
365
|
|
|
343
366
|
if options is None or "query" not in options:
|
|
344
|
-
return self.cache
|
|
367
|
+
return {file_path: self.cache[file_path].model_dump() for file_path in self.cache}
|
|
345
368
|
|
|
346
369
|
query = options.get("query", "")
|
|
347
370
|
total_tokens = 0
|
|
@@ -361,6 +384,7 @@ class ByzerStorageCache(BaseCacheManager):
|
|
|
361
384
|
results = query_builder.execute()
|
|
362
385
|
|
|
363
386
|
# Group results by file_path and reconstruct documents while preserving order
|
|
387
|
+
# 这里还可以有排序优化,综合考虑一篇内容出现的次数以及排序位置
|
|
364
388
|
file_paths = []
|
|
365
389
|
seen = set()
|
|
366
390
|
for result in results:
|
|
@@ -374,17 +398,17 @@ class ByzerStorageCache(BaseCacheManager):
|
|
|
374
398
|
for file_path in file_paths:
|
|
375
399
|
if file_path in self.cache:
|
|
376
400
|
cached_data = self.cache[file_path]
|
|
377
|
-
for doc in cached_data
|
|
401
|
+
for doc in cached_data.content:
|
|
378
402
|
if total_tokens + doc["tokens"] > self.max_output_tokens:
|
|
379
403
|
return result
|
|
380
404
|
total_tokens += doc["tokens"]
|
|
381
|
-
result[file_path] = cached_data
|
|
405
|
+
result[file_path] = cached_data.model_dump()
|
|
382
406
|
|
|
383
407
|
return result
|
|
384
408
|
|
|
385
409
|
|
|
386
410
|
|
|
387
|
-
def get_all_files(self) -> List[
|
|
411
|
+
def get_all_files(self) -> List[FileInfo]:
|
|
388
412
|
all_files = []
|
|
389
413
|
for root, dirs, files in os.walk(self.path,followlinks=True):
|
|
390
414
|
dirs[:] = [d for d in dirs if not d.startswith(".") and d not in default_ignore_dirs]
|
|
@@ -412,6 +436,9 @@ class ByzerStorageCache(BaseCacheManager):
|
|
|
412
436
|
relative_path = os.path.relpath(file_path, self.path)
|
|
413
437
|
modify_time = os.path.getmtime(file_path)
|
|
414
438
|
file_md5 = generate_file_md5(file_path)
|
|
415
|
-
all_files.append((file_path,
|
|
439
|
+
all_files.append(FileInfo(file_path=file_path,
|
|
440
|
+
relative_path=relative_path,
|
|
441
|
+
modify_time=modify_time,
|
|
442
|
+
file_md5=file_md5))
|
|
416
443
|
|
|
417
444
|
return all_files
|
|
@@ -71,15 +71,21 @@ class LongContextRAG:
|
|
|
71
71
|
tokenizer_path: Optional[str] = None,
|
|
72
72
|
) -> None:
|
|
73
73
|
self.llm = llm
|
|
74
|
-
self.
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
)
|
|
80
|
-
|
|
81
|
-
|
|
74
|
+
self.recall_llm = self.llm
|
|
75
|
+
self.chunk_llm = self.llm
|
|
76
|
+
self.qa_llm = self.llm
|
|
77
|
+
|
|
78
|
+
if self.llm.get_sub_client("qa_model"):
|
|
79
|
+
self.qa_llm = self.llm.get_sub_client("qa_model")
|
|
80
|
+
|
|
81
|
+
if self.llm.get_sub_client("recall_model"):
|
|
82
|
+
self.recall_llm = self.llm.get_sub_client("recall_model")
|
|
82
83
|
|
|
84
|
+
if self.llm.get_sub_client("chunk_model"):
|
|
85
|
+
self.chunk_llm = self.llm.get_sub_client("chunk_model")
|
|
86
|
+
|
|
87
|
+
self.args = args
|
|
88
|
+
|
|
83
89
|
self.path = path
|
|
84
90
|
self.relevant_score = self.args.rag_doc_filter_relevance or 5
|
|
85
91
|
|
|
@@ -162,7 +168,7 @@ class LongContextRAG:
|
|
|
162
168
|
)
|
|
163
169
|
|
|
164
170
|
self.doc_filter = DocFilter(
|
|
165
|
-
self.
|
|
171
|
+
self.llm, self.args, on_ray=self.on_ray, path=self.path
|
|
166
172
|
)
|
|
167
173
|
|
|
168
174
|
doc_num = 0
|
|
@@ -459,22 +465,23 @@ class LongContextRAG:
|
|
|
459
465
|
|
|
460
466
|
logger.info(f"Query: {query} only_contexts: {only_contexts}")
|
|
461
467
|
start_time = time.time()
|
|
468
|
+
|
|
462
469
|
|
|
463
470
|
rag_stat = RAGStat(
|
|
464
471
|
recall_stat=RecallStat(
|
|
465
472
|
total_input_tokens=0,
|
|
466
473
|
total_generated_tokens=0,
|
|
467
|
-
model_name=self.
|
|
474
|
+
model_name=self.recall_llm.default_model_name,
|
|
468
475
|
),
|
|
469
476
|
chunk_stat=ChunkStat(
|
|
470
477
|
total_input_tokens=0,
|
|
471
478
|
total_generated_tokens=0,
|
|
472
|
-
model_name=self.
|
|
479
|
+
model_name=self.chunk_llm.default_model_name,
|
|
473
480
|
),
|
|
474
481
|
answer_stat=AnswerStat(
|
|
475
482
|
total_input_tokens=0,
|
|
476
483
|
total_generated_tokens=0,
|
|
477
|
-
model_name=self.
|
|
484
|
+
model_name=self.qa_llm.default_model_name,
|
|
478
485
|
),
|
|
479
486
|
)
|
|
480
487
|
|
|
@@ -624,7 +631,7 @@ class LongContextRAG:
|
|
|
624
631
|
|
|
625
632
|
# 记录令牌统计
|
|
626
633
|
request_tokens = sum([doc.tokens for doc in relevant_docs])
|
|
627
|
-
target_model =
|
|
634
|
+
target_model = target_llm.default_model_name
|
|
628
635
|
logger.info(
|
|
629
636
|
f"=== LLM Request ===\n"
|
|
630
637
|
f" * Target model: {target_model}\n"
|
autocoder/rag/utils.py
CHANGED
autocoder/version.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = "0.1.
|
|
1
|
+
__version__ = "0.1.276"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|