auto-coder 0.1.375__py3-none-any.whl → 0.1.377__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of auto-coder might be problematic. Click here for more details.
- {auto_coder-0.1.375.dist-info → auto_coder-0.1.377.dist-info}/METADATA +1 -1
- {auto_coder-0.1.375.dist-info → auto_coder-0.1.377.dist-info}/RECORD +18 -52
- autocoder/agent/base_agentic/base_agent.py +9 -8
- autocoder/auto_coder_rag.py +12 -0
- autocoder/models.py +2 -2
- autocoder/rag/agentic_rag.py +7 -20
- autocoder/rag/cache/local_duckdb_storage_cache.py +63 -33
- autocoder/rag/conversation_to_queries.py +37 -5
- autocoder/rag/long_context_rag.py +161 -41
- autocoder/rag/tools/recall_tool.py +2 -1
- autocoder/rag/tools/search_tool.py +2 -1
- autocoder/rag/types.py +36 -0
- autocoder/utils/_markitdown.py +59 -13
- autocoder/version.py +1 -1
- autocoder/agent/agentic_edit.py +0 -833
- autocoder/agent/agentic_edit_tools/__init__.py +0 -28
- autocoder/agent/agentic_edit_tools/ask_followup_question_tool_resolver.py +0 -32
- autocoder/agent/agentic_edit_tools/attempt_completion_tool_resolver.py +0 -29
- autocoder/agent/agentic_edit_tools/base_tool_resolver.py +0 -29
- autocoder/agent/agentic_edit_tools/execute_command_tool_resolver.py +0 -84
- autocoder/agent/agentic_edit_tools/list_code_definition_names_tool_resolver.py +0 -75
- autocoder/agent/agentic_edit_tools/list_files_tool_resolver.py +0 -62
- autocoder/agent/agentic_edit_tools/plan_mode_respond_tool_resolver.py +0 -30
- autocoder/agent/agentic_edit_tools/read_file_tool_resolver.py +0 -36
- autocoder/agent/agentic_edit_tools/replace_in_file_tool_resolver.py +0 -95
- autocoder/agent/agentic_edit_tools/search_files_tool_resolver.py +0 -70
- autocoder/agent/agentic_edit_tools/use_mcp_tool_resolver.py +0 -55
- autocoder/agent/agentic_edit_tools/write_to_file_tool_resolver.py +0 -98
- autocoder/agent/agentic_edit_types.py +0 -124
- autocoder/auto_coder_lang.py +0 -60
- autocoder/auto_coder_rag_client_mcp.py +0 -170
- autocoder/auto_coder_rag_mcp.py +0 -193
- autocoder/common/llm_rerank.py +0 -84
- autocoder/common/model_speed_test.py +0 -392
- autocoder/common/v2/agent/agentic_edit_conversation.py +0 -188
- autocoder/common/v2/agent/ignore_utils.py +0 -50
- autocoder/dispacher/actions/plugins/action_translate.py +0 -214
- autocoder/ignorefiles/__init__.py +0 -4
- autocoder/ignorefiles/ignore_file_utils.py +0 -63
- autocoder/ignorefiles/test_ignore_file_utils.py +0 -91
- autocoder/linters/code_linter.py +0 -588
- autocoder/rag/loaders/test_image_loader.py +0 -209
- autocoder/rag/raw_rag.py +0 -96
- autocoder/rag/simple_directory_reader.py +0 -646
- autocoder/rag/simple_rag.py +0 -404
- autocoder/regex_project/__init__.py +0 -162
- autocoder/utils/coder.py +0 -125
- autocoder/utils/tests.py +0 -37
- {auto_coder-0.1.375.dist-info → auto_coder-0.1.377.dist-info}/LICENSE +0 -0
- {auto_coder-0.1.375.dist-info → auto_coder-0.1.377.dist-info}/WHEEL +0 -0
- {auto_coder-0.1.375.dist-info → auto_coder-0.1.377.dist-info}/entry_points.txt +0 -0
- {auto_coder-0.1.375.dist-info → auto_coder-0.1.377.dist-info}/top_level.txt +0 -0
|
@@ -1,9 +1,6 @@
|
|
|
1
1
|
autocoder/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
2
|
autocoder/auto_coder.py,sha256=7602L3tG0JErNxh8vkLAmGUgv2c-DGPzPCkmWIQt9bs,69757
|
|
3
|
-
autocoder/
|
|
4
|
-
autocoder/auto_coder_rag.py,sha256=MqIYTuBtrosl7u-jzpyVg7SGUI7pOOV2AonPHLD-XLY,40530
|
|
5
|
-
autocoder/auto_coder_rag_client_mcp.py,sha256=QRxUbjc6A8UmDMQ8lXgZkjgqtq3lgKYeatJbDY6rSo0,6270
|
|
6
|
-
autocoder/auto_coder_rag_mcp.py,sha256=-RrjNwFaS2e5v8XDIrKR-zlUNUE8UBaeOtojffBrvJo,8521
|
|
3
|
+
autocoder/auto_coder_rag.py,sha256=tRAKfo3jIhcaQKN_3g7DZRKtDJSZXJxMRdT6Zz8W9nw,41173
|
|
7
4
|
autocoder/auto_coder_runner.py,sha256=VktQIEWjVMmraVjD7W73eZmYtdfm9Ma2w_Ib-cWZYhM,112263
|
|
8
5
|
autocoder/auto_coder_server.py,sha256=bLORGEclcVdbBVfM140JCI8WtdrU0jbgqdJIVVupiEU,20578
|
|
9
6
|
autocoder/benchmark.py,sha256=Ypomkdzd1T3GE6dRICY3Hj547dZ6_inqJbBJIp5QMco,4423
|
|
@@ -12,12 +9,10 @@ autocoder/chat_auto_coder_lang.py,sha256=ylLr1GskchU6kIUJY2TiznrBg-ckc1o-8fDsKZZ
|
|
|
12
9
|
autocoder/command_args.py,sha256=HxflngkYtTrV17Vfgk6lyUyiG68jP2ftSc7FYr9AXwY,30585
|
|
13
10
|
autocoder/command_parser.py,sha256=fx1g9E6GaM273lGTcJqaFQ-hoksS_Ik2glBMnVltPCE,10013
|
|
14
11
|
autocoder/lang.py,sha256=PFtATuOhHRnfpqHQkXr6p4C893JvpsgwTMif3l-GEi0,14321
|
|
15
|
-
autocoder/models.py,sha256=
|
|
12
|
+
autocoder/models.py,sha256=pD5u6gcMKRwWaLxeVin18g25k-ERyeHOFsRpOgO_Ae0,13788
|
|
16
13
|
autocoder/run_context.py,sha256=IUfSO6_gp2Wt1blFWAmOpN0b0nDrTTk4LmtCYUBIoro,1643
|
|
17
|
-
autocoder/version.py,sha256=
|
|
14
|
+
autocoder/version.py,sha256=CVgMQwI3mpcERFwTAKlHgwo2B5O4X7-d5vYnrO6uBkE,25
|
|
18
15
|
autocoder/agent/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
19
|
-
autocoder/agent/agentic_edit.py,sha256=XsfePZ-t6M-uBSdG1VLZXk1goqXk2HPeJ_A8IYyBuWQ,58896
|
|
20
|
-
autocoder/agent/agentic_edit_types.py,sha256=oFcDd_cxJ2yH9Ed1uTpD3BipudgoIEWDMPb5pAkq4gI,3288
|
|
21
16
|
autocoder/agent/agentic_filter.py,sha256=zlInIRhawKIYTJjCiJBWqPCOV5UtMbh5VnvszfTy2vo,39824
|
|
22
17
|
autocoder/agent/auto_demand_organizer.py,sha256=URAq0gSEiHeV_W4zwhOI_83kHz0Ryfj1gcfh5jwCv_w,6501
|
|
23
18
|
autocoder/agent/auto_filegroup.py,sha256=pBsAkBcpFTff-9L5OwI8xhf2xPKpl-aZwz-skF2B6dc,6296
|
|
@@ -30,24 +25,11 @@ autocoder/agent/coder.py,sha256=x6bdJwDuETGg9ebQnYlUWCxCtQcDGg73LtI6McpWslQ,7203
|
|
|
30
25
|
autocoder/agent/designer.py,sha256=EpRbzO58Xym3GrnppIT1Z8ZFAlnNfgzHbIzZ3PX-Yv8,27037
|
|
31
26
|
autocoder/agent/planner.py,sha256=2OgJsPVGmp_koEZsdcp2pvtdDzegiMAwxraPc_5GYvo,9215
|
|
32
27
|
autocoder/agent/project_reader.py,sha256=WVl-xvrzseWmv-YBg-bDbdy_OtLdqz0xdadkHc70Lf8,17747
|
|
33
|
-
autocoder/agent/agentic_edit_tools/__init__.py,sha256=wGICCc1dYh07osB21j62zOQ9Ws0PyyOQ12UYRHmHrtI,1229
|
|
34
|
-
autocoder/agent/agentic_edit_tools/ask_followup_question_tool_resolver.py,sha256=8yQq9ypKpq8eU-P-8-p8Pia_9mLRdrqj7s9joRjGDog,1468
|
|
35
|
-
autocoder/agent/agentic_edit_tools/attempt_completion_tool_resolver.py,sha256=Y4E6KTnalEABKmSwWTA_isRITVlBz1LvDHBAOh-rkWM,1297
|
|
36
|
-
autocoder/agent/agentic_edit_tools/base_tool_resolver.py,sha256=BUvm1s0MjYAPOAapEmBHtS8SNH3HLbxf2V7KOmsszAw,909
|
|
37
|
-
autocoder/agent/agentic_edit_tools/execute_command_tool_resolver.py,sha256=XX0EawKIv8uMf9M8U84pmmOHxfh_zBfihqueJTw1GNQ,3886
|
|
38
|
-
autocoder/agent/agentic_edit_tools/list_code_definition_names_tool_resolver.py,sha256=5m9uYX2GEVzD5_2xbx9IjnykILjIemGr8SjwOArDZBo,3635
|
|
39
|
-
autocoder/agent/agentic_edit_tools/list_files_tool_resolver.py,sha256=jJUhqCqUVeaQMCOu9cNZQdJmk7NaYrORfq_NnxbtmzY,3529
|
|
40
|
-
autocoder/agent/agentic_edit_tools/plan_mode_respond_tool_resolver.py,sha256=CPQgNwx0ztBklWAXYT8rX6NoZOfega6oFL3YGP2Oh1s,1279
|
|
41
|
-
autocoder/agent/agentic_edit_tools/read_file_tool_resolver.py,sha256=YFAcudkhotv8vkiRukuOlKLTuqcas97z976dZmuzXB8,1895
|
|
42
|
-
autocoder/agent/agentic_edit_tools/replace_in_file_tool_resolver.py,sha256=A2ODUC5YQ2K3ze-Mw5QqriWYlEe8Z3pXW8nJacizpNk,5095
|
|
43
|
-
autocoder/agent/agentic_edit_tools/search_files_tool_resolver.py,sha256=Xx0nqrgBUP7MZuigdy_hjO89v1CgQEDE75O9la3VJbE,3754
|
|
44
|
-
autocoder/agent/agentic_edit_tools/use_mcp_tool_resolver.py,sha256=5WWH8-lEZO29dLOby9GNjkr-JH5rW8UT-7JZ1LuUtM8,2531
|
|
45
|
-
autocoder/agent/agentic_edit_tools/write_to_file_tool_resolver.py,sha256=HPX1g7Nh7H8I3P2MNmIK4HXLh8IZYaedQgRZAeGxNdc,5135
|
|
46
28
|
autocoder/agent/base_agentic/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
47
29
|
autocoder/agent/base_agentic/agent_hub.py,sha256=3Al9rCKu-SRgAs1kvnMe4VIIErTIw85QTkhfFu5omO8,5605
|
|
48
30
|
autocoder/agent/base_agentic/agentic_lang.py,sha256=UCq1NY9USaYJakTWc-3cv_MeHxAb6se1PI4lsSwGrPM,3657
|
|
49
31
|
autocoder/agent/base_agentic/agentic_tool_display.py,sha256=UnAq8ovvpu88KLk19Ff0TW-Dq-k7YiRwZiIJgcYPwiY,6989
|
|
50
|
-
autocoder/agent/base_agentic/base_agent.py,sha256=
|
|
32
|
+
autocoder/agent/base_agentic/base_agent.py,sha256=fEmyoN1CGtioT1BlgrHWO4F4UyAnr_4IUqO7cEvijvc,93163
|
|
51
33
|
autocoder/agent/base_agentic/default_tools.py,sha256=gCzw6pJA95ERXWwSlAm1SVxmzAvJmi-O6nrmLcNStCc,34200
|
|
52
34
|
autocoder/agent/base_agentic/test_base_agent.py,sha256=jok9f-DoEagzZRWjk-Zpy3gKw2ztZrsNzEc0XlvE7HU,2804
|
|
53
35
|
autocoder/agent/base_agentic/tool_registry.py,sha256=YFnUXJ78y7g3pm3yGgrhZ-0mx-C1ctdcA0r_ljGiE6o,14292
|
|
@@ -115,14 +97,12 @@ autocoder/common/global_cancel.py,sha256=EYMIzdIJHQjoYP4grxhBxSIT3tCJOy3ESULNd-c
|
|
|
115
97
|
autocoder/common/image_to_page.py,sha256=yWiTJQ49Lm3j0FngiJhQ9u7qayqE_bOGb8Rk0TmSWx0,14123
|
|
116
98
|
autocoder/common/index_import_export.py,sha256=h758AYY1df6JMTKUXYmMkSgxItfymDt82XT7O-ygEuw,4565
|
|
117
99
|
autocoder/common/interpreter.py,sha256=62-dIakOunYB4yjmX8SHC0Gdy2h8NtxdgbpdqRZJ5vk,2833
|
|
118
|
-
autocoder/common/llm_rerank.py,sha256=FbvtCzaR661Mt2wn0qsuiEL1Y3puD6jeIJS4zg_e7Bs,3260
|
|
119
100
|
autocoder/common/mcp_hub.py,sha256=grf51bZbZDXQIqlruIxXZjfat8MoVwK7NvHTHMaxKrg,23614
|
|
120
101
|
autocoder/common/mcp_server.py,sha256=Aj6snmB4XXEcLpcm7SC-KBbNLOlEmiNW6hFtMLltpt8,17624
|
|
121
102
|
autocoder/common/mcp_server_install.py,sha256=vQOWWZsl6MZ2qz3b7Y2zctKOEGO69Ph2Nrof4p_1SOg,11599
|
|
122
103
|
autocoder/common/mcp_server_types.py,sha256=ijGnMID7Egq3oOn2t7_BJj7JUisDwhUyClZCUsEMsdY,4393
|
|
123
104
|
autocoder/common/mcp_tools.py,sha256=YdEhDzRnwAr2J3D-23ExIQFWbrNO-EUpIxg179qs9Sw,12666
|
|
124
105
|
autocoder/common/memory_manager.py,sha256=Xx6Yv0ULxVfcFfmD36hdHFFhxCgRAs-5fTd0fLHJrpQ,3773
|
|
125
|
-
autocoder/common/model_speed_test.py,sha256=U48xUUpOnbwUal1cdij4YAn_H2PD2pNaqrMHaYtQRfI,15200
|
|
126
106
|
autocoder/common/model_speed_tester.py,sha256=U48xUUpOnbwUal1cdij4YAn_H2PD2pNaqrMHaYtQRfI,15200
|
|
127
107
|
autocoder/common/openai_content.py,sha256=M_V_UyHrqNVWjgrYvxfAupZw2I0Nr3iilYv6SxSvfLA,8091
|
|
128
108
|
autocoder/common/printer.py,sha256=T4XTAcQp5w1ZWYx5NAUXlIGd-9500Vl0JaG1JJXMdkg,2030
|
|
@@ -193,10 +173,8 @@ autocoder/common/v2/code_manager.py,sha256=C403bS-f6urixwitlKHcml-J03hci-UyNwHJO
|
|
|
193
173
|
autocoder/common/v2/code_strict_diff_manager.py,sha256=Bys7tFAq4G03R1zUZuxrszBTvP4QB96jIw2y5BDLyRM,9424
|
|
194
174
|
autocoder/common/v2/agent/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
195
175
|
autocoder/common/v2/agent/agentic_edit.py,sha256=GqzwD-YIuftxz6IGPonE8spxcPBCAiImHPsKuVLQIJE,116459
|
|
196
|
-
autocoder/common/v2/agent/agentic_edit_conversation.py,sha256=pFgWPWHKhZ4J9EcFmIdiGsrSolTZuYcH1qkgKdD8nwk,7726
|
|
197
176
|
autocoder/common/v2/agent/agentic_edit_types.py,sha256=nEcZc2MOZ_fQLaJX-YDha_x9Iim22ao4tykYM2iIy4k,4908
|
|
198
177
|
autocoder/common/v2/agent/agentic_tool_display.py,sha256=-a-JTQLc4q03E_rdIILKMI0B6DHN-5gcGlrqq-mBYK4,7239
|
|
199
|
-
autocoder/common/v2/agent/ignore_utils.py,sha256=gnUchRzKMLbUm_jvnKL-r-K9MWKPtt-6iiuzijY7Es0,1717
|
|
200
178
|
autocoder/common/v2/agent/agentic_edit_tools/__init__.py,sha256=RbPZZcZg_VnGssL577GxSyFrYrxQ_LopJ4G_-mY3z_Q,1337
|
|
201
179
|
autocoder/common/v2/agent/agentic_edit_tools/ask_followup_question_tool_resolver.py,sha256=-HFXo3RR6CH8xXjDaE2mYV4XasTLAmvXe6WutL7qbwA,3208
|
|
202
180
|
autocoder/common/v2/agent/agentic_edit_tools/attempt_completion_tool_resolver.py,sha256=82ZGKeRBSDKeead_XVBW4FxpiE-5dS7tBOk_3RZ6B5s,1511
|
|
@@ -236,7 +214,6 @@ autocoder/dispacher/actions/action.py,sha256=lszFrNZOmmFMJC0QaIjS-OEE2du5i6a81Nj
|
|
|
236
214
|
autocoder/dispacher/actions/copilot.py,sha256=2nQzKt8Sr40mIDOizZWyl4ekCwaHYklvgGlVfvhOlFM,13106
|
|
237
215
|
autocoder/dispacher/actions/plugins/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
238
216
|
autocoder/dispacher/actions/plugins/action_regex_project.py,sha256=mziGcOtGafeABVITyNdHzCuI_-rorUlWoo1JfsXnPuk,7863
|
|
239
|
-
autocoder/dispacher/actions/plugins/action_translate.py,sha256=GEn7dZA22jy5WyzINomjmzzB795p2Olg-CJla97lRF8,7744
|
|
240
217
|
autocoder/events/__init__.py,sha256=1x_juwr9Ows2RADDa2LyI4QlmPxOVOXZeLO1cht-slM,1443
|
|
241
218
|
autocoder/events/event_content.py,sha256=eLHf5M1BifSqhzzEBgAWKn3JD5_z_1mWeNdZ53TpMqk,12240
|
|
242
219
|
autocoder/events/event_manager.py,sha256=--V3sEdoSmYDCXqJXRMaMa1qWR9umuv9Cjd5Czjpavc,11887
|
|
@@ -246,9 +223,6 @@ autocoder/events/event_types.py,sha256=W_S6PTDIBdufcuPosgz64iITzQy79flL8s3hWB-vZ
|
|
|
246
223
|
autocoder/helper/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
247
224
|
autocoder/helper/project_creator.py,sha256=sgXrZyAaGc84aUT7K7d7N1ztT-mSNGoLnsT-uKMUvVw,21472
|
|
248
225
|
autocoder/helper/rag_doc_creator.py,sha256=A3lB_jr1KU4bxLbBTX9-nxyylwDirxSi1NXmbPTnp90,4386
|
|
249
|
-
autocoder/ignorefiles/__init__.py,sha256=P0hq7Avu1IeXBYEkPBZLsJhFzhzyktUWTqaRIXiAFLY,75
|
|
250
|
-
autocoder/ignorefiles/ignore_file_utils.py,sha256=atJ_LEhRn-3NamBFl0Y9hJPG0cEt3nL9lVGHBweEOW0,1782
|
|
251
|
-
autocoder/ignorefiles/test_ignore_file_utils.py,sha256=961_5ilCgzyo09Luj457A4694OzZDggmQEoiAkldMcU,3104
|
|
252
226
|
autocoder/index/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
253
227
|
autocoder/index/entry.py,sha256=Eb0SmwrgshQte3_IGL4GAB4_HXC2jbatrgrd2uPxQng,15306
|
|
254
228
|
autocoder/index/for_command.py,sha256=BFvljE4t6VaMBGboZAuhUCzVK0EitCy_n5D_7FEnihw,3204
|
|
@@ -260,7 +234,6 @@ autocoder/index/filter/normal_filter.py,sha256=W8UD2a8yWRx41PBx-GzlLETEkU9uhDnQc
|
|
|
260
234
|
autocoder/index/filter/quick_filter.py,sha256=ozESEgy506FQ5ecjOumyo4D_KMrterB1QLmnVtiyOiM,43264
|
|
261
235
|
autocoder/linters/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
262
236
|
autocoder/linters/base_linter.py,sha256=1_0DPESnSyF3ZcQhoFkBYJylT5w-B61Rx-3A9uhuPlg,3066
|
|
263
|
-
autocoder/linters/code_linter.py,sha256=JylTj-Mj4jl9-XSH3PVlbQ4l55Y6E1FG-glv860CGSs,22462
|
|
264
237
|
autocoder/linters/linter_factory.py,sha256=BgGeXPdli7BgiN9BifWoosyn9BGeJnRwSqX0G1R8qvU,10471
|
|
265
238
|
autocoder/linters/models.py,sha256=GBdayu_p50KBxoRms4X68zrDK-OsKDEKKjo926FevwE,9838
|
|
266
239
|
autocoder/linters/normal_linter.py,sha256=ezToVW33psvBXsGhE7y1ng7ucf7yT_1YuIULns6TXYM,13011
|
|
@@ -283,22 +256,19 @@ autocoder/privacy/__init__.py,sha256=LnIVvGu_K66zCE-yhN_-dPO8R80pQyedCsXJ7wRqQaI
|
|
|
283
256
|
autocoder/privacy/model_filter.py,sha256=RyGh_uWWE6hHqvaYZGjFylDJldDLxBz5LDZP7CG3sTo,14178
|
|
284
257
|
autocoder/pyproject/__init__.py,sha256=qn0_-6O_LP-ZH91nneYrn3yaIMYCYYRD1Z3MSNhXUXI,13754
|
|
285
258
|
autocoder/rag/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
286
|
-
autocoder/rag/agentic_rag.py,sha256=
|
|
259
|
+
autocoder/rag/agentic_rag.py,sha256=sXs1qo696zp6ZFaueYDa6AKBzy7O8x9D3wgTkC0zjr4,7851
|
|
287
260
|
autocoder/rag/api_server.py,sha256=TNN5CmR1nlMgeuZVYZ1U3a48XBp647Io9P-VvCkdI9o,13936
|
|
288
|
-
autocoder/rag/conversation_to_queries.py,sha256=
|
|
261
|
+
autocoder/rag/conversation_to_queries.py,sha256=QUeRacDZVVd5XiDvKZ1G71h2QpfmfZldc27Is6sTMdU,6508
|
|
289
262
|
autocoder/rag/doc_filter.py,sha256=UduVO2mlrngwJICrefjDJTYfdmQ4GcRXrfWDQ7xXksk,14206
|
|
290
263
|
autocoder/rag/document_retriever.py,sha256=rFwbAuHTvEFJq16HQNlmRLyJp2ddn2RNFslw_ncU7NI,8847
|
|
291
264
|
autocoder/rag/lang.py,sha256=HvcMeu6jReEJOGxyLMn4rwBoD-myFwmykS3VLceBJLs,3364
|
|
292
265
|
autocoder/rag/llm_wrapper.py,sha256=LsNv8maCnvazyXjjtkO9aN3OT7Br20V1ilHV8Lt45Os,4245
|
|
293
|
-
autocoder/rag/long_context_rag.py,sha256=
|
|
266
|
+
autocoder/rag/long_context_rag.py,sha256=s00w7Ep6nWjRS0Xy8m2qUCvAaH3CgqLLvh6N5d5ssII,50029
|
|
294
267
|
autocoder/rag/qa_conversation_strategy.py,sha256=4CiMK88apKbJ2YM4HHq1KGpr5jUkTh0_m_aCyt-JYgc,10568
|
|
295
268
|
autocoder/rag/rag_config.py,sha256=8LwFcTd8OJWWwi1_WY4IzjqgtT6RyE2j4PjxS5cCTDE,802
|
|
296
269
|
autocoder/rag/rag_entry.py,sha256=QOdUX_nd1Qak2NyOW0CYcLRDB26AZ6MeByHJaMMGgqs,2316
|
|
297
|
-
autocoder/rag/raw_rag.py,sha256=BOr0YGf3umjqXOIDVO1LXQ0bIHx8hzBdiubND2ezyxc,2946
|
|
298
270
|
autocoder/rag/relevant_utils.py,sha256=25wRiX-CrBsratASLGHsZE3ux7VjwaQoDNtl74UlV5U,1749
|
|
299
271
|
autocoder/rag/searchable.py,sha256=miO2U-3J9JDYFOEv85vs9JExDQ0goLIeI20Ob2rNqU4,2067
|
|
300
|
-
autocoder/rag/simple_directory_reader.py,sha256=LkKreCkNdEOoL4fNhc3_hDoyyWTQUte4uqextISRz4U,24485
|
|
301
|
-
autocoder/rag/simple_rag.py,sha256=I902EUqOK1WM0Y2WFd7RzDJYofElvTZNLVCBtX5A9rc,14885
|
|
302
272
|
autocoder/rag/test_doc_filter.py,sha256=eE6IiMknCHDMVbdyOBQQVTKiyAnX1tAm7qFyfxmXSN0,15301
|
|
303
273
|
autocoder/rag/test_long_context_rag.py,sha256=hn50GKhXyRrlJ1mP9RI4bnObR6pZb2yDmJRcZmVSzVU,18365
|
|
304
274
|
autocoder/rag/test_token_limiter.py,sha256=rJmLUqPalkznL7PskKabuqu5dC6Yj-kWa498TaVMjAY,13744
|
|
@@ -306,7 +276,7 @@ autocoder/rag/token_checker.py,sha256=jc76x6KWmvVxds6W8juZfQGaoErudc2HenG3sNQfSL
|
|
|
306
276
|
autocoder/rag/token_counter.py,sha256=C-Lwc4oIjJpZDEqp9WLHGOe6hb4yhrdJpMtkrtp_1qc,2125
|
|
307
277
|
autocoder/rag/token_limiter.py,sha256=3VgJF4may3ESyATmBIiOe05oc3VsidJcJTJ5EhoSvH8,18854
|
|
308
278
|
autocoder/rag/token_limiter_utils.py,sha256=FATNEXBnFJy8IK3PWNt1pspIv8wuTgy3F_ACNvqoc4I,404
|
|
309
|
-
autocoder/rag/types.py,sha256=
|
|
279
|
+
autocoder/rag/types.py,sha256=G6A3P5YN-VBUgUX2TFSWRCKRDJq42fUxVIf46DL8fNI,3280
|
|
310
280
|
autocoder/rag/utils.py,sha256=FPK3Vvk9X9tUuOu4_LctZN5WnRVuEjFiffRtE-pHn0s,6318
|
|
311
281
|
autocoder/rag/variable_holder.py,sha256=PFvBjFcR7-fNDD4Vcsc8CpH2Te057vcpwJMxtrfUgKI,75
|
|
312
282
|
autocoder/rag/cache/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -316,7 +286,7 @@ autocoder/rag/cache/cache_result_merge.py,sha256=VnTdbT2OMBmWl_83bqds97d9_M33IhP
|
|
|
316
286
|
autocoder/rag/cache/failed_files_utils.py,sha256=kITguXANLC3EEJy5JoKzNXrtwvTkmZT-ANPwcno42Ck,1183
|
|
317
287
|
autocoder/rag/cache/file_monitor_cache.py,sha256=lwNrm8epdA3ubc3X3q_BCU1zr_Ul5gEOaM5X5ICeeeQ,9580
|
|
318
288
|
autocoder/rag/cache/local_byzer_storage_cache.py,sha256=iekuUFacUZOVeIqp9Sjgm0DuAeBOmmW3eMk3HQtG0fE,31197
|
|
319
|
-
autocoder/rag/cache/local_duckdb_storage_cache.py,sha256=
|
|
289
|
+
autocoder/rag/cache/local_duckdb_storage_cache.py,sha256=8nViKW4oBtDXmoWGgi97jQ3KGWYJzdWfWzxWp3yCZzI,36753
|
|
320
290
|
autocoder/rag/cache/rag_file_meta.py,sha256=RQ3n4wfkHlB-1ljS3sFSi8ijbsUPeIqBSgjmmbRuwRI,20521
|
|
321
291
|
autocoder/rag/cache/simple_cache.py,sha256=oT5tBSsBkUQ5DNVBzz5P3QJih-wKWdPyCAjmtFgl90Q,18270
|
|
322
292
|
autocoder/rag/loaders/__init__.py,sha256=EQHEZ5Cmz-mGP2SllUTvcIbYCnF7W149dNpNItfs0yE,304
|
|
@@ -326,23 +296,20 @@ autocoder/rag/loaders/filter_utils.py,sha256=Y-m8ckhCQvwTaPtcnlY66dhaHBzNtyhXbXE
|
|
|
326
296
|
autocoder/rag/loaders/image_loader.py,sha256=953bS8u6CeZzFF8CAH4M38N9ZjwG8ghsWOGNAyBnX5I,23318
|
|
327
297
|
autocoder/rag/loaders/pdf_loader.py,sha256=S9hYCC-4XAKliKVbCrVkuNLetOvJVRtIzef_gjbNJpM,779
|
|
328
298
|
autocoder/rag/loaders/ppt_loader.py,sha256=7VEYc-bqgK8VHCoGC3DIUcqbpda-E5jQF9lYLqP256I,1681
|
|
329
|
-
autocoder/rag/loaders/test_image_loader.py,sha256=oy_j8xkFgPZ0vhz6czTVPh7IwdVWkgh5gE-UA2xCATQ,5753
|
|
330
299
|
autocoder/rag/stream_event/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
331
300
|
autocoder/rag/stream_event/event_writer.py,sha256=l7kq_LnDDE8E5dZ-73C7J2MgzSL7WrozdXk0eV-k55Q,409
|
|
332
301
|
autocoder/rag/stream_event/types.py,sha256=rtLwOE8rShmi1dJdxyBpAV5ZjLBGG9vptMiSzMxGuIA,318
|
|
333
302
|
autocoder/rag/tools/__init__.py,sha256=3U91ZI4wZh8UYYl_D11IyLxBLseemmPVfBnmh4ZzNgw,376
|
|
334
|
-
autocoder/rag/tools/recall_tool.py,sha256=
|
|
335
|
-
autocoder/rag/tools/search_tool.py,sha256=
|
|
336
|
-
autocoder/regex_project/__init__.py,sha256=EBZeCL5ORyD_9_5u_UuG4s7XtpXOu0y1sWDmxWFtufE,6781
|
|
303
|
+
autocoder/rag/tools/recall_tool.py,sha256=z-EEyfeD1JR5Mdk9Obi3FJyW-5Q7ljH97Rtd_vlk-Tk,6487
|
|
304
|
+
autocoder/rag/tools/search_tool.py,sha256=HaIA-H66oJwUisk_mpDJQw28TgVHzNxA5JOBbX1y6q8,4702
|
|
337
305
|
autocoder/regexproject/__init__.py,sha256=QMXphSxjuv_LDIx0L_0jnnvCzMfmrOl0VMTVmXabeL0,8976
|
|
338
306
|
autocoder/shadows/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
339
307
|
autocoder/shadows/shadow_manager.py,sha256=atY6d9Si4twe3pqQ56SjMje5W1VxamPSZs_WzQtAQvM,19165
|
|
340
308
|
autocoder/suffixproject/__init__.py,sha256=Rew-M9W4pgO9cvw9UCdrc6QVCPdBhVcIpPBnJxrLJ3M,10374
|
|
341
309
|
autocoder/tsproject/__init__.py,sha256=e_TWVyXQQxYKsXqdQZuFVqNCQLdtBVNJRTs0fgLXVdA,11055
|
|
342
310
|
autocoder/utils/__init__.py,sha256=W47ac6IOZhNR1rdbho9fvhHnPI_N1i4oMcZOwxLelbU,1123
|
|
343
|
-
autocoder/utils/_markitdown.py,sha256=
|
|
311
|
+
autocoder/utils/_markitdown.py,sha256=zrhWztp3cwChtOZ66aCtq7xX-w25cT_eaX5qX-7OaVo,54730
|
|
344
312
|
autocoder/utils/auto_project_type.py,sha256=9_-wE9aavjbPiNSUVKxttJAdu5i5fu-zHyPYHr5XtWk,4422
|
|
345
|
-
autocoder/utils/coder.py,sha256=rK8e0svQBe0NOP26dIGToUXgha_hUDgxlWoC_p_r7oc,5698
|
|
346
313
|
autocoder/utils/conversation_store.py,sha256=esd9zLarKYe0ZsYqjjwHc_ksmVQDDEhVt-Ejul2oyys,1178
|
|
347
314
|
autocoder/utils/llm_client_interceptors.py,sha256=FEHNXoFZlCjAHQcjPRyX8FOMjo6rPXpO2AJ2zn2KTTo,901
|
|
348
315
|
autocoder/utils/llms.py,sha256=CQRNsX8SJBpeHl_zJ1N-Nj-MYyqkFCi3zETYSurCMkU,4021
|
|
@@ -357,15 +324,14 @@ autocoder/utils/request_event_queue.py,sha256=r3lo5qGsB1dIjzVQ05dnr0z_9Z3zOkBdP1
|
|
|
357
324
|
autocoder/utils/request_queue.py,sha256=nwp6PMtgTCiuwJI24p8OLNZjUiprC-TsefQrhMI-yPE,3889
|
|
358
325
|
autocoder/utils/rest.py,sha256=hLBhr78y-WVnV0oQf9Rxc22EwqF78KINkScvYa1MuYA,6435
|
|
359
326
|
autocoder/utils/stream_thinking.py,sha256=vbDObflBFW53eWEjMTEHf3nyL167_cqpDLh9zRx7Yk8,7015
|
|
360
|
-
autocoder/utils/tests.py,sha256=BqphrwyycGAvs-5mhH8pKtMZdObwhFtJ5MC_ZAOiLq8,1340
|
|
361
327
|
autocoder/utils/thread_utils.py,sha256=1x17W-xkbUCInrdXDBStbkJbpGHLbWHhCpZLjz95Lrw,4579
|
|
362
328
|
autocoder/utils/types.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
363
329
|
autocoder/utils/auto_coder_utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
364
330
|
autocoder/utils/auto_coder_utils/chat_stream_out.py,sha256=t902pKxQ5xM7zgIHiAOsTPLwxhE6VuvXAqPy751S7fg,14096
|
|
365
331
|
autocoder/utils/chat_auto_coder_utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
366
|
-
auto_coder-0.1.
|
|
367
|
-
auto_coder-0.1.
|
|
368
|
-
auto_coder-0.1.
|
|
369
|
-
auto_coder-0.1.
|
|
370
|
-
auto_coder-0.1.
|
|
371
|
-
auto_coder-0.1.
|
|
332
|
+
auto_coder-0.1.377.dist-info/LICENSE,sha256=HrhfyXIkWY2tGFK11kg7vPCqhgh5DcxleloqdhrpyMY,11558
|
|
333
|
+
auto_coder-0.1.377.dist-info/METADATA,sha256=6Hj45-oZTXQUYpO8mJZPUeKTSYje453MdHbFLQA2gtc,2775
|
|
334
|
+
auto_coder-0.1.377.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
|
|
335
|
+
auto_coder-0.1.377.dist-info/entry_points.txt,sha256=0nzHtHH4pNcM7xq4EBA2toS28Qelrvcbrr59GqD_0Ak,350
|
|
336
|
+
auto_coder-0.1.377.dist-info/top_level.txt,sha256=Jqc0_uJSw2GwoFQAa9iJxYns-2mWla-9ok_Y3Gcznjk,10
|
|
337
|
+
auto_coder-0.1.377.dist-info/RECORD,,
|
|
@@ -511,15 +511,16 @@ class BaseAgent(ABC):
|
|
|
511
511
|
{%endif%}
|
|
512
512
|
|
|
513
513
|
# Tool Use Examples
|
|
514
|
-
{
|
|
515
|
-
{
|
|
516
|
-
{
|
|
517
|
-
{
|
|
514
|
+
{% set example_count = 0 %}
|
|
515
|
+
{% for tool_tag, example in tool_examples.items() %}
|
|
516
|
+
{% if example %}
|
|
517
|
+
{% set example_count = example_count + 1 %}
|
|
518
518
|
## Example {{ example_count }}: {{ example.title }}
|
|
519
519
|
{{ example.body }}
|
|
520
|
-
{
|
|
521
|
-
{
|
|
520
|
+
{% endif %}
|
|
521
|
+
{% endfor %}
|
|
522
522
|
|
|
523
|
+
|
|
523
524
|
# Tool Use Guidelines
|
|
524
525
|
|
|
525
526
|
1. In <thinking> tags, assess what information you already have and what information you need to proceed with the task.
|
|
@@ -532,9 +533,9 @@ class BaseAgent(ABC):
|
|
|
532
533
|
- New terminal output in reaction to the changes, which you may need to consider or act upon.
|
|
533
534
|
- Any other relevant feedback or information related to the tool use.
|
|
534
535
|
6. ALWAYS wait for user confirmation after each tool use before proceeding. Never assume the success of a tool use without explicit confirmation of the result from the user.
|
|
535
|
-
{
|
|
536
|
+
{% for tool_name, guideline in tool_guidelines.items() %}
|
|
536
537
|
{{ loop.index + 6 }}. **{{ tool_name }}**: {{ guideline }}
|
|
537
|
-
{
|
|
538
|
+
{% endfor %}
|
|
538
539
|
|
|
539
540
|
It is crucial to proceed step-by-step, waiting for the user's message after each tool use before moving forward with the task. This approach allows you to:
|
|
540
541
|
1. Confirm the success of each step before proceeding.
|
autocoder/auto_coder_rag.py
CHANGED
|
@@ -34,6 +34,7 @@ from autocoder.rag.utils import process_file_local
|
|
|
34
34
|
import pkg_resources
|
|
35
35
|
from autocoder.rag.token_counter import TokenCounter
|
|
36
36
|
from autocoder.rag.types import RAGServiceInfo
|
|
37
|
+
from autocoder.version import __version__
|
|
37
38
|
|
|
38
39
|
if platform.system() == "Windows":
|
|
39
40
|
from colorama import init
|
|
@@ -169,6 +170,17 @@ def initialize_system(args):
|
|
|
169
170
|
|
|
170
171
|
|
|
171
172
|
def main(input_args: Optional[List[str]] = None):
|
|
173
|
+
print(
|
|
174
|
+
f"""
|
|
175
|
+
\033[1;32m
|
|
176
|
+
_ _ __ __ _ _ _ _____ _____ _______ ____ _ ____
|
|
177
|
+
| | | | | \/ | | \ | | / \|_ _|_ _\ \ / / ____| | _ \ / \ / ___|
|
|
178
|
+
| | | | | |\/| |_____| \| | / _ \ | | | | \ \ / /| _| | |_) | / _ \| | _
|
|
179
|
+
| |___| |___| | | |_____| |\ |/ ___ \| | | | \ V / | |___ | _ < / ___ \ |_| |
|
|
180
|
+
|_____|_____|_| |_| |_| \_/_/ \_\_| |___| \_/ |_____| |_| \_\/_/ \_\____|
|
|
181
|
+
v{__version__}
|
|
182
|
+
\033[0m"""
|
|
183
|
+
)
|
|
172
184
|
|
|
173
185
|
try:
|
|
174
186
|
tokenizer_path = pkg_resources.resource_filename(
|
autocoder/models.py
CHANGED
|
@@ -165,8 +165,8 @@ def load_models() -> List[Dict]:
|
|
|
165
165
|
if model.get("api_key_path",""):
|
|
166
166
|
api_key_file = os.path.join(api_key_dir, model["api_key_path"])
|
|
167
167
|
if os.path.exists(api_key_file):
|
|
168
|
-
with open(api_key_file, "r",encoding="utf-8") as f:
|
|
169
|
-
model["api_key"] = f.read()
|
|
168
|
+
with open(api_key_file, "r", encoding="utf-8") as f:
|
|
169
|
+
model["api_key"] = f.read().strip()
|
|
170
170
|
return target_models
|
|
171
171
|
|
|
172
172
|
def save_models(models: List[Dict]) -> None:
|
autocoder/rag/agentic_rag.py
CHANGED
|
@@ -40,7 +40,7 @@ class RAGAgent(BaseAgent):
|
|
|
40
40
|
args: AutoCoderArgs,
|
|
41
41
|
rag: LongContextRAG,
|
|
42
42
|
conversation_history: Optional[List[Dict[str, Any]]] = None):
|
|
43
|
-
|
|
43
|
+
self.llm = llm
|
|
44
44
|
self.default_llm = self.llm
|
|
45
45
|
self.context_prune_llm = self.default_llm
|
|
46
46
|
if self.default_llm.get_sub_client("context_prune_model"):
|
|
@@ -125,37 +125,24 @@ class AgenticRAG:
|
|
|
125
125
|
"query":message["content"]
|
|
126
126
|
}
|
|
127
127
|
|
|
128
|
-
|
|
128
|
+
@byzerllm.prompt()
|
|
129
129
|
def system_prompt(self):
|
|
130
130
|
'''
|
|
131
131
|
你是一个基于知识库的智能助手,我的核心能力是通过检索增强生成(RAG)技术来回答用户问题。
|
|
132
132
|
|
|
133
133
|
你的工作流程如下:
|
|
134
|
-
1.
|
|
134
|
+
1. 当用户提出问题时,你首先理解问题的核心意图和关键信息需求
|
|
135
135
|
2. 你会从多个角度分析问题,确定最佳的检索策略和关键词,然后召回工具 recall 获取与问题最相关的详细内容,只有在特别有必要的情况下,你才回使用 read_file 来获得相关文件更详细的信息。
|
|
136
136
|
5. 如果获得的信息足够回答用户问题,你会直接生成回答。
|
|
137
137
|
6. 如果获得的信息不足以回答用户问题,你会继续使用 recall 工具,直到你确信已经获取了足够的信息来回答用户问题。
|
|
138
138
|
7. 有的问题可能需要拆解成多个问题,分别进行recall,然后最终得到的结果才是完整信息,最后才能进行回答。
|
|
139
|
-
|
|
140
|
-
此外,你回答会遵循以下要求:
|
|
141
|
-
|
|
142
|
-
1. 严格基于召回的文档内容回答
|
|
143
|
-
- 如果召回的文档提供的信息无法回答问题,请明确回复:"抱歉,文档中没有足够的信息来回答这个问题。"
|
|
144
|
-
- 不要添加、推测或扩展文档未提及的信息
|
|
145
|
-
|
|
146
|
-
2. 格式如  的 Markdown 图片处理
|
|
147
|
-
- 根据Markdown 图片前后文本内容推测改图片与问题的相关性,有相关性则在回答中输出该Markdown图片路径
|
|
148
|
-
- 根据相关图片在文档中的位置,自然融入答复内容,保持上下文连贯
|
|
149
|
-
- 完整保留原始图片路径,不省略任何部分
|
|
150
|
-
|
|
151
|
-
3. 回答格式要求
|
|
152
|
-
- 使用markdown格式提升可读性
|
|
139
|
+
8. 当你遇到图片的时候,请根据图片前后文本内容推测改图片与问题的相关性,有相关性则在回答中使用 ![]()格式输出该Markdown图片路径,否则不输出。
|
|
153
140
|
{% if local_image_host %}
|
|
154
|
-
|
|
141
|
+
9. 图片路径处理
|
|
155
142
|
- 图片地址需返回绝对路径,
|
|
156
|
-
- 对于Windows风格的路径,需要转换为Linux风格,
|
|
143
|
+
- 对于Windows风格的路径,需要转换为Linux风格, 例如: 转换为 
|
|
157
144
|
- 为请求图片资源 需增加 http://{{ local_image_host }}/static/ 作为前缀
|
|
158
|
-
|
|
145
|
+
举个例子:, 返回 
|
|
159
146
|
{% endif %}
|
|
160
147
|
'''
|
|
161
148
|
return {
|
|
@@ -10,7 +10,6 @@ from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
|
10
10
|
from typing import List, Dict, Any, Optional, Tuple, Union
|
|
11
11
|
import numpy as np
|
|
12
12
|
from loguru import logger
|
|
13
|
-
from typing import Union
|
|
14
13
|
from byzerllm import SimpleByzerLLM, ByzerLLM
|
|
15
14
|
from autocoder.utils.llms import get_llm_names
|
|
16
15
|
|
|
@@ -31,10 +30,12 @@ from autocoder.rag.cache.base_cache import (
|
|
|
31
30
|
FileInfo,
|
|
32
31
|
CacheItem,
|
|
33
32
|
)
|
|
34
|
-
from autocoder.rag.utils import
|
|
33
|
+
from autocoder.rag.utils import (
|
|
34
|
+
process_file_in_multi_process,
|
|
35
|
+
process_file_local,
|
|
36
|
+
)
|
|
35
37
|
from autocoder.rag.variable_holder import VariableHolder
|
|
36
|
-
from
|
|
37
|
-
from .failed_files_utils import save_failed_files, load_failed_files
|
|
38
|
+
from .failed_files_utils import save_failed_files
|
|
38
39
|
|
|
39
40
|
if platform.system() != "Windows":
|
|
40
41
|
import fcntl
|
|
@@ -66,7 +67,8 @@ class DuckDBLocalContext:
|
|
|
66
67
|
def __enter__(self) -> "duckdb.DuckDBPyConnection":
|
|
67
68
|
if not os.path.exists(os.path.dirname(self.database_path)):
|
|
68
69
|
raise ValueError(
|
|
69
|
-
f"Directory {os.path.dirname(self.database_path)}
|
|
70
|
+
f"Directory {os.path.dirname(self.database_path)} "
|
|
71
|
+
f"does not exist."
|
|
70
72
|
)
|
|
71
73
|
|
|
72
74
|
self._conn = duckdb.connect(self.database_path)
|
|
@@ -97,10 +99,12 @@ class LocalDuckdbStorage:
|
|
|
97
99
|
self.persist_dir = persist_dir
|
|
98
100
|
self.cache_dir = os.path.join(self.persist_dir, ".cache")
|
|
99
101
|
self.args = args
|
|
100
|
-
logger.info(
|
|
102
|
+
logger.info("正在启动 DuckDBVectorStore.")
|
|
101
103
|
|
|
102
104
|
if self.database_name != ":memory:":
|
|
103
|
-
self.database_path = os.path.join(
|
|
105
|
+
self.database_path = os.path.join(
|
|
106
|
+
self.cache_dir, self.database_name
|
|
107
|
+
)
|
|
104
108
|
|
|
105
109
|
if self.database_name == ":memory:":
|
|
106
110
|
self._conn = duckdb.connect(self.database_name)
|
|
@@ -114,7 +118,8 @@ class LocalDuckdbStorage:
|
|
|
114
118
|
self._conn = None
|
|
115
119
|
logger.info(
|
|
116
120
|
f"DuckDBVectorStore 初始化完成, 存储目录: {self.cache_dir}, "
|
|
117
|
-
f"数据库名称: {self.database_name},
|
|
121
|
+
f"数据库名称: {self.database_name}, "
|
|
122
|
+
f"数据表名称: {self.table_name}"
|
|
118
123
|
)
|
|
119
124
|
|
|
120
125
|
@classmethod
|
|
@@ -167,15 +172,17 @@ class LocalDuckdbStorage:
|
|
|
167
172
|
retry_count += 1
|
|
168
173
|
if retry_count >= max_retries:
|
|
169
174
|
logger.error(
|
|
170
|
-
f"Failed to get embedding after {max_retries}
|
|
175
|
+
f"Failed to get embedding after {max_retries} "
|
|
176
|
+
f"attempts: {str(e)}"
|
|
171
177
|
)
|
|
172
178
|
raise
|
|
173
179
|
|
|
174
180
|
# Sleep between 1-5 seconds before retrying
|
|
175
181
|
sleep_time = 1 + (retry_count * 1.5)
|
|
176
182
|
logger.warning(
|
|
177
|
-
f"Embedding API call failed (attempt {retry_count}/
|
|
178
|
-
f"Error: {str(e)}. Retrying in
|
|
183
|
+
f"Embedding API call failed (attempt {retry_count}/"
|
|
184
|
+
f"{max_retries}). Error: {str(e)}. Retrying in "
|
|
185
|
+
f"{sleep_time:.1f} seconds..."
|
|
179
186
|
)
|
|
180
187
|
time.sleep(sleep_time)
|
|
181
188
|
|
|
@@ -244,7 +251,9 @@ class LocalDuckdbStorage:
|
|
|
244
251
|
|
|
245
252
|
if not context_chunk["raw_content"]:
|
|
246
253
|
context_chunk["raw_content"] = "empty"
|
|
247
|
-
context_chunk["raw_content"] = context_chunk["raw_content"][
|
|
254
|
+
context_chunk["raw_content"] = context_chunk["raw_content"][
|
|
255
|
+
: self.args.rag_emb_text_size
|
|
256
|
+
]
|
|
248
257
|
|
|
249
258
|
return (
|
|
250
259
|
context_chunk["_id"],
|
|
@@ -343,11 +352,14 @@ class LocalDuckDBStorageCache(BaseCacheManager):
|
|
|
343
352
|
)
|
|
344
353
|
self.queue = []
|
|
345
354
|
self.chunk_size = 1000
|
|
346
|
-
self.max_output_tokens =
|
|
355
|
+
self.max_output_tokens = (
|
|
356
|
+
extra_params.hybrid_index_max_output_tokens
|
|
357
|
+
)
|
|
347
358
|
|
|
348
359
|
# 设置缓存文件路径
|
|
349
360
|
self.cache_dir = os.path.join(self.path, ".cache")
|
|
350
|
-
self.cache_file = os.path.join(self.cache_dir,
|
|
361
|
+
self.cache_file = os.path.join(self.cache_dir,
|
|
362
|
+
"duckdb_storage_speedup.jsonl")
|
|
351
363
|
self.cache: Dict[str, CacheItem] = {}
|
|
352
364
|
# 创建缓存目录
|
|
353
365
|
if not os.path.exists(self.cache_dir):
|
|
@@ -356,7 +368,9 @@ class LocalDuckDBStorageCache(BaseCacheManager):
|
|
|
356
368
|
# failed files support
|
|
357
369
|
from .failed_files_utils import load_failed_files
|
|
358
370
|
|
|
359
|
-
self.failed_files_path = os.path.join(
|
|
371
|
+
self.failed_files_path = os.path.join(
|
|
372
|
+
self.cache_dir, "failed_files.json"
|
|
373
|
+
)
|
|
360
374
|
self.failed_files = load_failed_files(self.failed_files_path)
|
|
361
375
|
|
|
362
376
|
self.lock = threading.Lock()
|
|
@@ -406,7 +420,8 @@ class LocalDuckDBStorageCache(BaseCacheManager):
|
|
|
406
420
|
continue
|
|
407
421
|
return cache
|
|
408
422
|
except Exception as e:
|
|
409
|
-
logger.
|
|
423
|
+
logger.warning(f"Error loading cache file: {str(e)}")
|
|
424
|
+
logger.exception(e)
|
|
410
425
|
return {}
|
|
411
426
|
return {}
|
|
412
427
|
|
|
@@ -421,7 +436,8 @@ class LocalDuckDBStorageCache(BaseCacheManager):
|
|
|
421
436
|
json.dump(cache_item.model_dump(), f, ensure_ascii=False)
|
|
422
437
|
f.write("\n")
|
|
423
438
|
except IOError as e:
|
|
424
|
-
logger.
|
|
439
|
+
logger.warning(f"Error writing cache file: {str(e)}")
|
|
440
|
+
logger.exception(e)
|
|
425
441
|
else:
|
|
426
442
|
lock_file = cache_file + ".lock"
|
|
427
443
|
with open(lock_file, "w", encoding="utf-8") as lockf:
|
|
@@ -510,11 +526,12 @@ class LocalDuckDBStorageCache(BaseCacheManager):
|
|
|
510
526
|
self.write_cache()
|
|
511
527
|
|
|
512
528
|
if items:
|
|
513
|
-
logger.info("[BUILD CACHE] Clearing
|
|
529
|
+
logger.info("[BUILD CACHE] Clearing DuckDB Storage cache")
|
|
514
530
|
self.storage.truncate_table()
|
|
531
|
+
logger.info(f"[BUILD CACHE] Preparing to write to DuckDB Storage.")
|
|
515
532
|
logger.info(
|
|
516
|
-
f"[BUILD CACHE]
|
|
517
|
-
f"
|
|
533
|
+
f"[BUILD CACHE] Total chunks: {len(items)}, "
|
|
534
|
+
f"Total files: {len(files_to_process)}"
|
|
518
535
|
)
|
|
519
536
|
|
|
520
537
|
# Use a fixed optimal batch size instead of dividing by worker count
|
|
@@ -526,9 +543,10 @@ class LocalDuckDBStorageCache(BaseCacheManager):
|
|
|
526
543
|
total_batches = len(item_batches)
|
|
527
544
|
completed_batches = 0
|
|
528
545
|
|
|
546
|
+
logger.info(f"[BUILD CACHE] Writing to DuckDB Storage.")
|
|
529
547
|
logger.info(
|
|
530
|
-
f"[BUILD CACHE]
|
|
531
|
-
f"
|
|
548
|
+
f"[BUILD CACHE] Batch size: {batch_size}, "
|
|
549
|
+
f"Total batches: {total_batches}"
|
|
532
550
|
)
|
|
533
551
|
start_time = time.time()
|
|
534
552
|
|
|
@@ -569,18 +587,26 @@ class LocalDuckDBStorageCache(BaseCacheManager):
|
|
|
569
587
|
or (completed_batches == total_batches)
|
|
570
588
|
or (completed_batches % max(1, total_batches // 10) == 0)
|
|
571
589
|
):
|
|
590
|
+
progress_percent = (
|
|
591
|
+
completed_batches / total_batches * 100
|
|
592
|
+
if total_batches > 0
|
|
593
|
+
else 0
|
|
594
|
+
)
|
|
572
595
|
logger.info(
|
|
573
|
-
f"[BUILD CACHE] Progress: {completed_batches}/
|
|
574
|
-
f"
|
|
575
|
-
f"
|
|
596
|
+
f"[BUILD CACHE] Progress: {completed_batches}/"
|
|
597
|
+
f"{total_batches} ({progress_percent:.1f}%). "
|
|
598
|
+
f"ETA: {remaining:.1f}s"
|
|
576
599
|
)
|
|
577
600
|
except Exception as e:
|
|
578
601
|
logger.error(f"[BUILD CACHE] Error saving batch: {str(e)}")
|
|
579
602
|
# Add more detailed error information
|
|
603
|
+
batch_len_info = (
|
|
604
|
+
len(batch) if "batch" in locals() else "unknown"
|
|
605
|
+
)
|
|
580
606
|
logger.error(
|
|
581
|
-
f"[BUILD CACHE] Error details: batch size: "
|
|
582
|
-
f"{len(batch) if 'batch' in locals() else 'unknown'}"
|
|
607
|
+
f"[BUILD CACHE] Error details: batch size: {batch_len_info}"
|
|
583
608
|
)
|
|
609
|
+
logger.exception(e)
|
|
584
610
|
|
|
585
611
|
total_time = time.time() - start_time
|
|
586
612
|
logger.info(
|
|
@@ -622,6 +648,7 @@ class LocalDuckDBStorageCache(BaseCacheManager):
|
|
|
622
648
|
time.sleep(self.extra_params.anti_quota_limit)
|
|
623
649
|
except Exception as err:
|
|
624
650
|
logger.error(f"Error in saving chunk: {str(err)}")
|
|
651
|
+
logger.exception(err)
|
|
625
652
|
|
|
626
653
|
def process_queue(self):
|
|
627
654
|
while self.queue:
|
|
@@ -671,7 +698,8 @@ class LocalDuckDBStorageCache(BaseCacheManager):
|
|
|
671
698
|
self.failed_files.add(file_info.file_path)
|
|
672
699
|
save_failed_files(self.failed_files_path, self.failed_files)
|
|
673
700
|
except Exception as e:
|
|
674
|
-
logger.error(f"Error in process_queue: {e}")
|
|
701
|
+
logger.error(f"Error in process_queue: {str(e)}")
|
|
702
|
+
logger.exception(e)
|
|
675
703
|
self.failed_files.add(file_info.file_path)
|
|
676
704
|
save_failed_files(self.failed_files_path, self.failed_files)
|
|
677
705
|
|
|
@@ -816,17 +844,18 @@ class LocalDuckDBStorageCache(BaseCacheManager):
|
|
|
816
844
|
for doc in cached_data.content:
|
|
817
845
|
if total_tokens + doc["tokens"] > self.max_output_tokens:
|
|
818
846
|
logger.info(
|
|
819
|
-
f"当前检索已超出用户设置 Hybrid Index Max Tokens:
|
|
820
|
-
f"
|
|
821
|
-
f"经过向量搜索共检索出 {len(result.keys())} 个文档,
|
|
847
|
+
f"当前检索已超出用户设置 Hybrid Index Max Tokens:"
|
|
848
|
+
f"{self.max_output_tokens},累计tokens: {total_tokens}, "
|
|
849
|
+
f"经过向量搜索共检索出 {len(result.keys())} 个文档, "
|
|
850
|
+
f"共 {len(self.cache.keys())} 个文档"
|
|
822
851
|
)
|
|
823
852
|
return result
|
|
824
853
|
total_tokens += doc["tokens"]
|
|
825
854
|
result[file_path] = cached_data.model_dump()
|
|
826
855
|
logger.info(
|
|
827
856
|
f"用户Hybrid Index Max Tokens设置为:{self.max_output_tokens},"
|
|
828
|
-
f"累计tokens: {total_tokens}, "
|
|
829
|
-
f"
|
|
857
|
+
f"累计tokens: {total_tokens}, 经过向量搜索共检索出 "
|
|
858
|
+
f"{len(result.keys())} 个文档, 共 {len(self.cache.keys())} 个文档"
|
|
830
859
|
)
|
|
831
860
|
return result
|
|
832
861
|
|
|
@@ -904,6 +933,7 @@ class LocalDuckDBStorageCache(BaseCacheManager):
|
|
|
904
933
|
query_results.append((query, query_result))
|
|
905
934
|
except Exception as e:
|
|
906
935
|
logger.error(f"处理查询 '{query}' 时出错: {str(e)}")
|
|
936
|
+
logger.exception(e)
|
|
907
937
|
|
|
908
938
|
logger.info(f"所有查询共返回 {sum(len(r) for _, r in query_results)} 条记录")
|
|
909
939
|
|