auto-coder 0.1.346__py3-none-any.whl → 0.1.347__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of auto-coder might be problematic. Click here for more details.

@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: auto-coder
3
- Version: 0.1.346
3
+ Version: 0.1.347
4
4
  Summary: AutoCoder: AutoCoder
5
5
  Author: allwefantasy
6
6
  Classifier: Programming Language :: Python :: 3.10
@@ -14,7 +14,7 @@ autocoder/command_parser.py,sha256=fx1g9E6GaM273lGTcJqaFQ-hoksS_Ik2glBMnVltPCE,1
14
14
  autocoder/lang.py,sha256=PFtATuOhHRnfpqHQkXr6p4C893JvpsgwTMif3l-GEi0,14321
15
15
  autocoder/models.py,sha256=_SCar82QIeBFTZZBdM2jPS6atKVhHnvE0gX3V0CsxD4,11590
16
16
  autocoder/run_context.py,sha256=IUfSO6_gp2Wt1blFWAmOpN0b0nDrTTk4LmtCYUBIoro,1643
17
- autocoder/version.py,sha256=FUAij2FADNMtAmQCx7M7pqiOpTP65zxKY6khe4EGQUs,23
17
+ autocoder/version.py,sha256=R1KcSahF3DMgAzBREOFaR3Pjcqm9a3u4b36Hb2RFm50,23
18
18
  autocoder/agent/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
19
19
  autocoder/agent/agentic_edit.py,sha256=XsfePZ-t6M-uBSdG1VLZXk1goqXk2HPeJ_A8IYyBuWQ,58896
20
20
  autocoder/agent/agentic_edit_types.py,sha256=oFcDd_cxJ2yH9Ed1uTpD3BipudgoIEWDMPb5pAkq4gI,3288
@@ -54,7 +54,7 @@ autocoder/common/action_yml_file_manager.py,sha256=DdF5P1R_B_chCnnqoA2IgogakWLZk
54
54
  autocoder/common/anything2images.py,sha256=0ILBbWzY02M-CiWB-vzuomb_J1hVdxRcenAfIrAXq9M,25283
55
55
  autocoder/common/anything2img.py,sha256=iZQmg8srXlD7N5uGl5b_ONKJMBjYoW8kPmokkG6ISF0,10118
56
56
  autocoder/common/audio.py,sha256=Kn9nWKQddWnUrAz0a_ZUgjcu4VUU_IcZBigT7n3N3qc,7439
57
- autocoder/common/auto_coder_lang.py,sha256=ozoGTy4ZFn3YsO5zWhvAGCu54mK4LtnRfC2yCvrMc_8,42462
57
+ autocoder/common/auto_coder_lang.py,sha256=bqBoICLIvi9l8jRCwcNLWR6n5pI3ix7YDPGpmqQDmgc,42677
58
58
  autocoder/common/auto_configure.py,sha256=D4N-fl9v8bKM5-Ds-uhkC2uGDmHH_ZjLJ759F8KXMKs,13129
59
59
  autocoder/common/buildin_tokenizer.py,sha256=L7d5t39ZFvUd6EoMPXUhYK1toD0FHlRH1jtjKRGokWU,1236
60
60
  autocoder/common/chunk_validation.py,sha256=BrR_ZWavW8IANuueEE7hS8NFAwEvm8TX34WnPx_1hs8,3030
@@ -126,18 +126,19 @@ autocoder/common/v2/code_editblock_manager.py,sha256=G0CIuV9Ki0FqMLnpA8nBT4pnkCN
126
126
  autocoder/common/v2/code_manager.py,sha256=C403bS-f6urixwitlKHcml-J03hci-UyNwHJOqBiY6Q,9182
127
127
  autocoder/common/v2/code_strict_diff_manager.py,sha256=v-J1kDyLg7tLGg_6_lbO9S4fNkx7M_L8Xr2G7fPptiU,9347
128
128
  autocoder/common/v2/agent/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
129
- autocoder/common/v2/agent/agentic_edit.py,sha256=Ghx6HTCdjIYj96zoB5xkL6aGLizftgCmy5ZcUSaFXtQ,92599
129
+ autocoder/common/v2/agent/agentic_edit.py,sha256=pxgKD-FZaaRQkPgWsxvLleUXDRFC8qSlYfNUHU6ZENU,94794
130
130
  autocoder/common/v2/agent/agentic_edit_conversation.py,sha256=qLLhTegH619JQTp3s1bj5FVn2hAcoV-DlhGO3UyIOMc,7338
131
- autocoder/common/v2/agent/agentic_edit_types.py,sha256=6qBLLmvdlcsbzrpMHsYQVIHqbOWubMXOnmkqTs1pBWQ,4629
131
+ autocoder/common/v2/agent/agentic_edit_types.py,sha256=VJMrictg6hJ3mC45VgQGRd43DyDUPDUvPV1Rf3z72NI,4776
132
132
  autocoder/common/v2/agent/agentic_tool_display.py,sha256=WKirt-2V346KLnbHgH3NVJiK3xvriD9oaCWj2IdvzLU,7309
133
133
  autocoder/common/v2/agent/ignore_utils.py,sha256=gnUchRzKMLbUm_jvnKL-r-K9MWKPtt-6iiuzijY7Es0,1717
134
- autocoder/common/v2/agent/agentic_edit_tools/__init__.py,sha256=wGICCc1dYh07osB21j62zOQ9Ws0PyyOQ12UYRHmHrtI,1229
134
+ autocoder/common/v2/agent/agentic_edit_tools/__init__.py,sha256=RbPZZcZg_VnGssL577GxSyFrYrxQ_LopJ4G_-mY3z_Q,1337
135
135
  autocoder/common/v2/agent/agentic_edit_tools/ask_followup_question_tool_resolver.py,sha256=bwtf4m9N82TCP3piK5UglJk1FVFFm7ZX59XerA2qxko,3131
136
136
  autocoder/common/v2/agent/agentic_edit_tools/attempt_completion_tool_resolver.py,sha256=82ZGKeRBSDKeead_XVBW4FxpiE-5dS7tBOk_3RZ6B5s,1511
137
137
  autocoder/common/v2/agent/agentic_edit_tools/base_tool_resolver.py,sha256=Zid2m1uZd-2wVFGc_n_KAViXZyNjbdLSpI5n7ut1RUQ,1036
138
138
  autocoder/common/v2/agent/agentic_edit_tools/execute_command_tool_resolver.py,sha256=GueQfIY2hVu2R5j9R5rBtn2znl5MlmEdGtsa6snsMHs,4112
139
139
  autocoder/common/v2/agent/agentic_edit_tools/list_code_definition_names_tool_resolver.py,sha256=8QoMsADUDWliqiDt_dpguz31403syB8eeW0Pcw-qfb8,3842
140
140
  autocoder/common/v2/agent/agentic_edit_tools/list_files_tool_resolver.py,sha256=d0LzGPA3zsIHK5s1-arPry6ddWFSymRsMY3VbkV6v5A,5795
141
+ autocoder/common/v2/agent/agentic_edit_tools/list_package_info_tool_resolver.py,sha256=dIdV12VuczHpHuHgx2B1j_3BZYc9PL0jfHCuBk9ryk8,2005
141
142
  autocoder/common/v2/agent/agentic_edit_tools/plan_mode_respond_tool_resolver.py,sha256=lGT4_QYJK6Fa9f6HVSGo0cSsGK7qCsDYgJGUowNxPzk,1499
142
143
  autocoder/common/v2/agent/agentic_edit_tools/read_file_tool_resolver.py,sha256=9Bh0KVbL0qiIqwChlb77biiBiETQ3zekxGe5Fj7hXAg,2800
143
144
  autocoder/common/v2/agent/agentic_edit_tools/replace_in_file_tool_resolver.py,sha256=lpD4fCbVR8GTrynqXON69IjM94nPy3nuUL62Ashm5O4,7988
@@ -211,11 +212,11 @@ autocoder/rag/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
211
212
  autocoder/rag/api_server.py,sha256=StGyxrM-7-W2vYHJq-i_Fv-MHrl9UgVWY272Hd-6VJ4,13090
212
213
  autocoder/rag/conversation_to_queries.py,sha256=xwmErn4WbdADnhK1me-h_6fV3KYrl_y1qPNQl1aoI6o,4810
213
214
  autocoder/rag/doc_filter.py,sha256=UduVO2mlrngwJICrefjDJTYfdmQ4GcRXrfWDQ7xXksk,14206
214
- autocoder/rag/document_retriever.py,sha256=5BDqKVJqLPScEnua5S5suXhWuCaALIfPf5obXeJoWfs,8461
215
+ autocoder/rag/document_retriever.py,sha256=rFwbAuHTvEFJq16HQNlmRLyJp2ddn2RNFslw_ncU7NI,8847
215
216
  autocoder/rag/lang.py,sha256=HvcMeu6jReEJOGxyLMn4rwBoD-myFwmykS3VLceBJLs,3364
216
217
  autocoder/rag/llm_wrapper.py,sha256=Ht5GF5yJtrztoliujsZzx_ooWZmHkd5xLZKcGEiicZw,4303
217
- autocoder/rag/long_context_rag.py,sha256=84haMjfbPd2Rw_8Zdj-Azf_v9tbYST_OUPW0NZlmeJY,42189
218
- autocoder/rag/qa_conversation_strategy.py,sha256=vv62JhmdZvLJ3U1gG4ZZ3m9thIlGvU1H0RXHtrRTJ6s,11788
218
+ autocoder/rag/long_context_rag.py,sha256=syPIxO_TQJpBgjZ0taF-G7xVGvkNjKWL65KTI-sy4io,42234
219
+ autocoder/rag/qa_conversation_strategy.py,sha256=N4zcLstmTEZH4iGTnmNhzlRSRPfdZnTlKOXPqnqSEUQ,11727
219
220
  autocoder/rag/rag_config.py,sha256=8LwFcTd8OJWWwi1_WY4IzjqgtT6RyE2j4PjxS5cCTDE,802
220
221
  autocoder/rag/rag_entry.py,sha256=6TKtErZ0Us9XSV6HgRKXA6yR3SiZGPHpynOKSaR1wgE,2463
221
222
  autocoder/rag/raw_rag.py,sha256=BOr0YGf3umjqXOIDVO1LXQ0bIHx8hzBdiubND2ezyxc,2946
@@ -232,13 +233,14 @@ autocoder/rag/utils.py,sha256=f21ybCAlYVgr3tJP9MkVoM9d82-uG5NHu2gsv2oaVBQ,4961
232
233
  autocoder/rag/variable_holder.py,sha256=PFvBjFcR7-fNDD4Vcsc8CpH2Te057vcpwJMxtrfUgKI,75
233
234
  autocoder/rag/cache/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
234
235
  autocoder/rag/cache/base_cache.py,sha256=EaYYYbclMBvnlOUoM7qonnluwZX5oSvUjdvGvFun8_8,742
235
- autocoder/rag/cache/byzer_storage_cache.py,sha256=S_afj4_fFt88A0KJ23N-Gc3mMwPTDjzv9E3J9O_IiT4,29319
236
+ autocoder/rag/cache/byzer_storage_cache.py,sha256=buUrsY7-gWf6BXtzhFVKRJ-ME4LGrr9EEzjoSVu9G6g,27937
236
237
  autocoder/rag/cache/cache_result_merge.py,sha256=VnTdbT2OMBmWl_83bqds97d9_M33IhPNX8tF7KH2GMM,10556
237
- autocoder/rag/cache/file_monitor_cache.py,sha256=OdSXTH3vo6inAzkN5d55I0RN03GUlSlnUEKmXpjFl78,9443
238
- autocoder/rag/cache/local_byzer_storage_cache.py,sha256=1xskK7X_hFEAsHHoT_F9lFYhQOTrpQtsFyFCIeI2Mvk,31964
239
- autocoder/rag/cache/local_duckdb_storage_cache.py,sha256=37WH2mlOgykBUrp6ow43kcaaKFZc1CRTkNLSFdELPcE,32714
238
+ autocoder/rag/cache/failed_files_utils.py,sha256=kITguXANLC3EEJy5JoKzNXrtwvTkmZT-ANPwcno42Ck,1183
239
+ autocoder/rag/cache/file_monitor_cache.py,sha256=TelBRzw2uD9QLUIvyrdD6XWmgvb3MyTgx63ZrpptaXI,9512
240
+ autocoder/rag/cache/local_byzer_storage_cache.py,sha256=X7EVGa4xcmUWC7WLnyGPx17I1l9NvW3OHfkErO4Ww0M,30784
241
+ autocoder/rag/cache/local_duckdb_storage_cache.py,sha256=6NsNlqA8-nGSD8ILXQgO-6QbgE8eUy4uT3eM1t69nic,34447
240
242
  autocoder/rag/cache/rag_file_meta.py,sha256=RQ3n4wfkHlB-1ljS3sFSi8ijbsUPeIqBSgjmmbRuwRI,20521
241
- autocoder/rag/cache/simple_cache.py,sha256=yGmt8iXY2ZVif4_2-_DqWGl_zG0fzgSI3tUOxvdqHZU,14230
243
+ autocoder/rag/cache/simple_cache.py,sha256=yrGgRXGcMNrWSQOviPshm3Qlo2QDNVFoRZPLNTV_nQs,15997
242
244
  autocoder/rag/loaders/__init__.py,sha256=EQHEZ5Cmz-mGP2SllUTvcIbYCnF7W149dNpNItfs0yE,304
243
245
  autocoder/rag/loaders/docx_loader.py,sha256=ZswPqiiLngUEpzLhNNm1nmwEYV7ZHFEfIoXoG7c5GDU,614
244
246
  autocoder/rag/loaders/excel_loader.py,sha256=Ue8YB1z_kBs8SjIPuBskyM08Q1JiONs_BJZPrzi59oo,896
@@ -277,9 +279,9 @@ autocoder/utils/types.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
277
279
  autocoder/utils/auto_coder_utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
278
280
  autocoder/utils/auto_coder_utils/chat_stream_out.py,sha256=KW0mlmcHlStXi8-_6fXZ2-ifeJ5mgP0OV7DQFzCtIsw,14008
279
281
  autocoder/utils/chat_auto_coder_utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
280
- auto_coder-0.1.346.dist-info/LICENSE,sha256=HrhfyXIkWY2tGFK11kg7vPCqhgh5DcxleloqdhrpyMY,11558
281
- auto_coder-0.1.346.dist-info/METADATA,sha256=pXxlRR0gvLNboAcWQtU9VzpKpiaCTj4evVC9Ztas7R4,2728
282
- auto_coder-0.1.346.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
283
- auto_coder-0.1.346.dist-info/entry_points.txt,sha256=0nzHtHH4pNcM7xq4EBA2toS28Qelrvcbrr59GqD_0Ak,350
284
- auto_coder-0.1.346.dist-info/top_level.txt,sha256=Jqc0_uJSw2GwoFQAa9iJxYns-2mWla-9ok_Y3Gcznjk,10
285
- auto_coder-0.1.346.dist-info/RECORD,,
282
+ auto_coder-0.1.347.dist-info/LICENSE,sha256=HrhfyXIkWY2tGFK11kg7vPCqhgh5DcxleloqdhrpyMY,11558
283
+ auto_coder-0.1.347.dist-info/METADATA,sha256=-wG5ilXMo-fYflOC9zJZY2N0JdHBluyyxYNqxhzNVwQ,2728
284
+ auto_coder-0.1.347.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
285
+ auto_coder-0.1.347.dist-info/entry_points.txt,sha256=0nzHtHH4pNcM7xq4EBA2toS28Qelrvcbrr59GqD_0Ak,350
286
+ auto_coder-0.1.347.dist-info/top_level.txt,sha256=Jqc0_uJSw2GwoFQAa9iJxYns-2mWla-9ok_Y3Gcznjk,10
287
+ auto_coder-0.1.347.dist-info/RECORD,,
@@ -833,6 +833,14 @@ MESSAGES = {
833
833
  "/agent/edit/user_query":{
834
834
  "en":"User Query",
835
835
  "zh":"用户查询"
836
+ },
837
+ "/agent/edit/apply_pre_changes":{
838
+ "en":"Commit user changes",
839
+ "zh":"提交用户修改"
840
+ },
841
+ "/agent/edit/apply_changes":{
842
+ "en":"Commit changes",
843
+ "zh":"提交修改"
836
844
  }
837
845
  }
838
846
 
@@ -52,8 +52,10 @@ from autocoder.common.v2.agent.agentic_edit_tools import ( # Import specific re
52
52
  ExecuteCommandToolResolver, ReadFileToolResolver, WriteToFileToolResolver,
53
53
  ReplaceInFileToolResolver, SearchFilesToolResolver, ListFilesToolResolver,
54
54
  ListCodeDefinitionNamesToolResolver, AskFollowupQuestionToolResolver,
55
- AttemptCompletionToolResolver, PlanModeRespondToolResolver, UseMcpToolResolver
55
+ AttemptCompletionToolResolver, PlanModeRespondToolResolver, UseMcpToolResolver,
56
+ ListPackageInfoToolResolver
56
57
  )
58
+
57
59
  from autocoder.common.v2.agent.agentic_edit_types import (AgenticEditRequest, ToolResult,
58
60
  MemoryConfig, CommandConfig, BaseTool,
59
61
  ExecuteCommandTool, ReadFileTool,
@@ -63,6 +65,7 @@ from autocoder.common.v2.agent.agentic_edit_types import (AgenticEditRequest, To
63
65
  ListFilesTool,
64
66
  ListCodeDefinitionNamesTool, AskFollowupQuestionTool,
65
67
  AttemptCompletionTool, PlanModeRespondTool, UseMcpTool,
68
+ ListPackageInfoTool,
66
69
  TOOL_MODEL_MAP,
67
70
  # Event Types
68
71
  LLMOutputEvent, LLMThinkingEvent, ToolCallEvent,
@@ -83,6 +86,7 @@ TOOL_RESOLVER_MAP: Dict[Type[BaseTool], Type[BaseToolResolver]] = {
83
86
  SearchFilesTool: SearchFilesToolResolver,
84
87
  ListFilesTool: ListFilesToolResolver,
85
88
  ListCodeDefinitionNamesTool: ListCodeDefinitionNamesToolResolver,
89
+ ListPackageInfoTool: ListPackageInfoToolResolver,
86
90
  AskFollowupQuestionTool: AskFollowupQuestionToolResolver,
87
91
  AttemptCompletionTool: AttemptCompletionToolResolver, # Will stop the loop anyway
88
92
  PlanModeRespondTool: PlanModeRespondToolResolver,
@@ -245,6 +249,7 @@ class AgenticEdit:
245
249
 
246
250
  Always adhere to this format for the tool use to ensure proper parsing and execution.
247
251
 
252
+
248
253
  # Tools
249
254
 
250
255
  ## execute_command
@@ -258,6 +263,15 @@ class AgenticEdit:
258
263
  <requires_approval>true or false</requires_approval>
259
264
  </execute_command>
260
265
 
266
+ ## list_package_info
267
+ Description: Request to retrieve information about a source code package, such as recent changes or documentation summary, to better understand the code context. It accepts a directory path (absolute or relative to the current project).
268
+ Parameters:
269
+ - path: (required) The source code package directory path.
270
+ Usage:
271
+ <list_package_info>
272
+ <path>relative/or/absolute/package/path</path>
273
+ </list_package_info>
274
+
261
275
  ## read_file
262
276
  Description: Request to read the contents of a file at the specified path. Use this when you need to examine the contents of an existing file you do not know the contents of, for example to analyze code, review text files, or extract information from configuration files. Automatically extracts raw text from PDF and DOCX files. May not be suitable for other types of binary files, as it returns the raw content as a string.
263
277
  Parameters:
@@ -670,7 +684,7 @@ class AgenticEdit:
670
684
 
671
685
  {% if extra_docs %}
672
686
  ====
673
-
687
+
674
688
  RULES PROVIDED BY USER
675
689
 
676
690
  The following rules are provided by the user, and you must follow them strictly.
@@ -683,7 +697,8 @@ class AgenticEdit:
683
697
  """
684
698
  import os
685
699
  extra_docs = {}
686
- rules_dir = os.path.join(self.args.source_dir,".auto-coder", "autocoderrules")
700
+ rules_dir = os.path.join(self.args.source_dir,
701
+ ".auto-coder", "autocoderrules")
687
702
  if os.path.isdir(rules_dir):
688
703
  for fname in os.listdir(rules_dir):
689
704
  if fname.endswith(".md"):
@@ -1137,6 +1152,7 @@ class AgenticEdit:
1137
1152
  standard event system format and writing them using the event manager.
1138
1153
  """
1139
1154
  event_manager = get_event_manager(self.args.event_file)
1155
+ self.apply_pre_changes()
1140
1156
 
1141
1157
  try:
1142
1158
  event_stream = self.analyze(request)
@@ -1280,6 +1296,25 @@ class AgenticEdit:
1280
1296
  # Re-raise the exception if needed, or handle appropriately
1281
1297
  raise e
1282
1298
 
1299
+ def apply_pre_changes(self):
1300
+ # get the file name
1301
+ file_name = os.path.basename(self.args.file)
1302
+ if not self.args.skip_commit:
1303
+ try:
1304
+ get_event_manager(self.args.event_file).write_result(
1305
+ EventContentCreator.create_result(
1306
+ content=self.printer.get_message_from_key("/agent/edit/apply_pre_changes")), metadata=EventMetadata(
1307
+ action_file=self.args.file,
1308
+ is_streaming=False,
1309
+ path="/agent/edit/apply_pre_changes",
1310
+ stream_out_type="/agent/edit").to_dict())
1311
+ git_utils.commit_changes(
1312
+ self.args.source_dir, f"auto_coder_pre_{file_name}")
1313
+ except Exception as e:
1314
+ self.printer.print_in_terminal("git_init_required",
1315
+ source_dir=self.args.source_dir, error=str(e))
1316
+ return
1317
+
1283
1318
  def apply_changes(self):
1284
1319
  """
1285
1320
  Apply all tracked file changes to the original project directory.
@@ -1297,6 +1332,12 @@ class AgenticEdit:
1297
1332
  f"{self.args.query}\nauto_coder_{file_name}",
1298
1333
  )
1299
1334
 
1335
+ get_event_manager(self.args.event_file).write_result(
1336
+ EventContentCreator.create_result(
1337
+ content=self.printer.get_message_from_key("/agent/edit/apply_changes")), metadata=EventMetadata(
1338
+ action_file=self.args.file,
1339
+ is_streaming=False,
1340
+ stream_out_type="/agent/edit").to_dict())
1300
1341
  action_yml_file_manager = ActionYmlFileManager(
1301
1342
  self.args.source_dir)
1302
1343
  action_file_name = os.path.basename(self.args.file)
@@ -1343,6 +1384,7 @@ class AgenticEdit:
1343
1384
  f"[bold]{get_message('/agent/edit/user_query')}:[/bold]\n{request.user_input}", title=get_message("/agent/edit/objective"), border_style="blue"))
1344
1385
 
1345
1386
  try:
1387
+ self.apply_pre_changes()
1346
1388
  event_stream = self.analyze(request)
1347
1389
  for event in event_stream:
1348
1390
  if isinstance(event, LLMThinkingEvent):
@@ -1434,7 +1476,8 @@ class AgenticEdit:
1434
1476
  logger.warning(
1435
1477
  f"Error formatting tool result content: {e}")
1436
1478
  panel_content.append(
1437
- _format_content(str(result.content))) # Fallback
1479
+ # Fallback
1480
+ _format_content(str(result.content)))
1438
1481
 
1439
1482
  # Print the base info panel
1440
1483
  console.print(Panel("\n".join(
@@ -11,6 +11,7 @@ from .ask_followup_question_tool_resolver import AskFollowupQuestionToolResolver
11
11
  from .attempt_completion_tool_resolver import AttemptCompletionToolResolver
12
12
  from .plan_mode_respond_tool_resolver import PlanModeRespondToolResolver
13
13
  from .use_mcp_tool_resolver import UseMcpToolResolver
14
+ from .list_package_info_tool_resolver import ListPackageInfoToolResolver
14
15
 
15
16
  __all__ = [
16
17
  "BaseToolResolver",
@@ -25,4 +26,5 @@ __all__ = [
25
26
  "AttemptCompletionToolResolver",
26
27
  "PlanModeRespondToolResolver",
27
28
  "UseMcpToolResolver",
29
+ "ListPackageInfoToolResolver",
28
30
  ]
@@ -0,0 +1,42 @@
1
+
2
+ import os
3
+ from typing import Optional
4
+ from autocoder.common.v2.agent.agentic_edit_tools.base_tool_resolver import BaseToolResolver
5
+ from autocoder.common.v2.agent.agentic_edit_types import ListPackageInfoTool, ToolResult
6
+ from loguru import logger
7
+ import typing
8
+
9
+ if typing.TYPE_CHECKING:
10
+ from autocoder.common.v2.agent.agentic_edit import AgenticEdit
11
+
12
+ class ListPackageInfoToolResolver(BaseToolResolver):
13
+ def __init__(self, agent: Optional['AgenticEdit'], tool: ListPackageInfoTool, args):
14
+ super().__init__(agent, tool, args)
15
+ self.tool: ListPackageInfoTool = tool
16
+
17
+ def resolve(self) -> ToolResult:
18
+ source_dir = self.args.source_dir or "."
19
+ abs_source_dir = os.path.abspath(source_dir)
20
+
21
+ input_path = self.tool.path.strip()
22
+ abs_input_path = os.path.abspath(os.path.join(source_dir, input_path)) if not os.path.isabs(input_path) else input_path
23
+
24
+ # 校验输入目录是否在项目目录内
25
+ if not abs_input_path.startswith(abs_source_dir):
26
+ return ToolResult(success=False, message=f"Error: Access denied. Path outside project: {self.tool.path}")
27
+
28
+ rel_package_path = os.path.relpath(abs_input_path, abs_source_dir)
29
+ active_md_path = os.path.join(abs_source_dir, ".auto-coder", "active-context", rel_package_path, "active.md")
30
+
31
+ logger.info(f"Looking for package info at: {active_md_path}")
32
+
33
+ if not os.path.exists(active_md_path):
34
+ return ToolResult(success=True, message="No package info found for this path.", content="没有相关包信息。")
35
+
36
+ try:
37
+ with open(active_md_path, 'r', encoding='utf-8', errors='replace') as f:
38
+ content = f.read()
39
+ return ToolResult(success=True, message="Successfully retrieved package info.", content=content)
40
+ except Exception as e:
41
+ logger.error(f"Error reading package info file: {e}")
42
+ return ToolResult(success=False, message=f"Error reading package info file: {e}")
@@ -57,6 +57,9 @@ class UseMcpTool(BaseTool):
57
57
  tool_name: str
58
58
  query:str
59
59
 
60
+ class ListPackageInfoTool(BaseTool):
61
+ path: str # 源码包目录,相对路径或绝对路径
62
+
60
63
  # Event Types for Rich Output Streaming
61
64
  class LLMOutputEvent(BaseModel):
62
65
  """Represents plain text output from the LLM."""
@@ -112,6 +115,7 @@ TOOL_MODEL_MAP: Dict[str, Type[BaseTool]] = {
112
115
  "attempt_completion": AttemptCompletionTool,
113
116
  "plan_mode_respond": PlanModeRespondTool,
114
117
  "use_mcp_tool": UseMcpTool,
118
+ "list_package_info": ListPackageInfoTool,
115
119
  }
116
120
 
117
121
  class FileChangeEntry(BaseModel):
@@ -29,6 +29,7 @@ import hashlib
29
29
  from typing import Union
30
30
  from pydantic import BaseModel
31
31
  from autocoder.rag.cache.cache_result_merge import CacheResultMerger, MergeStrategy
32
+ from .failed_files_utils import save_failed_files, load_failed_files
32
33
  import time
33
34
 
34
35
  if platform.system() != "Windows":
@@ -65,74 +66,18 @@ class ByzerStorageCache(BaseCacheManager):
65
66
  ignore_spec,
66
67
  required_exts,
67
68
  extra_params: Optional[AutoCoderArgs] = None,
69
+ args=None,
70
+ llm=None,
68
71
  ):
69
72
  """
70
73
  初始化基于云端 Byzer Storage 的 RAG 缓存管理器。
71
-
72
- 参数:
73
- path: 需要索引的代码库根目录
74
- ignore_spec: 指定哪些文件/目录应被忽略的规则
75
- required_exts: 需要处理的文件扩展名列表
76
- extra_params: 额外的配置参数,包含向量索引相关设置
77
-
78
- 缓存结构 (self.cache):
79
- self.cache 是一个字典,键为文件路径,值为 CacheItem 对象:
80
- {
81
- "file_path1": CacheItem(
82
- file_path: str, # 文件的绝对路径
83
- relative_path: str, # 相对于项目根目录的路径
84
- content: List[Dict], # 文件内容的结构化表示,每个元素是 SourceCode 对象的序列化
85
- modify_time: float, # 文件最后修改时间的时间戳
86
- md5: str # 文件内容的 MD5 哈希值,用于检测变更
87
- ),
88
- "file_path2": CacheItem(...),
89
- ...
90
- }
91
-
92
- 这个缓存有两层存储:
93
- 1. 本地文件缓存: 保存在项目根目录的 .cache/byzer_storage_speedup.jsonl 文件中
94
- - 用于跟踪文件变更和快速加载
95
- - 使用 JSONL 格式存储,每行是一个 CacheItem 的 JSON 表示
96
-
97
- 2. 云端 Byzer Storage 向量数据库:
98
- - 存储文件内容的分块和向量嵌入
99
- - 每个文件被分割成大小为 chunk_size 的文本块
100
- - 每个块都会生成向量嵌入,用于语义搜索
101
- - 存储结构包含: 文件路径、内容块、原始内容、向量嵌入、修改时间
102
-
103
- 源代码处理流程:
104
- 在缓存更新过程中使用了两个关键函数:
105
-
106
- 1. process_file_in_multi_process: 在多进程环境中处理文件
107
- - 参数: file_info (文件信息元组)
108
- - 返回值: List[SourceCode] 或 None
109
- - 用途: 在初始构建缓存时并行处理多个文件
110
-
111
- 2. process_file_local: 在当前进程中处理单个文件
112
- - 参数: file_path (文件路径)
113
- - 返回值: List[SourceCode] 或 None
114
- - 用途: 在检测到文件更新时处理单个文件
115
-
116
- 文件处理后,会:
117
- 1. 更新内存中的缓存 (self.cache)
118
- 2. 将缓存持久化到本地文件
119
- 3. 将内容分块并更新到 Byzer Storage 向量数据库
120
-
121
- 更新机制:
122
- - 通过单独的线程异步处理文件变更
123
- - 使用 MD5 哈希值检测文件是否发生变化
124
- - 支持文件添加、更新和删除事件
125
- - 使用向量数据库进行语义检索,支持相似度搜索
126
-
127
- 与 LocalByzerStorageCache 的区别:
128
- - 使用云端 ByzerStorage 而非本地存储
129
- - 适用于需要远程访问和共享索引的场景
130
- - 支持大规模分布式检索和更高级的查询功能
131
74
  """
132
75
  self.path = path
133
76
  self.ignore_spec = ignore_spec
134
77
  self.required_exts = required_exts
135
78
  self.extra_params = extra_params
79
+ self.args = args
80
+ self.llm = llm
136
81
  self.rag_build_name = extra_params.rag_build_name
137
82
  self.storage = ByzerStorage("byzerai_store", "rag", self.rag_build_name)
138
83
  self.queue = []
@@ -150,16 +95,20 @@ class ByzerStorageCache(BaseCacheManager):
150
95
  self.cache_file = os.path.join(self.cache_dir, "byzer_storage_speedup.jsonl")
151
96
  self.cache: Dict[str, CacheItem] = {}
152
97
 
98
+ # 创建缓存目录
99
+ if not os.path.exists(self.cache_dir):
100
+ os.makedirs(self.cache_dir)
101
+
102
+ # failed files support
103
+ self.failed_files_path = os.path.join(self.cache_dir, "failed_files.json")
104
+ self.failed_files = load_failed_files(self.failed_files_path)
105
+
153
106
  self.lock = threading.Lock()
154
107
  self.stop_event = threading.Event()
155
108
  self.thread = threading.Thread(target=self.process_queue)
156
109
  self.thread.daemon = True
157
110
  self.thread.start()
158
111
 
159
- # 创建缓存目录
160
- if not os.path.exists(self.cache_dir):
161
- os.makedirs(self.cache_dir)
162
-
163
112
  # 加载缓存
164
113
  self.cache = self._load_cache()
165
114
 
@@ -455,6 +404,10 @@ class ByzerStorageCache(BaseCacheManager):
455
404
  for item in file_list.file_paths:
456
405
  logger.info(f"[QUEUE PROCESSING] Processing file deletion: {item}")
457
406
  del self.cache[item]
407
+ # remove from failed files if present
408
+ if item in self.failed_files:
409
+ self.failed_files.remove(item)
410
+ save_failed_files(self.failed_files_path, self.failed_files)
458
411
  # Create a temporary FileInfo object
459
412
  file_info = FileInfo(file_path=item, relative_path="", modify_time=0, file_md5="")
460
413
  self.update_storage(file_info, is_delete=True)
@@ -463,16 +416,29 @@ class ByzerStorageCache(BaseCacheManager):
463
416
  logger.info(f"[QUEUE PROCESSING] Processing add/update event, total files: {len(file_list.file_infos)}")
464
417
  for file_info in file_list.file_infos:
465
418
  logger.info(f"[QUEUE PROCESSING] Processing file update: {file_info.file_path}")
466
- # Process file and create CacheItem
467
- content = process_file_local(self.fileinfo_to_tuple(file_info))
468
- self.cache[file_info.file_path] = CacheItem(
469
- file_path=file_info.file_path,
470
- relative_path=file_info.relative_path,
471
- content=[c.model_dump() for c in content],
472
- modify_time=file_info.modify_time,
473
- md5=file_info.file_md5,
474
- )
475
- self.update_storage(file_info, is_delete=False)
419
+ try:
420
+ content = process_file_local(self.fileinfo_to_tuple(file_info))
421
+ if content:
422
+ self.cache[file_info.file_path] = CacheItem(
423
+ file_path=file_info.file_path,
424
+ relative_path=file_info.relative_path,
425
+ content=[c.model_dump() for c in content],
426
+ modify_time=file_info.modify_time,
427
+ md5=file_info.file_md5,
428
+ )
429
+ self.update_storage(file_info, is_delete=False)
430
+ # remove from failed files if present
431
+ if file_info.file_path in self.failed_files:
432
+ self.failed_files.remove(file_info.file_path)
433
+ save_failed_files(self.failed_files_path, self.failed_files)
434
+ else:
435
+ logger.warning(f"Empty result for file: {file_info.file_path}, treat as parse failed, skipping cache update")
436
+ self.failed_files.add(file_info.file_path)
437
+ save_failed_files(self.failed_files_path, self.failed_files)
438
+ except Exception as e:
439
+ logger.error(f"Error in process_queue: {e}")
440
+ self.failed_files.add(file_info.file_path)
441
+ save_failed_files(self.failed_files_path, self.failed_files)
476
442
  self.write_cache()
477
443
 
478
444
  elapsed = time.time() - start_time
@@ -486,6 +452,10 @@ class ByzerStorageCache(BaseCacheManager):
486
452
  current_files = set()
487
453
  for file_info in self.get_all_files():
488
454
  current_files.add(file_info.file_path)
455
+ # skip failed files
456
+ if file_info.file_path in self.failed_files:
457
+ logger.info(f"文件 {file_info.file_path} 之前解析失败,跳过此次更新")
458
+ continue
489
459
  if (
490
460
  file_info.file_path not in self.cache
491
461
  or self.cache[file_info.file_path].md5 != file_info.file_md5
@@ -0,0 +1,39 @@
1
+
2
+ import os
3
+ import json
4
+ from loguru import logger
5
+
6
+ def load_failed_files(failed_files_path: str) -> set:
7
+ """
8
+ Load the set of failed file paths from a JSON file.
9
+
10
+ Args:
11
+ failed_files_path: Path to the JSON file storing failed files.
12
+
13
+ Returns:
14
+ A set of failed file paths.
15
+ """
16
+ directory = os.path.dirname(failed_files_path)
17
+ if not os.path.exists(directory):
18
+ os.makedirs(directory, exist_ok=True)
19
+ if os.path.exists(failed_files_path):
20
+ try:
21
+ with open(failed_files_path, "r", encoding="utf-8") as f:
22
+ return set(json.load(f))
23
+ except Exception:
24
+ return set()
25
+ return set()
26
+
27
+ def save_failed_files(failed_files_path: str, failed_files: set) -> None:
28
+ """
29
+ Save the set of failed file paths to a JSON file.
30
+
31
+ Args:
32
+ failed_files_path: Path to the JSON file.
33
+ failed_files: A set of failed file paths.
34
+ """
35
+ try:
36
+ with open(failed_files_path, "w", encoding="utf-8") as f:
37
+ json.dump(list(failed_files), f, ensure_ascii=False, indent=2)
38
+ except Exception as e:
39
+ logger.error(f"Error saving failed files list: {e}")
@@ -48,7 +48,7 @@ class AutoCoderRAGDocListener(BaseCacheManager):
48
48
  r"^test.*$",
49
49
  ]
50
50
 
51
- def __init__(self, path: str, ignore_spec, required_exts: List) -> None:
51
+ def __init__(self, path: str, ignore_spec, required_exts: List, args=None, llm=None) -> None:
52
52
  """
53
53
  初始化文件监控缓存管理器。
54
54
 
@@ -89,6 +89,8 @@ class AutoCoderRAGDocListener(BaseCacheManager):
89
89
  self.path = path
90
90
  self.ignore_spec = ignore_spec
91
91
  self.required_exts = required_exts
92
+ self.args = args
93
+ self.llm = llm
92
94
  self.stop_event = threading.Event()
93
95
 
94
96
  # connect list
@@ -30,6 +30,7 @@ from typing import Union
30
30
  from byzerllm import SimpleByzerLLM, ByzerLLM
31
31
  from autocoder.rag.cache.cache_result_merge import CacheResultMerger, MergeStrategy
32
32
  import time
33
+ from .failed_files_utils import save_failed_files, load_failed_files
33
34
 
34
35
  if platform.system() != "Windows":
35
36
  import fcntl
@@ -70,71 +71,17 @@ class LocalByzerStorageCache(BaseCacheManager):
70
71
  emb_llm: Union[ByzerLLM, SimpleByzerLLM] = None,
71
72
  host: str = "127.0.0.1",
72
73
  port: int = 33333,
74
+ args=None,
75
+ llm=None,
73
76
  ):
74
77
  """
75
78
  初始化基于 Byzer Storage 的 RAG 缓存管理器。
76
-
77
- 参数:
78
- path: 需要索引的代码库根目录
79
- ignore_spec: 指定哪些文件/目录应被忽略的规则
80
- required_exts: 需要处理的文件扩展名列表
81
- extra_params: 额外的配置参数,包含向量索引相关设置
82
- emb_llm: 用于生成文本向量嵌入的 ByzerLLM 实例
83
- host: Byzer Storage 服务的主机地址
84
- port: Byzer Storage 服务的端口
85
-
86
- 缓存结构 (self.cache):
87
- self.cache 是一个字典,键为文件路径,值为 CacheItem 对象:
88
- {
89
- "file_path1": CacheItem(
90
- file_path: str, # 文件的绝对路径
91
- relative_path: str, # 相对于项目根目录的路径
92
- content: List[Dict], # 文件内容的结构化表示,每个元素是 SourceCode 对象的序列化
93
- modify_time: float, # 文件最后修改时间的时间戳
94
- md5: str # 文件内容的 MD5 哈希值,用于检测变更
95
- ),
96
- "file_path2": CacheItem(...),
97
- ...
98
- }
99
-
100
- 这个缓存有两层存储:
101
- 1. 本地文件缓存: 保存在项目根目录的 .cache/byzer_storage_speedup.jsonl 文件中
102
- - 用于跟踪文件变更和快速加载
103
- - 使用 JSONL 格式存储,每行是一个 CacheItem 的 JSON 表示
104
-
105
- 2. Byzer Storage 向量数据库:
106
- - 存储文件内容的分块和向量嵌入
107
- - 每个文件被分割成大小为 chunk_size 的文本块
108
- - 每个块都会生成向量嵌入,用于语义搜索
109
- - 存储结构包含: 文件路径、内容块、原始内容、向量嵌入、修改时间
110
-
111
- 源代码处理流程:
112
- 在缓存更新过程中使用了两个关键函数:
113
-
114
- 1. process_file_in_multi_process: 在多进程环境中处理文件
115
- - 参数: file_info (文件信息元组)
116
- - 返回值: List[SourceCode] 或 None
117
- - 用途: 在初始构建缓存时并行处理多个文件
118
-
119
- 2. process_file_local: 在当前进程中处理单个文件
120
- - 参数: file_path (文件路径)
121
- - 返回值: List[SourceCode] 或 None
122
- - 用途: 在检测到文件更新时处理单个文件
123
-
124
- 文件处理后,会:
125
- 1. 更新内存中的缓存 (self.cache)
126
- 2. 将缓存持久化到本地文件
127
- 3. 将内容分块并更新到 Byzer Storage 向量数据库
128
-
129
- 更新机制:
130
- - 通过单独的线程异步处理文件变更
131
- - 使用 MD5 哈希值检测文件是否发生变化
132
- - 支持文件添加、更新和删除事件
133
- - 使用向量数据库进行语义检索,支持相似度搜索
134
79
  """
135
80
  self.path = path
136
81
  self.ignore_spec = ignore_spec
137
82
  self.required_exts = required_exts
83
+ self.args = args
84
+ self.llm = llm
138
85
  self.rag_build_name = extra_params.rag_build_name
139
86
  self.storage = LocalByzerStorage("byzerai_store",
140
87
  "rag_test", self.rag_build_name, host=host, port=port,emb_llm=emb_llm)
@@ -153,16 +100,20 @@ class LocalByzerStorageCache(BaseCacheManager):
153
100
  self.cache_dir, "byzer_storage_speedup.jsonl")
154
101
  self.cache: Dict[str, CacheItem] = {}
155
102
 
103
+ # 创建缓存目录
104
+ if not os.path.exists(self.cache_dir):
105
+ os.makedirs(self.cache_dir)
106
+
107
+ # failed files support
108
+ self.failed_files_path = os.path.join(self.cache_dir, "failed_files.json")
109
+ self.failed_files = load_failed_files(self.failed_files_path)
110
+
156
111
  self.lock = threading.Lock()
157
112
  self.stop_event = threading.Event()
158
113
  self.thread = threading.Thread(target=self.process_queue)
159
114
  self.thread.daemon = True
160
115
  self.thread.start()
161
116
 
162
- # 创建缓存目录
163
- if not os.path.exists(self.cache_dir):
164
- os.makedirs(self.cache_dir)
165
-
166
117
  # 加载缓存
167
118
  self.cache = self._load_cache()
168
119
 
@@ -485,6 +436,10 @@ class LocalByzerStorageCache(BaseCacheManager):
485
436
  for item in file_list.file_paths:
486
437
  logger.info(f"[QUEUE PROCESSING] Processing file deletion: {item}")
487
438
  del self.cache[item]
439
+ # remove from failed files if present
440
+ if item in self.failed_files:
441
+ self.failed_files.remove(item)
442
+ save_failed_files(self.failed_files_path, self.failed_files)
488
443
  # Create a temporary FileInfo object
489
444
  file_info = FileInfo(
490
445
  file_path=item, relative_path="", modify_time=0, file_md5="")
@@ -495,17 +450,30 @@ class LocalByzerStorageCache(BaseCacheManager):
495
450
  for file_info in file_list.file_infos:
496
451
  logger.info(
497
452
  f"[QUEUE PROCESSING] Processing file update: {file_info.file_path}")
498
- # Process file and create CacheItem
499
- content = process_file_local(
500
- self.fileinfo_to_tuple(file_info))
501
- self.cache[file_info.file_path] = CacheItem(
502
- file_path=file_info.file_path,
503
- relative_path=file_info.relative_path,
504
- content=[c.model_dump() for c in content],
505
- modify_time=file_info.modify_time,
506
- md5=file_info.file_md5,
507
- )
508
- self.update_storage(file_info, is_delete=False)
453
+ try:
454
+ content = process_file_local(
455
+ self.fileinfo_to_tuple(file_info))
456
+ if content:
457
+ self.cache[file_info.file_path] = CacheItem(
458
+ file_path=file_info.file_path,
459
+ relative_path=file_info.relative_path,
460
+ content=[c.model_dump() for c in content],
461
+ modify_time=file_info.modify_time,
462
+ md5=file_info.file_md5,
463
+ )
464
+ self.update_storage(file_info, is_delete=False)
465
+ # remove from failed files if present
466
+ if file_info.file_path in self.failed_files:
467
+ self.failed_files.remove(file_info.file_path)
468
+ save_failed_files(self.failed_files_path, self.failed_files)
469
+ else:
470
+ logger.warning(f"Empty result for file: {file_info.file_path}, treat as parse failed, skipping cache update")
471
+ self.failed_files.add(file_info.file_path)
472
+ save_failed_files(self.failed_files_path, self.failed_files)
473
+ except Exception as e:
474
+ logger.error(f"Error in process_queue: {e}")
475
+ self.failed_files.add(file_info.file_path)
476
+ save_failed_files(self.failed_files_path, self.failed_files)
509
477
  self.write_cache()
510
478
 
511
479
  elapsed = time.time() - start_time
@@ -519,6 +487,10 @@ class LocalByzerStorageCache(BaseCacheManager):
519
487
  current_files = set()
520
488
  for file_info in self.get_all_files():
521
489
  current_files.add(file_info.file_path)
490
+ # skip failed files
491
+ if file_info.file_path in self.failed_files:
492
+ logger.info(f"文件 {file_info.file_path} 之前解析失败,跳过此次更新")
493
+ continue
522
494
  if (
523
495
  file_info.file_path not in self.cache
524
496
  or self.cache[file_info.file_path].md5 != file_info.file_md5
@@ -28,6 +28,7 @@ from autocoder.rag.cache.base_cache import (
28
28
  from autocoder.rag.utils import process_file_in_multi_process, process_file_local
29
29
  from autocoder.rag.variable_holder import VariableHolder
30
30
  from byzerllm import SimpleByzerLLM, ByzerLLM
31
+ from .failed_files_utils import save_failed_files, load_failed_files
31
32
 
32
33
  if platform.system() != "Windows":
33
34
  import fcntl
@@ -300,12 +301,16 @@ class LocalDuckDBStorageCache(BaseCacheManager):
300
301
  ignore_spec,
301
302
  required_exts,
302
303
  extra_params: Optional[AutoCoderArgs] = None,
303
- emb_llm: Union[ByzerLLM, SimpleByzerLLM] = None
304
+ emb_llm: Union[ByzerLLM, SimpleByzerLLM] = None,
305
+ args=None,
306
+ llm=None
304
307
  ):
305
308
  self.path = path
306
309
  self.ignore_spec = ignore_spec
307
310
  self.required_exts = required_exts
308
311
  self.extra_params = extra_params
312
+ self.args = args
313
+ self.llm = llm
309
314
 
310
315
  self.storage = LocalDuckdbStorage(
311
316
  llm=emb_llm,
@@ -325,6 +330,11 @@ class LocalDuckDBStorageCache(BaseCacheManager):
325
330
  if not os.path.exists(self.cache_dir):
326
331
  os.makedirs(self.cache_dir)
327
332
 
333
+ # failed files support
334
+ from .failed_files_utils import load_failed_files
335
+ self.failed_files_path = os.path.join(self.cache_dir, "failed_files.json")
336
+ self.failed_files = load_failed_files(self.failed_files_path)
337
+
328
338
  self.lock = threading.Lock()
329
339
  self.stop_event = threading.Event()
330
340
  self.thread = threading.Thread(target=self.process_queue)
@@ -569,6 +579,10 @@ class LocalDuckDBStorageCache(BaseCacheManager):
569
579
  for item in file_list.file_paths:
570
580
  logger.info(f"{item} is detected to be removed")
571
581
  del self.cache[item]
582
+ # remove from failed files if present
583
+ if item in self.failed_files:
584
+ self.failed_files.remove(item)
585
+ save_failed_files(self.failed_files_path, self.failed_files)
572
586
  # 创建一个临时的 FileInfo 对象
573
587
  file_info = FileInfo(
574
588
  file_path=item, relative_path="", modify_time=0, file_md5="")
@@ -578,18 +592,30 @@ class LocalDuckDBStorageCache(BaseCacheManager):
578
592
  for file_info in file_list.file_infos:
579
593
  logger.info(
580
594
  f"{file_info.file_path} is detected to be updated")
581
- # 处理文件并创建 CacheItem
582
- # content = process_file_local(
583
- # self.fileinfo_to_tuple(file_info))
584
- content = process_file_local(file_info.file_path)
585
- self.cache[file_info.file_path] = CacheItem(
586
- file_path=file_info.file_path,
587
- relative_path=file_info.relative_path,
588
- content=[c.model_dump() for c in content],
589
- modify_time=file_info.modify_time,
590
- md5=file_info.file_md5,
591
- )
592
- self.update_storage(file_info, is_delete=False)
595
+ try:
596
+ content = process_file_local(file_info.file_path)
597
+ if content:
598
+ self.cache[file_info.file_path] = CacheItem(
599
+ file_path=file_info.file_path,
600
+ relative_path=file_info.relative_path,
601
+ content=[c.model_dump() for c in content],
602
+ modify_time=file_info.modify_time,
603
+ md5=file_info.file_md5,
604
+ )
605
+ self.update_storage(file_info, is_delete=False)
606
+ # remove from failed files if present
607
+ if file_info.file_path in self.failed_files:
608
+ self.failed_files.remove(file_info.file_path)
609
+ save_failed_files(self.failed_files_path, self.failed_files)
610
+ else:
611
+ logger.warning(f"Empty result for file: {file_info.file_path}, treat as parse failed, skipping cache update")
612
+ self.failed_files.add(file_info.file_path)
613
+ save_failed_files(self.failed_files_path, self.failed_files)
614
+ except Exception as e:
615
+ logger.error(f"Error in process_queue: {e}")
616
+ self.failed_files.add(file_info.file_path)
617
+ save_failed_files(self.failed_files_path, self.failed_files)
618
+
593
619
  self.write_cache()
594
620
 
595
621
  def trigger_update(self):
@@ -598,6 +624,10 @@ class LocalDuckDBStorageCache(BaseCacheManager):
598
624
  current_files = set()
599
625
  for file_info in self.get_all_files():
600
626
  current_files.add(file_info.file_path)
627
+ # skip failed files
628
+ if file_info.file_path in self.failed_files:
629
+ logger.info(f"文件 {file_info.file_path} 之前解析失败,跳过此次更新")
630
+ continue
601
631
  if (
602
632
  file_info.file_path not in self.cache
603
633
  or self.cache[file_info.file_path].md5 != file_info.file_md5
@@ -19,6 +19,7 @@ from loguru import logger
19
19
  from autocoder.rag.utils import process_file_in_multi_process, process_file_local
20
20
  from autocoder.rag.variable_holder import VariableHolder
21
21
  import hashlib
22
+ from .failed_files_utils import load_failed_files, save_failed_files
22
23
 
23
24
 
24
25
  default_ignore_dirs = [
@@ -45,7 +46,7 @@ def generate_content_md5(content: Union[str, bytes]) -> str:
45
46
 
46
47
 
47
48
  class AutoCoderRAGAsyncUpdateQueue(BaseCacheManager):
48
- def __init__(self, path: str, ignore_spec, required_exts: list, update_interval: int = 5):
49
+ def __init__(self, path: str, ignore_spec, required_exts: list, update_interval: int = 5, args=None, llm=None):
49
50
  """
50
51
  初始化异步更新队列,用于管理代码文件的缓存。
51
52
 
@@ -91,24 +92,31 @@ class AutoCoderRAGAsyncUpdateQueue(BaseCacheManager):
91
92
  self.path = path
92
93
  self.ignore_spec = ignore_spec
93
94
  self.required_exts = required_exts
95
+ self.args = args
96
+ self.llm = llm
94
97
  self.update_interval = update_interval
95
98
  self.queue = []
96
99
  self.cache = {} # 初始化为空字典,稍后通过 read_cache() 填充
97
100
  self.lock = threading.Lock()
98
101
  self.stop_event = threading.Event()
99
-
102
+
103
+ # 用于存放解析失败的文件路径集合
104
+ self.failed_files_path = os.path.join(self.path, ".cache", "failed_files.json")
105
+ self.failed_files = load_failed_files(self.failed_files_path)
106
+
100
107
  # 启动处理队列的线程
101
108
  self.queue_thread = threading.Thread(target=self._process_queue)
102
109
  self.queue_thread.daemon = True
103
110
  self.queue_thread.start()
104
-
111
+
105
112
  # 启动定时触发更新的线程
106
113
  self.update_thread = threading.Thread(target=self._periodic_update)
107
114
  self.update_thread.daemon = True
108
115
  self.update_thread.start()
109
-
116
+
110
117
  self.cache = self.read_cache()
111
118
 
119
+
112
120
  def _process_queue(self):
113
121
  while not self.stop_event.is_set():
114
122
  try:
@@ -183,13 +191,18 @@ class AutoCoderRAGAsyncUpdateQueue(BaseCacheManager):
183
191
  files_to_process = []
184
192
  current_files = set()
185
193
  for file_info in self.get_all_files():
186
- file_path, _, _, file_md5 = file_info
194
+ file_path, relative_path, modify_time, file_md5 = file_info
187
195
  current_files.add(file_path)
196
+ # 如果文件曾经解析失败,跳过本次增量更新
197
+ if file_path in self.failed_files:
198
+ logger.info(f"文件 {file_path} 之前解析失败,跳过此次更新")
199
+ continue
200
+ # 变更检测
188
201
  if (
189
202
  file_path not in self.cache
190
- or self.cache[file_path].get("md5","") != file_md5
203
+ or self.cache[file_path].get("md5", "") != file_md5
191
204
  ):
192
- files_to_process.append(file_info)
205
+ files_to_process.append((file_path, relative_path, modify_time, file_md5))
193
206
 
194
207
  deleted_files = set(self.cache.keys()) - current_files
195
208
  logger.info(f"files_to_process: {files_to_process}")
@@ -213,19 +226,34 @@ class AutoCoderRAGAsyncUpdateQueue(BaseCacheManager):
213
226
  if isinstance(file_list, DeleteEvent):
214
227
  for item in file_list.file_paths:
215
228
  logger.info(f"{item} is detected to be removed")
216
- del self.cache[item]
229
+ if item in self.cache:
230
+ del self.cache[item]
231
+ # 删除时也从失败列表中移除(防止文件已修复)
232
+ if item in self.failed_files:
233
+ self.failed_files.remove(item)
234
+ save_failed_files(self.failed_files_path, self.failed_files)
217
235
  elif isinstance(file_list, AddOrUpdateEvent):
218
236
  for file_info in file_list.file_infos:
219
237
  logger.info(f"{file_info.file_path} is detected to be updated")
220
238
  try:
221
239
  result = process_file_local(file_info.file_path)
222
- if result: # 只有当result不为空时才更新缓存
240
+ if result:
241
+ # 解析成功且非空
223
242
  self.update_cache(self.fileinfo_to_tuple(file_info), result)
243
+ # 如果之前失败过且本次成功,移除失败记录
244
+ if file_info.file_path in self.failed_files:
245
+ self.failed_files.remove(file_info.file_path)
246
+ save_failed_files(self.failed_files_path, self.failed_files)
224
247
  else:
225
- logger.warning(f"Empty result for file: {file_info.file_path}, skipping cache update")
248
+ # 只要为空也认为解析失败,加入失败列表
249
+ logger.warning(f"Empty result for file: {file_info.file_path}, treat as parse failed, skipping cache update")
250
+ self.failed_files.add(file_info.file_path)
251
+ save_failed_files(self.failed_files_path, self.failed_files)
226
252
  except Exception as e:
227
- logger.error(
228
- f"SimpleCache Error in process_queue: {e}")
253
+ logger.error(f"SimpleCache Error in process_queue: {e}")
254
+ # 解析失败则加入失败列表
255
+ self.failed_files.add(file_info.file_path)
256
+ save_failed_files(self.failed_files_path, self.failed_files)
229
257
 
230
258
  self.write_cache()
231
259
 
@@ -2,7 +2,6 @@ import threading
2
2
  from typing import Dict, Generator, List, Tuple, Any, Optional,Union
3
3
 
4
4
  from byzerllm import ByzerLLM, SimpleByzerLLM
5
-
6
5
  from loguru import logger
7
6
  from autocoder.common import SourceCode
8
7
  from uuid import uuid4
@@ -37,6 +36,8 @@ class LocalDocumentRetriever(BaseDocumentRetriever):
37
36
 
38
37
  def __init__(
39
38
  self,
39
+ args: AutoCoderArgs,
40
+ llm: Union[ByzerLLM,SimpleByzerLLM],
40
41
  path: str,
41
42
  ignore_spec,
42
43
  required_exts: list,
@@ -45,9 +46,12 @@ class LocalDocumentRetriever(BaseDocumentRetriever):
45
46
  single_file_token_limit: int = 60000,
46
47
  disable_auto_window: bool = False,
47
48
  enable_hybrid_index: bool = False,
48
- extra_params: Optional[AutoCoderArgs] = None,
49
- emb_llm: Union[ByzerLLM, SimpleByzerLLM] = None,
49
+ extra_params: Optional['AutoCoderArgs'] = None,
50
+ emb_llm: Union['ByzerLLM', 'SimpleByzerLLM'] = None,
50
51
  ) -> None:
52
+ self.args = args
53
+ self.llm = llm
54
+
51
55
  self.path = path
52
56
  self.ignore_spec = ignore_spec
53
57
  self.required_exts = required_exts
@@ -65,27 +69,32 @@ class LocalDocumentRetriever(BaseDocumentRetriever):
65
69
  if self.enable_hybrid_index:
66
70
  if self.on_ray:
67
71
  self.cacher = ByzerStorageCache(
68
- path, ignore_spec, required_exts, extra_params
72
+ path, ignore_spec, required_exts, extra_params,
73
+ args=self.args, llm=self.llm
69
74
  )
70
75
  else:
71
76
  if extra_params.rag_storage_type == "duckdb":
72
77
  self.cacher = LocalDuckDBStorageCache(
73
78
  path, ignore_spec, required_exts, extra_params,
74
- emb_llm=emb_llm
79
+ emb_llm=emb_llm,
80
+ args=self.args, llm=self.llm
75
81
  )
76
82
  elif extra_params.rag_storage_type in ["byzer-storage", "byzer_storage"]:
77
83
  self.cacher = LocalByzerStorageCache(
78
84
  path, ignore_spec, required_exts, extra_params,
79
- emb_llm=emb_llm
85
+ emb_llm=emb_llm,
86
+ args=self.args, llm=self.llm
80
87
  )
81
88
  else:
82
89
  if self.monitor_mode:
83
90
  self.cacher = AutoCoderRAGDocListener(
84
- path, ignore_spec, required_exts
91
+ path, ignore_spec, required_exts,
92
+ args=self.args, llm=self.llm
85
93
  )
86
94
  else:
87
95
  self.cacher = AutoCoderRAGAsyncUpdateQueue(
88
- path, ignore_spec, required_exts
96
+ path, ignore_spec, required_exts,
97
+ args=self.args, llm=self.llm
89
98
  )
90
99
 
91
100
  logger.info(f"DocumentRetriever initialized with:")
@@ -183,6 +183,8 @@ class LongContextRAG:
183
183
  "emb_llm is required for local byzer storage cache")
184
184
 
185
185
  self.document_retriever = retriever_class(
186
+ self.args,
187
+ self.llm,
186
188
  self.path,
187
189
  self.ignore_spec,
188
190
  self.required_exts,
@@ -92,18 +92,6 @@ class MultiRoundStrategy(QAConversationStrategy):
92
92
  {% endfor %}
93
93
  </documents>
94
94
 
95
- ====
96
-
97
- {% if extra_docs %}
98
- AUTO EXTENSION DOCS
99
-
100
- The following extension documents are loaded dynamically to enhance your understanding or provide special instructions, rules, or context.
101
-
102
- {% for key, value in extra_docs.items() %}
103
- ### {{ key }}
104
- {{ value }}
105
- {% endfor %}
106
-
107
95
  ====
108
96
  {% endif %}
109
97
 
@@ -124,6 +112,19 @@ class MultiRoundStrategy(QAConversationStrategy):
124
112
  - Format your answer with Markdown for readability.
125
113
  - Always use the language used by the user in their question.
126
114
 
115
+ {% if extra_docs %}
116
+ ====
117
+
118
+ RULES PROVIDED BY USER
119
+
120
+ The following rules are provided by the user, and you must follow them strictly.
121
+
122
+ {% for key, value in extra_docs.items() %}
123
+ ### {{ key }}
124
+ {{ value }}
125
+ {% endfor %}
126
+ {% endif %}
127
+
127
128
  """
128
129
 
129
130
  import os
@@ -213,17 +214,6 @@ class SingleRoundStrategy(QAConversationStrategy):
213
214
  {% endfor %}
214
215
  </documents>
215
216
 
216
- ====
217
- {% if extra_docs %}
218
- AUTO EXTENSION DOCS
219
-
220
- The following extension documents are loaded dynamically to enhance your understanding or provide special instructions, rules, or context.
221
-
222
- {% for key, value in extra_docs.items() %}
223
- ### {{ key }}
224
- {{ value }}
225
- {% endfor %}
226
-
227
217
  ====
228
218
  {% endif %}
229
219
 
@@ -252,6 +242,19 @@ class SingleRoundStrategy(QAConversationStrategy):
252
242
  - Format your answer with Markdown for readability.
253
243
  - Always use the language used by the user in their question.
254
244
 
245
+ {% if extra_docs %}
246
+ ====
247
+
248
+ RULES PROVIDED BY USER
249
+
250
+ The following rules are provided by the user, and you must follow them strictly.
251
+
252
+ {% for key, value in extra_docs.items() %}
253
+ ### {{ key }}
254
+ {{ value }}
255
+ {% endfor %}
256
+ {% endif %}
257
+
255
258
  """
256
259
  import os
257
260
  extra_docs = {}
autocoder/version.py CHANGED
@@ -1 +1 @@
1
- __version__ = "0.1.346"
1
+ __version__ = "0.1.347"