auto-coder 0.1.354__py3-none-any.whl → 0.1.356__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of auto-coder might be problematic. Click here for more details.

Files changed (40) hide show
  1. {auto_coder-0.1.354.dist-info → auto_coder-0.1.356.dist-info}/METADATA +1 -1
  2. {auto_coder-0.1.354.dist-info → auto_coder-0.1.356.dist-info}/RECORD +40 -35
  3. autocoder/agent/agentic_filter.py +1 -1
  4. autocoder/agent/auto_learn.py +631 -0
  5. autocoder/auto_coder.py +8 -0
  6. autocoder/auto_coder_runner.py +59 -87
  7. autocoder/chat/conf_command.py +270 -0
  8. autocoder/chat/models_command.py +485 -0
  9. autocoder/chat/rules_command.py +458 -0
  10. autocoder/chat_auto_coder.py +34 -24
  11. autocoder/chat_auto_coder_lang.py +156 -2
  12. autocoder/commands/auto_command.py +1 -1
  13. autocoder/commands/auto_web.py +1 -1
  14. autocoder/common/__init__.py +2 -0
  15. autocoder/common/auto_coder_lang.py +9 -1
  16. autocoder/common/command_completer.py +58 -12
  17. autocoder/common/command_completer_v2.py +615 -0
  18. autocoder/common/global_cancel.py +53 -16
  19. autocoder/common/rulefiles/autocoderrules_utils.py +83 -0
  20. autocoder/common/v2/agent/agentic_edit.py +4 -4
  21. autocoder/common/v2/code_agentic_editblock_manager.py +9 -9
  22. autocoder/common/v2/code_diff_manager.py +2 -2
  23. autocoder/common/v2/code_editblock_manager.py +11 -10
  24. autocoder/common/v2/code_strict_diff_manager.py +3 -2
  25. autocoder/dispacher/actions/action.py +6 -6
  26. autocoder/dispacher/actions/plugins/action_regex_project.py +2 -2
  27. autocoder/events/event_manager_singleton.py +1 -1
  28. autocoder/index/index.py +2 -2
  29. autocoder/rag/cache/local_byzer_storage_cache.py +1 -1
  30. autocoder/rag/cache/local_duckdb_storage_cache.py +8 -0
  31. autocoder/rag/loaders/image_loader.py +25 -13
  32. autocoder/rag/long_context_rag.py +2 -2
  33. autocoder/utils/auto_coder_utils/chat_stream_out.py +3 -4
  34. autocoder/utils/model_provider_selector.py +14 -2
  35. autocoder/utils/thread_utils.py +9 -27
  36. autocoder/version.py +1 -1
  37. {auto_coder-0.1.354.dist-info → auto_coder-0.1.356.dist-info}/LICENSE +0 -0
  38. {auto_coder-0.1.354.dist-info → auto_coder-0.1.356.dist-info}/WHEEL +0 -0
  39. {auto_coder-0.1.354.dist-info → auto_coder-0.1.356.dist-info}/entry_points.txt +0 -0
  40. {auto_coder-0.1.354.dist-info → auto_coder-0.1.356.dist-info}/top_level.txt +0 -0
@@ -14,32 +14,41 @@ class GlobalCancel:
14
14
  self._token_flags: Dict[str, bool] = {}
15
15
  self._lock = threading.Lock()
16
16
  self._context: Dict[str, Any] = {} # 存储与取消相关的上下文信息
17
+ self._active_tokens: set[str] = set() # 存储当前正在运行的token
17
18
 
18
- @property
19
- def requested(self) -> bool:
20
- """检查是否请求了全局取消(向后兼容)"""
19
+ def register_token(self, token: str) -> None:
20
+ """注册一个 token,表示一个操作开始,但尚未请求取消"""
21
+ with self._lock:
22
+ self._token_flags[token] = False
23
+ self._active_tokens.add(token)
24
+
25
+ def get_active_tokens(self) -> set[str]:
26
+ """获取当前正在运行的token"""
21
27
  with self._lock:
22
- return self._global_flag
28
+ return self._active_tokens.copy()
23
29
 
24
30
  def is_requested(self, token: Optional[str] = None) -> bool:
25
- """检查是否请求了特定token或全局的取消"""
26
- with self._lock:
27
- # 全局标志总是优先
28
- if self._global_flag:
29
- return True
30
- # 如果提供了token,检查该token的标志
31
- if token is not None and token in self._token_flags:
32
- return self._token_flags[token]
33
- return False
34
-
31
+ """检查是否请求了特定token或全局的取消"""
32
+ if token is not None and token in self._token_flags:
33
+ return self._token_flags[token]
34
+
35
+ if self._global_flag:
36
+ return True
37
+ return False
38
+
39
+ def set_active_tokens(self) -> None:
40
+ """启用所有活跃的token"""
41
+ for token in self._active_tokens:
42
+ self.set(token)
43
+
35
44
  def set(self, token: Optional[str] = None, context: Optional[Dict[str, Any]] = None) -> None:
36
45
  """设置特定token或全局的取消标志"""
37
46
  with self._lock:
38
47
  if token is None:
39
48
  self._global_flag = True
40
49
  else:
41
- self._token_flags[token] = True
42
-
50
+ self._token_flags[token] = True
51
+
43
52
  # 存储上下文
44
53
  if context:
45
54
  if token is None:
@@ -49,6 +58,21 @@ class GlobalCancel:
49
58
  self._context["tokens"] = {}
50
59
  self._context["tokens"][token] = context
51
60
 
61
+ def reset_global(self) -> None:
62
+ """重置全局取消标志"""
63
+ with self._lock:
64
+ self._global_flag = False
65
+
66
+ def reset_token(self, token: str) -> None:
67
+ """重置特定token的取消标志"""
68
+ with self._lock:
69
+ if token in self._token_flags:
70
+ del self._token_flags[token]
71
+ if "tokens" in self._context and token in self._context["tokens"]:
72
+ del self._context["tokens"][token]
73
+ if token:
74
+ self._active_tokens.discard(token) # 从活跃集合中移除
75
+
52
76
  def reset(self, token: Optional[str] = None) -> None:
53
77
  """重置特定token或全局的取消标志"""
54
78
  with self._lock:
@@ -57,12 +81,21 @@ class GlobalCancel:
57
81
  self._global_flag = False
58
82
  self._token_flags.clear()
59
83
  self._context.clear()
84
+ self._active_tokens.clear() # 清空活跃集合
60
85
  else:
61
86
  # 特定token重置
62
87
  if token in self._token_flags:
63
88
  del self._token_flags[token]
64
89
  if "tokens" in self._context and token in self._context["tokens"]:
65
90
  del self._context["tokens"][token]
91
+ if token:
92
+ self._active_tokens.discard(token) # 从活跃集合中移除
93
+
94
+ def reset_active_tokens(self) -> None:
95
+ """重置所有活跃的token"""
96
+ with self._lock:
97
+ for token in self._active_tokens.copy():
98
+ self.reset_token(token)
66
99
 
67
100
  def get_context(self, token: Optional[str] = None) -> Dict[str, Any]:
68
101
  """获取与取消相关的上下文信息"""
@@ -77,6 +110,10 @@ class GlobalCancel:
77
110
  """检查是否请求了取消,如果是则抛出异常"""
78
111
  if self.is_requested(token):
79
112
  context = self.get_context(token)
113
+ if token:
114
+ self.reset_token(token)
115
+ else:
116
+ self.reset_global()
80
117
  raise CancelRequestedException(token, context.get("message", "Operation was cancelled"))
81
118
 
82
119
  global_cancel = GlobalCancel()
@@ -4,6 +4,9 @@ from threading import Lock
4
4
  import threading
5
5
  from typing import Dict, List, Optional
6
6
  from loguru import logger
7
+ import re
8
+ import yaml
9
+ from pydantic import BaseModel, Field
7
10
 
8
11
  # 尝试导入 FileMonitor
9
12
  try:
@@ -15,6 +18,15 @@ except ImportError:
15
18
  Change = None
16
19
 
17
20
 
21
+ class RuleFile(BaseModel):
22
+ """规则文件的Pydantic模型"""
23
+ description: str = Field(default="", description="规则的描述")
24
+ globs: List[str] = Field(default_factory=list, description="文件匹配模式列表")
25
+ always_apply: bool = Field(default=False, alias="alwaysApply", description="是否总是应用规则")
26
+ content: str = Field(default="", description="规则文件的正文内容")
27
+ file_path: str = Field(default="", description="规则文件的路径")
28
+
29
+
18
30
  class AutocoderRulesManager:
19
31
  """
20
32
  管理和监控 autocoderrules 目录中的规则文件。
@@ -157,9 +169,66 @@ class AutocoderRulesManager:
157
169
  self._load_rules()
158
170
  logger.info("已重新加载规则")
159
171
 
172
+ def parse_rule_file(self, file_path: str) -> RuleFile:
173
+ """
174
+ 解析规则文件并返回结构化的Pydantic模型对象
175
+
176
+ Args:
177
+ file_path: 规则文件的路径
178
+
179
+ Returns:
180
+ RuleFile: 包含规则文件结构化内容的Pydantic模型
181
+ """
182
+ if not os.path.exists(file_path) or not file_path.endswith('.md'):
183
+ logger.warning(f"无效的规则文件路径: {file_path}")
184
+ return RuleFile(file_path=file_path)
185
+
186
+ try:
187
+ with open(file_path, 'r', encoding='utf-8') as f:
188
+ content = f.read()
189
+
190
+ # 解析YAML头部和Markdown内容
191
+ yaml_pattern = re.compile(r'^---\s*\n(.*?)\n---\s*\n', re.DOTALL)
192
+ yaml_match = yaml_pattern.search(content)
193
+
194
+ metadata = {}
195
+ markdown_content = content
196
+
197
+ if yaml_match:
198
+ yaml_content = yaml_match.group(1)
199
+ try:
200
+ metadata = yaml.safe_load(yaml_content)
201
+ # 移除YAML部分,仅保留Markdown内容
202
+ markdown_content = content[yaml_match.end():]
203
+ except Exception as e:
204
+ logger.warning(f"解析规则文件YAML头部时出错: {e}")
205
+
206
+ # 创建并返回Pydantic模型
207
+ rule = RuleFile(
208
+ description=metadata.get('description', ''),
209
+ globs=metadata.get('globs', []),
210
+ always_apply=metadata.get('alwaysApply', False),
211
+ content=markdown_content.strip(),
212
+ file_path=file_path
213
+ )
214
+
215
+ return rule
216
+
217
+ except Exception as e:
218
+ logger.warning(f"解析规则文件时出错: {file_path}, 错误: {e}")
219
+ return RuleFile(file_path=file_path)
220
+
160
221
  def get_rules(self) -> Dict[str, str]:
161
222
  """获取所有规则文件内容"""
162
223
  return self._rules.copy()
224
+
225
+ def get_parsed_rules(self) -> List[RuleFile]:
226
+ """获取所有解析后的规则文件"""
227
+ parsed_rules = []
228
+ for file_path in self._rules:
229
+ parsed_rule = self.parse_rule_file(file_path)
230
+ parsed_rules.append(parsed_rule)
231
+ return parsed_rules
163
232
 
164
233
 
165
234
  # 对外提供单例
@@ -171,3 +240,17 @@ def get_rules(project_root: Optional[str] = None) -> Dict[str, str]:
171
240
  if _rules_manager is None:
172
241
  _rules_manager = AutocoderRulesManager(project_root=project_root)
173
242
  return _rules_manager.get_rules()
243
+
244
+ def get_parsed_rules(project_root: Optional[str] = None) -> List[RuleFile]:
245
+ """获取所有解析后的规则文件,可指定项目根目录"""
246
+ global _rules_manager
247
+ if _rules_manager is None:
248
+ _rules_manager = AutocoderRulesManager(project_root=project_root)
249
+ return _rules_manager.get_parsed_rules()
250
+
251
+ def parse_rule_file(file_path: str, project_root: Optional[str] = None) -> RuleFile:
252
+ """解析指定的规则文件,可指定项目根目录"""
253
+ global _rules_manager
254
+ if _rules_manager is None:
255
+ _rules_manager = AutocoderRulesManager(project_root=project_root)
256
+ return _rules_manager.parse_rule_file(file_path)
@@ -785,7 +785,7 @@ Below are some files the user is focused on, and the content is up to date. Thes
785
785
  while True:
786
786
  iteration_count += 1
787
787
  logger.info(f"Starting LLM interaction cycle #{iteration_count}")
788
- global_cancel.check_and_raise()
788
+ global_cancel.check_and_raise(token=self.args.event_file)
789
789
  last_message = conversations[-1]
790
790
  if last_message["role"] == "assistant":
791
791
  logger.info(f"Last message is assistant, skipping LLM interaction cycle")
@@ -814,8 +814,8 @@ Below are some files the user is focused on, and the content is up to date. Thes
814
814
  event_count = 0
815
815
  for event in parsed_events:
816
816
  event_count += 1
817
- logger.info(f"Processing event #{event_count}: {type(event).__name__}")
818
- global_cancel.check_and_raise()
817
+ logger.info(f"Processing event #{event_count}: {type(event).__name__}")
818
+ global_cancel.check_and_raise(token=self.args.event_file)
819
819
  if isinstance(event, (LLMOutputEvent, LLMThinkingEvent)):
820
820
  assistant_buffer += event.text
821
821
  logger.debug(f"Accumulated {len(assistant_buffer)} chars in assistant buffer")
@@ -1033,7 +1033,7 @@ Below are some files the user is focused on, and the content is up to date. Thes
1033
1033
  return None
1034
1034
 
1035
1035
  for content_chunk, metadata in generator:
1036
- global_cancel.check_and_raise()
1036
+ global_cancel.check_and_raise(token=self.args.event_file)
1037
1037
  meta_holder.meta = metadata
1038
1038
  if not content_chunk:
1039
1039
  continue
@@ -347,7 +347,7 @@ class CodeEditBlockManager:
347
347
 
348
348
  # 计算这次修复缺失上下文花费的token情况
349
349
  token_cost_calculator.track_token_usage_by_generate(
350
- llm=self.llm,
350
+ llm=self.code_generator.llms[0],
351
351
  generate=generation_result,
352
352
  operation_name="code_generation_complete",
353
353
  start_time=start_time,
@@ -433,7 +433,7 @@ class CodeEditBlockManager:
433
433
  return (unmerged_formatted_text, merged_formatted_text)
434
434
 
435
435
  for attempt in range(self.args.auto_fix_merge_max_attempts):
436
- global_cancel.check_and_raise()
436
+ global_cancel.check_and_raise(token=self.args.event_file)
437
437
  unmerged_formatted_text, merged_formatted_text = _format_blocks(
438
438
  merge)
439
439
  fix_prompt = self.fix_unmerged_blocks.prompt(
@@ -470,7 +470,7 @@ class CodeEditBlockManager:
470
470
 
471
471
  # 计算这次修复未合并块花费的token情况
472
472
  token_cost_calculator.track_token_usage_by_generate(
473
- llm=self.llm,
473
+ llm=self.code_generator.llms[0],
474
474
  generate=generation_result,
475
475
  operation_name="code_generation_complete",
476
476
  start_time=start_time,
@@ -544,7 +544,7 @@ class CodeEditBlockManager:
544
544
  token_cost_calculator = TokenCostCalculator(args=self.args)
545
545
 
546
546
  for attempt in range(self.auto_fix_lint_max_attempts):
547
- global_cancel.check_and_raise()
547
+ global_cancel.check_and_raise(token=self.args.event_file)
548
548
  # 代码生成结果更新到影子文件里去
549
549
  self.shadow_manager.clean_shadows()
550
550
  shadow_files = self._create_shadow_files_from_edits(
@@ -611,7 +611,7 @@ class CodeEditBlockManager:
611
611
 
612
612
  # 计算这次修复lint问题花费的token情况
613
613
  token_cost_calculator.track_token_usage_by_generate(
614
- llm=self.llm,
614
+ llm=self.code_generator.llms[0],
615
615
  generate=generation_result,
616
616
  operation_name="code_generation_complete",
617
617
  start_time=start_time,
@@ -657,7 +657,7 @@ class CodeEditBlockManager:
657
657
  token_cost_calculator = TokenCostCalculator(args=self.args)
658
658
 
659
659
  for attempt in range(self.auto_fix_compile_max_attempts):
660
- global_cancel.check_and_raise()
660
+ global_cancel.check_and_raise(token=self.args.event_file)
661
661
  # 先更新增量影子系统的文件
662
662
  self.shadow_manager.clean_shadows()
663
663
  shadow_files = self._create_shadow_files_from_edits(
@@ -710,7 +710,7 @@ class CodeEditBlockManager:
710
710
 
711
711
  # 计算这次修复compile问题花费的token情况
712
712
  token_cost_calculator.track_token_usage_by_generate(
713
- llm=self.llm,
713
+ llm=self.code_generator.llms[0],
714
714
  generate=generation_result,
715
715
  operation_name="code_generation_complete",
716
716
  start_time=start_time,
@@ -749,7 +749,7 @@ class CodeEditBlockManager:
749
749
 
750
750
  token_cost_calculator = TokenCostCalculator(args=self.args)
751
751
  token_cost_calculator.track_token_usage_by_generate(
752
- llm=self.llm,
752
+ llm=self.code_generator.llms[0],
753
753
  generate=generation_result,
754
754
  operation_name="code_generation_complete",
755
755
  start_time=start_time,
@@ -804,7 +804,7 @@ class CodeEditBlockManager:
804
804
  # 生成代码并自动修复lint错误
805
805
 
806
806
  generation_result = self.generate_and_fix(query, source_code_list)
807
- global_cancel.check_and_raise()
807
+ global_cancel.check_and_raise(token=self.args.event_file)
808
808
 
809
809
  # 合并代码
810
810
  self.code_merger.merge_code(generation_result)
@@ -251,7 +251,7 @@ class CodeDiffManager:
251
251
 
252
252
  # 最多尝试修复5次
253
253
  for attempt in range(self.max_correction_attempts):
254
- global_cancel.check_and_raise()
254
+ global_cancel.check_and_raise(token=self.args.event_file)
255
255
  # 代码生成结果更新到影子文件里去
256
256
  shadow_files = self._create_shadow_files_from_edits(generation_result)
257
257
 
@@ -326,7 +326,7 @@ class CodeDiffManager:
326
326
  """
327
327
  # 生成代码并自动修复lint错误
328
328
  generation_result = self.generate_and_fix(query, source_code_list)
329
- global_cancel.check_and_raise()
329
+ global_cancel.check_and_raise(token=self.args.event_file)
330
330
  # 合并代码
331
331
  self.code_merger.merge_code(generation_result)
332
332
 
@@ -24,6 +24,7 @@ from autocoder.shadows.shadow_manager import ShadowManager
24
24
  from autocoder.linters.shadow_linter import ShadowLinter
25
25
  from autocoder.linters.models import IssueSeverity
26
26
  from loguru import logger
27
+ from autocoder.utils.llms import get_llm_names
27
28
  from autocoder.common.global_cancel import global_cancel
28
29
  from autocoder.linters.models import ProjectLintResult
29
30
  from autocoder.common.token_cost_caculate import TokenCostCalculator
@@ -347,7 +348,7 @@ class CodeEditBlockManager:
347
348
 
348
349
  # 计算这次修复缺失上下文花费的token情况
349
350
  token_cost_calculator.track_token_usage_by_generate(
350
- llm=self.llm,
351
+ llm=self.code_generator.llms[0],
351
352
  generate=generation_result,
352
353
  operation_name="code_generation_complete",
353
354
  start_time=start_time,
@@ -445,7 +446,7 @@ class CodeEditBlockManager:
445
446
  return (unmerged_formatted_text, merged_formatted_text)
446
447
 
447
448
  for attempt in range(self.args.auto_fix_merge_max_attempts):
448
- global_cancel.check_and_raise()
449
+ global_cancel.check_and_raise(token=self.args.event_file)
449
450
  unmerged_formatted_text, merged_formatted_text = _format_blocks(
450
451
  merge)
451
452
  fix_prompt = self.fix_unmerged_blocks.prompt(
@@ -482,7 +483,7 @@ class CodeEditBlockManager:
482
483
 
483
484
  # 计算这次修复未合并块花费的token情况
484
485
  token_cost_calculator.track_token_usage_by_generate(
485
- llm=self.llm,
486
+ llm=self.code_generator.llms[0],
486
487
  generate=generation_result,
487
488
  operation_name="code_generation_complete",
488
489
  start_time=start_time,
@@ -556,7 +557,7 @@ class CodeEditBlockManager:
556
557
  token_cost_calculator = TokenCostCalculator(args=self.args)
557
558
 
558
559
  for attempt in range(self.auto_fix_lint_max_attempts):
559
- global_cancel.check_and_raise()
560
+ global_cancel.check_and_raise(token=self.args.event_file)
560
561
  # 代码生成结果更新到影子文件里去
561
562
  self.shadow_manager.clean_shadows()
562
563
  shadow_files = self._create_shadow_files_from_edits(
@@ -623,7 +624,7 @@ class CodeEditBlockManager:
623
624
 
624
625
  # 计算这次修复lint问题花费的token情况
625
626
  token_cost_calculator.track_token_usage_by_generate(
626
- llm=self.llm,
627
+ llm=self.code_generator.llms[0],
627
628
  generate=generation_result,
628
629
  operation_name="code_generation_complete",
629
630
  start_time=start_time,
@@ -669,7 +670,7 @@ class CodeEditBlockManager:
669
670
  token_cost_calculator = TokenCostCalculator(args=self.args)
670
671
 
671
672
  for attempt in range(self.auto_fix_compile_max_attempts):
672
- global_cancel.check_and_raise()
673
+ global_cancel.check_and_raise(token=self.args.event_file)
673
674
  # 先更新增量影子系统的文件
674
675
  self.shadow_manager.clean_shadows()
675
676
  shadow_files = self._create_shadow_files_from_edits(
@@ -722,7 +723,7 @@ class CodeEditBlockManager:
722
723
 
723
724
  # 计算这次修复compile问题花费的token情况
724
725
  token_cost_calculator.track_token_usage_by_generate(
725
- llm=self.llm,
726
+ llm=self.code_generator.llms[0],
726
727
  generate=generation_result,
727
728
  operation_name="code_generation_complete",
728
729
  start_time=start_time,
@@ -759,9 +760,9 @@ class CodeEditBlockManager:
759
760
  generation_result = self.code_generator.single_round_run(
760
761
  query, source_code_list)
761
762
 
762
- token_cost_calculator = TokenCostCalculator(args=self.args)
763
+ token_cost_calculator = TokenCostCalculator(args=self.args)
763
764
  token_cost_calculator.track_token_usage_by_generate(
764
- llm=self.llm,
765
+ llm=self.code_generator.llms[0],
765
766
  generate=generation_result,
766
767
  operation_name="code_generation_complete",
767
768
  start_time=start_time,
@@ -816,7 +817,7 @@ class CodeEditBlockManager:
816
817
  # 生成代码并自动修复lint错误
817
818
 
818
819
  generation_result = self.generate_and_fix(query, source_code_list)
819
- global_cancel.check_and_raise()
820
+ global_cancel.check_and_raise(token=self.args.event_file)
820
821
 
821
822
  # 合并代码
822
823
  self.code_merger.merge_code(generation_result)
@@ -32,6 +32,7 @@ class CodeStrictDiffManager:
32
32
  self.code_merger = CodeAutoMergeStrictDiff(llm, args)
33
33
  self.shadow_manager = ShadowManager(args.source_dir, args.event_file)
34
34
  self.shadow_linter = ShadowLinter(self.shadow_manager, verbose=False)
35
+ self.args = args
35
36
 
36
37
  @byzerllm.prompt()
37
38
  def fix_linter_errors(self, query: str, lint_issues: str) -> str:
@@ -159,7 +160,7 @@ class CodeStrictDiffManager:
159
160
 
160
161
  # 最多尝试修复5次
161
162
  for attempt in range(self.max_correction_attempts):
162
- global_cancel.check_and_raise()
163
+ global_cancel.check_and_raise(token=self.args.event_file)
163
164
  # 代码生成结果更新到影子文件里去
164
165
  shadow_files = self._create_shadow_files_from_edits(generation_result)
165
166
 
@@ -234,7 +235,7 @@ class CodeStrictDiffManager:
234
235
  """
235
236
  # 生成代码并自动修复lint错误
236
237
  generation_result = self.generate_and_fix(query, source_code_list)
237
- global_cancel.check_and_raise()
238
+ global_cancel.check_and_raise(token=self.args.event_file)
238
239
  # 合并代码
239
240
  self.code_merger.merge_code(generation_result)
240
241
 
@@ -113,7 +113,7 @@ class ActionTSProject(BaseAction):
113
113
  f"Content(send to model) is {content_length} tokens, which is larger than the maximum input length {self.args.model_max_input_length}"
114
114
  )
115
115
 
116
- global_cancel.check_and_raise()
116
+ global_cancel.check_and_raise(token=self.args.event_file)
117
117
 
118
118
  if (args.enable_auto_fix_merge or args.enable_auto_fix_lint) and args.execute and args.auto_merge=="editblock":
119
119
  code_merge_manager = CodeEditBlockManager(llm=self.llm, args=self.args,action=self)
@@ -179,7 +179,7 @@ class ActionTSProject(BaseAction):
179
179
  action_file=self.args.file
180
180
  ).to_dict())
181
181
 
182
- global_cancel.check_and_raise()
182
+ global_cancel.check_and_raise(token=self.args.event_file)
183
183
 
184
184
  merge_result = None
185
185
  if args.execute and args.auto_merge:
@@ -267,7 +267,7 @@ class ActionPyProject(BaseAction):
267
267
  max_length=self.args.model_max_input_length
268
268
  )
269
269
 
270
- global_cancel.check_and_raise()
270
+ global_cancel.check_and_raise(token=self.args.event_file)
271
271
 
272
272
  if (args.enable_auto_fix_merge or args.enable_auto_fix_lint) and args.execute and args.auto_merge=="editblock":
273
273
  code_merge_manager = CodeEditBlockManager(llm=self.llm, args=self.args,action=self)
@@ -334,7 +334,7 @@ class ActionPyProject(BaseAction):
334
334
  action_file=self.args.file
335
335
  ).to_dict())
336
336
 
337
- global_cancel.check_and_raise()
337
+ global_cancel.check_and_raise(token=self.args.event_file)
338
338
 
339
339
  merge_result = None
340
340
  if args.execute and args.auto_merge:
@@ -415,7 +415,7 @@ class ActionSuffixProject(BaseAction):
415
415
  f"Content(send to model) is {content_length} tokens, which is larger than the maximum input length {self.args.model_max_input_length}"
416
416
  )
417
417
 
418
- global_cancel.check_and_raise()
418
+ global_cancel.check_and_raise(token=self.args.event_file)
419
419
 
420
420
  if (args.enable_auto_fix_merge or args.enable_auto_fix_lint) and args.execute and args.auto_merge=="editblock":
421
421
  code_merge_manager = CodeEditBlockManager(llm=self.llm, args=self.args,action=self)
@@ -481,7 +481,7 @@ class ActionSuffixProject(BaseAction):
481
481
  action_file=self.args.file
482
482
  ).to_dict())
483
483
 
484
- global_cancel.check_and_raise()
484
+ global_cancel.check_and_raise(token=self.args.event_file)
485
485
 
486
486
  merge_result = None
487
487
  if args.execute and args.auto_merge:
@@ -67,7 +67,7 @@ class ActionRegexProject:
67
67
 
68
68
  start_time = time.time()
69
69
 
70
- global_cancel.check_and_raise()
70
+ global_cancel.check_and_raise(token=self.args.event_file)
71
71
 
72
72
  if (args.enable_auto_fix_merge or args.enable_auto_fix_lint) and args.execute and args.auto_merge=="editblock":
73
73
  code_merge_manager = CodeEditBlockManager(llm=self.llm, args=self.args,action=self)
@@ -128,7 +128,7 @@ class ActionRegexProject:
128
128
  action_file=self.args.file
129
129
  ).to_dict())
130
130
 
131
- global_cancel.check_and_raise()
131
+ global_cancel.check_and_raise(token=self.args.event_file)
132
132
 
133
133
  merge_result = None
134
134
  if args.execute and args.auto_merge:
@@ -71,7 +71,7 @@ class EventManagerSingleton:
71
71
 
72
72
  def get_event_file_path(file_id:str,project_path: Optional[str] = None) -> str:
73
73
  if project_path is None:
74
- return os.path.join(".auto-coder", "events", f"{file_id}.jsonl")
74
+ return os.path.join(os.getcwd(),".auto-coder", "events", f"{file_id}.jsonl")
75
75
  else:
76
76
  return os.path.join(project_path, ".auto-coder", "events", f"{file_id}.jsonl")
77
77
 
autocoder/index/index.py CHANGED
@@ -298,7 +298,7 @@ class IndexManager:
298
298
  return False
299
299
 
300
300
  def build_index_for_single_source(self, source: SourceCode):
301
- global_cancel.check_and_raise()
301
+ global_cancel.check_and_raise(token=self.args.event_file)
302
302
 
303
303
  file_path = source.module_name
304
304
  if not os.path.exists(file_path):
@@ -575,7 +575,7 @@ class IndexManager:
575
575
  for source in wait_to_build_files
576
576
  ]
577
577
  for future in as_completed(futures):
578
- global_cancel.check_and_raise()
578
+ global_cancel.check_and_raise(token=self.args.event_file)
579
579
  result = future.result()
580
580
  if result is not None:
581
581
  counter += 1
@@ -269,7 +269,7 @@ class LocalByzerStorageCache(BaseCacheManager):
269
269
  "file_path": file_info.file_path,
270
270
  "content": chunk[0:self.chunk_size*2],
271
271
  "raw_content": chunk[0:self.chunk_size*2],
272
- "vector": chunk[0:self.chunk_size*2],
272
+ "vector": chunk[0:self.args.rag_emb_text_size],
273
273
  "mtime": file_info.modify_time,
274
274
  }
275
275
  items.append(chunk_item)
@@ -88,6 +88,7 @@ class LocalDuckdbStorage:
88
88
  table_name: str = "documents",
89
89
  embed_dim: Optional[int] = None,
90
90
  persist_dir: str = "./storage",
91
+ args: Optional[AutoCoderArgs] = None,
91
92
  ) -> None:
92
93
  self.llm = llm
93
94
  self.database_name = database_name
@@ -95,6 +96,7 @@ class LocalDuckdbStorage:
95
96
  self.embed_dim = embed_dim
96
97
  self.persist_dir = persist_dir
97
98
  self.cache_dir = os.path.join(self.persist_dir, ".cache")
99
+ self.args = args
98
100
  logger.info(f"正在启动 DuckDBVectorStore.")
99
101
 
100
102
  if self.database_name != ":memory:":
@@ -239,6 +241,11 @@ class LocalDuckdbStorage:
239
241
  def _node_to_table_row(
240
242
  self, context_chunk: Dict[str, str | float], dim: int | None = None
241
243
  ) -> Any:
244
+
245
+ if not context_chunk["raw_content"]:
246
+ context_chunk["raw_content"] = "empty"
247
+ context_chunk["raw_content"] = context_chunk["raw_content"][: self.args.rag_emb_text_size]
248
+
242
249
  return (
243
250
  context_chunk["_id"],
244
251
  context_chunk["file_path"],
@@ -332,6 +339,7 @@ class LocalDuckDBStorageCache(BaseCacheManager):
332
339
  database_name="byzerai_store_duckdb.db",
333
340
  table_name="rag_duckdb",
334
341
  persist_dir=self.path,
342
+ args=args,
335
343
  )
336
344
  self.queue = []
337
345
  self.chunk_size = 1000
@@ -33,6 +33,9 @@ class ImageLoader:
33
33
  and converting the content to markdown format.
34
34
  """
35
35
 
36
+ # 存储不同参数组合的PaddleOCR实例
37
+ _ocr_instances = {}
38
+
36
39
  @staticmethod
37
40
  def parse_diff(diff_content: str) -> List[Tuple[str, str]]:
38
41
  """
@@ -106,19 +109,28 @@ class ImageLoader:
106
109
  print("paddleocr not installed")
107
110
  return ""
108
111
 
109
- # 初始化 OCR
110
- try:
111
- ocr = PaddleOCR(
112
- use_angle_cls=use_angle_cls,
113
- lang=lang,
114
- page_num=page_num,
115
- det_model_dir=det_model_dir,
116
- rec_model_dir=rec_model_dir,
117
- **kwargs
118
- )
119
- except Exception:
120
- traceback.print_exc()
121
- return ""
112
+ # 创建一个参数的哈希键,用于在缓存中存储OCR实例
113
+ param_key = f"{lang}_{use_angle_cls}_{page_num}_{det_model_dir}_{rec_model_dir}_{hash(frozenset(kwargs.items()) if kwargs else 0)}"
114
+
115
+ # 检查是否已经有对应参数的OCR实例
116
+ if param_key not in ImageLoader._ocr_instances:
117
+ try:
118
+ # 初始化OCR并缓存
119
+ ImageLoader._ocr_instances[param_key] = PaddleOCR(
120
+ use_angle_cls=use_angle_cls,
121
+ lang=lang,
122
+ page_num=page_num,
123
+ det_model_dir=det_model_dir,
124
+ rec_model_dir=rec_model_dir,
125
+ **kwargs
126
+ )
127
+ logger.info(f"初始化新的PaddleOCR实例,参数:{param_key}")
128
+ except Exception:
129
+ traceback.print_exc()
130
+ return ""
131
+
132
+ # 使用缓存的OCR实例
133
+ ocr = ImageLoader._ocr_instances[param_key]
122
134
 
123
135
  try:
124
136
  ext = os.path.splitext(file_path)[1].lower()