auto-coder 0.1.347__py3-none-any.whl → 0.1.348__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of auto-coder might be problematic. Click here for more details.

@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: auto-coder
3
- Version: 0.1.347
3
+ Version: 0.1.348
4
4
  Summary: AutoCoder: AutoCoder
5
5
  Author: allwefantasy
6
6
  Classifier: Programming Language :: Python :: 3.10
@@ -4,7 +4,7 @@ autocoder/auto_coder_lang.py,sha256=Rtupq6N3_HT7JRhDKdgCBcwRaiAnyCOR_Gsp4jUomrI,
4
4
  autocoder/auto_coder_rag.py,sha256=NesRm7sIJrRQL1xxm_lbMtM7gi-KrYv9f26RfBuloZE,35386
5
5
  autocoder/auto_coder_rag_client_mcp.py,sha256=QRxUbjc6A8UmDMQ8lXgZkjgqtq3lgKYeatJbDY6rSo0,6270
6
6
  autocoder/auto_coder_rag_mcp.py,sha256=-RrjNwFaS2e5v8XDIrKR-zlUNUE8UBaeOtojffBrvJo,8521
7
- autocoder/auto_coder_runner.py,sha256=2hI2095L5GWP1J04Lr7xBBTDOcNq_l4-HctcJ8bVaFE,111833
7
+ autocoder/auto_coder_runner.py,sha256=LV2QJZC9CEpDYkQEjVh5Ih5Ga5rhHvoW3J4G5cri8Z8,111857
8
8
  autocoder/auto_coder_server.py,sha256=bLORGEclcVdbBVfM140JCI8WtdrU0jbgqdJIVVupiEU,20578
9
9
  autocoder/benchmark.py,sha256=Ypomkdzd1T3GE6dRICY3Hj547dZ6_inqJbBJIp5QMco,4423
10
10
  autocoder/chat_auto_coder.py,sha256=CthuvdjVjTQOVv-zREsl8OCsZHPSP9OQcIgHULrW2Ro,25842
@@ -14,7 +14,7 @@ autocoder/command_parser.py,sha256=fx1g9E6GaM273lGTcJqaFQ-hoksS_Ik2glBMnVltPCE,1
14
14
  autocoder/lang.py,sha256=PFtATuOhHRnfpqHQkXr6p4C893JvpsgwTMif3l-GEi0,14321
15
15
  autocoder/models.py,sha256=_SCar82QIeBFTZZBdM2jPS6atKVhHnvE0gX3V0CsxD4,11590
16
16
  autocoder/run_context.py,sha256=IUfSO6_gp2Wt1blFWAmOpN0b0nDrTTk4LmtCYUBIoro,1643
17
- autocoder/version.py,sha256=R1KcSahF3DMgAzBREOFaR3Pjcqm9a3u4b36Hb2RFm50,23
17
+ autocoder/version.py,sha256=1h2cvc5WOu9wxtFBJmh-3rclKh09vlAmUylW6SDUges,23
18
18
  autocoder/agent/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
19
19
  autocoder/agent/agentic_edit.py,sha256=XsfePZ-t6M-uBSdG1VLZXk1goqXk2HPeJ_A8IYyBuWQ,58896
20
20
  autocoder/agent/agentic_edit_types.py,sha256=oFcDd_cxJ2yH9Ed1uTpD3BipudgoIEWDMPb5pAkq4gI,3288
@@ -54,7 +54,7 @@ autocoder/common/action_yml_file_manager.py,sha256=DdF5P1R_B_chCnnqoA2IgogakWLZk
54
54
  autocoder/common/anything2images.py,sha256=0ILBbWzY02M-CiWB-vzuomb_J1hVdxRcenAfIrAXq9M,25283
55
55
  autocoder/common/anything2img.py,sha256=iZQmg8srXlD7N5uGl5b_ONKJMBjYoW8kPmokkG6ISF0,10118
56
56
  autocoder/common/audio.py,sha256=Kn9nWKQddWnUrAz0a_ZUgjcu4VUU_IcZBigT7n3N3qc,7439
57
- autocoder/common/auto_coder_lang.py,sha256=bqBoICLIvi9l8jRCwcNLWR6n5pI3ix7YDPGpmqQDmgc,42677
57
+ autocoder/common/auto_coder_lang.py,sha256=TqyxigNAEg_Ouji-cGMx5alkFjboUDgFPSOwYgYAnNM,42757
58
58
  autocoder/common/auto_configure.py,sha256=D4N-fl9v8bKM5-Ds-uhkC2uGDmHH_ZjLJ759F8KXMKs,13129
59
59
  autocoder/common/buildin_tokenizer.py,sha256=L7d5t39ZFvUd6EoMPXUhYK1toD0FHlRH1jtjKRGokWU,1236
60
60
  autocoder/common/chunk_validation.py,sha256=BrR_ZWavW8IANuueEE7hS8NFAwEvm8TX34WnPx_1hs8,3030
@@ -126,7 +126,7 @@ autocoder/common/v2/code_editblock_manager.py,sha256=G0CIuV9Ki0FqMLnpA8nBT4pnkCN
126
126
  autocoder/common/v2/code_manager.py,sha256=C403bS-f6urixwitlKHcml-J03hci-UyNwHJOqBiY6Q,9182
127
127
  autocoder/common/v2/code_strict_diff_manager.py,sha256=v-J1kDyLg7tLGg_6_lbO9S4fNkx7M_L8Xr2G7fPptiU,9347
128
128
  autocoder/common/v2/agent/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
129
- autocoder/common/v2/agent/agentic_edit.py,sha256=pxgKD-FZaaRQkPgWsxvLleUXDRFC8qSlYfNUHU6ZENU,94794
129
+ autocoder/common/v2/agent/agentic_edit.py,sha256=qPEvDvK2iF9eyTlWqJMCL6BDzpS1fCbQsTaG4vExyd8,94942
130
130
  autocoder/common/v2/agent/agentic_edit_conversation.py,sha256=qLLhTegH619JQTp3s1bj5FVn2hAcoV-DlhGO3UyIOMc,7338
131
131
  autocoder/common/v2/agent/agentic_edit_types.py,sha256=VJMrictg6hJ3mC45VgQGRd43DyDUPDUvPV1Rf3z72NI,4776
132
132
  autocoder/common/v2/agent/agentic_tool_display.py,sha256=WKirt-2V346KLnbHgH3NVJiK3xvriD9oaCWj2IdvzLU,7309
@@ -199,7 +199,7 @@ autocoder/memory/active_context_manager.py,sha256=nqWD4lBLNcskXDRERhPpqnmn_i1V7_
199
199
  autocoder/memory/active_package.py,sha256=NHLLnncFSfFcOFLWILwJLuEVd4nOoL0mqzFev6QHgzU,25480
200
200
  autocoder/memory/async_processor.py,sha256=htHzLGupw9IHQAEdLe2AEaALZSItPi3AltDt8FMTRHk,4643
201
201
  autocoder/memory/directory_mapper.py,sha256=BXHblOdRpeZb7URDECALp9uN5oi91KmkW9g_UaWFuZY,2513
202
- autocoder/plugins/__init__.py,sha256=uc8UNSAVb9uQvr5zUSS-Xu_RhrdbO2i3w2NhkrVtODM,43023
202
+ autocoder/plugins/__init__.py,sha256=T71wGXBP24NhFyf9BntKDXWPIvKp5hWhq_-xK7C-ptE,43722
203
203
  autocoder/plugins/dynamic_completion_example.py,sha256=dDTeISsGqcWjxY_PzbFSZ4Q7_QwYUcuHUdWJT8x7q-4,4693
204
204
  autocoder/plugins/git_helper_plugin.py,sha256=nKQWkU-nQ39A4WC8nC8KI-x68VjwVARQvtrtQQ1GTMI,8386
205
205
  autocoder/plugins/sample_plugin.py,sha256=0Hn1SdLovSPwMamooXyfcX5JGzsROt238bMbBhE6aIk,5215
@@ -244,6 +244,7 @@ autocoder/rag/cache/simple_cache.py,sha256=yrGgRXGcMNrWSQOviPshm3Qlo2QDNVFoRZPLN
244
244
  autocoder/rag/loaders/__init__.py,sha256=EQHEZ5Cmz-mGP2SllUTvcIbYCnF7W149dNpNItfs0yE,304
245
245
  autocoder/rag/loaders/docx_loader.py,sha256=ZswPqiiLngUEpzLhNNm1nmwEYV7ZHFEfIoXoG7c5GDU,614
246
246
  autocoder/rag/loaders/excel_loader.py,sha256=Ue8YB1z_kBs8SjIPuBskyM08Q1JiONs_BJZPrzi59oo,896
247
+ autocoder/rag/loaders/image_loader.py,sha256=UMzVDG4wmqpXYlWclNvFWCC0UHr_sWBj36RVBnjaIoA,20218
247
248
  autocoder/rag/loaders/pdf_loader.py,sha256=9cLPdhfSiedvEUFFM7I-PkZ-9klJmj8AtuihETfzO_o,706
248
249
  autocoder/rag/loaders/ppt_loader.py,sha256=7VEYc-bqgK8VHCoGC3DIUcqbpda-E5jQF9lYLqP256I,1681
249
250
  autocoder/rag/stream_event/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -279,9 +280,9 @@ autocoder/utils/types.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
279
280
  autocoder/utils/auto_coder_utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
280
281
  autocoder/utils/auto_coder_utils/chat_stream_out.py,sha256=KW0mlmcHlStXi8-_6fXZ2-ifeJ5mgP0OV7DQFzCtIsw,14008
281
282
  autocoder/utils/chat_auto_coder_utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
282
- auto_coder-0.1.347.dist-info/LICENSE,sha256=HrhfyXIkWY2tGFK11kg7vPCqhgh5DcxleloqdhrpyMY,11558
283
- auto_coder-0.1.347.dist-info/METADATA,sha256=-wG5ilXMo-fYflOC9zJZY2N0JdHBluyyxYNqxhzNVwQ,2728
284
- auto_coder-0.1.347.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
285
- auto_coder-0.1.347.dist-info/entry_points.txt,sha256=0nzHtHH4pNcM7xq4EBA2toS28Qelrvcbrr59GqD_0Ak,350
286
- auto_coder-0.1.347.dist-info/top_level.txt,sha256=Jqc0_uJSw2GwoFQAa9iJxYns-2mWla-9ok_Y3Gcznjk,10
287
- auto_coder-0.1.347.dist-info/RECORD,,
283
+ auto_coder-0.1.348.dist-info/LICENSE,sha256=HrhfyXIkWY2tGFK11kg7vPCqhgh5DcxleloqdhrpyMY,11558
284
+ auto_coder-0.1.348.dist-info/METADATA,sha256=RSE1DWgb1J9bSnwhqwMngmydulCNkWiEETsz7nB6rwQ,2728
285
+ auto_coder-0.1.348.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
286
+ auto_coder-0.1.348.dist-info/entry_points.txt,sha256=0nzHtHH4pNcM7xq4EBA2toS28Qelrvcbrr59GqD_0Ak,350
287
+ auto_coder-0.1.348.dist-info/top_level.txt,sha256=Jqc0_uJSw2GwoFQAa9iJxYns-2mWla-9ok_Y3Gcznjk,10
288
+ auto_coder-0.1.348.dist-info/RECORD,,
@@ -2838,11 +2838,12 @@ def auto_command(query: str,extra_args: Dict[str,Any]={}):
2838
2838
  memory_config=MemoryConfig(memory=memory,
2839
2839
  save_memory_func=save_memory), command_config=CommandConfig,
2840
2840
  conversation_name="current"
2841
- )
2842
- if get_run_context() == RunMode.TERMINAL:
2843
- agent.run_in_terminal(AgenticEditRequest(user_input=query))
2844
- else:
2841
+ )
2842
+ if get_run_context().mode == RunMode.WEB:
2845
2843
  agent.run_with_events(AgenticEditRequest(user_input=query))
2844
+ else:
2845
+ agent.run_in_terminal(AgenticEditRequest(user_input=query))
2846
+
2846
2847
  return
2847
2848
 
2848
2849
  args = get_final_config()
@@ -836,11 +836,11 @@ MESSAGES = {
836
836
  },
837
837
  "/agent/edit/apply_pre_changes":{
838
838
  "en":"Commit user changes",
839
- "zh":"提交用户修改"
839
+ "zh":"检查用户是否有手动修改(如有,会自动提交)..."
840
840
  },
841
841
  "/agent/edit/apply_changes":{
842
- "en":"Commit changes",
843
- "zh":"提交修改"
842
+ "en":"Commit the changes in preview steps",
843
+ "zh":"提交前面步骤的修改"
844
844
  }
845
845
  }
846
846
 
@@ -212,18 +212,7 @@ class AgenticEdit:
212
212
  @byzerllm.prompt()
213
213
  def _analyze(self, request: AgenticEditRequest) -> str:
214
214
  """
215
- You are a highly skilled software engineer with extensive knowledge in many programming languages, frameworks, design patterns, and best practices.
216
-
217
- ====
218
-
219
- FILES CONTEXT
220
-
221
- The following files are provided to you as context for the user's task. You can use these files to understand the project structure and codebase, and to make informed decisions about which files to modify.
222
- If you need to read more files, you can use the tools to find and read more files.
223
-
224
- <files>
225
- {{files}}
226
- </files>
215
+ You are a highly skilled software engineer with extensive knowledge in many programming languages, frameworks, design patterns, and best practices.
227
216
 
228
217
  ====
229
218
 
@@ -690,8 +679,10 @@ class AgenticEdit:
690
679
  The following rules are provided by the user, and you must follow them strictly.
691
680
 
692
681
  {% for key, value in extra_docs.items() %}
693
- ### {{ key }}
682
+ <user_rule>
683
+ ##File: {{ key }}
694
684
  {{ value }}
685
+ </user_rule>
695
686
  {% endfor %}
696
687
  {% endif %}
697
688
  """
@@ -706,7 +697,7 @@ class AgenticEdit:
706
697
  try:
707
698
  with open(fpath, "r", encoding="utf-8") as f:
708
699
  content = f.read()
709
- key = os.path.splitext(fname)[0]
700
+ key = fpath
710
701
  extra_docs[key] = content
711
702
  except Exception:
712
703
  continue
@@ -784,11 +775,25 @@ class AgenticEdit:
784
775
  # print(system_prompt)
785
776
  conversations = [
786
777
  {"role": "system", "content": system_prompt},
787
- ] + self.conversation_manager.get_history()
778
+ ]
779
+
780
+ conversations.append({
781
+ "role":"user","content":f'''
782
+ Below are some files the user is focused on, and the content is up to date. These entries show the file paths along with their full text content, which can help you better understand the user's needs. If the information is insufficient, you can use tools such as read_file to retrieve more details.
783
+ <files>
784
+ {self.files.to_str()}
785
+ </files>'''
786
+ })
787
+
788
+ conversations.append({
789
+ "role":"assistant","content":"Ok"
790
+ })
791
+ conversations.extend(self.conversation_manager.get_history())
788
792
  conversations.append({
789
793
  "role": "user", "content": request.user_input
790
794
  })
791
795
  self.conversation_manager.add_user_message(request.user_input)
796
+
792
797
  logger.debug(
793
798
  f"Initial conversation history size: {len(conversations)}")
794
799
 
@@ -1013,9 +1018,7 @@ class AgenticEdit:
1013
1018
 
1014
1019
  for content_chunk, metadata in generator:
1015
1020
  global_cancel.check_and_raise()
1016
-
1017
- meta_holder.meta = metadata
1018
- logger.info(f"metadata: {metadata.input_tokens_count}")
1021
+ meta_holder.meta = metadata
1019
1022
  if not content_chunk:
1020
1023
  continue
1021
1024
  buffer += content_chunk
@@ -256,6 +256,7 @@ class PluginManager:
256
256
  "/plugins/dirs /remove",
257
257
  "/plugins/dirs /clear",
258
258
  ]
259
+ self._wrapped_functions: Dict[str, Callable] = {}
259
260
 
260
261
  @property
261
262
  def cached_discover_plugins(self) -> List[Type[Plugin]]:
@@ -561,6 +562,7 @@ class PluginManager:
561
562
  return result
562
563
  return None
563
564
 
565
+ self._wrapped_functions[func_name] = wrapped
564
566
  return wrapped
565
567
 
566
568
  def register_function_interception(self, plugin_name: str, func_name: str) -> None:
@@ -1107,6 +1109,24 @@ class PluginManager:
1107
1109
 
1108
1110
  return processed_completions
1109
1111
 
1112
+ def get_wrapped_functions(self) -> Dict[str, Callable]:
1113
+ """获取所有已包装的函数。
1114
+
1115
+ Returns:
1116
+ Dict[str, Callable]: 包含所有已包装函数的字典,键为函数名,值为包装后的函数
1117
+ """
1118
+ return self._wrapped_functions
1119
+
1120
+ def get_wrapped_function(self, func_name: str) -> Callable:
1121
+ """获取已包装的函数。
1122
+
1123
+ Args:
1124
+ func_name: 函数名
1125
+
1126
+ Returns:
1127
+ 已包装的函数,如果未找到则返回 None
1128
+ """
1129
+ return self._wrapped_functions.get(func_name)
1110
1130
 
1111
1131
  def register_global_plugin_dir(plugin_dir: str) -> None:
1112
1132
  """注册一个全局插件目录。
@@ -0,0 +1,551 @@
1
+ import os
2
+ import traceback
3
+ import re
4
+ from PIL import Image
5
+
6
+ try:
7
+ from paddleocr import PaddleOCR
8
+ except ImportError:
9
+ PaddleOCR = None
10
+
11
+ try:
12
+ import paddlex as paddlex_module
13
+ except ImportError:
14
+ paddlex_module = None
15
+
16
+ import byzerllm
17
+ from byzerllm.utils.client import code_utils
18
+ from autocoder.utils.llms import get_single_llm
19
+ from loguru import logger
20
+ from typing import List, Tuple, Optional
21
+ from pydantic import BaseModel
22
+
23
+
24
+ class ReplaceInFileTool(BaseModel):
25
+ path: str
26
+ diff: str
27
+
28
+
29
+ class ImageLoader:
30
+ """
31
+ A class for loading and processing images, extracting text and tables from them,
32
+ and converting the content to markdown format.
33
+ """
34
+
35
+ @staticmethod
36
+ def parse_diff(diff_content: str) -> List[Tuple[str, str]]:
37
+ """
38
+ Parses the diff content into a list of (search_block, replace_block) tuples.
39
+ """
40
+ blocks = []
41
+ lines = diff_content.splitlines(keepends=True)
42
+ i = 0
43
+ n = len(lines)
44
+
45
+ while i < n:
46
+ line = lines[i]
47
+ if line.strip() == "<<<<<<< SEARCH":
48
+ i += 1
49
+ search_lines = []
50
+ # Accumulate search block
51
+ while i < n and lines[i].strip() != "=======":
52
+ search_lines.append(lines[i])
53
+ i += 1
54
+ if i >= n:
55
+ logger.warning("Unterminated SEARCH block found in diff content.")
56
+ break
57
+ i += 1 # skip '======='
58
+ replace_lines = []
59
+ # Accumulate replace block
60
+ while i < n and lines[i].strip() != ">>>>>>> REPLACE":
61
+ replace_lines.append(lines[i])
62
+ i += 1
63
+ if i >= n:
64
+ logger.warning("Unterminated REPLACE block found in diff content.")
65
+ break
66
+ i += 1 # skip '>>>>>>> REPLACE'
67
+
68
+ search_block = ''.join(search_lines)
69
+ replace_block = ''.join(replace_lines)
70
+ blocks.append((search_block, replace_block))
71
+ else:
72
+ i += 1
73
+
74
+ if not blocks and diff_content.strip():
75
+ logger.warning(f"Could not parse any SEARCH/REPLACE blocks from diff: {diff_content}")
76
+ return blocks
77
+
78
+ @staticmethod
79
+ def paddleocr_extract_text(
80
+ file_path,
81
+ lang='ch',
82
+ use_angle_cls=True,
83
+ page_num=10,
84
+ slice_params=None,
85
+ det_model_dir=None,
86
+ rec_model_dir=None,
87
+ **kwargs
88
+ ):
89
+ """
90
+ 使用 PaddleOCR 识别文本,支持图片、PDF、超大图像滑动窗口
91
+
92
+ Args:
93
+ file_path: 图片或PDF路径
94
+ lang: 语言,默认中文
95
+ use_angle_cls: 是否启用方向分类
96
+ page_num: 识别PDF时的最大页数
97
+ slice_params: 超大图像滑动窗口参数 dict
98
+ det_model_dir: 自定义检测模型路径
99
+ rec_model_dir: 自定义识别模型路径
100
+ kwargs: 其他paddleocr参数
101
+ Returns:
102
+ 识别出的纯文本字符串
103
+ """
104
+ if PaddleOCR is None:
105
+ print("paddleocr not installed")
106
+ return ""
107
+
108
+ # 初始化 OCR
109
+ try:
110
+ ocr = PaddleOCR(
111
+ use_angle_cls=use_angle_cls,
112
+ lang=lang,
113
+ page_num=page_num,
114
+ det_model_dir=det_model_dir,
115
+ rec_model_dir=rec_model_dir,
116
+ **kwargs
117
+ )
118
+ except Exception:
119
+ traceback.print_exc()
120
+ return ""
121
+
122
+ try:
123
+ ext = os.path.splitext(file_path)[1].lower()
124
+
125
+ # 处理PDF
126
+ if ext == ".pdf":
127
+ result = ocr.ocr(file_path, cls=True) # result is list of pages, each page is list of lines
128
+ lines = []
129
+ if result and isinstance(result, list):
130
+ for page in result:
131
+ if page and isinstance(page, list):
132
+ for line_info in page: # line_info is [points, (text, confidence)]
133
+ try:
134
+ # Check structure: [points, (text, confidence)]
135
+ if isinstance(line_info, (list, tuple)) and len(line_info) == 2 and \
136
+ isinstance(line_info[1], (list, tuple)) and len(line_info[1]) >= 1:
137
+ txt = line_info[1][0]
138
+ if isinstance(txt, str):
139
+ lines.append(txt)
140
+ else:
141
+ logger.warning(f"Extracted text is not a string in PDF: {txt} (type: {type(txt)}). Skipping.")
142
+ else:
143
+ logger.warning(f"Unexpected line_info structure in PDF: {line_info}. Skipping.")
144
+ except Exception as e:
145
+ logger.warning(f"Error processing line_info in PDF: {line_info}. Error: {e}")
146
+ return "\n".join(lines)
147
+
148
+ # 处理图片
149
+ else: # Image processing
150
+ if slice_params is not None:
151
+ result = ocr.ocr(file_path, cls=True, slice=slice_params)
152
+ else:
153
+ result = ocr.ocr(file_path, cls=True) # result is [[[points, (text, confidence)], ...]] for single image
154
+
155
+ lines = []
156
+ # Standardize handling: PaddleOCR often returns a list containing one item for single images.
157
+ # result = [page_result] where page_result = [[line1_info], [line2_info], ...]
158
+ if result and isinstance(result, list):
159
+ # Heuristic: Treat 'result' as the list of pages directly.
160
+ # This handles both single image wrapped in list and multi-page PDFs consistently.
161
+ page_list = result
162
+
163
+ for page in page_list:
164
+ if page and isinstance(page, list):
165
+ for line_info in page: # line_info is [points, (text, confidence)]
166
+ try:
167
+ # Check structure: [points, (text, confidence)]
168
+ if isinstance(line_info, (list, tuple)) and len(line_info) == 2 and \
169
+ isinstance(line_info[1], (list, tuple)) and len(line_info[1]) >= 1:
170
+ txt = line_info[1][0]
171
+ if isinstance(txt, str):
172
+ lines.append(txt)
173
+ else:
174
+ # Handle potential nested lists in text: join them? Or log?
175
+ if isinstance(txt, list):
176
+ processed_txt = " ".join(map(str, txt))
177
+ logger.warning(f"Extracted text is a list in Image: {txt}. Joined as: '{processed_txt}'.")
178
+ lines.append(processed_txt) # Attempt to join if it's a list of strings/convertibles
179
+ else:
180
+ logger.warning(f"Extracted text is not a string in Image: {txt} (type: {type(txt)}). Skipping.")
181
+ else:
182
+ logger.warning(f"Unexpected line_info structure in Image: {line_info}. Skipping.")
183
+ except Exception as e:
184
+ logger.warning(f"Error processing line_info in Image: {line_info}. Error: {e}")
185
+ return "\n".join(lines)
186
+ except Exception:
187
+ traceback.print_exc()
188
+ return ""
189
+
190
+ @staticmethod
191
+ def paddlex_table_extract_markdown(image_path):
192
+ """
193
+ 使用 PaddleX 表格识别pipeline,抽取表格并转换为markdown格式
194
+
195
+ Args:
196
+ image_path: 图片路径
197
+ Returns:
198
+ markdown格式的表格字符串
199
+ """
200
+ if paddlex_module is None:
201
+ print("paddlex not installed")
202
+ return ""
203
+
204
+ try:
205
+ # 创建 pipeline
206
+ pipeline = paddlex_module.create_pipeline(pipeline='table_recognition')
207
+ # 预测
208
+ outputs = pipeline.predict([image_path])
209
+ if not outputs:
210
+ return ""
211
+
212
+ md_results = []
213
+ for res in outputs:
214
+ # 获取HTML表格
215
+ html = None
216
+ try:
217
+ html = res.to_html() if hasattr(res, "to_html") else None
218
+ except Exception:
219
+ html = None
220
+
221
+ # 如果没有to_html方法,尝试res.print()内容中提取,或跳过
222
+ if html is None:
223
+ try:
224
+ from io import StringIO
225
+ import sys
226
+ buffer = StringIO()
227
+ sys_stdout = sys.stdout
228
+ sys.stdout = buffer
229
+ res.print()
230
+ sys.stdout = sys_stdout
231
+ html = buffer.getvalue()
232
+ except Exception:
233
+ html = ""
234
+
235
+ # 转markdown
236
+ md = ImageLoader.html_table_to_markdown(html)
237
+ md_results.append(md)
238
+
239
+ return "\n\n".join(md_results)
240
+ except Exception:
241
+ traceback.print_exc()
242
+ return ""
243
+
244
+ @staticmethod
245
+ def html_table_to_markdown(html):
246
+ """
247
+ 简单将HTML table转换为markdown table
248
+ """
249
+ try:
250
+ from bs4 import BeautifulSoup
251
+ except ImportError:
252
+ print("BeautifulSoup4 not installed, cannot convert HTML to markdown")
253
+ return ""
254
+
255
+ try:
256
+ soup = BeautifulSoup(html, "html.parser")
257
+ table = soup.find("table")
258
+ if table is None:
259
+ return ""
260
+
261
+ rows = []
262
+ for tr in table.find_all("tr"):
263
+ cells = tr.find_all(["td", "th"])
264
+ row = [cell.get_text(strip=True) for cell in cells]
265
+ rows.append(row)
266
+
267
+ if not rows:
268
+ return ""
269
+
270
+ # 生成markdown
271
+ md_lines = []
272
+ header = rows[0]
273
+ md_lines.append("| " + " | ".join(header) + " |")
274
+ md_lines.append("|" + "|".join(["---"] * len(header)) + "|")
275
+
276
+ for row in rows[1:]:
277
+ md_lines.append("| " + " | ".join(row) + " |")
278
+
279
+ return "\n".join(md_lines)
280
+ except Exception:
281
+ traceback.print_exc()
282
+ return ""
283
+
284
+ @staticmethod
285
+ def format_table_in_content(content: str, llm=None) -> str:
286
+ """Format table content from OCR results into markdown format.
287
+
288
+ Args:
289
+ content: The OCR text content that may contain tables
290
+ llm: The language model to use for formatting
291
+
292
+ Returns:
293
+ Formatted content with tables converted to markdown
294
+ """
295
+
296
+ @byzerllm.prompt()
297
+ def _format_table(content: str)->str:
298
+ '''
299
+ # 表格格式化任务
300
+
301
+ 你是一个专业的OCR后处理专家,擅长将OCR识别出的表格数据转换为规范的Markdown表格。
302
+
303
+ ## 输入内容分析
304
+
305
+ OCR识别的表格通常会有以下特点:
306
+ 1. 每个单元格可能被识别为单独的一行
307
+ 2. 表格的行列结构可能不明显
308
+ 3. 可能包含非表格的文本内容
309
+ 4. 可能存在多个表格
310
+
311
+ ## 你的任务
312
+
313
+ 1. 识别内容中的表格数据
314
+ 2. 将表格数据转换为标准Markdown格式
315
+ 3. 保留非表格的文本内容
316
+ 4. 使用replace_in_file工具格式输出结果
317
+
318
+ ## 输出格式
319
+
320
+ 必须使用以下格式输出结果:
321
+
322
+ ```
323
+ <replace_in_file>
324
+ <path>content</path>
325
+ <diff>
326
+ <<<<<<< SEARCH
327
+ [原始表格文本,精确匹配]
328
+ =======
329
+ [转换后的Markdown表格]
330
+ >>>>>>> REPLACE
331
+ </diff>
332
+ </replace_in_file>
333
+ ```
334
+
335
+ ## 示例
336
+
337
+ 原始OCR文本:
338
+ ```
339
+ 下面是库存情况:
340
+ 产品名称
341
+ 价格
342
+ 库存
343
+ 苹果手机
344
+ 8999 352
345
+ 华为平板
346
+ 4599
347
+ 128
348
+ 小米电视
349
+ 3299
350
+ 89
351
+ 可以看到在,整体库存和价格是健康的。
352
+ ```
353
+
354
+ 转换后的输出:
355
+ ```
356
+ <replace_in_file>
357
+ <path>content</path>
358
+ <diff>
359
+ <<<<<<< SEARCH
360
+ 产品名称
361
+ 价格
362
+ 库存
363
+ 苹果手机
364
+ 8999 352
365
+ 华为平板
366
+ 4599
367
+ 128
368
+ 小米电视
369
+ 3299
370
+ 89
371
+ =======
372
+ | 产品名称 | 价格 | 库存 |
373
+ |---------|------|------|
374
+ | 苹果手机 | 8999 | 352 |
375
+ | 华为平板 | 4599 | 128 |
376
+ | 小米电视 | 3299 | 89 |
377
+ >>>>>>> REPLACE
378
+ </diff>
379
+ </replace_in_file>
380
+ ```
381
+
382
+ ## 处理规则
383
+
384
+ 1. 表格识别:
385
+ - 分析行列结构,识别表头和数据行
386
+ - 如果一行中有多个值,可能是一行表格数据
387
+ - 连续的短行可能是表格的单元格
388
+
389
+ 2. Markdown格式:
390
+ - 表头行使用`|`分隔各列
391
+ - 在表头下方添加分隔行`|---|---|---|`
392
+ - 对齐各列数据
393
+ - 保持原始数据的完整性
394
+
395
+ 3. 多表格处理:
396
+ - 为每个表格创建单独的replace_in_file块
397
+ - 保持表格在原文中的相对位置
398
+
399
+ 4. 非表格内容:
400
+ - 保留原始格式
401
+ - 不要修改非表格文本
402
+
403
+ ## 处理以下内容
404
+
405
+ {{content}}
406
+ '''
407
+
408
+ # Run the prompt with the provided content
409
+ tool_response = _format_table.with_llm(llm).run(content)
410
+
411
+ # Parse the tool response to extract replace_in_file tool calls
412
+ def extract_replace_in_file_tools(response):
413
+ tools = []
414
+ # Pattern to match replace_in_file tool blocks
415
+ pattern = r'<replace_in_file>\s*<path>(.*?)</path>\s*<diff>(.*?)</diff>\s*</replace_in_file>'
416
+ matches = re.finditer(pattern, response, re.DOTALL)
417
+
418
+ for match in matches:
419
+ path = match.group(1).strip()
420
+ diff = match.group(2).strip()
421
+ tools.append(ReplaceInFileTool(path=path, diff=diff))
422
+
423
+ return tools
424
+
425
+ # Extract tools from the response
426
+ tools = extract_replace_in_file_tools(tool_response)
427
+
428
+ # Process each tool to apply the replacements
429
+ formatted_content = content
430
+ for tool in tools:
431
+ # For in-memory content replacement (not actual file modification)
432
+ if tool.path == "content":
433
+ # Parse the diff to get search/replace blocks
434
+ blocks = ImageLoader.parse_diff(tool.diff)
435
+ # Apply each replacement to the content
436
+ for search_block, replace_block in blocks:
437
+ formatted_content = formatted_content.replace(search_block, replace_block)
438
+
439
+ return formatted_content
440
+
441
+ @staticmethod
442
+ def extract_text_from_image(
443
+ image_path: str,
444
+ llm,
445
+ engine: str = "vl",
446
+ product_mode: str = "lite",
447
+ paddle_kwargs: dict = None
448
+ ) -> str:
449
+ """
450
+ 识别图片或PDF中的所有文本内容,包括表格(以markdown table格式)
451
+
452
+ Args:
453
+ image_path: 图片或PDF路径
454
+ llm: LLM对象或字符串(模型名)
455
+ engine: 选择识别引擎
456
+ - "vl": 视觉语言模型
457
+ - "paddle": PaddleOCR
458
+ - "paddle_table": PaddleX表格识别
459
+ product_mode: get_single_llm的参数
460
+ paddle_kwargs: dict,传递给PaddleOCR的参数
461
+ Returns:
462
+ markdown内容字符串
463
+ """
464
+ if isinstance(llm, str):
465
+ llm = get_single_llm(llm, product_mode=product_mode)
466
+
467
+ markdown_content = ""
468
+
469
+ if engine == "vl":
470
+ try:
471
+ vl_model = llm.get_sub_client("vl_model") if llm.get_sub_client("vl_model") else llm
472
+
473
+ @byzerllm.prompt()
474
+ def analyze_image(image_path):
475
+ """
476
+ {{ image }}
477
+ 你是一名图像理解专家,请识别这张图片中的所有内容,优先识别文字和表格。
478
+ 对于普通文字,输出为段落文本。
479
+ 对于表格截图,转换成markdown table格式输出。
480
+ 请根据内容顺序,整合成一份markdown文档。
481
+ 只返回markdown内容,不要添加额外解释。
482
+ """
483
+ image = byzerllm.Image.load_image_from_path(image_path)
484
+ return {"image": image}
485
+
486
+ result = analyze_image.with_llm(vl_model).run(image_path)
487
+ md_blocks = code_utils.extract_code(result, language="markdown")
488
+ if md_blocks:
489
+ markdown_content = md_blocks[-1][1]
490
+ else:
491
+ markdown_content = result.strip()
492
+ if not markdown_content:
493
+ raise ValueError("Empty markdown from vl_model")
494
+ return markdown_content
495
+
496
+ except Exception:
497
+ traceback.print_exc()
498
+ return ""
499
+
500
+ elif engine == "paddle":
501
+ if paddle_kwargs is None:
502
+ paddle_kwargs = {}
503
+
504
+ markdown_content = ImageLoader.paddleocr_extract_text(image_path, **paddle_kwargs)
505
+ return markdown_content
506
+
507
+ elif engine == "paddle_table":
508
+ markdown_content = ImageLoader.paddlex_table_extract_markdown(image_path)
509
+ return markdown_content
510
+
511
+ else:
512
+ print(f"Unknown engine type: {engine}. Supported engines are 'vl', 'paddle', and 'paddle_table'.")
513
+ return ""
514
+
515
+ @staticmethod
516
+ def image_to_markdown(
517
+ image_path: str,
518
+ llm,
519
+ engine: str = "vl",
520
+ product_mode: str = "lite",
521
+ paddle_kwargs: dict = None
522
+ ) -> str:
523
+ """
524
+ 识别图片或PDF内容,生成markdown文件
525
+
526
+ Args:
527
+ image_path: 文件路径
528
+ llm: LLM对象或字符串
529
+ engine: 'vl'、'paddle'或'paddle_table'
530
+ product_mode: LLM参数
531
+ paddle_kwargs: dict,传递给PaddleOCR参数
532
+ Returns:
533
+ markdown内容字符串
534
+ """
535
+ md_content = ImageLoader.extract_text_from_image(
536
+ image_path,
537
+ llm,
538
+ engine=engine,
539
+ product_mode=product_mode,
540
+ paddle_kwargs=paddle_kwargs
541
+ )
542
+
543
+ md_path = os.path.splitext(image_path)[0] + ".md"
544
+ try:
545
+ with open(md_path, "w", encoding="utf-8") as f:
546
+ f.write(md_content)
547
+ except Exception:
548
+ traceback.print_exc()
549
+
550
+ return md_content
551
+
autocoder/version.py CHANGED
@@ -1 +1 @@
1
- __version__ = "0.1.347"
1
+ __version__ = "0.1.348"