auto-coder 0.1.352__py3-none-any.whl → 0.1.354__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of auto-coder might be problematic. Click here for more details.

Files changed (43) hide show
  1. {auto_coder-0.1.352.dist-info → auto_coder-0.1.354.dist-info}/METADATA +1 -1
  2. {auto_coder-0.1.352.dist-info → auto_coder-0.1.354.dist-info}/RECORD +43 -30
  3. autocoder/auto_coder_rag.py +37 -1
  4. autocoder/auto_coder_runner.py +8 -0
  5. autocoder/commands/auto_command.py +59 -131
  6. autocoder/commands/tools.py +1 -1
  7. autocoder/common/__init__.py +1 -1
  8. autocoder/common/conversations/__init__.py +52 -0
  9. autocoder/common/conversations/compatibility.py +303 -0
  10. autocoder/common/conversations/conversation_manager.py +502 -0
  11. autocoder/common/conversations/example.py +152 -0
  12. autocoder/common/file_monitor/__init__.py +5 -0
  13. autocoder/common/file_monitor/monitor.py +383 -0
  14. autocoder/common/git_utils.py +1 -1
  15. autocoder/common/ignorefiles/__init__.py +4 -0
  16. autocoder/common/ignorefiles/ignore_file_utils.py +103 -0
  17. autocoder/common/ignorefiles/test_ignore_file_utils.py +91 -0
  18. autocoder/common/rulefiles/__init__.py +15 -0
  19. autocoder/common/rulefiles/autocoderrules_utils.py +173 -0
  20. autocoder/common/save_formatted_log.py +54 -0
  21. autocoder/common/v2/agent/agentic_edit.py +40 -36
  22. autocoder/common/v2/agent/agentic_edit_tools/list_files_tool_resolver.py +1 -1
  23. autocoder/common/v2/agent/agentic_edit_tools/search_files_tool_resolver.py +73 -43
  24. autocoder/common/v2/agent/agentic_edit_tools/test_search_files_tool_resolver.py +163 -0
  25. autocoder/common/v2/code_editblock_manager.py +20 -8
  26. autocoder/index/index.py +1 -1
  27. autocoder/models.py +22 -9
  28. autocoder/rag/api_server.py +14 -2
  29. autocoder/rag/cache/simple_cache.py +63 -33
  30. autocoder/rag/loaders/docx_loader.py +1 -1
  31. autocoder/rag/loaders/filter_utils.py +133 -76
  32. autocoder/rag/loaders/image_loader.py +15 -3
  33. autocoder/rag/loaders/pdf_loader.py +2 -2
  34. autocoder/rag/long_context_rag.py +11 -0
  35. autocoder/rag/qa_conversation_strategy.py +5 -31
  36. autocoder/rag/utils.py +21 -2
  37. autocoder/utils/_markitdown.py +66 -25
  38. autocoder/utils/auto_coder_utils/chat_stream_out.py +1 -0
  39. autocoder/version.py +1 -1
  40. {auto_coder-0.1.352.dist-info → auto_coder-0.1.354.dist-info}/LICENSE +0 -0
  41. {auto_coder-0.1.352.dist-info → auto_coder-0.1.354.dist-info}/WHEEL +0 -0
  42. {auto_coder-0.1.352.dist-info → auto_coder-0.1.354.dist-info}/entry_points.txt +0 -0
  43. {auto_coder-0.1.352.dist-info → auto_coder-0.1.354.dist-info}/top_level.txt +0 -0
@@ -1,106 +1,163 @@
1
-
2
1
  import os
3
2
  import json
4
- from typing import Dict, Optional
3
+ import threading
4
+ from typing import Dict, Optional, List
5
5
  from loguru import logger
6
+ from functools import lru_cache
6
7
 
7
8
  class FilterRuleManager:
8
9
  '''
10
+ 单例模式的过滤规则管理器。支持按文件类型定义不同的过滤规则。
11
+
12
+ 支持的规则格式:
9
13
  {
10
- "whitelist": [
11
- "glob:*.png",
12
- "regex:^/tmp/.*hidden.*"
13
- ],
14
- "blacklist": [
15
- "glob:*/private/*",
16
- "regex:.*/secret/.*\\.jpg$"
17
- ]
14
+ "image": {
15
+ "whitelist": ["*.png", "*.jpg"],
16
+ "blacklist": ["*/private/*"]
17
+ },
18
+ "document": {
19
+ "whitelist": ["*.pdf", "*.docx"],
20
+ "blacklist": ["*/tmp/*"]
21
+ },
22
+ "default": {
23
+ "whitelist": [],
24
+ "blacklist": ["*/node_modules/*", "*/.*"]
18
25
  }
19
- '''
20
- _cache_rules: Optional[Dict] = None
21
- _cache_mtime: Optional[float] = None
22
-
23
- def __init__(self, llm, source_dir: str):
24
- """
25
- 初始化过滤规则管理器
26
-
27
- 参数:
28
- llm: 大模型对象,当前未使用,预留
29
- source_dir: 项目根目录路径
30
- """
31
- self.llm = llm
32
- self.source_dir = source_dir
33
- self.filter_rules_path = os.path.join(self.source_dir, ".cache", "filterrules")
26
+ }
27
+ '''
28
+ _instance = None
29
+ _lock = threading.RLock() # 使用可重入锁避免死锁
30
+
31
+ def __new__(cls, *args, **kwargs):
32
+ if cls._instance is None:
33
+ with cls._lock:
34
+ if cls._instance is None: # 双重检查锁定模式
35
+ cls._instance = super(FilterRuleManager, cls).__new__(cls)
36
+ cls._instance._initialized = False
37
+ return cls._instance
38
+
39
+ @classmethod
40
+ def get_instance(cls):
41
+ return cls() # 直接调用__new__,不需要重复加锁
42
+
43
+ def __init__(self):
44
+ with self._lock:
45
+ if hasattr(self, '_initialized') and self._initialized:
46
+ return
47
+
48
+ self.source_dir = os.getcwd()
49
+ self.filter_rules_path = os.path.join(self.source_dir, ".cache", "filterrules")
50
+ self._cache_rules: Optional[Dict] = None
51
+ self._cache_mtime: Optional[float] = None
52
+ self._rule_lock = threading.RLock() # 单独的锁用于规则访问
53
+ self._initialized = True
34
54
 
35
55
  def load_filter_rules(self) -> Dict:
36
- try:
37
- current_mtime = os.path.getmtime(self.filter_rules_path) if os.path.exists(self.filter_rules_path) else None
38
- except Exception:
39
- current_mtime = None
40
-
56
+ # 先检查是否需要重新加载,不持有锁
57
+ current_mtime = self._get_file_mtime()
41
58
  need_reload = False
42
59
 
43
- # 如果缓存为空,或者文件已更新,触发重新加载
44
- if FilterRuleManager._cache_rules is None:
60
+ if self._cache_rules is None:
45
61
  need_reload = True
46
- elif current_mtime is not None and FilterRuleManager._cache_mtime != current_mtime:
62
+ elif current_mtime is not None and self._cache_mtime != current_mtime:
47
63
  need_reload = True
48
64
 
65
+ # 只在需要重新加载时获取锁
49
66
  if need_reload:
50
- FilterRuleManager._cache_rules = {"whitelist": [], "blacklist": []}
51
- try:
52
- if os.path.exists(self.filter_rules_path):
53
- with open(self.filter_rules_path, "r", encoding="utf-8") as f:
54
- FilterRuleManager._cache_rules = json.load(f)
55
- FilterRuleManager._cache_mtime = current_mtime
56
- except Exception as e:
57
- logger.warning(f"Failed to load filterrules: {e}")
58
-
59
- return FilterRuleManager._cache_rules or {"whitelist": [], "blacklist": []}
67
+ with self._rule_lock:
68
+ # 双重检查,避免多线程重复加载
69
+ current_mtime = self._get_file_mtime()
70
+ if self._cache_rules is None or (current_mtime is not None and self._cache_mtime != current_mtime):
71
+ self._load_rules_from_file(current_mtime)
72
+
73
+ # 返回规则副本,避免外部修改影响缓存
74
+ with self._rule_lock:
75
+ return self._cache_rules.copy() if self._cache_rules else self._get_default_rules()
76
+
77
+ def _get_file_mtime(self) -> Optional[float]:
78
+ """获取文件修改时间,与IO相关的操作单独提取出来"""
79
+ try:
80
+ return os.path.getmtime(self.filter_rules_path) if os.path.exists(self.filter_rules_path) else None
81
+ except Exception:
82
+ logger.warning(f"Failed to get mtime for {self.filter_rules_path}")
83
+ return None
84
+
85
+ def _get_default_rules(self) -> Dict:
86
+ """返回默认的规则结构"""
87
+ return {
88
+ "default": {
89
+ "whitelist": [],
90
+ "blacklist": []
91
+ }
92
+ }
60
93
 
61
- def should_parse_image(self, file_path: str) -> bool:
94
+ def _load_rules_from_file(self, current_mtime: Optional[float]) -> None:
95
+ """从文件加载规则,仅在持有锁时调用"""
96
+ self._cache_rules = self._get_default_rules()
97
+ try:
98
+ if os.path.exists(self.filter_rules_path):
99
+ with open(self.filter_rules_path, "r", encoding="utf-8") as f:
100
+ file_rules = json.load(f)
101
+
102
+ # 转换旧格式规则到新格式(如果需要)
103
+ if "whitelist" in file_rules or "blacklist" in file_rules:
104
+ # 旧格式转换为新格式
105
+ self._cache_rules = {
106
+ "default": {
107
+ "whitelist": file_rules.get("whitelist", []),
108
+ "blacklist": file_rules.get("blacklist", [])
109
+ }
110
+ }
111
+ logger.info("Converted old format rules to new format")
112
+ else:
113
+ # 新格式直接使用
114
+ self._cache_rules = file_rules
115
+ self._cache_mtime = current_mtime
116
+ except Exception as e:
117
+ logger.warning(f"Failed to load filterrules: {e}")
118
+
119
+ @lru_cache(maxsize=1024) # 缓存频繁使用的路径判断结果
120
+ def should_parse_file(self, file_path: str, file_type: str = "default") -> bool:
62
121
  """
63
- 判断某个文件是否需要对图片进行解析。
64
-
65
- 支持规则格式:
66
- - glob通配符匹配,示例:"glob:*.png" 或 "*.png"
67
- - 正则表达式匹配,示例:"regex:^/tmp/.*hidden.*"
68
-
122
+ 判断某个文件是否需要进行解析。
123
+
124
+ 参数:
125
+ file_path: 文件路径
126
+ file_type: 文件类型(如"image"、"document"等),默认为"default"
127
+
69
128
  返回:
70
129
  True 表示应该解析
71
130
  False 表示不解析
72
131
  """
73
132
  import fnmatch
74
- import re
75
-
133
+
76
134
  rules = self.load_filter_rules()
77
- whitelist = rules.get("whitelist", [])
78
- blacklist = rules.get("blacklist", [])
79
-
80
- def match_pattern(pattern: str, path: str) -> bool:
81
- if pattern.startswith("glob:"):
82
- pat = pattern[len("glob:"):]
83
- return fnmatch.fnmatch(path, pat)
84
- elif pattern.startswith("regex:"):
85
- pat = pattern[len("regex:"):]
86
- try:
87
- return re.search(pat, path) is not None
88
- except re.error:
89
- logger.warning(f"Invalid regex pattern: {pat}")
90
- return False
91
- else:
92
- # 默认按glob处理
93
- return fnmatch.fnmatch(path, pattern)
94
-
135
+
136
+ # 获取指定类型的规则,如果不存在则使用默认规则
137
+ type_rules = rules.get(file_type, rules.get("default", {"whitelist": [], "blacklist": []}))
138
+ whitelist = type_rules.get("whitelist", [])
139
+ blacklist = type_rules.get("blacklist", [])
140
+
95
141
  # 优先匹配黑名单
96
142
  for pattern in blacklist:
97
- if match_pattern(pattern, file_path):
143
+ if fnmatch.fnmatch(file_path, pattern):
98
144
  return False
99
-
100
- # 再匹配白名单
145
+
146
+ # 如果白名单为空,则默认所有文件都通过(除非被黑名单过滤)
147
+ if not whitelist:
148
+ return True
149
+
150
+ # 匹配白名单
101
151
  for pattern in whitelist:
102
- if match_pattern(pattern, file_path):
152
+ if fnmatch.fnmatch(file_path, pattern):
103
153
  return True
104
-
105
- # 默认不解析
154
+
155
+ # 有白名单但不匹配,不通过
106
156
  return False
157
+
158
+ # 保持向后兼容
159
+ def should_parse_image(self, file_path: str) -> bool:
160
+ """
161
+ 判断某个图片文件是否需要解析(兼容旧版API)
162
+ """
163
+ return self.should_parse_file(file_path, "image")
@@ -538,7 +538,7 @@ class ImageLoader:
538
538
  def image_to_markdown(
539
539
  image_path: str,
540
540
  llm,
541
- engine: str = "vl",
541
+ engine: str = "paddle",
542
542
  product_mode: str = "lite",
543
543
  paddle_kwargs: dict = None
544
544
  ) -> str:
@@ -554,6 +554,13 @@ class ImageLoader:
554
554
  Returns:
555
555
  markdown内容字符串
556
556
  """
557
+ logger.info(f"image_path: {image_path} engine: {engine} product_mode: {product_mode} paddle_kwargs: {paddle_kwargs}")
558
+
559
+ # 新增:如果 engine 为 paddle 且 PaddleOCR 为 None,直接返回空字符串
560
+ if engine == "paddle" and PaddleOCR is None:
561
+ logger.warning("PaddleOCR 未安装,无法识别图片内容,直接返回空字符串。")
562
+ return ""
563
+
557
564
  md_content = ImageLoader.extract_text_from_image(
558
565
  image_path,
559
566
  llm,
@@ -561,8 +568,13 @@ class ImageLoader:
561
568
  product_mode=product_mode,
562
569
  paddle_kwargs=paddle_kwargs
563
570
  )
564
-
565
- md_path = os.path.splitext(image_path)[0] + ".md"
571
+
572
+ # Get directory and filename separately
573
+ dir_name = os.path.dirname(image_path)
574
+ file_name = os.path.basename(image_path)
575
+ base_name = os.path.splitext(file_name)[0]
576
+ # Create new path with dot before filename
577
+ md_path = os.path.join(dir_name, f".{base_name}.md")
566
578
  try:
567
579
  with open(md_path, "w", encoding="utf-8") as f:
568
580
  f.write(md_content)
@@ -1,6 +1,5 @@
1
1
  from io import BytesIO
2
2
  from pypdf import PdfReader
3
- from autocoder.utils._markitdown import MarkItDown
4
3
  import traceback
5
4
 
6
5
 
@@ -15,7 +14,8 @@ def extract_text_from_pdf_old(file_path):
15
14
  return text
16
15
 
17
16
  def extract_text_from_pdf(file_path, llm=None, product_mode="lite"):
18
- try:
17
+ try:
18
+ from autocoder.utils._markitdown import MarkItDown
19
19
  md_converter = MarkItDown(llm=llm, product_mode=product_mode)
20
20
  result = md_converter.convert(file_path)
21
21
  return result.text_content
@@ -41,6 +41,8 @@ from autocoder.rag.qa_conversation_strategy import get_qa_strategy
41
41
  from autocoder.rag.searchable import SearchableResults
42
42
  from autocoder.rag.conversation_to_queries import extract_search_queries
43
43
  from autocoder.common import openai_content as OpenAIContentProcessor
44
+ from autocoder.common.save_formatted_log import save_formatted_log
45
+ import json, os
44
46
  try:
45
47
  from autocoder_pro.rag.llm_compute import LLMComputeEngine
46
48
  pro_version = version("auto-coder-pro")
@@ -849,6 +851,15 @@ class LongContextRAG:
849
851
  conversations=conversations, local_image_host=self.args.local_image_host
850
852
  )
851
853
 
854
+ # 保存 new_conversations
855
+ try:
856
+ logger.info(f"Saving new_conversations log to {self.args.source_dir}/.cache/logs")
857
+ project_root = self.args.source_dir
858
+ json_text = json.dumps(new_conversations, ensure_ascii=False)
859
+ save_formatted_log(project_root, json_text, "rag_conversation")
860
+ except Exception as e:
861
+ logger.warning(f"Failed to save new_conversations log: {e}")
862
+
852
863
  chunks = target_llm.stream_chat_oai(
853
864
  conversations=new_conversations,
854
865
  model=model,
@@ -2,6 +2,7 @@ from abc import ABC, abstractmethod
2
2
  from typing import List, Dict, Any, Generator
3
3
  import byzerllm
4
4
  from autocoder.common import AutoCoderArgs
5
+ from autocoder.common.rulefiles.autocoderrules_utils import get_rules
5
6
 
6
7
  class QAConversationStrategy(ABC):
7
8
  """
@@ -124,22 +125,8 @@ class MultiRoundStrategy(QAConversationStrategy):
124
125
  {% endfor %}
125
126
  {% endif %}
126
127
 
127
- """
128
-
129
- import os
130
- extra_docs = {}
131
- rules_dir = os.path.join(self.args.source_dir, ".autocoderrules")
132
- if os.path.isdir(rules_dir):
133
- for fname in os.listdir(rules_dir):
134
- if fname.endswith(".md"):
135
- fpath = os.path.join(rules_dir, fname)
136
- try:
137
- with open(fpath, "r", encoding="utf-8") as f:
138
- content = f.read()
139
- key = os.path.splitext(fname)[0]
140
- extra_docs[key] = content
141
- except Exception:
142
- continue
128
+ """
129
+ extra_docs = get_rules()
143
130
  return {"extra_docs": extra_docs}
144
131
 
145
132
  class SingleRoundStrategy(QAConversationStrategy):
@@ -253,21 +240,8 @@ class SingleRoundStrategy(QAConversationStrategy):
253
240
  {% endfor %}
254
241
  {% endif %}
255
242
 
256
- """
257
- import os
258
- extra_docs = {}
259
- rules_dir = os.path.join(getattr(self, 'args', None).source_dir if getattr(self, 'args', None) else ".", ".autocoderrules")
260
- if os.path.isdir(rules_dir):
261
- for fname in os.listdir(rules_dir):
262
- if fname.endswith(".md"):
263
- fpath = os.path.join(rules_dir, fname)
264
- try:
265
- with open(fpath, "r", encoding="utf-8") as f:
266
- content = f.read()
267
- key = os.path.splitext(fname)[0]
268
- extra_docs[key] = content
269
- except Exception:
270
- continue
243
+ """
244
+ extra_docs = extra_docs = get_rules()
271
245
  return {"extra_docs": extra_docs}
272
246
 
273
247
  def get_qa_strategy(args: AutoCoderArgs) -> QAConversationStrategy:
autocoder/rag/utils.py CHANGED
@@ -2,8 +2,9 @@ from autocoder.common import SourceCode
2
2
  from autocoder.rag.token_counter import count_tokens_worker, count_tokens
3
3
  from autocoder.rag.loaders.pdf_loader import extract_text_from_pdf
4
4
  from autocoder.rag.loaders.docx_loader import extract_text_from_docx
5
- from autocoder.rag.loaders.excel_loader import extract_text_from_excel
5
+ from autocoder.rag.loaders.excel_loader import extract_text_from_excel
6
6
  from autocoder.rag.loaders.ppt_loader import extract_text_from_ppt
7
+ from autocoder.rag.loaders.image_loader import ImageLoader
7
8
  from typing import List, Tuple, Optional, Union
8
9
  import time
9
10
  from loguru import logger
@@ -21,7 +22,7 @@ def process_file_in_multi_process(
21
22
  llm = get_single_llm(llm,product_mode)
22
23
 
23
24
  start_time = time.time()
24
- file_path, relative_path, _, _ = file_info
25
+ file_path, relative_path, _, _ = file_info
25
26
  try:
26
27
  if file_path.endswith(".pdf"):
27
28
  content = extract_text_from_pdf(file_path, llm, product_mode)
@@ -61,6 +62,15 @@ def process_file_in_multi_process(
61
62
  tokens=count_tokens_worker(content),
62
63
  )
63
64
  ]
65
+ elif file_path.lower().endswith((".png", ".jpg", ".jpeg", ".bmp", ".gif")):
66
+ content = ImageLoader.image_to_markdown(file_path, llm=llm, product_mode=product_mode)
67
+ v = [
68
+ SourceCode(
69
+ module_name=f"##File: {file_path}",
70
+ source_code=content,
71
+ tokens=count_tokens_worker(content),
72
+ )
73
+ ]
64
74
  else:
65
75
  with open(file_path, "r", encoding="utf-8") as f:
66
76
  content = f.read()
@@ -126,6 +136,15 @@ def process_file_local(
126
136
  tokens=count_tokens(content),
127
137
  )
128
138
  ]
139
+ elif file_path.lower().endswith((".png", ".jpg", ".jpeg", ".bmp", ".gif")):
140
+ content = ImageLoader.image_to_markdown(file_path, llm=llm, product_mode=product_mode)
141
+ v = [
142
+ SourceCode(
143
+ module_name=f"##File: {file_path}",
144
+ source_code=content,
145
+ tokens=count_tokens(content),
146
+ )
147
+ ]
129
148
  else:
130
149
  with open(file_path, "r", encoding="utf-8") as f:
131
150
  content = f.read()
@@ -30,18 +30,20 @@ from pdfminer.pdfpage import PDFPage
30
30
  from pdfminer.pdfinterp import PDFResourceManager, PDFPageInterpreter
31
31
  import pptx
32
32
  from pdfminer.image import ImageWriter
33
+ import time
33
34
 
34
35
  import numpy as np
35
36
  from PIL import Image
36
37
 
37
38
  # 新增导入
38
- from autocoder.rag.loaders import filter_utils
39
+ from autocoder.rag.loaders.filter_utils import FilterRuleManager
39
40
  from autocoder.rag.loaders.image_loader import ImageLoader
40
41
 
41
42
  # File-format detection
42
43
  import puremagic
43
44
  import requests
44
45
  from bs4 import BeautifulSoup
46
+ from loguru import logger
45
47
 
46
48
  # Optional Transcription support
47
49
  try:
@@ -503,12 +505,16 @@ class PdfConverter(DocumentConverter):
503
505
  Converts PDFs to Markdown with support for extracting and including images.
504
506
  """
505
507
 
508
+ def __init__(self, llm=None, product_mode="lite"):
509
+ super().__init__()
510
+ self.llm = llm
511
+ self.product_mode = product_mode
512
+
506
513
  def convert(self, local_path, **kwargs) -> Union[None, DocumentConverterResult]:
507
514
  # Bail if not a PDF
508
515
  extension = kwargs.get("file_extension", "")
509
516
  if extension.lower() != ".pdf":
510
- return None
511
-
517
+ return None
512
518
  image_output_dir = None
513
519
  if kwargs.get("image_output_dir", None):
514
520
  image_output_dir = kwargs.get("image_output_dir")
@@ -531,17 +537,18 @@ class PdfConverter(DocumentConverter):
531
537
  rsrcmgr = PDFResourceManager()
532
538
  laparams = LAParams()
533
539
  device = PDFPageAggregator(rsrcmgr, laparams=laparams)
534
- interpreter = PDFPageInterpreter(rsrcmgr, device)
540
+ interpreter = PDFPageInterpreter(rsrcmgr, device)
535
541
 
536
542
  # Process each page
537
543
  for page in PDFPage.create_pages(document):
538
544
  interpreter.process_page(page)
539
- layout = device.get_result()
545
+ layout = device.get_result()
540
546
 
541
547
  # Extract text and images from the page
542
548
  page_content = self._process_layout(
543
549
  layout, image_output_dir, image_count
544
550
  )
551
+
545
552
  text_content.extend(page_content)
546
553
  image_count += len([c for c in page_content if c.startswith("![Image")])
547
554
 
@@ -582,13 +589,12 @@ class PdfConverter(DocumentConverter):
582
589
  image_output_dir, f"image_{local_image_count}{suffix}")
583
590
  os.rename(temp_path, image_path)
584
591
  content.append(f"![Image {local_image_count}]({image_path})")
585
- # ===== 新增:根据filter_utils判断是否需要解析图片
586
- if filter_utils.should_parse_image(image_path):
587
- try:
588
- _ = ImageLoader.image_to_markdown(image_path, llm=None, engine="paddle")
589
- # image_to_markdown会自动生成md文件
590
- except Exception:
591
- import traceback; traceback.print_exc()
592
+ # ===== 修改:通过FilterRuleManager单例实例判断是否需要解析图片
593
+ v = try_parse_image(image_path,self.llm)
594
+ if v:
595
+ content.append("<image_content>")
596
+ content.append(v)
597
+ content.append("</image_content>")
592
598
  # =====
593
599
  local_image_count += 1
594
600
  continue
@@ -618,7 +624,11 @@ class PdfConverter(DocumentConverter):
618
624
  content.append(
619
625
  f"![Image {local_image_count}]({image_path})\n"
620
626
  )
621
- try_parse_image(image_path)
627
+ v = try_parse_image(image_path,self.llm)
628
+ if v:
629
+ content.append("<image_content>")
630
+ content.append(v)
631
+ content.append("</image_content>")
622
632
  local_image_count += 1
623
633
  continue
624
634
  elif colorspace == "DeviceGray":
@@ -629,7 +639,11 @@ class PdfConverter(DocumentConverter):
629
639
  content.append(
630
640
  f"![Image {local_image_count}]({image_path})\n"
631
641
  )
632
- try_parse_image(image_path)
642
+ v = try_parse_image(image_path,self.llm)
643
+ if v:
644
+ content.append("<image_content>")
645
+ content.append(v)
646
+ content.append("</image_content>")
633
647
  local_image_count += 1
634
648
  continue
635
649
  except Exception as e:
@@ -641,8 +655,12 @@ class PdfConverter(DocumentConverter):
641
655
  img_file.write(image_data)
642
656
 
643
657
  content.append(f"![Image {local_image_count}]({image_path})\n")
644
- # ===== 新增:根据filter_utils判断是否需要解析图片
645
- try_parse_image(image_path)
658
+ # ===== 新增:图片解析
659
+ v = try_parse_image(image_path,self.llm)
660
+ if v:
661
+ content.append("<image_content>")
662
+ content.append(v)
663
+ content.append("</image_content>")
646
664
  local_image_count += 1
647
665
 
648
666
  # Handle text
@@ -1089,6 +1107,8 @@ class MarkItDown:
1089
1107
  llm: Optional[Any] = None,
1090
1108
  product_mode: Optional[str] = None,
1091
1109
  ):
1110
+ # 初始化FilterRuleManager单例实例
1111
+ self._filter_rule_manager = FilterRuleManager.get_instance()
1092
1112
  if requests_session is None:
1093
1113
  self._requests_session = requests.Session()
1094
1114
  else:
@@ -1117,7 +1137,7 @@ class MarkItDown:
1117
1137
  self.register_page_converter(WavConverter())
1118
1138
  self.register_page_converter(Mp3Converter())
1119
1139
  self.register_page_converter(ImageConverter())
1120
- self.register_page_converter(PdfConverter())
1140
+ self.register_page_converter(PdfConverter(llm,product_mode))
1121
1141
 
1122
1142
  def convert(
1123
1143
  self, source: Union[str, requests.Response], **kwargs: Any
@@ -1126,8 +1146,7 @@ class MarkItDown:
1126
1146
  Args:
1127
1147
  - source: can be a string representing a path or url, or a requests.response object
1128
1148
  - extension: specifies the file extension to use when interpreting the file. If None, infer from source (path, uri, content-type, etc.)
1129
- """
1130
-
1149
+ """
1131
1150
  # Local path or url
1132
1151
  if isinstance(source, str):
1133
1152
  if (
@@ -1343,14 +1362,36 @@ class MarkItDown:
1343
1362
  self._page_converters.insert(0, converter)
1344
1363
 
1345
1364
 
1346
- def try_parse_image(image_path: str):
1365
+ def try_parse_image(image_path: str, llm=None):
1347
1366
  """
1348
- 根据filter_utils判断是否需要解析图片,如果需要则调用ImageLoader.image_to_markdown。
1367
+ 根据FilterRuleManager单例实例判断是否需要解析图片,如果需要则调用ImageLoader.image_to_markdown。
1349
1368
  解析失败会自动捕获异常。
1350
1369
  """
1351
- if filter_utils.should_parse_image(image_path):
1370
+ import uuid
1371
+ start_time = time.time()
1372
+ req_id = str(uuid.uuid4())[:8]
1373
+ logger.info(f"\n==== [try_parse_image] START | req_id={req_id} ====")
1374
+ logger.info(f"[try_parse_image][{req_id}] image_path: {image_path}, llm: {llm}")
1375
+ if FilterRuleManager.get_instance().should_parse_image(image_path):
1376
+ logger.info(f"[try_parse_image][{req_id}] should_parse_image=True, start parsing...")
1352
1377
  try:
1353
- _ = ImageLoader.image_to_markdown(image_path, llm=None, engine="paddle")
1354
- except Exception:
1355
- import traceback; traceback.print_exc()
1378
+ v = ImageLoader.image_to_markdown(image_path, llm=llm, engine="paddle")
1379
+ logger.info(f"[try_parse_image][{req_id}] image_to_markdown result: {str(v)[:200]}")
1380
+ if llm:
1381
+ v = ImageLoader.format_table_in_content(v, llm)
1382
+ logger.info(f"[try_parse_image][{req_id}] format_table_in_content result: {str(v)[:200]}")
1383
+ elapsed = time.time() - start_time
1384
+ logger.info(f"[try_parse_image][{req_id}] SUCCESS | execution time: {elapsed:.3f} seconds")
1385
+ logger.info(f"==== [try_parse_image] END | req_id={req_id} ====")
1386
+ return v
1387
+ except Exception as e:
1388
+ elapsed = time.time() - start_time
1389
+ logger.error(f"[try_parse_image][{req_id}] EXCEPTION | execution time: {elapsed:.3f} seconds | image_path: {image_path} | llm: {llm}")
1390
+ logger.exception(e)
1391
+ logger.info(f"==== [try_parse_image] END (EXCEPTION) | req_id={req_id} ====")
1392
+ return ""
1393
+ else:
1394
+ logger.info(f"[try_parse_image][{req_id}] should_parse_image=False, skip parsing.")
1395
+ logger.info(f"==== [try_parse_image] END (SKIP) | req_id={req_id} ====")
1396
+ return ""
1356
1397
 
@@ -292,6 +292,7 @@ def stream_out(
292
292
  get_event_manager(args.event_file).write_stream(content.to_dict(),
293
293
  metadata=EventMetadata(
294
294
  stream_out_type=extra_meta.get("stream_out_type", ""),
295
+ path=extra_meta.get("path", ""),
295
296
  is_streaming=True,
296
297
  output="delta",
297
298
  action_file=args.file
autocoder/version.py CHANGED
@@ -1 +1 @@
1
- __version__ = "0.1.352"
1
+ __version__ = "0.1.354"