auto-coder 0.1.353__py3-none-any.whl → 0.1.355__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of auto-coder might be problematic. Click here for more details.
- {auto_coder-0.1.353.dist-info → auto_coder-0.1.355.dist-info}/METADATA +1 -1
- {auto_coder-0.1.353.dist-info → auto_coder-0.1.355.dist-info}/RECORD +60 -45
- autocoder/agent/agentic_filter.py +1 -1
- autocoder/auto_coder.py +8 -0
- autocoder/auto_coder_rag.py +37 -1
- autocoder/auto_coder_runner.py +58 -77
- autocoder/chat/conf_command.py +270 -0
- autocoder/chat/models_command.py +485 -0
- autocoder/chat_auto_coder.py +29 -24
- autocoder/chat_auto_coder_lang.py +26 -2
- autocoder/commands/auto_command.py +60 -132
- autocoder/commands/auto_web.py +1 -1
- autocoder/commands/tools.py +1 -1
- autocoder/common/__init__.py +3 -1
- autocoder/common/command_completer.py +58 -12
- autocoder/common/command_completer_v2.py +576 -0
- autocoder/common/conversations/__init__.py +52 -0
- autocoder/common/conversations/compatibility.py +303 -0
- autocoder/common/conversations/conversation_manager.py +502 -0
- autocoder/common/conversations/example.py +152 -0
- autocoder/common/file_monitor/__init__.py +5 -0
- autocoder/common/file_monitor/monitor.py +383 -0
- autocoder/common/global_cancel.py +53 -16
- autocoder/common/ignorefiles/__init__.py +4 -0
- autocoder/common/ignorefiles/ignore_file_utils.py +103 -0
- autocoder/common/ignorefiles/test_ignore_file_utils.py +91 -0
- autocoder/common/rulefiles/__init__.py +15 -0
- autocoder/common/rulefiles/autocoderrules_utils.py +173 -0
- autocoder/common/save_formatted_log.py +54 -0
- autocoder/common/v2/agent/agentic_edit.py +10 -39
- autocoder/common/v2/agent/agentic_edit_tools/list_files_tool_resolver.py +1 -1
- autocoder/common/v2/agent/agentic_edit_tools/search_files_tool_resolver.py +73 -43
- autocoder/common/v2/code_agentic_editblock_manager.py +9 -9
- autocoder/common/v2/code_diff_manager.py +2 -2
- autocoder/common/v2/code_editblock_manager.py +31 -18
- autocoder/common/v2/code_strict_diff_manager.py +3 -2
- autocoder/dispacher/actions/action.py +6 -6
- autocoder/dispacher/actions/plugins/action_regex_project.py +2 -2
- autocoder/events/event_manager_singleton.py +1 -1
- autocoder/index/index.py +3 -3
- autocoder/models.py +22 -9
- autocoder/rag/api_server.py +14 -2
- autocoder/rag/cache/local_byzer_storage_cache.py +1 -1
- autocoder/rag/cache/local_duckdb_storage_cache.py +8 -0
- autocoder/rag/cache/simple_cache.py +63 -33
- autocoder/rag/loaders/docx_loader.py +1 -1
- autocoder/rag/loaders/filter_utils.py +133 -76
- autocoder/rag/loaders/image_loader.py +15 -3
- autocoder/rag/loaders/pdf_loader.py +2 -2
- autocoder/rag/long_context_rag.py +11 -0
- autocoder/rag/qa_conversation_strategy.py +5 -31
- autocoder/rag/utils.py +21 -2
- autocoder/utils/_markitdown.py +66 -25
- autocoder/utils/auto_coder_utils/chat_stream_out.py +4 -4
- autocoder/utils/thread_utils.py +9 -27
- autocoder/version.py +1 -1
- {auto_coder-0.1.353.dist-info → auto_coder-0.1.355.dist-info}/LICENSE +0 -0
- {auto_coder-0.1.353.dist-info → auto_coder-0.1.355.dist-info}/WHEEL +0 -0
- {auto_coder-0.1.353.dist-info → auto_coder-0.1.355.dist-info}/entry_points.txt +0 -0
- {auto_coder-0.1.353.dist-info → auto_coder-0.1.355.dist-info}/top_level.txt +0 -0
autocoder/utils/_markitdown.py
CHANGED
|
@@ -30,18 +30,20 @@ from pdfminer.pdfpage import PDFPage
|
|
|
30
30
|
from pdfminer.pdfinterp import PDFResourceManager, PDFPageInterpreter
|
|
31
31
|
import pptx
|
|
32
32
|
from pdfminer.image import ImageWriter
|
|
33
|
+
import time
|
|
33
34
|
|
|
34
35
|
import numpy as np
|
|
35
36
|
from PIL import Image
|
|
36
37
|
|
|
37
38
|
# 新增导入
|
|
38
|
-
from autocoder.rag.loaders import
|
|
39
|
+
from autocoder.rag.loaders.filter_utils import FilterRuleManager
|
|
39
40
|
from autocoder.rag.loaders.image_loader import ImageLoader
|
|
40
41
|
|
|
41
42
|
# File-format detection
|
|
42
43
|
import puremagic
|
|
43
44
|
import requests
|
|
44
45
|
from bs4 import BeautifulSoup
|
|
46
|
+
from loguru import logger
|
|
45
47
|
|
|
46
48
|
# Optional Transcription support
|
|
47
49
|
try:
|
|
@@ -503,12 +505,16 @@ class PdfConverter(DocumentConverter):
|
|
|
503
505
|
Converts PDFs to Markdown with support for extracting and including images.
|
|
504
506
|
"""
|
|
505
507
|
|
|
508
|
+
def __init__(self, llm=None, product_mode="lite"):
|
|
509
|
+
super().__init__()
|
|
510
|
+
self.llm = llm
|
|
511
|
+
self.product_mode = product_mode
|
|
512
|
+
|
|
506
513
|
def convert(self, local_path, **kwargs) -> Union[None, DocumentConverterResult]:
|
|
507
514
|
# Bail if not a PDF
|
|
508
515
|
extension = kwargs.get("file_extension", "")
|
|
509
516
|
if extension.lower() != ".pdf":
|
|
510
|
-
return None
|
|
511
|
-
|
|
517
|
+
return None
|
|
512
518
|
image_output_dir = None
|
|
513
519
|
if kwargs.get("image_output_dir", None):
|
|
514
520
|
image_output_dir = kwargs.get("image_output_dir")
|
|
@@ -531,17 +537,18 @@ class PdfConverter(DocumentConverter):
|
|
|
531
537
|
rsrcmgr = PDFResourceManager()
|
|
532
538
|
laparams = LAParams()
|
|
533
539
|
device = PDFPageAggregator(rsrcmgr, laparams=laparams)
|
|
534
|
-
interpreter = PDFPageInterpreter(rsrcmgr, device)
|
|
540
|
+
interpreter = PDFPageInterpreter(rsrcmgr, device)
|
|
535
541
|
|
|
536
542
|
# Process each page
|
|
537
543
|
for page in PDFPage.create_pages(document):
|
|
538
544
|
interpreter.process_page(page)
|
|
539
|
-
layout = device.get_result()
|
|
545
|
+
layout = device.get_result()
|
|
540
546
|
|
|
541
547
|
# Extract text and images from the page
|
|
542
548
|
page_content = self._process_layout(
|
|
543
549
|
layout, image_output_dir, image_count
|
|
544
550
|
)
|
|
551
|
+
|
|
545
552
|
text_content.extend(page_content)
|
|
546
553
|
image_count += len([c for c in page_content if c.startswith("![Image")])
|
|
547
554
|
|
|
@@ -582,13 +589,12 @@ class PdfConverter(DocumentConverter):
|
|
|
582
589
|
image_output_dir, f"image_{local_image_count}{suffix}")
|
|
583
590
|
os.rename(temp_path, image_path)
|
|
584
591
|
content.append(f"")
|
|
585
|
-
# =====
|
|
586
|
-
|
|
587
|
-
|
|
588
|
-
|
|
589
|
-
|
|
590
|
-
|
|
591
|
-
import traceback; traceback.print_exc()
|
|
592
|
+
# ===== 修改:通过FilterRuleManager单例实例判断是否需要解析图片
|
|
593
|
+
v = try_parse_image(image_path,self.llm)
|
|
594
|
+
if v:
|
|
595
|
+
content.append("<image_content>")
|
|
596
|
+
content.append(v)
|
|
597
|
+
content.append("</image_content>")
|
|
592
598
|
# =====
|
|
593
599
|
local_image_count += 1
|
|
594
600
|
continue
|
|
@@ -618,7 +624,11 @@ class PdfConverter(DocumentConverter):
|
|
|
618
624
|
content.append(
|
|
619
625
|
f"\n"
|
|
620
626
|
)
|
|
621
|
-
try_parse_image(image_path)
|
|
627
|
+
v = try_parse_image(image_path,self.llm)
|
|
628
|
+
if v:
|
|
629
|
+
content.append("<image_content>")
|
|
630
|
+
content.append(v)
|
|
631
|
+
content.append("</image_content>")
|
|
622
632
|
local_image_count += 1
|
|
623
633
|
continue
|
|
624
634
|
elif colorspace == "DeviceGray":
|
|
@@ -629,7 +639,11 @@ class PdfConverter(DocumentConverter):
|
|
|
629
639
|
content.append(
|
|
630
640
|
f"\n"
|
|
631
641
|
)
|
|
632
|
-
try_parse_image(image_path)
|
|
642
|
+
v = try_parse_image(image_path,self.llm)
|
|
643
|
+
if v:
|
|
644
|
+
content.append("<image_content>")
|
|
645
|
+
content.append(v)
|
|
646
|
+
content.append("</image_content>")
|
|
633
647
|
local_image_count += 1
|
|
634
648
|
continue
|
|
635
649
|
except Exception as e:
|
|
@@ -641,8 +655,12 @@ class PdfConverter(DocumentConverter):
|
|
|
641
655
|
img_file.write(image_data)
|
|
642
656
|
|
|
643
657
|
content.append(f"\n")
|
|
644
|
-
# =====
|
|
645
|
-
try_parse_image(image_path)
|
|
658
|
+
# ===== 新增:图片解析
|
|
659
|
+
v = try_parse_image(image_path,self.llm)
|
|
660
|
+
if v:
|
|
661
|
+
content.append("<image_content>")
|
|
662
|
+
content.append(v)
|
|
663
|
+
content.append("</image_content>")
|
|
646
664
|
local_image_count += 1
|
|
647
665
|
|
|
648
666
|
# Handle text
|
|
@@ -1089,6 +1107,8 @@ class MarkItDown:
|
|
|
1089
1107
|
llm: Optional[Any] = None,
|
|
1090
1108
|
product_mode: Optional[str] = None,
|
|
1091
1109
|
):
|
|
1110
|
+
# 初始化FilterRuleManager单例实例
|
|
1111
|
+
self._filter_rule_manager = FilterRuleManager.get_instance()
|
|
1092
1112
|
if requests_session is None:
|
|
1093
1113
|
self._requests_session = requests.Session()
|
|
1094
1114
|
else:
|
|
@@ -1117,7 +1137,7 @@ class MarkItDown:
|
|
|
1117
1137
|
self.register_page_converter(WavConverter())
|
|
1118
1138
|
self.register_page_converter(Mp3Converter())
|
|
1119
1139
|
self.register_page_converter(ImageConverter())
|
|
1120
|
-
self.register_page_converter(PdfConverter())
|
|
1140
|
+
self.register_page_converter(PdfConverter(llm,product_mode))
|
|
1121
1141
|
|
|
1122
1142
|
def convert(
|
|
1123
1143
|
self, source: Union[str, requests.Response], **kwargs: Any
|
|
@@ -1126,8 +1146,7 @@ class MarkItDown:
|
|
|
1126
1146
|
Args:
|
|
1127
1147
|
- source: can be a string representing a path or url, or a requests.response object
|
|
1128
1148
|
- extension: specifies the file extension to use when interpreting the file. If None, infer from source (path, uri, content-type, etc.)
|
|
1129
|
-
"""
|
|
1130
|
-
|
|
1149
|
+
"""
|
|
1131
1150
|
# Local path or url
|
|
1132
1151
|
if isinstance(source, str):
|
|
1133
1152
|
if (
|
|
@@ -1343,14 +1362,36 @@ class MarkItDown:
|
|
|
1343
1362
|
self._page_converters.insert(0, converter)
|
|
1344
1363
|
|
|
1345
1364
|
|
|
1346
|
-
def try_parse_image(image_path: str):
|
|
1365
|
+
def try_parse_image(image_path: str, llm=None):
|
|
1347
1366
|
"""
|
|
1348
|
-
根据
|
|
1367
|
+
根据FilterRuleManager单例实例判断是否需要解析图片,如果需要则调用ImageLoader.image_to_markdown。
|
|
1349
1368
|
解析失败会自动捕获异常。
|
|
1350
1369
|
"""
|
|
1351
|
-
|
|
1370
|
+
import uuid
|
|
1371
|
+
start_time = time.time()
|
|
1372
|
+
req_id = str(uuid.uuid4())[:8]
|
|
1373
|
+
logger.info(f"\n==== [try_parse_image] START | req_id={req_id} ====")
|
|
1374
|
+
logger.info(f"[try_parse_image][{req_id}] image_path: {image_path}, llm: {llm}")
|
|
1375
|
+
if FilterRuleManager.get_instance().should_parse_image(image_path):
|
|
1376
|
+
logger.info(f"[try_parse_image][{req_id}] should_parse_image=True, start parsing...")
|
|
1352
1377
|
try:
|
|
1353
|
-
|
|
1354
|
-
|
|
1355
|
-
|
|
1378
|
+
v = ImageLoader.image_to_markdown(image_path, llm=llm, engine="paddle")
|
|
1379
|
+
logger.info(f"[try_parse_image][{req_id}] image_to_markdown result: {str(v)[:200]}")
|
|
1380
|
+
if llm:
|
|
1381
|
+
v = ImageLoader.format_table_in_content(v, llm)
|
|
1382
|
+
logger.info(f"[try_parse_image][{req_id}] format_table_in_content result: {str(v)[:200]}")
|
|
1383
|
+
elapsed = time.time() - start_time
|
|
1384
|
+
logger.info(f"[try_parse_image][{req_id}] SUCCESS | execution time: {elapsed:.3f} seconds")
|
|
1385
|
+
logger.info(f"==== [try_parse_image] END | req_id={req_id} ====")
|
|
1386
|
+
return v
|
|
1387
|
+
except Exception as e:
|
|
1388
|
+
elapsed = time.time() - start_time
|
|
1389
|
+
logger.error(f"[try_parse_image][{req_id}] EXCEPTION | execution time: {elapsed:.3f} seconds | image_path: {image_path} | llm: {llm}")
|
|
1390
|
+
logger.exception(e)
|
|
1391
|
+
logger.info(f"==== [try_parse_image] END (EXCEPTION) | req_id={req_id} ====")
|
|
1392
|
+
return ""
|
|
1393
|
+
else:
|
|
1394
|
+
logger.info(f"[try_parse_image][{req_id}] should_parse_image=False, skip parsing.")
|
|
1395
|
+
logger.info(f"==== [try_parse_image] END (SKIP) | req_id={req_id} ====")
|
|
1396
|
+
return ""
|
|
1356
1397
|
|
|
@@ -230,9 +230,8 @@ def stream_out(
|
|
|
230
230
|
refresh_per_second=4,
|
|
231
231
|
console=console
|
|
232
232
|
) as live:
|
|
233
|
-
for res in stream_generator:
|
|
234
|
-
global_cancel.check_and_raise(
|
|
235
|
-
|
|
233
|
+
for res in stream_generator:
|
|
234
|
+
global_cancel.check_and_raise(args.event_file)
|
|
236
235
|
last_meta = res[1]
|
|
237
236
|
content = res[0]
|
|
238
237
|
|
|
@@ -241,7 +240,7 @@ def stream_out(
|
|
|
241
240
|
reasoning_content = last_meta.reasoning_content
|
|
242
241
|
|
|
243
242
|
if reasoning_content == "" and content == "":
|
|
244
|
-
continue
|
|
243
|
+
continue
|
|
245
244
|
|
|
246
245
|
if first_token_time == 0.0:
|
|
247
246
|
first_token_time = time.time() - first_token_time_start
|
|
@@ -292,6 +291,7 @@ def stream_out(
|
|
|
292
291
|
get_event_manager(args.event_file).write_stream(content.to_dict(),
|
|
293
292
|
metadata=EventMetadata(
|
|
294
293
|
stream_out_type=extra_meta.get("stream_out_type", ""),
|
|
294
|
+
path=extra_meta.get("path", ""),
|
|
295
295
|
is_streaming=True,
|
|
296
296
|
output="delta",
|
|
297
297
|
action_file=args.file
|
autocoder/utils/thread_utils.py
CHANGED
|
@@ -36,9 +36,7 @@ def run_in_raw_thread(token: Optional[str] = None, context: Optional[Dict[str, A
|
|
|
36
36
|
def wrapper(*args, **kwargs):
|
|
37
37
|
# Store thread results
|
|
38
38
|
result = []
|
|
39
|
-
exception_raised = [None] # 存储工作线程中的异常
|
|
40
|
-
thread_token = token
|
|
41
|
-
thread_context = context or {}
|
|
39
|
+
exception_raised = [None] # 存储工作线程中的异常
|
|
42
40
|
thread_terminated = threading.Event() # 用于标记线程是否已终止
|
|
43
41
|
|
|
44
42
|
def worker():
|
|
@@ -53,9 +51,7 @@ def run_in_raw_thread(token: Optional[str] = None, context: Optional[Dict[str, A
|
|
|
53
51
|
except Exception as e:
|
|
54
52
|
# 存储其他异常
|
|
55
53
|
exception_raised[0] = e
|
|
56
|
-
finally:
|
|
57
|
-
# 无论如何执行完毕后,重置取消标志并标记线程已终止
|
|
58
|
-
global_cancel.reset(thread_token)
|
|
54
|
+
finally:
|
|
59
55
|
thread_terminated.set()
|
|
60
56
|
|
|
61
57
|
# Create and start thread with a meaningful name
|
|
@@ -72,18 +68,7 @@ def run_in_raw_thread(token: Optional[str] = None, context: Optional[Dict[str, A
|
|
|
72
68
|
|
|
73
69
|
while thread.is_alive():
|
|
74
70
|
# 每次等待较短时间,以便能够及时响应中断
|
|
75
|
-
thread.join(0.1)
|
|
76
|
-
|
|
77
|
-
# 检查是否已经超过最大等待时间(仅适用于已取消的情况)
|
|
78
|
-
elapsed_time = time.time() - wait_start_time
|
|
79
|
-
if cancelled_by_keyboard and elapsed_time > max_wait_time:
|
|
80
|
-
printer.print_in_terminal("force_terminating_thread")
|
|
81
|
-
break
|
|
82
|
-
|
|
83
|
-
# 检查线程间的取消请求
|
|
84
|
-
if global_cancel.is_requested(thread_token):
|
|
85
|
-
# 传播取消请求到工作线程
|
|
86
|
-
raise CancelRequestedException(thread_token)
|
|
71
|
+
thread.join(0.1)
|
|
87
72
|
|
|
88
73
|
# 如果工作线程出现了异常,在主线程中重新抛出
|
|
89
74
|
if exception_raised[0] is not None:
|
|
@@ -92,15 +77,12 @@ def run_in_raw_thread(token: Optional[str] = None, context: Optional[Dict[str, A
|
|
|
92
77
|
# 返回结果
|
|
93
78
|
return result[0] if result else None
|
|
94
79
|
|
|
95
|
-
except KeyboardInterrupt:
|
|
96
|
-
#
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
global_cancel.
|
|
100
|
-
printer.print_in_terminal("cancellation_requested")
|
|
101
|
-
|
|
102
|
-
# 标记为键盘中断取消
|
|
103
|
-
cancelled_by_keyboard = True
|
|
80
|
+
except KeyboardInterrupt:
|
|
81
|
+
# 取消所有任务
|
|
82
|
+
for token in global_cancel.get_active_tokens():
|
|
83
|
+
print(f"Cancelling job: {token}")
|
|
84
|
+
global_cancel.set_active_tokens()
|
|
85
|
+
printer.print_in_terminal("cancellation_requested")
|
|
104
86
|
wait_start_time = time.time()
|
|
105
87
|
|
|
106
88
|
# 等待线程终止或检测到取消
|
autocoder/version.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = "0.1.
|
|
1
|
+
__version__ = "0.1.355"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|