auto-coder 0.1.201__py3-none-any.whl → 0.1.202__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of auto-coder might be problematic. Click here for more details.

@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: auto-coder
3
- Version: 0.1.201
3
+ Version: 0.1.202
4
4
  Summary: AutoCoder: AutoCoder
5
5
  Author: allwefantasy
6
6
  Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
@@ -7,7 +7,7 @@ autocoder/chat_auto_coder.py,sha256=kgcD4HKKmvDd2CI048TqZwdx2hrQRbZKYPO10qyrbA8,
7
7
  autocoder/chat_auto_coder_lang.py,sha256=QYtu5gWEQmWKVovR_qUZ8plySZarNFX_Onk-1vN9IiA,8524
8
8
  autocoder/command_args.py,sha256=MBsVjZpADu5u7CjY_1F8fGUW-9dVGyNDpzWyrDIMxz8,29890
9
9
  autocoder/lang.py,sha256=Ajng6m7towmx-cvQfEHPFp43iEfddPvr8ju5GH4H8qA,13819
10
- autocoder/version.py,sha256=sVvR48V5CyWVyLriw3601idt1dQMOAnWtcO_tdLfmow,24
10
+ autocoder/version.py,sha256=KbKAfOQ1mRxioaKsb1VhjDtdk7RG8UIteq2BswiztKI,24
11
11
  autocoder/agent/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
12
12
  autocoder/agent/auto_tool.py,sha256=DBzip-P_T6ZtT2eHexPcusmKYD0h7ufzp7TLwXAY10E,11554
13
13
  autocoder/agent/coder.py,sha256=x6bdJwDuETGg9ebQnYlUWCxCtQcDGg73LtI6McpWslQ,72034
@@ -63,7 +63,7 @@ autocoder/rag/api_server.py,sha256=dRbhAZVRAOlZ64Cnxf4_rKb4iJwHnrWS9Zr67IVORw0,7
63
63
  autocoder/rag/doc_filter.py,sha256=Ha0Yae_G_hF72YzvrO7NoDZcG18K4hRcqGAEqfrIwAs,9330
64
64
  autocoder/rag/document_retriever.py,sha256=5oThtxukGuRFF96o3pHKsk306a8diXbhgSrbqyU2BvM,8894
65
65
  autocoder/rag/llm_wrapper.py,sha256=sbDxCANiZyWb_ocqNgqu2oy3c2t8orPNRGleEs-Uwl8,2649
66
- autocoder/rag/long_context_rag.py,sha256=kB8A8WuJJ7xYeSKtWI7pSmrYZk5Yo5HpFOm6sx7A2zI,23923
66
+ autocoder/rag/long_context_rag.py,sha256=Y0bpO0mNOBGUtQ8WBOSbrjFLKxRUcZnD-Dax1zU7q-I,24436
67
67
  autocoder/rag/rag_config.py,sha256=8LwFcTd8OJWWwi1_WY4IzjqgtT6RyE2j4PjxS5cCTDE,802
68
68
  autocoder/rag/rag_entry.py,sha256=V1RJ8RGqM30DNPmzymv64rZjNRGWn6kfc8sRy_LECg0,2451
69
69
  autocoder/rag/raw_rag.py,sha256=yS2Ur6kG0IRjhCj2_VonwxjY_xls_E62jO5Gz5j2nqE,2952
@@ -94,7 +94,7 @@ autocoder/regexproject/__init__.py,sha256=lHTpHfYkguCMtczXoH4bMr-IMNZQtXIjmtSvjt
94
94
  autocoder/suffixproject/__init__.py,sha256=bEPW9AyGSQ8kNzrgKEXyRikrUCUEuJ6b6vCLzO8Ja6g,11017
95
95
  autocoder/tsproject/__init__.py,sha256=avSnMA3uSdZmv5MxTilDPFLRFmtfzFr1NPhHaRMFhX4,11625
96
96
  autocoder/utils/__init__.py,sha256=O3n6cpsgkIbbMuwmBHSQ1dls_IBD7_7YKFFaeKNo_tc,1193
97
- autocoder/utils/_markitdown.py,sha256=ZITzuo8D7dH3s9K9rmriYRciqTYpHIqRinS9FZ10pOc,46691
97
+ autocoder/utils/_markitdown.py,sha256=RU88qn4eZfYIy0GDrPxlI8oYXIypbi63VRJjdlnE0VU,47431
98
98
  autocoder/utils/coder.py,sha256=rK8e0svQBe0NOP26dIGToUXgha_hUDgxlWoC_p_r7oc,5698
99
99
  autocoder/utils/conversation_store.py,sha256=sz-hhY7sttPAUOAQU6Pze-5zJc3j0_Emj22dM_0l5ro,1161
100
100
  autocoder/utils/llm_client_interceptors.py,sha256=FEHNXoFZlCjAHQcjPRyX8FOMjo6rPXpO2AJ2zn2KTTo,901
@@ -106,9 +106,9 @@ autocoder/utils/request_event_queue.py,sha256=r3lo5qGsB1dIjzVQ05dnr0z_9Z3zOkBdP1
106
106
  autocoder/utils/request_queue.py,sha256=nwp6PMtgTCiuwJI24p8OLNZjUiprC-TsefQrhMI-yPE,3889
107
107
  autocoder/utils/rest.py,sha256=HawagAap3wMIDROGhY1730zSZrJR_EycODAA5qOj83c,8807
108
108
  autocoder/utils/tests.py,sha256=BqphrwyycGAvs-5mhH8pKtMZdObwhFtJ5MC_ZAOiLq8,1340
109
- auto_coder-0.1.201.dist-info/LICENSE,sha256=HrhfyXIkWY2tGFK11kg7vPCqhgh5DcxleloqdhrpyMY,11558
110
- auto_coder-0.1.201.dist-info/METADATA,sha256=2iejkjbZEUMhfqT1A6k7MTJmTdpc0urt1dGGXtQ3KHA,2575
111
- auto_coder-0.1.201.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
112
- auto_coder-0.1.201.dist-info/entry_points.txt,sha256=0nzHtHH4pNcM7xq4EBA2toS28Qelrvcbrr59GqD_0Ak,350
113
- auto_coder-0.1.201.dist-info/top_level.txt,sha256=Jqc0_uJSw2GwoFQAa9iJxYns-2mWla-9ok_Y3Gcznjk,10
114
- auto_coder-0.1.201.dist-info/RECORD,,
109
+ auto_coder-0.1.202.dist-info/LICENSE,sha256=HrhfyXIkWY2tGFK11kg7vPCqhgh5DcxleloqdhrpyMY,11558
110
+ auto_coder-0.1.202.dist-info/METADATA,sha256=JU3TFfBNRhy5U-nqB2vFy2gLKLQs-E89lUX3B0MrNj8,2575
111
+ auto_coder-0.1.202.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
112
+ auto_coder-0.1.202.dist-info/entry_points.txt,sha256=0nzHtHH4pNcM7xq4EBA2toS28Qelrvcbrr59GqD_0Ak,350
113
+ auto_coder-0.1.202.dist-info/top_level.txt,sha256=Jqc0_uJSw2GwoFQAa9iJxYns-2mWla-9ok_Y3Gcznjk,10
114
+ auto_coder-0.1.202.dist-info/RECORD,,
@@ -200,20 +200,27 @@ class LongContextRAG:
200
200
  def _answer_question(
201
201
  self, query: str, relevant_docs: List[str]
202
202
  ) -> Generator[str, None, None]:
203
- """
204
- 使用以下文档来回答问题。如果文档中没有相关信息,请说"我没有足够的信息来回答这个问题"。
205
-
203
+ """
206
204
  文档:
207
205
  {% for doc in relevant_docs %}
208
206
  {{ doc }}
209
207
  {% endfor %}
210
208
 
211
- 问题:{{ query }}
209
+ 使用以上文档来回答用户的问题。回答要求:
210
+
211
+ 1. 严格基于文档内容回答
212
+ - 如果文档提供的信息无法回答问题,请明确回复:"抱歉,文档中没有足够的信息来回答这个问题。"
213
+ - 不要添加、推测或扩展文档未提及的信息
212
214
 
213
- 要求:
214
- 1. 注意相应的markdown图片如果存在也要输出,尽可能图文并茂
215
-
216
- 回答:
215
+ 2. 格式如 ![image](./path.png) 的 Markdown 图片处理
216
+ - 根据Markdown 图片前后文本内容推测改图片与问题的相关性,有相关性则在回答中输出该Markdown图片路径
217
+ - 根据相关图片在文档中的位置,自然融入答复内容,保持上下文连贯
218
+ - 完整保留原始图片路径,不省略任何部分
219
+
220
+ 3. 回答格式要求
221
+ - 使用markdown格式提升可读性
222
+
223
+ 问题:{{ query }}
217
224
  """
218
225
 
219
226
  def _get_document_retriever_class(self):
@@ -68,7 +68,8 @@ class _CustomMarkdownify(markdownify.MarkdownConverter):
68
68
  """
69
69
 
70
70
  def __init__(self, **options: Any):
71
- options["heading_style"] = options.get("heading_style", markdownify.ATX)
71
+ options["heading_style"] = options.get(
72
+ "heading_style", markdownify.ATX)
72
73
  # Explicitly cast options to the expected type if necessary
73
74
  super().__init__(**options)
74
75
 
@@ -318,7 +319,7 @@ class YouTubeConverter(DocumentConverter):
318
319
  obj_start = lines[0].find("{")
319
320
  obj_end = lines[0].rfind("}")
320
321
  if obj_start >= 0 and obj_end >= 0:
321
- data = json.loads(lines[0][obj_start : obj_end + 1])
322
+ data = json.loads(lines[0][obj_start: obj_end + 1])
322
323
  attrdesc = self._findKey(
323
324
  data, "attributedDescriptionBodyText"
324
325
  ) # type: ignore
@@ -331,7 +332,8 @@ class YouTubeConverter(DocumentConverter):
331
332
  # Start preparing the page
332
333
  webpage_text = "# YouTube\n"
333
334
 
334
- title = self._get(metadata, ["title", "og:title", "name"]) # type: ignore
335
+ title = self._get(
336
+ metadata, ["title", "og:title", "name"]) # type: ignore
335
337
  assert isinstance(title, str)
336
338
 
337
339
  if title:
@@ -468,7 +470,8 @@ class BingSerpConverter(DocumentConverter):
468
470
 
469
471
  try:
470
472
  # RFC 4648 / Base64URL" variant, which uses "-" and "_"
471
- a["href"] = base64.b64decode(u, altchars="-_").decode("utf-8")
473
+ a["href"] = base64.b64decode(
474
+ u, altchars="-_").decode("utf-8")
472
475
  except UnicodeDecodeError:
473
476
  pass
474
477
  except binascii.Error:
@@ -477,7 +480,8 @@ class BingSerpConverter(DocumentConverter):
477
480
  # Convert to markdown
478
481
  md_result = _markdownify.convert_soup(result).strip()
479
482
  lines = [line.strip() for line in re.split(r"\n+", md_result)]
480
- results.append("\n".join([line for line in lines if len(line) > 0]))
483
+ results.append(
484
+ "\n".join([line for line in lines if len(line) > 0]))
481
485
 
482
486
  webpage_text = (
483
487
  f"## A Bing search for '{query}' found the following results:\n\n"
@@ -507,7 +511,8 @@ class PdfConverter(DocumentConverter):
507
511
  else:
508
512
  # Create output directory for images if it doesn't exist
509
513
  image_output_dir = os.path.join(
510
- os.path.dirname(local_path), "_images", os.path.basename(local_path)
514
+ os.path.dirname(local_path), "_images", os.path.basename(
515
+ local_path).replace(" ", "_")
511
516
  )
512
517
  os.makedirs(image_output_dir, exist_ok=True)
513
518
 
@@ -545,18 +550,17 @@ class PdfConverter(DocumentConverter):
545
550
  self, layout, image_output_dir: str, image_count: int
546
551
  ) -> List[str]:
547
552
  """Process the layout of a PDF page, extracting both text and images."""
548
- content = []
549
- iw = ImageWriter(image_output_dir)
550
-
553
+ content = []
554
+ local_image_count = image_count
551
555
  for lt_obj in layout:
552
556
  # Handle images
553
557
  if isinstance(lt_obj, LTImage) or (
554
558
  isinstance(lt_obj, LTFigure) and lt_obj.name.startswith("Im")
555
- ):
556
- image_count += 1
559
+ ):
557
560
  image_data = None
558
561
  image_meta = {}
559
- image_path = os.path.join(image_output_dir, f"image_{image_count}.png")
562
+ image_path = os.path.join(
563
+ image_output_dir, f"image_{local_image_count}.png")
560
564
 
561
565
  if hasattr(lt_obj, "stream"):
562
566
  image_data = lt_obj.stream.get_data()
@@ -566,12 +570,15 @@ class PdfConverter(DocumentConverter):
566
570
 
567
571
  if image_data:
568
572
  if isinstance(lt_obj, LTImage):
573
+ iw = ImageWriter(image_output_dir)
569
574
  name = iw.export_image(lt_obj)
570
- suffix = os.path.splitext(name)[1]
575
+ suffix = os.path.splitext(name)[1]
571
576
  temp_path = os.path.join(image_output_dir, name)
572
- image_path = os.path.join(image_output_dir, f"image_{image_count}{suffix}")
577
+ image_path = os.path.join(
578
+ image_output_dir, f"image_{local_image_count}{suffix}")
573
579
  os.rename(temp_path, image_path)
574
- content.append(f"![Image {image_count}]({image_path})")
580
+ content.append(f"![Image {local_image_count}]({image_path})")
581
+ local_image_count += 1
575
582
  continue
576
583
  try:
577
584
  # Try to handle raw pixel data
@@ -580,12 +587,14 @@ class PdfConverter(DocumentConverter):
580
587
  height = image_meta["Height"]
581
588
  bits = image_meta["BitsPerComponent"]
582
589
  colorspace = image_meta["ColorSpace"].name
583
- new_image_data = np.frombuffer(image_data, dtype=np.uint8)
590
+ new_image_data = np.frombuffer(
591
+ image_data, dtype=np.uint8)
584
592
  # Normalize to 8-bit if necessary
585
593
  if bits != 8:
586
594
  max_val = (1 << bits) - 1
587
595
  new_image_data = (
588
- new_image_data.astype("float32") * 255 / max_val
596
+ new_image_data.astype(
597
+ "float32") * 255 / max_val
589
598
  ).astype("uint8")
590
599
 
591
600
  if colorspace == "DeviceRGB":
@@ -595,16 +604,19 @@ class PdfConverter(DocumentConverter):
595
604
  img = Image.fromarray(new_image_data, "RGB")
596
605
  img.save(image_path)
597
606
  content.append(
598
- f"![Image {image_count}]({image_path})\n"
607
+ f"![Image {local_image_count}]({image_path})\n"
599
608
  )
609
+ local_image_count += 1
600
610
  continue
601
611
  elif colorspace == "DeviceGray":
602
- new_image_data = new_image_data.reshape((height, width))
612
+ new_image_data = new_image_data.reshape(
613
+ (height, width))
603
614
  img = Image.fromarray(new_image_data, "L")
604
615
  img.save(image_path)
605
616
  content.append(
606
- f"![Image {image_count}]({image_path})\n"
617
+ f"![Image {local_image_count}]({image_path})\n"
607
618
  )
619
+ local_image_count += 1
608
620
  continue
609
621
  except Exception as e:
610
622
  print(
@@ -614,7 +626,8 @@ class PdfConverter(DocumentConverter):
614
626
  with open(image_path, "wb") as img_file:
615
627
  img_file.write(image_data)
616
628
 
617
- content.append(f"![Image {image_count}]({image_path})\n")
629
+ content.append(f"![Image {local_image_count}]({image_path})\n")
630
+ local_image_count += 1
618
631
 
619
632
  # Handle text
620
633
  if hasattr(lt_obj, "get_text"):
@@ -625,7 +638,8 @@ class PdfConverter(DocumentConverter):
625
638
  # Recursively process nested layouts
626
639
  elif hasattr(lt_obj, "_objs"):
627
640
  content.extend(
628
- self._process_layout(lt_obj._objs, image_output_dir, image_count)
641
+ self._process_layout(
642
+ lt_obj._objs, image_output_dir, image_count)
629
643
  )
630
644
 
631
645
  return content
@@ -635,7 +649,7 @@ class DocxConverter(HtmlConverter):
635
649
  """
636
650
  Converts DOCX files to Markdown. Style information (e.g.m headings) and tables are preserved where possible.
637
651
  """
638
-
652
+
639
653
  def __init__(self):
640
654
  self._image_counter = 0
641
655
  super().__init__()
@@ -644,18 +658,19 @@ class DocxConverter(HtmlConverter):
644
658
  """
645
659
  保存图片并返回相对路径,使用递增的计数器来命名文件
646
660
  """
647
- # 获取图片内容和格式
648
- image_format = image.content_type.split('/')[-1] if image.content_type else 'png'
649
-
661
+ # 获取图片内容和格式
662
+ image_format = image.content_type.split(
663
+ '/')[-1] if image.content_type else 'png'
664
+
650
665
  # 增加计数器并生成文件名
651
666
  self._image_counter += 1
652
667
  image_filename = f"image_{self._image_counter}.{image_format}"
653
-
668
+
654
669
  # 保存图片
655
670
  image_path = os.path.join(output_dir, image_filename)
656
671
  with image.open() as image_content, open(image_path, 'wb') as f:
657
672
  f.write(image_content.read())
658
-
673
+
659
674
  return image_path
660
675
 
661
676
  def convert(self, local_path, **kwargs) -> Union[None, DocumentConverterResult]:
@@ -671,7 +686,7 @@ class DocxConverter(HtmlConverter):
671
686
  else:
672
687
  # Create output directory for images if it doesn't exist
673
688
  image_output_dir = os.path.join(os.path.dirname(
674
- local_path), "_images", os.path.basename(local_path))
689
+ local_path), "_images", os.path.basename(local_path).replace(" ", "_"))
675
690
  os.makedirs(image_output_dir, exist_ok=True)
676
691
 
677
692
  result = None
@@ -682,7 +697,7 @@ class DocxConverter(HtmlConverter):
682
697
  "src": self._save_image(image, image_output_dir),
683
698
  "alt": image.alt_text if image.alt_text else f"Image {self._image_counter}"
684
699
  }
685
-
700
+
686
701
  # 进行转换
687
702
  result = mammoth.convert_to_html(
688
703
  docx_file,
@@ -691,7 +706,7 @@ class DocxConverter(HtmlConverter):
691
706
  html_content = result.value
692
707
  result = self._convert(html_content)
693
708
 
694
- return result
709
+ return result
695
710
 
696
711
 
697
712
  class XlsxConverter(HtmlConverter):
@@ -710,7 +725,8 @@ class XlsxConverter(HtmlConverter):
710
725
  for s in sheets:
711
726
  md_content += f"## {s}\n"
712
727
  html_content = sheets[s].to_html(index=False)
713
- md_content += self._convert(html_content).text_content.strip() + "\n\n"
728
+ md_content += self._convert(
729
+ html_content).text_content.strip() + "\n\n"
714
730
 
715
731
  return DocumentConverterResult(
716
732
  title=None,
@@ -745,7 +761,8 @@ class PptxConverter(HtmlConverter):
745
761
  # https://github.com/scanny/python-pptx/pull/512#issuecomment-1713100069
746
762
  alt_text = ""
747
763
  try:
748
- alt_text = shape._element._nvXxPr.cNvPr.attrib.get("descr", "")
764
+ alt_text = shape._element._nvXxPr.cNvPr.attrib.get(
765
+ "descr", "")
749
766
  except Exception:
750
767
  pass
751
768
 
@@ -767,14 +784,17 @@ class PptxConverter(HtmlConverter):
767
784
  html_table += "<tr>"
768
785
  for cell in row.cells:
769
786
  if first_row:
770
- html_table += "<th>" + html.escape(cell.text) + "</th>"
787
+ html_table += "<th>" + \
788
+ html.escape(cell.text) + "</th>"
771
789
  else:
772
- html_table += "<td>" + html.escape(cell.text) + "</td>"
790
+ html_table += "<td>" + \
791
+ html.escape(cell.text) + "</td>"
773
792
  html_table += "</tr>"
774
793
  first_row = False
775
794
  html_table += "</table></body></html>"
776
795
  md_content += (
777
- "\n" + self._convert(html_table).text_content.strip() + "\n"
796
+ "\n" +
797
+ self._convert(html_table).text_content.strip() + "\n"
778
798
  )
779
799
 
780
800
  # Text areas
@@ -1028,7 +1048,8 @@ class ImageConverter(MediaConverter):
1028
1048
  }
1029
1049
  ]
1030
1050
 
1031
- response = client.chat.completions.create(model=model, messages=messages)
1051
+ response = client.chat.completions.create(
1052
+ model=model, messages=messages)
1032
1053
  return response.choices[0].message.content
1033
1054
 
1034
1055
 
@@ -1242,9 +1263,11 @@ class MarkItDown:
1242
1263
  if res is not None:
1243
1264
  # Normalize the content
1244
1265
  res.text_content = "\n".join(
1245
- [line.rstrip() for line in re.split(r"\r?\n", res.text_content)]
1266
+ [line.rstrip()
1267
+ for line in re.split(r"\r?\n", res.text_content)]
1246
1268
  )
1247
- res.text_content = re.sub(r"\n{3,}", "\n\n", res.text_content)
1269
+ res.text_content = re.sub(
1270
+ r"\n{3,}", "\n\n", res.text_content)
1248
1271
 
1249
1272
  # Todo
1250
1273
  return res
autocoder/version.py CHANGED
@@ -1 +1 @@
1
- __version__ = "0.1.201"
1
+ __version__ = "0.1.202"