PyPI - auto-coder - Versions diffs - 0.1.201__py3-none-any.whl → 0.1.202__py3-none-any.whl - Mend

auto-coder 0.1.201py3-none-any.whl → 0.1.202py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of auto-coder might be problematic. Click here for more details.

Files changed (9) hide show

{auto_coder-0.1.201.dist-info → auto_coder-0.1.202.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: auto-coder
-Version: 0.1.201
+Version: 0.1.202
 Summary: AutoCoder: AutoCoder
 Author: allwefantasy
 Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence

{auto_coder-0.1.201.dist-info → auto_coder-0.1.202.dist-info}/RECORD RENAMED Viewed

@@ -7,7 +7,7 @@ autocoder/chat_auto_coder.py,sha256=kgcD4HKKmvDd2CI048TqZwdx2hrQRbZKYPO10qyrbA8,
 autocoder/chat_auto_coder_lang.py,sha256=QYtu5gWEQmWKVovR_qUZ8plySZarNFX_Onk-1vN9IiA,8524
 autocoder/command_args.py,sha256=MBsVjZpADu5u7CjY_1F8fGUW-9dVGyNDpzWyrDIMxz8,29890
 autocoder/lang.py,sha256=Ajng6m7towmx-cvQfEHPFp43iEfddPvr8ju5GH4H8qA,13819
-autocoder/version.py,sha256=sVvR48V5CyWVyLriw3601idt1dQMOAnWtcO_tdLfmow,24
+autocoder/version.py,sha256=KbKAfOQ1mRxioaKsb1VhjDtdk7RG8UIteq2BswiztKI,24
 autocoder/agent/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 autocoder/agent/auto_tool.py,sha256=DBzip-P_T6ZtT2eHexPcusmKYD0h7ufzp7TLwXAY10E,11554
 autocoder/agent/coder.py,sha256=x6bdJwDuETGg9ebQnYlUWCxCtQcDGg73LtI6McpWslQ,72034
@@ -63,7 +63,7 @@ autocoder/rag/api_server.py,sha256=dRbhAZVRAOlZ64Cnxf4_rKb4iJwHnrWS9Zr67IVORw0,7
 autocoder/rag/doc_filter.py,sha256=Ha0Yae_G_hF72YzvrO7NoDZcG18K4hRcqGAEqfrIwAs,9330
 autocoder/rag/document_retriever.py,sha256=5oThtxukGuRFF96o3pHKsk306a8diXbhgSrbqyU2BvM,8894
 autocoder/rag/llm_wrapper.py,sha256=sbDxCANiZyWb_ocqNgqu2oy3c2t8orPNRGleEs-Uwl8,2649
-autocoder/rag/long_context_rag.py,sha256=kB8A8WuJJ7xYeSKtWI7pSmrYZk5Yo5HpFOm6sx7A2zI,23923
+autocoder/rag/long_context_rag.py,sha256=Y0bpO0mNOBGUtQ8WBOSbrjFLKxRUcZnD-Dax1zU7q-I,24436
 autocoder/rag/rag_config.py,sha256=8LwFcTd8OJWWwi1_WY4IzjqgtT6RyE2j4PjxS5cCTDE,802
 autocoder/rag/rag_entry.py,sha256=V1RJ8RGqM30DNPmzymv64rZjNRGWn6kfc8sRy_LECg0,2451
 autocoder/rag/raw_rag.py,sha256=yS2Ur6kG0IRjhCj2_VonwxjY_xls_E62jO5Gz5j2nqE,2952
@@ -94,7 +94,7 @@ autocoder/regexproject/__init__.py,sha256=lHTpHfYkguCMtczXoH4bMr-IMNZQtXIjmtSvjt
 autocoder/suffixproject/__init__.py,sha256=bEPW9AyGSQ8kNzrgKEXyRikrUCUEuJ6b6vCLzO8Ja6g,11017
 autocoder/tsproject/__init__.py,sha256=avSnMA3uSdZmv5MxTilDPFLRFmtfzFr1NPhHaRMFhX4,11625
 autocoder/utils/__init__.py,sha256=O3n6cpsgkIbbMuwmBHSQ1dls_IBD7_7YKFFaeKNo_tc,1193
-autocoder/utils/_markitdown.py,sha256=ZITzuo8D7dH3s9K9rmriYRciqTYpHIqRinS9FZ10pOc,46691
+autocoder/utils/_markitdown.py,sha256=RU88qn4eZfYIy0GDrPxlI8oYXIypbi63VRJjdlnE0VU,47431
 autocoder/utils/coder.py,sha256=rK8e0svQBe0NOP26dIGToUXgha_hUDgxlWoC_p_r7oc,5698
 autocoder/utils/conversation_store.py,sha256=sz-hhY7sttPAUOAQU6Pze-5zJc3j0_Emj22dM_0l5ro,1161
 autocoder/utils/llm_client_interceptors.py,sha256=FEHNXoFZlCjAHQcjPRyX8FOMjo6rPXpO2AJ2zn2KTTo,901
@@ -106,9 +106,9 @@ autocoder/utils/request_event_queue.py,sha256=r3lo5qGsB1dIjzVQ05dnr0z_9Z3zOkBdP1
 autocoder/utils/request_queue.py,sha256=nwp6PMtgTCiuwJI24p8OLNZjUiprC-TsefQrhMI-yPE,3889
 autocoder/utils/rest.py,sha256=HawagAap3wMIDROGhY1730zSZrJR_EycODAA5qOj83c,8807
 autocoder/utils/tests.py,sha256=BqphrwyycGAvs-5mhH8pKtMZdObwhFtJ5MC_ZAOiLq8,1340
-auto_coder-0.1.201.dist-info/LICENSE,sha256=HrhfyXIkWY2tGFK11kg7vPCqhgh5DcxleloqdhrpyMY,11558
-auto_coder-0.1.201.dist-info/METADATA,sha256=2iejkjbZEUMhfqT1A6k7MTJmTdpc0urt1dGGXtQ3KHA,2575
-auto_coder-0.1.201.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
-auto_coder-0.1.201.dist-info/entry_points.txt,sha256=0nzHtHH4pNcM7xq4EBA2toS28Qelrvcbrr59GqD_0Ak,350
-auto_coder-0.1.201.dist-info/top_level.txt,sha256=Jqc0_uJSw2GwoFQAa9iJxYns-2mWla-9ok_Y3Gcznjk,10
-auto_coder-0.1.201.dist-info/RECORD,,
+auto_coder-0.1.202.dist-info/LICENSE,sha256=HrhfyXIkWY2tGFK11kg7vPCqhgh5DcxleloqdhrpyMY,11558
+auto_coder-0.1.202.dist-info/METADATA,sha256=JU3TFfBNRhy5U-nqB2vFy2gLKLQs-E89lUX3B0MrNj8,2575
+auto_coder-0.1.202.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
+auto_coder-0.1.202.dist-info/entry_points.txt,sha256=0nzHtHH4pNcM7xq4EBA2toS28Qelrvcbrr59GqD_0Ak,350
+auto_coder-0.1.202.dist-info/top_level.txt,sha256=Jqc0_uJSw2GwoFQAa9iJxYns-2mWla-9ok_Y3Gcznjk,10
+auto_coder-0.1.202.dist-info/RECORD,,

autocoder/rag/long_context_rag.py CHANGED Viewed

@@ -200,20 +200,27 @@ class LongContextRAG:
     def _answer_question(
         self, query: str, relevant_docs: List[str]
     ) -> Generator[str, None, None]:
-        """
-        使用以下文档来回答问题。如果文档中没有相关信息，请说"我没有足够的信息来回答这个问题"。
+        """
         文档：
         {% for doc in relevant_docs %}
         {{ doc }}
         {% endfor %}
-        问题：{{ query }}
+        使用以上文档来回答用户的问题。回答要求：
+        1. 严格基于文档内容回答
+        - 如果文档提供的信息无法回答问题,请明确回复:"抱歉,文档中没有足够的信息来回答这个问题。"
+        - 不要添加、推测或扩展文档未提及的信息
-        要求：
-        1. 注意相应的markdown图片如果存在也要输出,尽可能图文并茂
-        回答：
+        2. 格式如 ![image](./path.png) 的 Markdown 图片处理
+        - 根据Markdown 图片前后文本内容推测改图片与问题的相关性，有相关性则在回答中输出该Markdown图片路径
+        - 根据相关图片在文档中的位置，自然融入答复内容,保持上下文连贯
+        - 完整保留原始图片路径,不省略任何部分
+        3. 回答格式要求
+        - 使用markdown格式提升可读性
+        问题：{{ query }}
         """
     def _get_document_retriever_class(self):

autocoder/utils/_markitdown.py CHANGED Viewed

@@ -68,7 +68,8 @@ class _CustomMarkdownify(markdownify.MarkdownConverter):
     """
     def __init__(self, **options: Any):
-        options["heading_style"] = options.get("heading_style", markdownify.ATX)
+        options["heading_style"] = options.get(
+            "heading_style", markdownify.ATX)
         # Explicitly cast options to the expected type if necessary
         super().__init__(**options)
@@ -318,7 +319,7 @@ class YouTubeConverter(DocumentConverter):
                     obj_start = lines[0].find("{")
                     obj_end = lines[0].rfind("}")
                     if obj_start >= 0 and obj_end >= 0:
-                        data = json.loads(lines[0][obj_start : obj_end + 1])
+                        data = json.loads(lines[0][obj_start: obj_end + 1])
                         attrdesc = self._findKey(
                             data, "attributedDescriptionBodyText"
                         )  # type: ignore
@@ -331,7 +332,8 @@ class YouTubeConverter(DocumentConverter):
         # Start preparing the page
         webpage_text = "# YouTube\n"
-        title = self._get(metadata, ["title", "og:title", "name"])  # type: ignore
+        title = self._get(
+            metadata, ["title", "og:title", "name"])  # type: ignore
         assert isinstance(title, str)
         if title:
@@ -468,7 +470,8 @@ class BingSerpConverter(DocumentConverter):
                     try:
                         # RFC 4648 / Base64URL" variant, which uses "-" and "_"
-                        a["href"] = base64.b64decode(u, altchars="-_").decode("utf-8")
+                        a["href"] = base64.b64decode(
+                            u, altchars="-_").decode("utf-8")
                     except UnicodeDecodeError:
                         pass
                     except binascii.Error:
@@ -477,7 +480,8 @@ class BingSerpConverter(DocumentConverter):
             # Convert to markdown
             md_result = _markdownify.convert_soup(result).strip()
             lines = [line.strip() for line in re.split(r"\n+", md_result)]
-            results.append("\n".join([line for line in lines if len(line) > 0]))
+            results.append(
+                "\n".join([line for line in lines if len(line) > 0]))
         webpage_text = (
             f"## A Bing search for '{query}' found the following results:\n\n"
@@ -507,7 +511,8 @@ class PdfConverter(DocumentConverter):
         else:
             # Create output directory for images if it doesn't exist
             image_output_dir = os.path.join(
-                os.path.dirname(local_path), "_images", os.path.basename(local_path)
+                os.path.dirname(local_path), "_images", os.path.basename(
+                    local_path).replace(" ", "_")
             )
         os.makedirs(image_output_dir, exist_ok=True)
@@ -545,18 +550,17 @@ class PdfConverter(DocumentConverter):
         self, layout, image_output_dir: str, image_count: int
     ) -> List[str]:
         """Process the layout of a PDF page, extracting both text and images."""
-        content = []
-        iw = ImageWriter(image_output_dir)
+        content = []
+        local_image_count = image_count
         for lt_obj in layout:
             # Handle images
             if isinstance(lt_obj, LTImage) or (
                 isinstance(lt_obj, LTFigure) and lt_obj.name.startswith("Im")
-            ):
-                image_count += 1
+            ):
                 image_data = None
                 image_meta = {}
-                image_path = os.path.join(image_output_dir, f"image_{image_count}.png")
+                image_path = os.path.join(
+                    image_output_dir, f"image_{local_image_count}.png")
                 if hasattr(lt_obj, "stream"):
                     image_data = lt_obj.stream.get_data()
@@ -566,12 +570,15 @@ class PdfConverter(DocumentConverter):
                 if image_data:
                     if isinstance(lt_obj, LTImage):
+                        iw = ImageWriter(image_output_dir)
                         name = iw.export_image(lt_obj)
-                        suffix = os.path.splitext(name)[1]
+                        suffix = os.path.splitext(name)[1]
                         temp_path = os.path.join(image_output_dir, name)
-                        image_path = os.path.join(image_output_dir, f"image_{image_count}{suffix}")
+                        image_path = os.path.join(
+                            image_output_dir, f"image_{local_image_count}{suffix}")
                         os.rename(temp_path, image_path)
-                        content.append(f"![Image {image_count}]({image_path})")
+                        content.append(f"![Image {local_image_count}]({image_path})")
+                        local_image_count += 1
                         continue
                     try:
                         # Try to handle raw pixel data
@@ -580,12 +587,14 @@ class PdfConverter(DocumentConverter):
                             height = image_meta["Height"]
                             bits = image_meta["BitsPerComponent"]
                             colorspace = image_meta["ColorSpace"].name
-                            new_image_data = np.frombuffer(image_data, dtype=np.uint8)
+                            new_image_data = np.frombuffer(
+                                image_data, dtype=np.uint8)
                             # Normalize to 8-bit if necessary
                             if bits != 8:
                                 max_val = (1 << bits) - 1
                                 new_image_data = (
-                                    new_image_data.astype("float32") * 255 / max_val
+                                    new_image_data.astype(
+                                        "float32") * 255 / max_val
                                 ).astype("uint8")
                             if colorspace == "DeviceRGB":
@@ -595,16 +604,19 @@ class PdfConverter(DocumentConverter):
                                 img = Image.fromarray(new_image_data, "RGB")
                                 img.save(image_path)
                                 content.append(
-                                    f"![Image {image_count}]({image_path})\n"
+                                    f"![Image {local_image_count}]({image_path})\n"
                                 )
+                                local_image_count += 1
                                 continue
                             elif colorspace == "DeviceGray":
-                                new_image_data = new_image_data.reshape((height, width))
+                                new_image_data = new_image_data.reshape(
+                                    (height, width))
                                 img = Image.fromarray(new_image_data, "L")
                                 img.save(image_path)
                                 content.append(
-                                    f"![Image {image_count}]({image_path})\n"
+                                    f"![Image {local_image_count}]({image_path})\n"
                                 )
+                                local_image_count += 1
                                 continue
                     except Exception as e:
                         print(
@@ -614,7 +626,8 @@ class PdfConverter(DocumentConverter):
                     with open(image_path, "wb") as img_file:
                         img_file.write(image_data)
-                    content.append(f"![Image {image_count}]({image_path})\n")
+                    content.append(f"![Image {local_image_count}]({image_path})\n")
+                    local_image_count += 1
             # Handle text
             if hasattr(lt_obj, "get_text"):
@@ -625,7 +638,8 @@ class PdfConverter(DocumentConverter):
             # Recursively process nested layouts
             elif hasattr(lt_obj, "_objs"):
                 content.extend(
-                    self._process_layout(lt_obj._objs, image_output_dir, image_count)
+                    self._process_layout(
+                        lt_obj._objs, image_output_dir, image_count)
                 )
         return content
@@ -635,7 +649,7 @@ class DocxConverter(HtmlConverter):
     """
     Converts DOCX files to Markdown. Style information (e.g.m headings) and tables are preserved where possible.
     """
     def __init__(self):
         self._image_counter = 0
         super().__init__()
@@ -644,18 +658,19 @@ class DocxConverter(HtmlConverter):
         """
         保存图片并返回相对路径，使用递增的计数器来命名文件
         """
-        # 获取图片内容和格式
-        image_format = image.content_type.split('/')[-1] if image.content_type else 'png'
+        # 获取图片内容和格式
+        image_format = image.content_type.split(
+            '/')[-1] if image.content_type else 'png'
         # 增加计数器并生成文件名
         self._image_counter += 1
         image_filename = f"image_{self._image_counter}.{image_format}"
         # 保存图片
         image_path = os.path.join(output_dir, image_filename)
         with image.open() as image_content, open(image_path, 'wb') as f:
             f.write(image_content.read())
         return image_path
     def convert(self, local_path, **kwargs) -> Union[None, DocumentConverterResult]:
@@ -671,7 +686,7 @@ class DocxConverter(HtmlConverter):
         else:
             # Create output directory for images if it doesn't exist
             image_output_dir = os.path.join(os.path.dirname(
-                local_path), "_images", os.path.basename(local_path))
+                local_path), "_images", os.path.basename(local_path).replace(" ", "_"))
         os.makedirs(image_output_dir, exist_ok=True)
         result = None
@@ -682,7 +697,7 @@ class DocxConverter(HtmlConverter):
                     "src": self._save_image(image, image_output_dir),
                     "alt": image.alt_text if image.alt_text else f"Image {self._image_counter}"
                 }
             # 进行转换
             result = mammoth.convert_to_html(
                 docx_file,
@@ -691,7 +706,7 @@ class DocxConverter(HtmlConverter):
             html_content = result.value
             result = self._convert(html_content)
-        return result
+        return result
 class XlsxConverter(HtmlConverter):
@@ -710,7 +725,8 @@ class XlsxConverter(HtmlConverter):
         for s in sheets:
             md_content += f"## {s}\n"
             html_content = sheets[s].to_html(index=False)
-            md_content += self._convert(html_content).text_content.strip() + "\n\n"
+            md_content += self._convert(
+                html_content).text_content.strip() + "\n\n"
         return DocumentConverterResult(
             title=None,
@@ -745,7 +761,8 @@ class PptxConverter(HtmlConverter):
                     # https://github.com/scanny/python-pptx/pull/512#issuecomment-1713100069
                     alt_text = ""
                     try:
-                        alt_text = shape._element._nvXxPr.cNvPr.attrib.get("descr", "")
+                        alt_text = shape._element._nvXxPr.cNvPr.attrib.get(
+                            "descr", "")
                     except Exception:
                         pass
@@ -767,14 +784,17 @@ class PptxConverter(HtmlConverter):
                         html_table += "<tr>"
                         for cell in row.cells:
                             if first_row:
-                                html_table += "<th>" + html.escape(cell.text) + "</th>"
+                                html_table += "<th>" + \
+                                    html.escape(cell.text) + "</th>"
                             else:
-                                html_table += "<td>" + html.escape(cell.text) + "</td>"
+                                html_table += "<td>" + \
+                                    html.escape(cell.text) + "</td>"
                         html_table += "</tr>"
                         first_row = False
                     html_table += "</table></body></html>"
                     md_content += (
-                        "\n" + self._convert(html_table).text_content.strip() + "\n"
+                        "\n" +
+                        self._convert(html_table).text_content.strip() + "\n"
                     )
                 # Text areas
@@ -1028,7 +1048,8 @@ class ImageConverter(MediaConverter):
             }
         ]
-        response = client.chat.completions.create(model=model, messages=messages)
+        response = client.chat.completions.create(
+            model=model, messages=messages)
         return response.choices[0].message.content
@@ -1242,9 +1263,11 @@ class MarkItDown:
                 if res is not None:
                     # Normalize the content
                     res.text_content = "\n".join(
-                        [line.rstrip() for line in re.split(r"\r?\n", res.text_content)]
+                        [line.rstrip()
+                         for line in re.split(r"\r?\n", res.text_content)]
                     )
-                    res.text_content = re.sub(r"\n{3,}", "\n\n", res.text_content)
+                    res.text_content = re.sub(
+                        r"\n{3,}", "\n\n", res.text_content)
                     # Todo
                     return res

autocoder/version.py CHANGED Viewed

	@@ -1 +1 @@
1	- __version__ = "0.1.~~201~~"
1	+ __version__ = "0.1.202"

{auto_coder-0.1.201.dist-info → auto_coder-0.1.202.dist-info}/LICENSE RENAMED Viewed

File without changes

{auto_coder-0.1.201.dist-info → auto_coder-0.1.202.dist-info}/WHEEL RENAMED Viewed

File without changes

{auto_coder-0.1.201.dist-info → auto_coder-0.1.202.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{auto_coder-0.1.201.dist-info → auto_coder-0.1.202.dist-info}/top_level.txt RENAMED Viewed

File without changes

auto-coder 0.1.201__py3-none-any.whl → 0.1.202__py3-none-any.whl

Potentially problematic release.

auto-coder 0.1.201py3-none-any.whl → 0.1.202py3-none-any.whl