PyPI - data-science-document-ai - Versions diffs - 1.43.3__py3-none-any.whl → 1.43.4__py3-none-any.whl - Mend

data-science-document-ai 1.43.3py3-none-any.whl → 1.43.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

{data_science_document_ai-1.43.3.dist-info → data_science_document_ai-1.43.4.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: data-science-document-ai
-Version: 1.43.3
+Version: 1.43.4
 Summary: "Document AI repo for data science"
 Author: Naomi Nguyen
 Author-email: naomi.nguyen@forto.com

{data_science_document_ai-1.43.3.dist-info → data_science_document_ai-1.43.4.dist-info}/RECORD RENAMED Viewed

@@ -2,7 +2,7 @@ src/constants.py,sha256=rpYIecVLIBLh98YrJ8e5gdvM0bqrXJZWIKgFkUSn69g,3513
 src/constants_sandbox.py,sha256=Iu6HdjCoNSmOX0AwoL9qUQkhq_ZnIN5U9e-Q2UfNuGc,547
 src/docai.py,sha256=dHuR0ehVjUi1CnoNvdp_yxJtpU_HFXqAZ61ywdz7BEo,5655
 src/docai_processor_config.yaml,sha256=81NUGs-u8UFJm6mc0ZOeeNQlhe9h0f35GhjTcwErvTA,1717
-src/excel_processing.py,sha256=gzP7QFCp4-n0FTevhWmXm-2UoDF0w0y5v39gsby0IV8,3135
+src/excel_processing.py,sha256=AppxrliVj7cLv1I_X7xC5bq4OPFAeiVNMNwcp-TZZDs,3466
 src/io.py,sha256=tOJpMyI-mP1AaXKG4UFudH47MHWzjWBgVahFJUcjGfs,4749
 src/llm.py,sha256=OE4IEIqcM-hYK9U7e0x1rAfcqdpeo4iXPHBp64L5Qz0,8199
 src/log_setup.py,sha256=RhHnpXqcl-ii4EJzRt47CF2R-Q3YPF68tepg_Kg7tkw,2895
@@ -54,6 +54,6 @@ src/prompts/prompt_library.py,sha256=jPxybNPPGH7mzonqtAOqmw5WcT-RtbGP0pvMqqP22hg
 src/setup.py,sha256=M-p5c8M9ejKcSZ9N86VtmtPc4TYLxe1_4_dxf6jpfVc,7262
 src/tms.py,sha256=UXbIo1QE--hIX6NZi5Qyp2R_CP338syrY9pCTPrfgnE,1741
 src/utils.py,sha256=cTF2A12jugKjXxGlNXEZQtfgcsIoaTtaU7zhVOOvXXA,16634
-data_science_document_ai-1.43.3.dist-info/METADATA,sha256=6WQCGhLAMXOWEdTyPax7z0teZpgl-poGtI3o3X_P164,2152
-data_science_document_ai-1.43.3.dist-info/WHEEL,sha256=zp0Cn7JsFoX2ATtOhtaFYIiE2rmFAD4OcMhtUki8W3U,88
-data_science_document_ai-1.43.3.dist-info/RECORD,,
+data_science_document_ai-1.43.4.dist-info/METADATA,sha256=bcmTXEnl4r0z7IqelSFuCyfxNJjnPvEY2snX1WViH9s,2152
+data_science_document_ai-1.43.4.dist-info/WHEEL,sha256=zp0Cn7JsFoX2ATtOhtaFYIiE2rmFAD4OcMhtUki8W3U,88
+data_science_document_ai-1.43.4.dist-info/RECORD,,

src/excel_processing.py CHANGED Viewed

@@ -9,12 +9,12 @@ from src.postprocessing.common import llm_prediction_to_tuples
 logger = logging.getLogger(__name__)
 import asyncio
-import json
 import numpy as np
 import pandas as pd
 from src.llm import prompt_excel_extraction
+from src.prompts.prompt_library import prompt_library
 from src.utils import estimate_page_count, generate_schema_structure, get_excel_sheets
@@ -67,7 +67,18 @@ async def extract_data_from_excel(
     """
     # Generate the response structure
-    response_schema = generate_schema_structure(params, input_doc_type)
+    response_schema = (
+        prompt_library.library[input_doc_type]["other"]["placeholders"]
+        if input_doc_type
+        in [
+            "partnerInvoice",
+            "customsInvoice",
+            "bundeskasse",
+            "commercialInvoice",
+            "packingList",
+        ]
+        else generate_schema_structure(params, input_doc_type)
+    )
     # Load the Excel file and get ONLY the "visible" sheet names
     sheets, workbook = get_excel_sheets(file_content, mime_type)

{data_science_document_ai-1.43.3.dist-info → data_science_document_ai-1.43.4.dist-info}/WHEEL RENAMED Viewed

File without changes

data-science-document-ai 1.43.3__py3-none-any.whl → 1.43.4__py3-none-any.whl

data-science-document-ai 1.43.3py3-none-any.whl → 1.43.4py3-none-any.whl