PyPI - data-science-document-ai - Versions diffs - 1.40.3__tar.gz → 1.41.0__tar.gz - Mend

data-science-document-ai 1.40.3tar.gz → 1.41.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (66) hide show

{data_science_document_ai-1.40.3 → data_science_document_ai-1.41.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: data-science-document-ai
-Version: 1.40.3
+Version: 1.41.0
 Summary: "Document AI repo for data science"
 Author: Naomi Nguyen
 Author-email: naomi.nguyen@forto.com

{data_science_document_ai-1.40.3 → data_science_document_ai-1.41.0}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "data-science-document-ai"
-version = "1.40.3"
+version = "1.41.0"
 description = "\"Document AI repo for data science\""
 authors = ["Naomi Nguyen <naomi.nguyen@forto.com>", "Kumar Rajendrababu <kumar.rajendrababu@forto.com>", "Igor Tonko <igor.tonko@forto.com>", "Osman Demirel <osman.demirel@forto.com>"]
 packages = [

{data_science_document_ai-1.40.3 → data_science_document_ai-1.41.0}/src/excel_processing.py RENAMED Viewed

@@ -2,6 +2,8 @@
 # flake8: noqa: E402
 import logging
+from src.postprocessing.common import llm_prediction_to_tuples
 logger = logging.getLogger(__name__)
 import asyncio
@@ -73,6 +75,8 @@ async def extract_data_from_excel(
     ]
     extracted_data = {k: v for k, v in await asyncio.gather(*sheet_extract_tasks)}
+    # Convert LLM prediction dictionary to tuples of (value, page_number).
+    extracted_data = llm_prediction_to_tuples(extracted_data)
     stored_data = json.dumps(extracted_data)
     return extracted_data, stored_data, params["gemini_params"]["model_id"]

{data_science_document_ai-1.40.3 → data_science_document_ai-1.41.0}/src/pdf_processing.py RENAMED Viewed

@@ -14,7 +14,7 @@ from google.cloud.documentai_v1 import Document as docaiv1_document
 from src.docai import _batch_process_pdf_w_docai, _process_pdf_w_docai
 from src.excel_processing import extract_data_from_excel
-from src.postprocessing.common import format_all_entities, remove_none_values
+from src.postprocessing.common import format_all_entities, remove_none_values, llm_prediction_to_tuples
 from src.postprocessing.postprocess_booking_confirmation import (
     postprocess_booking_confirmation,
 )
@@ -31,6 +31,7 @@ from src.utils import (
     get_processor_name,
     run_background_tasks,
     validate_based_on_schema,
+    transform_schema_strings
 )
@@ -104,9 +105,16 @@ async def extract_data_from_pdf_w_docai(
     # Extract entities from the result
     for entity in result.entities:
         value = (
-            {child.type_: child.mention_text for child in entity.properties}
+            {child.type_: (child.mention_text,
+                           child.page_anchor.page_refs[0].page
+                           if hasattr(child.page_anchor.page_refs[0], "page")
+                           else 0)
+             for child in entity.properties}
             if entity.properties
-            else entity.mention_text
+            else (entity.mention_text,
+                  entity.page_anchor.page_refs[0].page
+                  if hasattr(entity.page_anchor.page_refs[0], "page")
+                  else 0)
         )
         aggregated_data[entity.type_].append(value)
@@ -220,6 +228,9 @@ async def process_file_w_llm(params, file_content, input_doc_type, llm_client):
         result = await llm_client.get_unified_json_genai(
             prompt=prompt, document=document, response_schema=response_schema
         )
+        result = llm_prediction_to_tuples(result)
         return result
     return {}

{data_science_document_ai-1.40.3 → data_science_document_ai-1.41.0}/src/postprocessing/common.py RENAMED Viewed

@@ -380,6 +380,11 @@ async def format_label(entity_k, entity_value, document_type_code, params):
             ]
         )
         return entity_k, [v for _, v in format_tasks]
+    if isinstance(entity_value, tuple):
+        page = entity_value[1]
+        entity_value = entity_value[0]
+    else:
+        page = -1
     entity_key = entity_k.lower()
     formatted_value = None
@@ -446,10 +451,18 @@ async def format_label(entity_k, entity_value, document_type_code, params):
     elif "reversechargesentence" in entity_key:
         formatted_value = clean_item_description(entity_value, remove_numbers=False)
+    elif "quantity" in entity_key:
+        if document_type_code in ["partnerInvoice", "customsInvoice", "bundeskasse"]:
+            # For partner invoice, quantity can be mentioned as whole number
+            formatted_value = decimal_convertor(
+                extract_number(entity_value), quantity=True
+            )
+        else:
+            formatted_value = extract_number(entity_value)
     elif any(
         numeric_indicator in entity_key
         for numeric_indicator in [
-            "quantity",
             "value",
             "amount",
             "price",
@@ -466,6 +479,7 @@ async def format_label(entity_k, entity_value, document_type_code, params):
     result = {
         "documentValue": entity_value,
         "formattedValue": formatted_value,
+        "page": page,
     }
     return entity_k, result
@@ -514,7 +528,7 @@ async def get_port_code_llm(port: str, llm_client):
             return None
-def decimal_convertor(value):
+def decimal_convertor(value, quantity=False):
     """Convert EU values to English values."""
     if value is None:
         return None
@@ -522,25 +536,39 @@ def decimal_convertor(value):
     # Remove spaces
     value = value.strip().replace(" ", "")
-    # Convert comma to dot for decimal point (e.g., 4.123,45 -> 4123.45)
-    if re.match(r"^\d{1,3}(\.\d{3})*,\d{1,2}$", value):
-        value = value.replace(".", "").replace(",", ".")
+    if not quantity:
+        # Convert comma to dot for decimal point (e.g., 4.123,45 -> 4123.45)
+        if re.match(r"^\d{1,3}(\.\d{3})*,\d{1,2}$", value):
+            value = value.replace(".", "").replace(",", ".")
+        # European style integer with thousand separator: 2.500
+        elif re.match(r"^\d{1,3}(\.\d{3})+$", value):
+            value = value.replace(".", "")
+        # Format english values as well for consistency (e.g., 4,123.45 -> 4123.45)
+        elif re.match(r"^\d{1,3}(,\d{3})*\.\d{1,2}$", value):
+            value = value.replace(",", "")
-    # European style integer with thousand separator: 2.500
-    elif re.match(r"^\d{1,3}(\.\d{3})+$", value):
-        value = value.replace(".", "")
+        # English style integer with thousand separator: 2,500
+        elif re.match(r"^\d{1,3}(,\d{3})+$", value):
+            value = value.replace(",", "")
-    # Format english values as well for consistency (e.g., 4,123.45 -> 4123.45)
-    elif re.match(r"^\d{1,3}(,\d{3})*\.\d{1,2}$", value):
-        value = value.replace(",", "")
+        # Just replace comma decimals with dot (e.g., 65,45 -> 65.45)
+        if re.match(r"^\d+,\d{1,2}$", value):
+            value = value.replace(",", ".")
-    # English style integer with thousand separator: 2,500
-    elif re.match(r"^\d{1,3}(,\d{3})+$", value):
-        value = value.replace(",", "")
+        # If there are more than 3 0s after decimal point, consider only 2 decimal points (e.g., 8.500000 -> 8.50)
+        elif re.match(r"^\d+\.\d{3,}$", value):
+            value = value[: value.index(".") + 3]
-    # Just replace comma decimals with dot (e.g., 65,45 -> 65.45)
-    elif re.match(r"^\d+,\d{1,2}$", value):
-        value = value.replace(",", ".")
+    else:  # quantity=True → only last two
+        # Just replace comma decimals with dot (e.g., 65,45 -> 65.45)
+        if re.match(r"^\d+,\d{1,2}$", value):
+            value = value.replace(",", ".")
+        # If there are more than 3 0s after decimal point, consider only 2 decimal points (e.g., 8.500000 -> 8.50)
+        elif re.match(r"^\d+\.\d{3,}$", value):
+            value = value[: value.index(".") + 3]
     return value
@@ -594,3 +622,24 @@ def remove_stop_words(lineitem: str):
         .upper()
         .strip()
     )
+def llm_prediction_to_tuples(llm_prediction):
+    """Convert LLM prediction dictionary to tuples of (value, page_number)."""
+    if isinstance(llm_prediction, dict):
+        if "page_number" in llm_prediction.keys() and "value" in llm_prediction.keys():
+            if llm_prediction["value"]:
+                try:
+                    page_number = int(llm_prediction["page_number"])
+                except:  # noqa: E722
+                    page_number = -1
+                return (llm_prediction["value"], page_number)
+            return None
+        for key, value in llm_prediction.items():
+            llm_prediction[key] = llm_prediction_to_tuples(
+                llm_prediction.get(key, value)
+            )
+    elif isinstance(llm_prediction, list):
+        for i, item in enumerate(llm_prediction):
+            llm_prediction[i] = llm_prediction_to_tuples(item)
+    return llm_prediction

data_science_document_ai-1.41.0/src/prompts/library/bookingConfirmation/evergreen/placeholders.json ADDED Viewed

@@ -0,0 +1,32 @@
+{
+  "type": "OBJECT",
+  "properties": {
+    "cfsCutOff": {"type": "STRING", "nullable": true, "description": "the date by which an LCL (Less than Container Load) shipment needs to be checked in to a CFS (Container Freight Station) to meet its scheduled sailing"},
+    "bookingNumber": {"type": "STRING", "nullable": true},
+    "cyCutOff": {"type": "STRING", "nullable": true},
+    "gateInReference": {"type": "STRING", "nullable": true},
+    "gateInTerminal": {"type": "STRING", "nullable": true},
+    "mblNumber": {"type": "STRING", "nullable": true},
+    "pickUpReference": {"type": "STRING", "nullable": true},
+    "pickUpTerminal": {"type": "STRING", "nullable": true},
+    "siCutOff": {"type": "STRING", "nullable": true},
+    "vgmCutOff": {"type": "STRING", "nullable": true},
+    "transportLegs": {
+      "type": "ARRAY",
+      "items": {
+        "type": "OBJECT",
+        "properties": {
+            "eta": {"type": "STRING", "nullable": true},
+            "etd": {"type": "STRING", "nullable": true},
+            "imoNumber": {"type": "STRING", "nullable": true},
+            "portOfDischarge": {"type": "STRING", "nullable": true},
+            "portOfLoading": {"type": "STRING", "nullable": true},
+            "vesselName": {"type": "STRING", "nullable": true},
+            "voyage": {"type": "STRING", "nullable": true}
+          },
+        "required": []
+      }
+    }
+  },
+  "required": []
+}

{data_science_document_ai-1.40.3 → data_science_document_ai-1.41.0}/src/prompts/library/bookingConfirmation/evergreen/prompt.txt RENAMED Viewed

@@ -1,3 +1,4 @@
+your task is to extract the text value of the following entities and page numbers starting from 0 where the value was found in the document:
 ```json
 {
 "mblNumber": "Extract the value after the label 'BOOKING NO.'.",

data_science_document_ai-1.41.0/src/prompts/library/bookingConfirmation/hapag-lloyd/placeholders.json ADDED Viewed

@@ -0,0 +1,32 @@
+{
+  "type": "OBJECT",
+  "properties": {
+    "cfsCutOff": {"type": "STRING", "nullable": true, "description": "the date by which an LCL (Less than Container Load) shipment needs to be checked in to a CFS (Container Freight Station) to meet its scheduled sailing"},
+    "bookingNumber": {"type": "STRING", "nullable": true},
+    "cyCutOff": {"type": "STRING", "nullable": true},
+    "gateInReference": {"type": "STRING", "nullable": true},
+    "gateInTerminal": {"type": "STRING", "nullable": true},
+    "mblNumber": {"type": "STRING", "nullable": true},
+    "pickUpReference": {"type": "STRING", "nullable": true},
+    "pickUpTerminal": {"type": "STRING", "nullable": true},
+    "siCutOff": {"type": "STRING", "nullable": true},
+    "vgmCutOff": {"type": "STRING", "nullable": true},
+    "transportLegs": {
+      "type": "ARRAY",
+      "items": {
+        "type": "OBJECT",
+        "properties": {
+            "eta": {"type": "STRING", "nullable": true},
+            "etd": {"type": "STRING", "nullable": true},
+            "imoNumber": {"type": "STRING", "nullable": true},
+            "portOfDischarge": {"type": "STRING", "nullable": true},
+            "portOfLoading": {"type": "STRING", "nullable": true},
+            "vesselName": {"type": "STRING", "nullable": true},
+            "voyage": {"type": "STRING", "nullable": true}
+          },
+        "required": []
+      }
+    }
+  },
+  "required": []
+}

{data_science_document_ai-1.40.3 → data_science_document_ai-1.41.0}/src/prompts/library/bookingConfirmation/hapag-lloyd/prompt.txt RENAMED Viewed

@@ -18,7 +18,7 @@ transportLegs:
     vesselName: The name of the vessel for a specific leg.
     voyage: The journey or route taken by the vessel for a specific leg.
-your task is to extract the text value of the following entities:
+your task is to extract the text value of the following entities and page numbers starting from 0 where the value was found in the document:
 SCHEMA_PLACEHOLDER
 Keywords for datapoints:

data_science_document_ai-1.41.0/src/prompts/library/bookingConfirmation/maersk/placeholders.json ADDED Viewed

@@ -0,0 +1,32 @@
+{
+  "type": "OBJECT",
+  "properties": {
+    "bookingNumber": {"type": "STRING", "nullable": true},
+    "cfsCutOff": {"type": "STRING", "nullable": true, "description": "the date by which an LCL (Less than Container Load) shipment needs to be checked in to a CFS (Container Freight Station) to meet its scheduled sailing"},
+    "cyCutOff": {"type": "STRING", "nullable": true},
+    "gateInReference": {"type": "STRING", "nullable": true},
+    "gateInTerminal": {"type": "STRING", "nullable": true},
+    "mblNumber": {"type": "STRING", "nullable": true},
+    "pickUpReference": {"type": "STRING", "nullable": true},
+    "pickUpTerminal": {"type": "STRING", "nullable": true},
+    "siCutOff": {"type": "STRING", "nullable": true},
+    "vgmCutOff": {"type": "STRING", "nullable": true},
+    "transportLegs": {
+      "type": "ARRAY",
+      "items": {
+        "type": "OBJECT",
+        "properties": {
+            "eta": {"type": "STRING", "nullable": true},
+            "etd": {"type": "STRING", "nullable": true},
+            "imoNumber": {"type": "STRING", "nullable": true},
+            "portOfDischarge": {"type": "STRING", "nullable": true},
+            "portOfLoading": {"type": "STRING", "nullable": true},
+            "vesselName": {"type": "STRING", "nullable": true},
+            "voyage": {"type": "STRING", "nullable": true}
+          },
+        "required": []
+      }
+    }
+  },
+  "required": []
+}

{data_science_document_ai-1.40.3 → data_science_document_ai-1.41.0}/src/prompts/library/bookingConfirmation/maersk/prompt.txt RENAMED Viewed

@@ -18,7 +18,7 @@ transportLegs:
     vesselName: The name of the vessel for a specific leg.
     voyage: The journey or route taken by the vessel for a specific leg.
-your task is to extract the text value of the following entities:
+your task is to extract the text value of the following entities and page numbers starting from 0 where the value was found in the document:
 SCHEMA_PLACEHOLDER
 Keywords for datapoints:

data_science_document_ai-1.41.0/src/prompts/library/bookingConfirmation/msc/placeholders.json ADDED Viewed

@@ -0,0 +1,32 @@
+{
+  "type": "OBJECT",
+  "properties": {
+    "cfsCutOff": {"type": "STRING", "nullable": true, "description": "the date by which an LCL (Less than Container Load) shipment needs to be checked in to a CFS (Container Freight Station) to meet its scheduled sailing"},
+    "bookingNumber": {"type": "STRING", "nullable": true},
+    "cyCutOff": {"type": "STRING", "nullable": true},
+    "gateInReference": {"type": "STRING", "nullable": true},
+    "gateInTerminal": {"type": "STRING", "nullable": true},
+    "mblNumber": {"type": "STRING", "nullable": true},
+    "pickUpReference": {"type": "STRING", "nullable": true},
+    "pickUpTerminal": {"type": "STRING", "nullable": true},
+    "siCutOff": {"type": "STRING", "nullable": true},
+    "vgmCutOff": {"type": "STRING", "nullable": true},
+    "transportLegs": {
+      "type": "ARRAY",
+      "items": {
+        "type": "OBJECT",
+        "properties": {
+            "eta": {"type": "STRING", "nullable": true},
+            "etd": {"type": "STRING", "nullable": true},
+            "imoNumber": {"type": "STRING", "nullable": true},
+            "portOfDischarge": {"type": "STRING", "nullable": true},
+            "portOfLoading": {"type": "STRING", "nullable": true},
+            "vesselName": {"type": "STRING", "nullable": true},
+            "voyage": {"type": "STRING", "nullable": true}
+          },
+        "required": []
+      }
+    }
+  },
+  "required": []
+}

{data_science_document_ai-1.40.3 → data_science_document_ai-1.41.0}/src/prompts/library/bookingConfirmation/msc/prompt.txt RENAMED Viewed

@@ -18,7 +18,7 @@ transportLegs:
     vesselName: The name of the vessel for a specific leg.
     voyage: The journey or route taken by the vessel for a specific leg.
-your task is to extract the text value of the following entities:
+your task is to extract the text value of the following entities and page numbers starting from 0 where the value was found in the document:
 SCHEMA_PLACEHOLDER
 Further explanation and Keywords for the transportLegs part as follows. The below 2 conditions is crucial. Take attention here:

data_science_document_ai-1.41.0/src/prompts/library/bookingConfirmation/oocl/placeholders.json ADDED Viewed

@@ -0,0 +1,32 @@
+{
+  "type": "OBJECT",
+  "properties": {
+    "cfsCutOff": {"type": "STRING", "nullable": true, "description": "the date by which an LCL (Less than Container Load) shipment needs to be checked in to a CFS (Container Freight Station) to meet its scheduled sailing"},
+    "bookingNumber": {"type": "STRING", "nullable": true},
+    "cyCutOff": {"type": "STRING", "nullable": true},
+    "gateInReference": {"type": "STRING", "nullable": true},
+    "gateInTerminal": {"type": "STRING", "nullable": true},
+    "mblNumber": {"type": "STRING", "nullable": true},
+    "pickUpReference": {"type": "STRING", "nullable": true},
+    "pickUpTerminal": {"type": "STRING", "nullable": true},
+    "siCutOff": {"type": "STRING", "nullable": true},
+    "vgmCutOff": {"type": "STRING", "nullable": true},
+    "transportLegs": {
+      "type": "ARRAY",
+      "items": {
+        "type": "OBJECT",
+        "properties": {
+            "eta": {"type": "STRING", "nullable": true},
+            "etd": {"type": "STRING", "nullable": true},
+            "portOfDischarge": {"type": "STRING", "nullable": true},
+            "portOfLoading": {"type": "STRING", "nullable": true},
+            "vesselName": {"type": "STRING", "nullable": true},
+            "voyage": {"type": "STRING", "nullable": true},
+            "imoNumber": {"type": "STRING", "nullable": true}
+        },
+        "required": []
+      }
+    }
+  },
+  "required": []
+}

{data_science_document_ai-1.40.3 → data_science_document_ai-1.41.0}/src/prompts/library/bookingConfirmation/oocl/prompt.txt RENAMED Viewed

@@ -1,4 +1,6 @@
-bookingNumber: Extract the booking number. This information can be found near the labels "BOOKING ACKNOWLEDGEMENT" or "BOOKING NUMBER".
+your task is to extract the text value of the following entities and page numbers starting from 0 where the value was found in the document:
+bookingNumber: Extract the booking number. This information can be found near the labels "BOOKING ACKNOWLEDGEMENT" or "BOOKING NUMBER".
 gateInReference: This field should have the same value as the bookingNumber.
 cyCutOff: Look for the "INTENDED FCL CY CUT-OFF" label and extract the date and time value.
 vgmCutOff: Look for the "INTENDED VGM CUT-OFF" label and extract the date and time value.

data_science_document_ai-1.41.0/src/prompts/library/bookingConfirmation/other/placeholders.json ADDED Viewed

@@ -0,0 +1,32 @@
+{
+  "type": "OBJECT",
+  "properties": {
+    "cfsCutOff": {"type": "STRING", "nullable": true, "description": "the date by which an LCL (Less than Container Load) shipment needs to be checked in to a CFS (Container Freight Station) to meet its scheduled sailing"},
+    "bookingNumber": {"type": "STRING", "nullable": true},
+    "cyCutOff": {"type": "STRING", "nullable": true},
+    "gateInReference": {"type": "STRING", "nullable": true},
+    "gateInTerminal": {"type": "STRING", "nullable": true},
+    "mblNumber": {"type": "STRING", "nullable": true},
+    "pickUpReference": {"type": "STRING", "nullable": true},
+    "pickUpTerminal": {"type": "STRING", "nullable": true},
+    "siCutOff": {"type": "STRING", "nullable": true},
+    "vgmCutOff": {"type": "STRING", "nullable": true},
+    "transportLegs": {
+      "type": "ARRAY",
+      "items": {
+        "type": "OBJECT",
+        "properties": {
+            "eta": {"type": "STRING", "nullable": true},
+            "etd": {"type": "STRING", "nullable": true},
+            "imoNumber": {"type": "STRING", "nullable": true},
+            "portOfDischarge": {"type": "STRING", "nullable": true},
+            "portOfLoading": {"type": "STRING", "nullable": true},
+            "vesselName": {"type": "STRING", "nullable": true},
+            "voyage": {"type": "STRING", "nullable": true}
+          },
+        "required": []
+      }
+    }
+  },
+  "required": []
+}

{data_science_document_ai-1.40.3 → data_science_document_ai-1.41.0}/src/prompts/library/bookingConfirmation/other/prompt.txt RENAMED Viewed

@@ -18,7 +18,7 @@ transportLegs:
     vesselName: The name of the vessel for a specific leg.
     voyage: The journey or route taken by the vessel for a specific leg.
-your task is to extract the text value of the following entities:
+your task is to extract the text value of the following entities and page numbers starting from 0 where the value was found in the document:
 SCHEMA_PLACEHOLDER
 Further explanation for the transportLegs part as follows:

data_science_document_ai-1.41.0/src/prompts/library/bookingConfirmation/yangming/placeholders.json ADDED Viewed

@@ -0,0 +1,32 @@
+{
+  "type": "OBJECT",
+  "properties": {
+    "cfsCutOff": {"type": "STRING", "nullable": true, "description": "the date by which an LCL (Less than Container Load) shipment needs to be checked in to a CFS (Container Freight Station) to meet its scheduled sailing"},
+    "bookingNumber": {"type": "STRING", "nullable": true},
+    "cyCutOff": {"type": "STRING", "nullable": true},
+    "gateInReference": {"type": "STRING", "nullable": true},
+    "gateInTerminal": {"type": "STRING", "nullable": true},
+    "mblNumber": {"type": "STRING", "nullable": true},
+    "pickUpReference": {"type": "STRING", "nullable": true},
+    "pickUpTerminal": {"type": "STRING", "nullable": true},
+    "siCutOff": {"type": "STRING", "nullable": true},
+    "vgmCutOff": {"type": "STRING", "nullable": true},
+    "transportLegs": {
+      "type": "ARRAY",
+      "items": {
+        "type": "OBJECT",
+        "properties": {
+            "eta": {"type": "STRING", "nullable": true},
+            "etd": {"type": "STRING", "nullable": true},
+            "imoNumber": {"type": "STRING", "nullable": true},
+            "portOfDischarge": {"type": "STRING", "nullable": true},
+            "portOfLoading": {"type": "STRING", "nullable": true},
+            "vesselName": {"type": "STRING", "nullable": true},
+            "voyage": {"type": "STRING", "nullable": true}
+          },
+        "required": []
+      }
+    }
+  },
+  "required": []
+}

{data_science_document_ai-1.40.3 → data_science_document_ai-1.41.0}/src/prompts/library/bookingConfirmation/yangming/prompt.txt RENAMED Viewed

@@ -18,7 +18,7 @@ transportLegs:
     vesselName: The name of the vessel for a specific leg.
     voyage: The journey or route taken by the vessel for a specific leg.
-your task is to extract the text value of the following entities:
+your task is to extract the text value of the following entities and page numbers starting from 0 where the value was found in the document:
 SCHEMA_PLACEHOLDER
 Keywords for datapoints:

{data_science_document_ai-1.40.3 → data_science_document_ai-1.41.0}/src/prompts/library/bundeskasse/other/placeholders.json RENAMED Viewed

@@ -2,47 +2,47 @@
   "type": "OBJECT",
   "properties": {
       "currencyCode": {
-        "type": "string",
+        "type": "STRING",
         "nullable": true,
         "description": "The currency in which the invoice is issued."
       },
       "grandTotal": {
-        "type": "string",
+        "type": "STRING",
         "nullable": true,
         "description": "The overall total amount of the invoice."
       },
       "issueDate": {
-        "type": "string",
+        "type": "STRING",
         "nullable": true,
         "description": "The date the document was issued."
       },
       "recipientAddress": {
-        "type": "string",
+        "type": "STRING",
         "nullable": true,
         "description": "The address of the recipient."
       },
       "recipientName": {
-        "type": "string",
+        "type": "STRING",
         "nullable": true,
         "description": "The name of the recipient."
       },
       "serviceDate": {
-        "type": "string",
+        "type": "STRING",
         "nullable": true,
         "description": "The date of service or transaction."
       },
       "shipmentId": {
-        "type": "string",
+        "type": "STRING",
         "nullable": true,
         "description": "Starting with an \"S\" and followed by 6 or 7 digits. Example: S124321"
       },
       "vendorName": {
-        "type": "string",
+        "type": "STRING",
         "nullable": true,
         "description": "The name of the vendor."
       },
       "vendorAddress": {
-        "type": "string",
+        "type": "STRING",
         "nullable": true,
         "description": "The address of the vendor."
       },
@@ -52,37 +52,37 @@
               "type": "OBJECT",
               "properties": {
                   "deferredDutyPayer": {
-                    "type": "string",
+                    "type": "STRING",
                     "nullable": true,
                     "description": "It can be identified under \"Aufschubenhmer\" for each line item"
                   },
                   "name": {
-                    "type": "string",
+                    "type": "STRING",
                     "nullable": true,
                     "description": "The name or description of the line item A0000 and B0000"
                   },
                   "taxType": {
-                    "type": "string",
+                    "type": "STRING",
                     "nullable": true,
                     "description": "It's a line item mentioned in the invoice. For example; A0000 and B0000"
                   },
                   "totalAmount": {
-                    "type": "string",
+                    "type": "STRING",
                     "nullable": true,
                     "description": "The total amount for the line item."
                   },
                   "totalAmountCurrency": {
-                    "type": "string",
+                    "type": "STRING",
                     "nullable": true,
                     "description": "The currency of the total amount."
                   },
                   "vatId": {
-                    "type": "string",
+                    "type": "STRING",
                     "nullable": true,
                     "description": "The VAT identification number. This is named a Konto-Nummer for each line item."
                   },
                   "dueDate": {
-                    "type": "string",
+                    "type": "STRING",
                     "nullable": true,
                     "description": "It's a due date. Due date to pay the amount. It's usually mentioned either in a date or a number of days format"
                   }
@@ -91,20 +91,20 @@
           }
       },
       "invoiceNumber": {
-        "type": "string",
+        "type": "STRING",
         "nullable": true,
         "description": "Invoice Number is a unique identifier for the invoice, it starts with \"ATC\", \"AT-C\", or \"AT/C\" only (e.g., ATC40, AT-C-40-, AT/C/40/....). Do NOT extract \"NIZZA-Registrierkennzeichen number."
       },
       "containerNumber": {
         "type": "ARRAY",
         "items": {
-          "type": "string",
+          "type": "STRING",
           "nullable": true,
           "description": "The unique identifier for each container. It always starts with 4 capital letters and followed by 7 digits. Example: TEMU7972458."
       }
   },
       "creditNoteInvoiceNumber": {
-        "type": "string",
+        "type": "STRING",
         "nullable": true,
         "description": "The unique identifier for the associated Invoice. The number usually starts with ATS..."
       }

data-science-document-ai 1.40.3__tar.gz → 1.41.0__tar.gz

data-science-document-ai 1.40.3tar.gz → 1.41.0tar.gz