data-science-document-ai 1.54.0__tar.gz → 1.55.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {data_science_document_ai-1.54.0 → data_science_document_ai-1.55.0}/PKG-INFO +1 -1
- {data_science_document_ai-1.54.0 → data_science_document_ai-1.55.0}/pyproject.toml +1 -1
- {data_science_document_ai-1.54.0 → data_science_document_ai-1.55.0}/src/prompts/library/customsInvoice/other/prompt.txt +2 -1
- {data_science_document_ai-1.54.0 → data_science_document_ai-1.55.0}/src/prompts/library/partnerInvoice/other/prompt.txt +1 -1
- {data_science_document_ai-1.54.0 → data_science_document_ai-1.55.0}/src/constants.py +0 -0
- {data_science_document_ai-1.54.0 → data_science_document_ai-1.55.0}/src/constants_sandbox.py +0 -0
- {data_science_document_ai-1.54.0 → data_science_document_ai-1.55.0}/src/docai.py +0 -0
- {data_science_document_ai-1.54.0 → data_science_document_ai-1.55.0}/src/docai_processor_config.yaml +0 -0
- {data_science_document_ai-1.54.0 → data_science_document_ai-1.55.0}/src/excel_processing.py +0 -0
- {data_science_document_ai-1.54.0 → data_science_document_ai-1.55.0}/src/io.py +0 -0
- {data_science_document_ai-1.54.0 → data_science_document_ai-1.55.0}/src/llm.py +0 -0
- {data_science_document_ai-1.54.0 → data_science_document_ai-1.55.0}/src/log_setup.py +0 -0
- {data_science_document_ai-1.54.0 → data_science_document_ai-1.55.0}/src/pdf_processing.py +0 -0
- {data_science_document_ai-1.54.0 → data_science_document_ai-1.55.0}/src/postprocessing/common.py +0 -0
- {data_science_document_ai-1.54.0 → data_science_document_ai-1.55.0}/src/postprocessing/postprocess_booking_confirmation.py +0 -0
- {data_science_document_ai-1.54.0 → data_science_document_ai-1.55.0}/src/postprocessing/postprocess_commercial_invoice.py +0 -0
- {data_science_document_ai-1.54.0 → data_science_document_ai-1.55.0}/src/postprocessing/postprocess_partner_invoice.py +0 -0
- {data_science_document_ai-1.54.0 → data_science_document_ai-1.55.0}/src/prompts/library/arrivalNotice/other/placeholders.json +0 -0
- {data_science_document_ai-1.54.0 → data_science_document_ai-1.55.0}/src/prompts/library/arrivalNotice/other/prompt.txt +0 -0
- {data_science_document_ai-1.54.0 → data_science_document_ai-1.55.0}/src/prompts/library/bookingConfirmation/evergreen/placeholders.json +0 -0
- {data_science_document_ai-1.54.0 → data_science_document_ai-1.55.0}/src/prompts/library/bookingConfirmation/evergreen/prompt.txt +0 -0
- {data_science_document_ai-1.54.0 → data_science_document_ai-1.55.0}/src/prompts/library/bookingConfirmation/hapag-lloyd/placeholders.json +0 -0
- {data_science_document_ai-1.54.0 → data_science_document_ai-1.55.0}/src/prompts/library/bookingConfirmation/hapag-lloyd/prompt.txt +0 -0
- {data_science_document_ai-1.54.0 → data_science_document_ai-1.55.0}/src/prompts/library/bookingConfirmation/maersk/placeholders.json +0 -0
- {data_science_document_ai-1.54.0 → data_science_document_ai-1.55.0}/src/prompts/library/bookingConfirmation/maersk/prompt.txt +0 -0
- {data_science_document_ai-1.54.0 → data_science_document_ai-1.55.0}/src/prompts/library/bookingConfirmation/msc/placeholders.json +0 -0
- {data_science_document_ai-1.54.0 → data_science_document_ai-1.55.0}/src/prompts/library/bookingConfirmation/msc/prompt.txt +0 -0
- {data_science_document_ai-1.54.0 → data_science_document_ai-1.55.0}/src/prompts/library/bookingConfirmation/oocl/placeholders.json +0 -0
- {data_science_document_ai-1.54.0 → data_science_document_ai-1.55.0}/src/prompts/library/bookingConfirmation/oocl/prompt.txt +0 -0
- {data_science_document_ai-1.54.0 → data_science_document_ai-1.55.0}/src/prompts/library/bookingConfirmation/other/placeholders.json +0 -0
- {data_science_document_ai-1.54.0 → data_science_document_ai-1.55.0}/src/prompts/library/bookingConfirmation/other/prompt.txt +0 -0
- {data_science_document_ai-1.54.0 → data_science_document_ai-1.55.0}/src/prompts/library/bookingConfirmation/yangming/placeholders.json +0 -0
- {data_science_document_ai-1.54.0 → data_science_document_ai-1.55.0}/src/prompts/library/bookingConfirmation/yangming/prompt.txt +0 -0
- {data_science_document_ai-1.54.0 → data_science_document_ai-1.55.0}/src/prompts/library/bundeskasse/other/placeholders.json +0 -0
- {data_science_document_ai-1.54.0 → data_science_document_ai-1.55.0}/src/prompts/library/bundeskasse/other/prompt.txt +0 -0
- {data_science_document_ai-1.54.0 → data_science_document_ai-1.55.0}/src/prompts/library/commercialInvoice/other/placeholders.json +0 -0
- {data_science_document_ai-1.54.0 → data_science_document_ai-1.55.0}/src/prompts/library/commercialInvoice/other/prompt.txt +0 -0
- {data_science_document_ai-1.54.0 → data_science_document_ai-1.55.0}/src/prompts/library/customsAssessment/other/placeholders.json +0 -0
- {data_science_document_ai-1.54.0 → data_science_document_ai-1.55.0}/src/prompts/library/customsAssessment/other/prompt.txt +0 -0
- {data_science_document_ai-1.54.0 → data_science_document_ai-1.55.0}/src/prompts/library/customsInvoice/other/placeholders.json +0 -0
- {data_science_document_ai-1.54.0 → data_science_document_ai-1.55.0}/src/prompts/library/deliveryOrder/other/placeholders.json +0 -0
- {data_science_document_ai-1.54.0 → data_science_document_ai-1.55.0}/src/prompts/library/deliveryOrder/other/prompt.txt +0 -0
- {data_science_document_ai-1.54.0 → data_science_document_ai-1.55.0}/src/prompts/library/draftMbl/other/placeholders.json +0 -0
- {data_science_document_ai-1.54.0 → data_science_document_ai-1.55.0}/src/prompts/library/draftMbl/other/prompt.txt +0 -0
- {data_science_document_ai-1.54.0 → data_science_document_ai-1.55.0}/src/prompts/library/finalMbL/other/placeholders.json +0 -0
- {data_science_document_ai-1.54.0 → data_science_document_ai-1.55.0}/src/prompts/library/finalMbL/other/prompt.txt +0 -0
- {data_science_document_ai-1.54.0 → data_science_document_ai-1.55.0}/src/prompts/library/packingList/other/placeholders.json +0 -0
- {data_science_document_ai-1.54.0 → data_science_document_ai-1.55.0}/src/prompts/library/packingList/other/prompt.txt +0 -0
- {data_science_document_ai-1.54.0 → data_science_document_ai-1.55.0}/src/prompts/library/partnerInvoice/other/placeholders.json +0 -0
- {data_science_document_ai-1.54.0 → data_science_document_ai-1.55.0}/src/prompts/library/postprocessing/port_code/placeholders.json +0 -0
- {data_science_document_ai-1.54.0 → data_science_document_ai-1.55.0}/src/prompts/library/postprocessing/port_code/prompt_port_code.txt +0 -0
- {data_science_document_ai-1.54.0 → data_science_document_ai-1.55.0}/src/prompts/library/preprocessing/carrier/placeholders.json +0 -0
- {data_science_document_ai-1.54.0 → data_science_document_ai-1.55.0}/src/prompts/library/preprocessing/carrier/prompt.txt +0 -0
- {data_science_document_ai-1.54.0 → data_science_document_ai-1.55.0}/src/prompts/library/shippingInstruction/other/placeholders.json +0 -0
- {data_science_document_ai-1.54.0 → data_science_document_ai-1.55.0}/src/prompts/library/shippingInstruction/other/prompt.txt +0 -0
- {data_science_document_ai-1.54.0 → data_science_document_ai-1.55.0}/src/prompts/prompt_library.py +0 -0
- {data_science_document_ai-1.54.0 → data_science_document_ai-1.55.0}/src/setup.py +0 -0
- {data_science_document_ai-1.54.0 → data_science_document_ai-1.55.0}/src/tms.py +0 -0
- {data_science_document_ai-1.54.0 → data_science_document_ai-1.55.0}/src/utils.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[tool.poetry]
|
|
2
2
|
name = "data-science-document-ai"
|
|
3
|
-
version = "1.
|
|
3
|
+
version = "1.55.0"
|
|
4
4
|
description = "\"Document AI repo for data science\""
|
|
5
5
|
authors = ["Naomi Nguyen <naomi.nguyen@forto.com>", "Kumar Rajendrababu <kumar.rajendrababu@forto.com>", "Igor Tonko <igor.tonko@forto.com>", "Osman Demirel <osman.demirel@forto.com>"]
|
|
6
6
|
packages = [
|
|
@@ -48,7 +48,7 @@ Your role is to accurately extract specific entities from these invoices to supp
|
|
|
48
48
|
- issueDate: The date the document was issued.
|
|
49
49
|
- dueDate: The date by which the payment should be made. Do Not calculate dueDate based on issueDate or any other date. Extract it directly from the invoice.
|
|
50
50
|
|
|
51
|
-
- lineItem: Details of each COGS and Customs line item on the invoice. Make sure to extract each amount and currency separately.
|
|
51
|
+
- lineItem: Details of each COGS and Customs line item on the invoice from each page. Make sure to extract each amount and currency separately.
|
|
52
52
|
- uniqueId: A unique id which associated with the lineItem as each line item can belong to a different shipment. Extract only if its available in the line item. Either a shipmentId starting with an S and followed by 6 or 8 numeric values or a mblNumber. If shipmentId or mblNumber does not exist, set it to containerNumber.
|
|
53
53
|
- lineItemDescription: The name or description of the item. Usually, it will be a one line sentence.
|
|
54
54
|
- unitPrice: Even if the quantity is not mentioned, you can still extract the unit price. Check the naming of the columns in a different languages, it can be "Unit Price", "Prezzo unitario", "Prix Unitaire", "Unitario", etc. Refer to "Prezzo unitario" field in the italian invoice example.
|
|
@@ -92,6 +92,7 @@ Your role is to accurately extract specific entities from these invoices to supp
|
|
|
92
92
|
|
|
93
93
|
IMPORTANT NOTE:
|
|
94
94
|
- Ensure all extracted values are directly from the document. Do not make assumptions or modifications.
|
|
95
|
+
- Extract line items from each page if the invoice spans multiple pages.
|
|
95
96
|
- Do not normalize or modify any entity values.
|
|
96
97
|
- Pay attention to the line item details and paymentInformation, as they may vary significantly across different invoices.
|
|
97
98
|
|
|
@@ -46,7 +46,7 @@ Your role is to accurately extract specific entities from these invoices to supp
|
|
|
46
46
|
|
|
47
47
|
- eta and etd: Few invoices contains same date for ARRIVED/DEPARTED or ETA/ETD. Extract it for both eta and etd.
|
|
48
48
|
|
|
49
|
-
- lineItem: Details of each COGS and Customs line item on the invoice. Make sure to extract each amount and currency separately.
|
|
49
|
+
- lineItem: Details of each COGS and Customs line item on the invoice from each page. Make sure to extract each amount and currency separately.
|
|
50
50
|
- uniqueId: A unique id which associated with the lineItem as each line item can belong to a different shipment. Extract only if its available in the line item. Either a shipmentId starting with an S and followed by 6 or 8 numeric values or a mblNumber. If shipmentId or mblNumber does not exist, set it to containerNumber.
|
|
51
51
|
- lineItemDescription: The name or description of the item. Usually, it will be a one line sentence.
|
|
52
52
|
- unitPrice: Even if the quantity is not mentioned, you can still extract the unit price. Check the naming of the columns in a different languages, it can be "Unit Price", "Prezzo unitario", "Prix Unitaire", "Unitario", etc. Refer to "Prezzo unitario" field in the italian invoice example.
|
|
File without changes
|
{data_science_document_ai-1.54.0 → data_science_document_ai-1.55.0}/src/constants_sandbox.py
RENAMED
|
File without changes
|
|
File without changes
|
{data_science_document_ai-1.54.0 → data_science_document_ai-1.55.0}/src/docai_processor_config.yaml
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{data_science_document_ai-1.54.0 → data_science_document_ai-1.55.0}/src/postprocessing/common.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{data_science_document_ai-1.54.0 → data_science_document_ai-1.55.0}/src/prompts/prompt_library.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|