data-science-document-ai 1.60.2__py3-none-any.whl → 1.60.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {data_science_document_ai-1.60.2.dist-info → data_science_document_ai-1.60.3.dist-info}/METADATA +1 -1
- {data_science_document_ai-1.60.2.dist-info → data_science_document_ai-1.60.3.dist-info}/RECORD +12 -12
- src/postprocessing/common.py +11 -0
- src/prompts/library/bundeskasse/other/placeholders.json +2 -2
- src/prompts/library/bundeskasse/other/prompt.txt +3 -2
- src/prompts/library/customsInvoice/other/placeholders.json +2 -1
- src/prompts/library/customsInvoice/other/prompt.txt +1 -1
- src/prompts/library/draftMbl/other/prompt.txt +1 -1
- src/prompts/library/finalMbL/other/prompt.txt +1 -1
- src/prompts/library/partnerInvoice/other/placeholders.json +2 -1
- src/prompts/library/partnerInvoice/other/prompt.txt +1 -1
- {data_science_document_ai-1.60.2.dist-info → data_science_document_ai-1.60.3.dist-info}/WHEEL +0 -0
{data_science_document_ai-1.60.2.dist-info → data_science_document_ai-1.60.3.dist-info}/RECORD
RENAMED
|
@@ -7,7 +7,7 @@ src/io.py,sha256=rYjXVLlriEacw1uNuPIYhg12bXNu48Qs9GYMY2YcVTE,5563
|
|
|
7
7
|
src/llm.py,sha256=a7UYA4ITUNjzct_2fHgM-bma_XWc28VC0FV71g9tnUI,7137
|
|
8
8
|
src/log_setup.py,sha256=RhHnpXqcl-ii4EJzRt47CF2R-Q3YPF68tepg_Kg7tkw,2895
|
|
9
9
|
src/pdf_processing.py,sha256=Fx-Glb9niEUU3WUCrBZ02ZYV-E2vWoUM0ifN7-0A1Q4,19961
|
|
10
|
-
src/postprocessing/common.py,sha256=
|
|
10
|
+
src/postprocessing/common.py,sha256=gcHZTuj_TRG4E0eTTpr7lUjTYKG9N4IiClvqiEoXGZ8,26591
|
|
11
11
|
src/postprocessing/postprocess_booking_confirmation.py,sha256=nK32eDiBNbauyQz0oCa9eraysku8aqzrcoRFoWVumDU,4827
|
|
12
12
|
src/postprocessing/postprocess_commercial_invoice.py,sha256=3I8ijluTZcOs_sMnFZxfkAPle0UFQ239EMuvZfDZVPg,1028
|
|
13
13
|
src/postprocessing/postprocess_partner_invoice.py,sha256=WuaTQK5D09dV_QNrh29ZoKX9IvQn2Ub-WnAMyRjCsvI,14240
|
|
@@ -27,24 +27,24 @@ src/prompts/library/bookingConfirmation/other/placeholders.json,sha256=zkpcKzOTC
|
|
|
27
27
|
src/prompts/library/bookingConfirmation/other/prompt.txt,sha256=EMmKLDX7ir96KcU7Ca12B7k5-crdioU-Sw3VebX33HY,7867
|
|
28
28
|
src/prompts/library/bookingConfirmation/yangming/placeholders.json,sha256=2fu7q7J4QxrYfmZJlpAfDs9wIHKTheyysIw7i7UmOR0,6254
|
|
29
29
|
src/prompts/library/bookingConfirmation/yangming/prompt.txt,sha256=c_qHnP7f2JTCumDRN0aQ9QpXmglLPRuaMEiDXq3wMJ0,4605
|
|
30
|
-
src/prompts/library/bundeskasse/other/placeholders.json,sha256=
|
|
31
|
-
src/prompts/library/bundeskasse/other/prompt.txt,sha256=
|
|
30
|
+
src/prompts/library/bundeskasse/other/placeholders.json,sha256=6VvXUluc_shZNg8jb40JiaDjovNhuoEHpNSfsTzoRs4,4456
|
|
31
|
+
src/prompts/library/bundeskasse/other/prompt.txt,sha256=prmk2BIGlSZ_1vhi5WcBUm91D5zGJJT3fbbLEQ3YeTk,3487
|
|
32
32
|
src/prompts/library/commercialInvoice/other/placeholders.json,sha256=zUK2mg9MnHiEQRYF6VgTiUiq68WGy5f7_4qL63CWyR0,4700
|
|
33
33
|
src/prompts/library/commercialInvoice/other/prompt.txt,sha256=CJapcVrmcvynJUanETDklkzU-0N9hHdhq5wL4MK7OIY,2683
|
|
34
34
|
src/prompts/library/customsAssessment/other/placeholders.json,sha256=scIV--C9HNWAQbU9zEz3GT_FoAvJqbfuY85YUtt7t-Q,3850
|
|
35
35
|
src/prompts/library/customsAssessment/other/prompt.txt,sha256=z3FuoHZ588Pz1WBJDW7ISAC3J6n7hPJCcS92CdHDTFw,2494
|
|
36
|
-
src/prompts/library/customsInvoice/other/placeholders.json,sha256=
|
|
37
|
-
src/prompts/library/customsInvoice/other/prompt.txt,sha256=
|
|
36
|
+
src/prompts/library/customsInvoice/other/placeholders.json,sha256=Y3nztiKvaqll7Oq2Vo1MjhjqNBG_HyiB_4lYtwCOB0g,12263
|
|
37
|
+
src/prompts/library/customsInvoice/other/prompt.txt,sha256=i1ox7ZuWuK69DOllzYIB8sKduFYmxyI-OAtSP_tf6IQ,10297
|
|
38
38
|
src/prompts/library/deliveryOrder/other/placeholders.json,sha256=j-9F4V3yDg4610PPsOwU3oOj_S9vAvAB9Ix155WGIwc,3827
|
|
39
39
|
src/prompts/library/deliveryOrder/other/prompt.txt,sha256=RD076vq0x0IjoEVQfh-G0u4nxITCpgKZGrwMlR9YAvk,2695
|
|
40
40
|
src/prompts/library/draftMbl/other/placeholders.json,sha256=Gn8kQ8cMmrzRGLSFH7_8wO1_j2jxhqHd4zeivZP2SjU,4304
|
|
41
|
-
src/prompts/library/draftMbl/other/prompt.txt,sha256=
|
|
41
|
+
src/prompts/library/draftMbl/other/prompt.txt,sha256=Z4uH7lL01ok0GajHWQ0feCKJqRIRj_lyFMmc3-wqfpU,2458
|
|
42
42
|
src/prompts/library/finalMbL/other/placeholders.json,sha256=Gn8kQ8cMmrzRGLSFH7_8wO1_j2jxhqHd4zeivZP2SjU,4304
|
|
43
|
-
src/prompts/library/finalMbL/other/prompt.txt,sha256=
|
|
43
|
+
src/prompts/library/finalMbL/other/prompt.txt,sha256=68IHh7DDAoxQAID2dsRmKPUVQmX9vxxmMcUqnDN8-j0,2458
|
|
44
44
|
src/prompts/library/packingList/other/placeholders.json,sha256=cGUUvEFoi4Lm0BAiyD29KbNFbUgzO1s7eit_qK3F0ig,4478
|
|
45
45
|
src/prompts/library/packingList/other/prompt.txt,sha256=6Q9d0KBG6YWmNtzFivvmtQmitaUE2jytfwwc5YwsUgQ,2872
|
|
46
|
-
src/prompts/library/partnerInvoice/other/placeholders.json,sha256=
|
|
47
|
-
src/prompts/library/partnerInvoice/other/prompt.txt,sha256=
|
|
46
|
+
src/prompts/library/partnerInvoice/other/placeholders.json,sha256=S5zWxVM6Ex23drBg6wGKLk_Vd2KJon6mdK1ag3E4RBs,10988
|
|
47
|
+
src/prompts/library/partnerInvoice/other/prompt.txt,sha256=Qw27Tj9Kd3exJi1Wd57Ne_sIaFa8j5qSPwI2y1xj55M,8483
|
|
48
48
|
src/prompts/library/postprocessing/port_code/placeholders.json,sha256=2TiXf3zSzrglOMPtDOlCntIa5RSvyZQAKG2-IgrCY5A,22
|
|
49
49
|
src/prompts/library/postprocessing/port_code/prompt_port_code.txt,sha256=--1wunSqEr2ox958lEhjO-0JFBfOLzA3qfKYIzG_Iok,884
|
|
50
50
|
src/prompts/library/preprocessing/carrier/placeholders.json,sha256=tQeVDtvembhVqvel9vGoy4qcKp1hOvg-bLCgZRdQj0g,192
|
|
@@ -55,6 +55,6 @@ src/prompts/prompt_library.py,sha256=VJWHeXN-s501C2GiidIIvQQuZdU6T1R27hE2dKBiI40
|
|
|
55
55
|
src/setup.py,sha256=8-vZWjC8Iwa3xxdk3iR4412VCjtNtgzVqkXcFon7UBE,7309
|
|
56
56
|
src/tms.py,sha256=UXbIo1QE--hIX6NZi5Qyp2R_CP338syrY9pCTPrfgnE,1741
|
|
57
57
|
src/utils.py,sha256=Ow5_Jals88o8mbZ1BoHfZpHZoCfig_UQb5aalH-mpWE,17278
|
|
58
|
-
data_science_document_ai-1.60.
|
|
59
|
-
data_science_document_ai-1.60.
|
|
60
|
-
data_science_document_ai-1.60.
|
|
58
|
+
data_science_document_ai-1.60.3.dist-info/METADATA,sha256=FyamkknqoADm8IVibk-09-6grFI8qfofMblJTHabohE,2152
|
|
59
|
+
data_science_document_ai-1.60.3.dist-info/WHEEL,sha256=zp0Cn7JsFoX2ATtOhtaFYIiE2rmFAD4OcMhtUki8W3U,88
|
|
60
|
+
data_science_document_ai-1.60.3.dist-info/RECORD,,
|
src/postprocessing/common.py
CHANGED
|
@@ -723,6 +723,17 @@ async def format_all_entities(result, document_type_code, params, mime_type):
|
|
|
723
723
|
if document_type_code in ["partnerInvoice", "bundeskasse"]:
|
|
724
724
|
await process_partner_invoice(params, aggregated_data, document_type_code)
|
|
725
725
|
|
|
726
|
+
# TODO: This is a temporary change until the terminal codes are updated
|
|
727
|
+
if document_type_code == "bookingConfirmation":
|
|
728
|
+
if "gateInTerminalCode" in aggregated_data:
|
|
729
|
+
aggregated_data["gateInTerminal"] = aggregated_data.pop(
|
|
730
|
+
"gateInTerminalCode"
|
|
731
|
+
)
|
|
732
|
+
if "pickUpTerminalCode" in aggregated_data:
|
|
733
|
+
aggregated_data["pickUpTerminal"] = aggregated_data.pop(
|
|
734
|
+
"pickUpTerminalCode"
|
|
735
|
+
)
|
|
736
|
+
|
|
726
737
|
logger.info("Data Extraction completed successfully")
|
|
727
738
|
return aggregated_data
|
|
728
739
|
|
|
@@ -93,14 +93,14 @@
|
|
|
93
93
|
"invoiceNumber": {
|
|
94
94
|
"type": "STRING",
|
|
95
95
|
"nullable": true,
|
|
96
|
-
"description": "Invoice Number is a unique identifier for the invoice, it starts with
|
|
96
|
+
"description": "Invoice Number is a unique identifier for the invoice, it starts with ATC, AT-C, or AT/C only (e.g., ATC40, AT-C-40-, AT/C/40/....) It can be found just below the title of the invoice or in the top section of the invoice. Do NOT extract NIZZA-Registrierkennzeichen number (e.g. ATC0040M00...)."
|
|
97
97
|
},
|
|
98
98
|
"containerNumber": {
|
|
99
99
|
"type": "ARRAY",
|
|
100
100
|
"items": {
|
|
101
101
|
"type": "STRING",
|
|
102
102
|
"nullable": true,
|
|
103
|
-
"description": "The unique identifier for each container. It always starts with 4 capital letters and followed by 7 digits. Example: TEMU7972458."
|
|
103
|
+
"description": "The unique identifier for each container. It always starts with 4 capital letters and followed by 7 digits. Example: TEMU7972458. Do not get confused between 0 vs O in the 7 digits of container number."
|
|
104
104
|
}
|
|
105
105
|
},
|
|
106
106
|
"creditNoteInvoiceNumber": {
|
|
@@ -16,7 +16,7 @@ Your role is to accurately extract specific entities from these Customs invoices
|
|
|
16
16
|
- The amount and the currency is always in EUR both for grandTotal and line items.
|
|
17
17
|
|
|
18
18
|
- containerNumber:
|
|
19
|
-
- Container Number consists of 4 capital letters followed by 7 digits (e.g., TEMU7972458, CAIU7222892).
|
|
19
|
+
- Container Number consists of 4 capital letters followed by 7 digits (e.g., TEMU7972458, CAIU7222892). Do not get confused between 0 vs O in the 7 digits of container number.
|
|
20
20
|
- Few invoices contains multiple container numbers, in that case, all container numbers should be captured.
|
|
21
21
|
|
|
22
22
|
- shipmentID:
|
|
@@ -26,7 +26,8 @@ Your role is to accurately extract specific entities from these Customs invoices
|
|
|
26
26
|
|
|
27
27
|
- invoiceNumber:
|
|
28
28
|
- Invoice Number is a unique identifier for the invoice, it starts with "ATC", "AT-C", or "AT/C" only (e.g., ATC40..., AT-C-40-..., AT/C/40/....).
|
|
29
|
-
-
|
|
29
|
+
- It can be found just below the title of the invoice or in the top section of the invoice.
|
|
30
|
+
- Do NOT extract if the text is about vehicle registrations, license plates, or location identifiers (e.g., "NIZZA-Registrierkennzeichen: ATC0040M00....")
|
|
30
31
|
|
|
31
32
|
- creditNoteInvoiceNumber:
|
|
32
33
|
- Credit Note Invoice Number is a unique identifier for the credit note, it starts with "ATS" only (e.g., ATS.....).
|
|
@@ -93,7 +93,8 @@
|
|
|
93
93
|
"description": "The percentage rate of VAT applied to the totalAmount of the line item. This is used to calculate the vatAmount."
|
|
94
94
|
},
|
|
95
95
|
"containerNumber": {"type": "STRING", "nullable": true,
|
|
96
|
-
"description": "The container number associated with the line item. containerNumber MUST start with 4 letters followed by 7 digits (e.g., CMAU1234567)"
|
|
96
|
+
"description": "The container number associated with the line item. containerNumber MUST start with 4 letters followed by 7 digits (e.g., CMAU1234567). Do not get confused between 0 vs O in the 7 digits of container number."
|
|
97
|
+
},
|
|
97
98
|
"containerSize": {"type": "STRING", "nullable": true,
|
|
98
99
|
"description": "The size of the container associated with the containerNumber, such as 20ft, 40ft, 40HC, 20DC etc."}
|
|
99
100
|
}
|
|
@@ -59,7 +59,7 @@ Your role is to accurately extract specific entities from these invoices to supp
|
|
|
59
59
|
- totalAmount: The total amount for the item. It can be in different currencies, so ensure to capture the currency as well for the totalAmountCurrency.
|
|
60
60
|
- totalAmountEuro: Few line items contains a total amount in Euro. You can find it by looking for the term "Total EUR" or "Amount in Euro" in the line item but it's always in the EURO / € currency. Sometimes, it can be same as totalAmount if the line item is already in Euro.
|
|
61
61
|
- quantity: The quantity of the item or service provided in the line item. Pay attention to 2 x 40HC or 2x40HC. It means, quantity is 2 and 40HC is containerSize but not 240.
|
|
62
|
-
- containerNumber: Container Number always starts with 4 letters and is followed by 7 digits (e.g., ABCD1234567).
|
|
62
|
+
- containerNumber: Container Number always starts with 4 letters and is followed by 7 digits (e.g., ABCD1234567). Do not get confused between 0 vs O in the 7 digits of container number.
|
|
63
63
|
|
|
64
64
|
- hblNumber and mblNumber:
|
|
65
65
|
- The Master Bill of Lading number. Commonly known as "Bill of Lading Number", "BILL OF LADING NO.", "BL Number", "BL No.", "B/L No.", "BL-Nr.", "B/L", or "HBL No.".
|
|
@@ -28,7 +28,7 @@ Your role is to accurately extract specific entities from these draftMBLs to sup
|
|
|
28
28
|
- Vessel Name is the name of the ship carrying the cargo. It can be referred to as "Vessel", "Ship Name", "Schiff", "Schiffsname", "Nave", or "Vessel/Flight No.".
|
|
29
29
|
|
|
30
30
|
- containers: Details of each container on the draftMBL. Make sure to extract each container information separately.
|
|
31
|
-
- containerNumber: Container Number consists of 4 capital letters followed by 7 digits (e.g., TEMU7972458, CAIU 7222892).
|
|
31
|
+
- containerNumber: Container Number consists of 4 capital letters followed by 7 digits (e.g., TEMU7972458, CAIU 7222892). Do not get confused between 0 vs O in the 7 digits of container number.
|
|
32
32
|
- sealNumber: Seal numbers are unique identifiers for shipping seals. They are usually mentioned as seal numbers in the document but they are definitely not container numbers.
|
|
33
33
|
|
|
34
34
|
<INSTRUCTIONS>
|
|
@@ -28,7 +28,7 @@ Your role is to accurately extract specific entities from these finalMBLs to sup
|
|
|
28
28
|
- Vessel Name is the name of the ship carrying the cargo. It can be referred to as "Vessel", "Ship Name", "Schiff", "Schiffsname", "Nave", or "Vessel/Flight No.".
|
|
29
29
|
|
|
30
30
|
- containers: Details of each container on the finalMBL. Make sure to extract each container information separately.
|
|
31
|
-
- containerNumber: Container Number consists of 4 capital letters followed by 7 digits (e.g., TEMU7972458, CAIU 7222892).
|
|
31
|
+
- containerNumber: Container Number consists of 4 capital letters followed by 7 digits (e.g., TEMU7972458, CAIU 7222892). Do not get confused between 0 vs O in the 7 digits of container number.
|
|
32
32
|
- sealNumber: Seal numbers are unique identifiers for shipping seals. They are usually mentioned as seal numbers in the document but they are definitely not container numbers.
|
|
33
33
|
|
|
34
34
|
<INSTRUCTIONS>
|
|
@@ -91,7 +91,8 @@
|
|
|
91
91
|
"description": "The percentage rate of VAT applied to the totalAmount of the line item. This is used to calculate the vatAmount."
|
|
92
92
|
},
|
|
93
93
|
"containerNumber": {"type": "STRING", "nullable": true,
|
|
94
|
-
"description": "The container number associated with the line item. containerNumber MUST start with 4 letters followed by 7 digits (e.g., CMAU1234567)"
|
|
94
|
+
"description": "The container number associated with the line item. containerNumber MUST start with 4 letters followed by 7 digits (e.g., CMAU1234567). Do not get confused between 0 vs O in the 7 digits of container number."
|
|
95
|
+
},
|
|
95
96
|
"containerSize": {"type": "STRING", "nullable": true,
|
|
96
97
|
"description": "The size of the container associated with the containerNumber, such as 20ft, 40ft, 40HC, 20DC etc."}
|
|
97
98
|
}
|
|
@@ -57,7 +57,7 @@ Your role is to accurately extract specific entities from these invoices to supp
|
|
|
57
57
|
- totalAmount: The total amount for the item. It can be in different currencies, so ensure to capture the currency as well for the totalAmountCurrency.
|
|
58
58
|
- totalAmountEuro: Few line items contains a total amount in Euro. You can find it by looking for the term "Total EUR" or "Amount in Euro" in the line item but it's always in the EURO / € currency. Sometimes, it can be same as totalAmount if the line item is already in Euro.
|
|
59
59
|
- quantity: The quantity of the item or service provided in the line item. Pay attention to 2 x 40HC or 2x40HC. It means, quantity is 2 and 40HC is containerSize but not 240.
|
|
60
|
-
- containerNumber: Container Number always starts with 4 letters and is followed by 7 digits (e.g., ABCD1234567, XALU 8593678).
|
|
60
|
+
- containerNumber: Container Number always starts with 4 letters and is followed by 7 digits (e.g., ABCD1234567, XALU 8593678). Do not get confused between 0 vs O in the 7 digits of container number.
|
|
61
61
|
|
|
62
62
|
- hblNumber and mblNumber:
|
|
63
63
|
- The Master Bill of Lading number. Commonly known as "Bill of Lading Number", "BILL OF LADING NO.", "BL Number", "BL No.", "B/L No.", "BL-Nr.", "B/L", or "HBL No.".
|
{data_science_document_ai-1.60.2.dist-info → data_science_document_ai-1.60.3.dist-info}/WHEEL
RENAMED
|
File without changes
|