data-science-document-ai 1.40.4__py3-none-any.whl → 1.42.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {data_science_document_ai-1.40.4.dist-info → data_science_document_ai-1.42.0.dist-info}/METADATA +1 -1
- data_science_document_ai-1.42.0.dist-info/RECORD +57 -0
- src/docai.py +14 -5
- src/excel_processing.py +14 -4
- src/io.py +26 -1
- src/llm.py +10 -3
- src/pdf_processing.py +46 -10
- src/postprocessing/common.py +34 -5
- src/prompts/library/bookingConfirmation/evergreen/placeholders.json +17 -17
- src/prompts/library/bookingConfirmation/evergreen/prompt.txt +1 -0
- src/prompts/library/bookingConfirmation/hapag-lloyd/placeholders.json +18 -18
- src/prompts/library/bookingConfirmation/hapag-lloyd/prompt.txt +1 -1
- src/prompts/library/bookingConfirmation/maersk/placeholders.json +17 -17
- src/prompts/library/bookingConfirmation/maersk/prompt.txt +1 -1
- src/prompts/library/bookingConfirmation/msc/placeholders.json +17 -17
- src/prompts/library/bookingConfirmation/msc/prompt.txt +1 -1
- src/prompts/library/bookingConfirmation/oocl/placeholders.json +17 -17
- src/prompts/library/bookingConfirmation/oocl/prompt.txt +3 -1
- src/prompts/library/bookingConfirmation/other/placeholders.json +17 -17
- src/prompts/library/bookingConfirmation/other/prompt.txt +1 -1
- src/prompts/library/bookingConfirmation/yangming/placeholders.json +17 -17
- src/prompts/library/bookingConfirmation/yangming/prompt.txt +1 -1
- src/prompts/library/bundeskasse/other/placeholders.json +19 -19
- src/prompts/library/bundeskasse/other/prompt.txt +1 -1
- src/prompts/library/commercialInvoice/other/prompt.txt +2 -1
- src/prompts/library/customsAssessment/other/prompt.txt +1 -1
- src/prompts/library/customsInvoice/other/placeholders.json +19 -19
- src/prompts/library/customsInvoice/other/prompt.txt +1 -1
- src/prompts/library/deliveryOrder/other/placeholders.json +15 -17
- src/prompts/library/deliveryOrder/other/prompt.txt +1 -1
- src/prompts/library/draftMbl/hapag-lloyd/prompt.txt +2 -1
- src/prompts/library/draftMbl/maersk/prompt.txt +2 -0
- src/prompts/library/draftMbl/other/prompt.txt +1 -1
- src/prompts/library/finalMbL/hapag-lloyd/prompt.txt +1 -1
- src/prompts/library/finalMbL/maersk/prompt.txt +2 -0
- src/prompts/library/finalMbL/other/prompt.txt +1 -1
- src/prompts/library/packingList/other/prompt.txt +1 -1
- src/prompts/library/partnerInvoice/other/placeholders.json +12 -60
- src/prompts/library/partnerInvoice/other/prompt.txt +1 -1
- src/prompts/library/shippingInstruction/other/prompt.txt +1 -0
- src/prompts/prompt_library.py +4 -0
- src/setup.py +5 -1
- src/utils.py +64 -4
- data_science_document_ai-1.40.4.dist-info/RECORD +0 -59
- src/prompts/library/customsAssessment/other/placeholders.json +0 -19
- src/prompts/library/finalMbL/other/placeholders.json +0 -80
- {data_science_document_ai-1.40.4.dist-info → data_science_document_ai-1.42.0.dist-info}/WHEEL +0 -0
|
@@ -1,28 +1,28 @@
|
|
|
1
1
|
{
|
|
2
2
|
"type": "OBJECT",
|
|
3
3
|
"properties": {
|
|
4
|
-
"bookingNumber": {"type": "
|
|
5
|
-
"cfsCutOff": {"type": "
|
|
6
|
-
"cyCutOff": {"type": "
|
|
7
|
-
"gateInReference": {"type": "
|
|
8
|
-
"gateInTerminal": {"type": "
|
|
9
|
-
"mblNumber": {"type": "
|
|
10
|
-
"pickUpReference": {"type": "
|
|
11
|
-
"pickUpTerminal": {"type": "
|
|
12
|
-
"siCutOff": {"type": "
|
|
13
|
-
"vgmCutOff": {"type": "
|
|
4
|
+
"bookingNumber": {"type": "STRING", "nullable": true},
|
|
5
|
+
"cfsCutOff": {"type": "STRING", "nullable": true, "description": "the date by which an LCL (Less than Container Load) shipment needs to be checked in to a CFS (Container Freight Station) to meet its scheduled sailing"},
|
|
6
|
+
"cyCutOff": {"type": "STRING", "nullable": true},
|
|
7
|
+
"gateInReference": {"type": "STRING", "nullable": true},
|
|
8
|
+
"gateInTerminal": {"type": "STRING", "nullable": true},
|
|
9
|
+
"mblNumber": {"type": "STRING", "nullable": true},
|
|
10
|
+
"pickUpReference": {"type": "STRING", "nullable": true},
|
|
11
|
+
"pickUpTerminal": {"type": "STRING", "nullable": true},
|
|
12
|
+
"siCutOff": {"type": "STRING", "nullable": true},
|
|
13
|
+
"vgmCutOff": {"type": "STRING", "nullable": true},
|
|
14
14
|
"transportLegs": {
|
|
15
15
|
"type": "ARRAY",
|
|
16
16
|
"items": {
|
|
17
17
|
"type": "OBJECT",
|
|
18
18
|
"properties": {
|
|
19
|
-
"eta": {"type": "
|
|
20
|
-
"etd": {"type": "
|
|
21
|
-
"imoNumber": {"type": "
|
|
22
|
-
"portOfDischarge": {"type": "
|
|
23
|
-
"portOfLoading": {"type": "
|
|
24
|
-
"vesselName": {"type": "
|
|
25
|
-
"voyage": {"type": "
|
|
19
|
+
"eta": {"type": "STRING", "nullable": true},
|
|
20
|
+
"etd": {"type": "STRING", "nullable": true},
|
|
21
|
+
"imoNumber": {"type": "STRING", "nullable": true},
|
|
22
|
+
"portOfDischarge": {"type": "STRING", "nullable": true},
|
|
23
|
+
"portOfLoading": {"type": "STRING", "nullable": true},
|
|
24
|
+
"vesselName": {"type": "STRING", "nullable": true},
|
|
25
|
+
"voyage": {"type": "STRING", "nullable": true}
|
|
26
26
|
},
|
|
27
27
|
"required": []
|
|
28
28
|
}
|
|
@@ -18,7 +18,7 @@ transportLegs:
|
|
|
18
18
|
vesselName: The name of the vessel for a specific leg.
|
|
19
19
|
voyage: The journey or route taken by the vessel for a specific leg.
|
|
20
20
|
|
|
21
|
-
your task is to extract the text value of the following entities:
|
|
21
|
+
your task is to extract the text value of the following entities and page numbers starting from 0 where the value was found in the document:
|
|
22
22
|
SCHEMA_PLACEHOLDER
|
|
23
23
|
|
|
24
24
|
Keywords for datapoints:
|
|
@@ -1,28 +1,28 @@
|
|
|
1
1
|
{
|
|
2
2
|
"type": "OBJECT",
|
|
3
3
|
"properties": {
|
|
4
|
-
"cfsCutOff": {"type": "
|
|
5
|
-
"bookingNumber": {"type": "
|
|
6
|
-
"cyCutOff": {"type": "
|
|
7
|
-
"gateInReference": {"type": "
|
|
8
|
-
"gateInTerminal": {"type": "
|
|
9
|
-
"mblNumber": {"type": "
|
|
10
|
-
"pickUpReference": {"type": "
|
|
11
|
-
"pickUpTerminal": {"type": "
|
|
12
|
-
"siCutOff": {"type": "
|
|
13
|
-
"vgmCutOff": {"type": "
|
|
4
|
+
"cfsCutOff": {"type": "STRING", "nullable": true, "description": "the date by which an LCL (Less than Container Load) shipment needs to be checked in to a CFS (Container Freight Station) to meet its scheduled sailing"},
|
|
5
|
+
"bookingNumber": {"type": "STRING", "nullable": true},
|
|
6
|
+
"cyCutOff": {"type": "STRING", "nullable": true},
|
|
7
|
+
"gateInReference": {"type": "STRING", "nullable": true},
|
|
8
|
+
"gateInTerminal": {"type": "STRING", "nullable": true},
|
|
9
|
+
"mblNumber": {"type": "STRING", "nullable": true},
|
|
10
|
+
"pickUpReference": {"type": "STRING", "nullable": true},
|
|
11
|
+
"pickUpTerminal": {"type": "STRING", "nullable": true},
|
|
12
|
+
"siCutOff": {"type": "STRING", "nullable": true},
|
|
13
|
+
"vgmCutOff": {"type": "STRING", "nullable": true},
|
|
14
14
|
"transportLegs": {
|
|
15
15
|
"type": "ARRAY",
|
|
16
16
|
"items": {
|
|
17
17
|
"type": "OBJECT",
|
|
18
18
|
"properties": {
|
|
19
|
-
"eta": {"type": "
|
|
20
|
-
"etd": {"type": "
|
|
21
|
-
"imoNumber": {"type": "
|
|
22
|
-
"portOfDischarge": {"type": "
|
|
23
|
-
"portOfLoading": {"type": "
|
|
24
|
-
"vesselName": {"type": "
|
|
25
|
-
"voyage": {"type": "
|
|
19
|
+
"eta": {"type": "STRING", "nullable": true},
|
|
20
|
+
"etd": {"type": "STRING", "nullable": true},
|
|
21
|
+
"imoNumber": {"type": "STRING", "nullable": true},
|
|
22
|
+
"portOfDischarge": {"type": "STRING", "nullable": true},
|
|
23
|
+
"portOfLoading": {"type": "STRING", "nullable": true},
|
|
24
|
+
"vesselName": {"type": "STRING", "nullable": true},
|
|
25
|
+
"voyage": {"type": "STRING", "nullable": true}
|
|
26
26
|
},
|
|
27
27
|
"required": []
|
|
28
28
|
}
|
|
@@ -18,7 +18,7 @@ transportLegs:
|
|
|
18
18
|
vesselName: The name of the vessel for a specific leg.
|
|
19
19
|
voyage: The journey or route taken by the vessel for a specific leg.
|
|
20
20
|
|
|
21
|
-
your task is to extract the text value of the following entities:
|
|
21
|
+
your task is to extract the text value of the following entities and page numbers starting from 0 where the value was found in the document:
|
|
22
22
|
SCHEMA_PLACEHOLDER
|
|
23
23
|
|
|
24
24
|
Further explanation and Keywords for the transportLegs part as follows. The below 2 conditions is crucial. Take attention here:
|
|
@@ -1,28 +1,28 @@
|
|
|
1
1
|
{
|
|
2
2
|
"type": "OBJECT",
|
|
3
3
|
"properties": {
|
|
4
|
-
"cfsCutOff": {"type": "
|
|
5
|
-
"bookingNumber": {"type": "
|
|
6
|
-
"cyCutOff": {"type": "
|
|
7
|
-
"gateInReference": {"type": "
|
|
8
|
-
"gateInTerminal": {"type": "
|
|
9
|
-
"mblNumber": {"type": "
|
|
10
|
-
"pickUpReference": {"type": "
|
|
11
|
-
"pickUpTerminal": {"type": "
|
|
12
|
-
"siCutOff": {"type": "
|
|
13
|
-
"vgmCutOff": {"type": "
|
|
4
|
+
"cfsCutOff": {"type": "STRING", "nullable": true, "description": "the date by which an LCL (Less than Container Load) shipment needs to be checked in to a CFS (Container Freight Station) to meet its scheduled sailing"},
|
|
5
|
+
"bookingNumber": {"type": "STRING", "nullable": true},
|
|
6
|
+
"cyCutOff": {"type": "STRING", "nullable": true},
|
|
7
|
+
"gateInReference": {"type": "STRING", "nullable": true},
|
|
8
|
+
"gateInTerminal": {"type": "STRING", "nullable": true},
|
|
9
|
+
"mblNumber": {"type": "STRING", "nullable": true},
|
|
10
|
+
"pickUpReference": {"type": "STRING", "nullable": true},
|
|
11
|
+
"pickUpTerminal": {"type": "STRING", "nullable": true},
|
|
12
|
+
"siCutOff": {"type": "STRING", "nullable": true},
|
|
13
|
+
"vgmCutOff": {"type": "STRING", "nullable": true},
|
|
14
14
|
"transportLegs": {
|
|
15
15
|
"type": "ARRAY",
|
|
16
16
|
"items": {
|
|
17
17
|
"type": "OBJECT",
|
|
18
18
|
"properties": {
|
|
19
|
-
"eta": {"type": "
|
|
20
|
-
"etd": {"type": "
|
|
21
|
-
"portOfDischarge": {"type": "
|
|
22
|
-
"portOfLoading": {"type": "
|
|
23
|
-
"vesselName": {"type": "
|
|
24
|
-
"voyage": {"type": "
|
|
25
|
-
"imoNumber": {"type": "
|
|
19
|
+
"eta": {"type": "STRING", "nullable": true},
|
|
20
|
+
"etd": {"type": "STRING", "nullable": true},
|
|
21
|
+
"portOfDischarge": {"type": "STRING", "nullable": true},
|
|
22
|
+
"portOfLoading": {"type": "STRING", "nullable": true},
|
|
23
|
+
"vesselName": {"type": "STRING", "nullable": true},
|
|
24
|
+
"voyage": {"type": "STRING", "nullable": true},
|
|
25
|
+
"imoNumber": {"type": "STRING", "nullable": true}
|
|
26
26
|
},
|
|
27
27
|
"required": []
|
|
28
28
|
}
|
|
@@ -1,4 +1,6 @@
|
|
|
1
|
-
|
|
1
|
+
your task is to extract the text value of the following entities and page numbers starting from 0 where the value was found in the document:
|
|
2
|
+
|
|
3
|
+
bookingNumber: Extract the booking number. This information can be found near the labels "BOOKING ACKNOWLEDGEMENT" or "BOOKING NUMBER".
|
|
2
4
|
gateInReference: This field should have the same value as the bookingNumber.
|
|
3
5
|
cyCutOff: Look for the "INTENDED FCL CY CUT-OFF" label and extract the date and time value.
|
|
4
6
|
vgmCutOff: Look for the "INTENDED VGM CUT-OFF" label and extract the date and time value.
|
|
@@ -1,28 +1,28 @@
|
|
|
1
1
|
{
|
|
2
2
|
"type": "OBJECT",
|
|
3
3
|
"properties": {
|
|
4
|
-
"cfsCutOff": {"type": "
|
|
5
|
-
"bookingNumber": {"type": "
|
|
6
|
-
"cyCutOff": {"type": "
|
|
7
|
-
"gateInReference": {"type": "
|
|
8
|
-
"gateInTerminal": {"type": "
|
|
9
|
-
"mblNumber": {"type": "
|
|
10
|
-
"pickUpReference": {"type": "
|
|
11
|
-
"pickUpTerminal": {"type": "
|
|
12
|
-
"siCutOff": {"type": "
|
|
13
|
-
"vgmCutOff": {"type": "
|
|
4
|
+
"cfsCutOff": {"type": "STRING", "nullable": true, "description": "the date by which an LCL (Less than Container Load) shipment needs to be checked in to a CFS (Container Freight Station) to meet its scheduled sailing"},
|
|
5
|
+
"bookingNumber": {"type": "STRING", "nullable": true},
|
|
6
|
+
"cyCutOff": {"type": "STRING", "nullable": true},
|
|
7
|
+
"gateInReference": {"type": "STRING", "nullable": true},
|
|
8
|
+
"gateInTerminal": {"type": "STRING", "nullable": true},
|
|
9
|
+
"mblNumber": {"type": "STRING", "nullable": true},
|
|
10
|
+
"pickUpReference": {"type": "STRING", "nullable": true},
|
|
11
|
+
"pickUpTerminal": {"type": "STRING", "nullable": true},
|
|
12
|
+
"siCutOff": {"type": "STRING", "nullable": true},
|
|
13
|
+
"vgmCutOff": {"type": "STRING", "nullable": true},
|
|
14
14
|
"transportLegs": {
|
|
15
15
|
"type": "ARRAY",
|
|
16
16
|
"items": {
|
|
17
17
|
"type": "OBJECT",
|
|
18
18
|
"properties": {
|
|
19
|
-
"eta": {"type": "
|
|
20
|
-
"etd": {"type": "
|
|
21
|
-
"imoNumber": {"type": "
|
|
22
|
-
"portOfDischarge": {"type": "
|
|
23
|
-
"portOfLoading": {"type": "
|
|
24
|
-
"vesselName": {"type": "
|
|
25
|
-
"voyage": {"type": "
|
|
19
|
+
"eta": {"type": "STRING", "nullable": true},
|
|
20
|
+
"etd": {"type": "STRING", "nullable": true},
|
|
21
|
+
"imoNumber": {"type": "STRING", "nullable": true},
|
|
22
|
+
"portOfDischarge": {"type": "STRING", "nullable": true},
|
|
23
|
+
"portOfLoading": {"type": "STRING", "nullable": true},
|
|
24
|
+
"vesselName": {"type": "STRING", "nullable": true},
|
|
25
|
+
"voyage": {"type": "STRING", "nullable": true}
|
|
26
26
|
},
|
|
27
27
|
"required": []
|
|
28
28
|
}
|
|
@@ -18,7 +18,7 @@ transportLegs:
|
|
|
18
18
|
vesselName: The name of the vessel for a specific leg.
|
|
19
19
|
voyage: The journey or route taken by the vessel for a specific leg.
|
|
20
20
|
|
|
21
|
-
your task is to extract the text value of the following entities:
|
|
21
|
+
your task is to extract the text value of the following entities and page numbers starting from 0 where the value was found in the document:
|
|
22
22
|
SCHEMA_PLACEHOLDER
|
|
23
23
|
|
|
24
24
|
Further explanation for the transportLegs part as follows:
|
|
@@ -1,28 +1,28 @@
|
|
|
1
1
|
{
|
|
2
2
|
"type": "OBJECT",
|
|
3
3
|
"properties": {
|
|
4
|
-
"cfsCutOff": {"type": "
|
|
5
|
-
"bookingNumber": {"type": "
|
|
6
|
-
"cyCutOff": {"type": "
|
|
7
|
-
"gateInReference": {"type": "
|
|
8
|
-
"gateInTerminal": {"type": "
|
|
9
|
-
"mblNumber": {"type": "
|
|
10
|
-
"pickUpReference": {"type": "
|
|
11
|
-
"pickUpTerminal": {"type": "
|
|
12
|
-
"siCutOff": {"type": "
|
|
13
|
-
"vgmCutOff": {"type": "
|
|
4
|
+
"cfsCutOff": {"type": "STRING", "nullable": true, "description": "the date by which an LCL (Less than Container Load) shipment needs to be checked in to a CFS (Container Freight Station) to meet its scheduled sailing"},
|
|
5
|
+
"bookingNumber": {"type": "STRING", "nullable": true},
|
|
6
|
+
"cyCutOff": {"type": "STRING", "nullable": true},
|
|
7
|
+
"gateInReference": {"type": "STRING", "nullable": true},
|
|
8
|
+
"gateInTerminal": {"type": "STRING", "nullable": true},
|
|
9
|
+
"mblNumber": {"type": "STRING", "nullable": true},
|
|
10
|
+
"pickUpReference": {"type": "STRING", "nullable": true},
|
|
11
|
+
"pickUpTerminal": {"type": "STRING", "nullable": true},
|
|
12
|
+
"siCutOff": {"type": "STRING", "nullable": true},
|
|
13
|
+
"vgmCutOff": {"type": "STRING", "nullable": true},
|
|
14
14
|
"transportLegs": {
|
|
15
15
|
"type": "ARRAY",
|
|
16
16
|
"items": {
|
|
17
17
|
"type": "OBJECT",
|
|
18
18
|
"properties": {
|
|
19
|
-
"eta": {"type": "
|
|
20
|
-
"etd": {"type": "
|
|
21
|
-
"imoNumber": {"type": "
|
|
22
|
-
"portOfDischarge": {"type": "
|
|
23
|
-
"portOfLoading": {"type": "
|
|
24
|
-
"vesselName": {"type": "
|
|
25
|
-
"voyage": {"type": "
|
|
19
|
+
"eta": {"type": "STRING", "nullable": true},
|
|
20
|
+
"etd": {"type": "STRING", "nullable": true},
|
|
21
|
+
"imoNumber": {"type": "STRING", "nullable": true},
|
|
22
|
+
"portOfDischarge": {"type": "STRING", "nullable": true},
|
|
23
|
+
"portOfLoading": {"type": "STRING", "nullable": true},
|
|
24
|
+
"vesselName": {"type": "STRING", "nullable": true},
|
|
25
|
+
"voyage": {"type": "STRING", "nullable": true}
|
|
26
26
|
},
|
|
27
27
|
"required": []
|
|
28
28
|
}
|
|
@@ -18,7 +18,7 @@ transportLegs:
|
|
|
18
18
|
vesselName: The name of the vessel for a specific leg.
|
|
19
19
|
voyage: The journey or route taken by the vessel for a specific leg.
|
|
20
20
|
|
|
21
|
-
your task is to extract the text value of the following entities:
|
|
21
|
+
your task is to extract the text value of the following entities and page numbers starting from 0 where the value was found in the document:
|
|
22
22
|
SCHEMA_PLACEHOLDER
|
|
23
23
|
|
|
24
24
|
Keywords for datapoints:
|
|
@@ -2,47 +2,47 @@
|
|
|
2
2
|
"type": "OBJECT",
|
|
3
3
|
"properties": {
|
|
4
4
|
"currencyCode": {
|
|
5
|
-
"type": "
|
|
5
|
+
"type": "STRING",
|
|
6
6
|
"nullable": true,
|
|
7
7
|
"description": "The currency in which the invoice is issued."
|
|
8
8
|
},
|
|
9
9
|
"grandTotal": {
|
|
10
|
-
"type": "
|
|
10
|
+
"type": "STRING",
|
|
11
11
|
"nullable": true,
|
|
12
12
|
"description": "The overall total amount of the invoice."
|
|
13
13
|
},
|
|
14
14
|
"issueDate": {
|
|
15
|
-
"type": "
|
|
15
|
+
"type": "STRING",
|
|
16
16
|
"nullable": true,
|
|
17
17
|
"description": "The date the document was issued."
|
|
18
18
|
},
|
|
19
19
|
"recipientAddress": {
|
|
20
|
-
"type": "
|
|
20
|
+
"type": "STRING",
|
|
21
21
|
"nullable": true,
|
|
22
22
|
"description": "The address of the recipient."
|
|
23
23
|
},
|
|
24
24
|
"recipientName": {
|
|
25
|
-
"type": "
|
|
25
|
+
"type": "STRING",
|
|
26
26
|
"nullable": true,
|
|
27
27
|
"description": "The name of the recipient."
|
|
28
28
|
},
|
|
29
29
|
"serviceDate": {
|
|
30
|
-
"type": "
|
|
30
|
+
"type": "STRING",
|
|
31
31
|
"nullable": true,
|
|
32
32
|
"description": "The date of service or transaction."
|
|
33
33
|
},
|
|
34
34
|
"shipmentId": {
|
|
35
|
-
"type": "
|
|
35
|
+
"type": "STRING",
|
|
36
36
|
"nullable": true,
|
|
37
37
|
"description": "Starting with an \"S\" and followed by 6 or 7 digits. Example: S124321"
|
|
38
38
|
},
|
|
39
39
|
"vendorName": {
|
|
40
|
-
"type": "
|
|
40
|
+
"type": "STRING",
|
|
41
41
|
"nullable": true,
|
|
42
42
|
"description": "The name of the vendor."
|
|
43
43
|
},
|
|
44
44
|
"vendorAddress": {
|
|
45
|
-
"type": "
|
|
45
|
+
"type": "STRING",
|
|
46
46
|
"nullable": true,
|
|
47
47
|
"description": "The address of the vendor."
|
|
48
48
|
},
|
|
@@ -52,37 +52,37 @@
|
|
|
52
52
|
"type": "OBJECT",
|
|
53
53
|
"properties": {
|
|
54
54
|
"deferredDutyPayer": {
|
|
55
|
-
"type": "
|
|
55
|
+
"type": "STRING",
|
|
56
56
|
"nullable": true,
|
|
57
57
|
"description": "It can be identified under \"Aufschubenhmer\" for each line item"
|
|
58
58
|
},
|
|
59
59
|
"name": {
|
|
60
|
-
"type": "
|
|
60
|
+
"type": "STRING",
|
|
61
61
|
"nullable": true,
|
|
62
62
|
"description": "The name or description of the line item A0000 and B0000"
|
|
63
63
|
},
|
|
64
64
|
"taxType": {
|
|
65
|
-
"type": "
|
|
65
|
+
"type": "STRING",
|
|
66
66
|
"nullable": true,
|
|
67
67
|
"description": "It's a line item mentioned in the invoice. For example; A0000 and B0000"
|
|
68
68
|
},
|
|
69
69
|
"totalAmount": {
|
|
70
|
-
"type": "
|
|
70
|
+
"type": "STRING",
|
|
71
71
|
"nullable": true,
|
|
72
72
|
"description": "The total amount for the line item."
|
|
73
73
|
},
|
|
74
74
|
"totalAmountCurrency": {
|
|
75
|
-
"type": "
|
|
75
|
+
"type": "STRING",
|
|
76
76
|
"nullable": true,
|
|
77
77
|
"description": "The currency of the total amount."
|
|
78
78
|
},
|
|
79
79
|
"vatId": {
|
|
80
|
-
"type": "
|
|
80
|
+
"type": "STRING",
|
|
81
81
|
"nullable": true,
|
|
82
82
|
"description": "The VAT identification number. This is named a Konto-Nummer for each line item."
|
|
83
83
|
},
|
|
84
84
|
"dueDate": {
|
|
85
|
-
"type": "
|
|
85
|
+
"type": "STRING",
|
|
86
86
|
"nullable": true,
|
|
87
87
|
"description": "It's a due date. Due date to pay the amount. It's usually mentioned either in a date or a number of days format"
|
|
88
88
|
}
|
|
@@ -91,20 +91,20 @@
|
|
|
91
91
|
}
|
|
92
92
|
},
|
|
93
93
|
"invoiceNumber": {
|
|
94
|
-
"type": "
|
|
94
|
+
"type": "STRING",
|
|
95
95
|
"nullable": true,
|
|
96
96
|
"description": "Invoice Number is a unique identifier for the invoice, it starts with \"ATC\", \"AT-C\", or \"AT/C\" only (e.g., ATC40, AT-C-40-, AT/C/40/....). Do NOT extract \"NIZZA-Registrierkennzeichen number."
|
|
97
97
|
},
|
|
98
98
|
"containerNumber": {
|
|
99
99
|
"type": "ARRAY",
|
|
100
100
|
"items": {
|
|
101
|
-
"type": "
|
|
101
|
+
"type": "STRING",
|
|
102
102
|
"nullable": true,
|
|
103
103
|
"description": "The unique identifier for each container. It always starts with 4 capital letters and followed by 7 digits. Example: TEMU7972458."
|
|
104
104
|
}
|
|
105
105
|
},
|
|
106
106
|
"creditNoteInvoiceNumber": {
|
|
107
|
-
"type": "
|
|
107
|
+
"type": "STRING",
|
|
108
108
|
"nullable": true,
|
|
109
109
|
"description": "The unique identifier for the associated Invoice. The number usually starts with ATS..."
|
|
110
110
|
}
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
<PERSONA> You are an efficient document entity data extraction specialist working for a Freight Forwarding company. <PERSONA>
|
|
2
2
|
|
|
3
|
-
<TASK>Your task is to extract data from customs invoice documents as per the given response schema structure.<TASK>
|
|
3
|
+
<TASK>Your task is to extract data and page numbers starting from 0 from customs invoice documents as per the given response schema structure.<TASK>
|
|
4
4
|
|
|
5
5
|
<CONTEXT>
|
|
6
6
|
The Freight Forwarding company receives Customs invoices from Customs Brokers called Bundeskasse.
|
|
@@ -2,7 +2,8 @@ Task: You are a document entity extraction specialist. Given a document, your ta
|
|
|
2
2
|
|
|
3
3
|
Extract all the data points from the given document.
|
|
4
4
|
Each data point is part of a master field called skus. There may be multiple skus entries in a document.
|
|
5
|
-
Your
|
|
5
|
+
Your task is to extract the text value of the entities and page numbers starting from 0 starting from 0 where the value was found in the document.
|
|
6
|
+
|
|
6
7
|
|
|
7
8
|
Instructions:
|
|
8
9
|
- Populate fields as defined in the response schema.
|
|
@@ -15,7 +15,7 @@ containers:
|
|
|
15
15
|
goodsDescription: Goods description.
|
|
16
16
|
|
|
17
17
|
|
|
18
|
-
Your task is to extract the text value of the following entities:
|
|
18
|
+
Your task is to extract the text value of the following entities and page numbers starting from 0 where the value was found in the document:
|
|
19
19
|
SCHEMA_PLACEHOLDER
|
|
20
20
|
|
|
21
21
|
Keywords for datapoints:
|
|
@@ -4,63 +4,63 @@
|
|
|
4
4
|
"bankAccount": {
|
|
5
5
|
"type": "ARRAY",
|
|
6
6
|
"items": {
|
|
7
|
-
"type":
|
|
7
|
+
"type": "STRING",
|
|
8
8
|
"nullable": true,
|
|
9
9
|
"description": "The bank account(s) number(s) of the vendor. This is the account to which the payment should be made. Extract all the relevant bank account numbers mentioned in the invoice."
|
|
10
10
|
}
|
|
11
11
|
},
|
|
12
|
-
"contractNumber": {"type":
|
|
12
|
+
"contractNumber": {"type": "STRING",
|
|
13
13
|
"nullable": true,
|
|
14
14
|
"description": "It's a contract number between the carrier and Forto Logistics SE & Co KG."
|
|
15
15
|
},
|
|
16
16
|
"currencyExchange": {
|
|
17
17
|
"type": "OBJECT",
|
|
18
18
|
"properties": {
|
|
19
|
-
"from": {"type":
|
|
19
|
+
"from": {"type": "STRING",
|
|
20
20
|
"nullable": true,
|
|
21
21
|
"description": "The currency code from which the exchange rate is applied."
|
|
22
22
|
},
|
|
23
|
-
"fxRate": {"type":
|
|
23
|
+
"fxRate": {"type": "STRING",
|
|
24
24
|
"nullable": true,
|
|
25
25
|
"description": "The exchange rate applied to convert the amount from the 'from' currency to the 'to' currency."
|
|
26
26
|
},
|
|
27
|
-
"to": {"type":
|
|
27
|
+
"to": {"type": "STRING",
|
|
28
28
|
"nullable": true,
|
|
29
29
|
"description": "The currency code to which the exchange rate is applied."}
|
|
30
30
|
}
|
|
31
31
|
},
|
|
32
|
-
"documentType": {"type":
|
|
33
|
-
"dueDate": {"type":
|
|
32
|
+
"documentType": {"type": "STRING", "nullable": true},
|
|
33
|
+
"dueDate": {"type": "STRING", "nullable": true,
|
|
34
34
|
"description": "The date by which the payment should be made by Forto Logistics SE & Co KG. Do Not calculate dueDate based on issueDate or any other date. Extract it directly from the invoice."},
|
|
35
|
-
"eta": {"type":
|
|
35
|
+
"eta": {"type": "STRING", "nullable": true,
|
|
36
36
|
"description": "Estimated Time of Arrival (ETA) is the expected date when the shipment will arrive at its destination."},
|
|
37
|
-
"etd": {"type":
|
|
37
|
+
"etd": {"type": "STRING", "nullable": true,
|
|
38
38
|
"description": "Estimated Time of Departure (ETD) is the expected date when the shipment will leave the origin port."},
|
|
39
|
-
"fortoEntity": {"type":
|
|
39
|
+
"fortoEntity": {"type": "STRING", "nullable": true,
|
|
40
40
|
"description": "The entity of 'Forto Logistics SE & Co KG' that is responsible for the invoice. The Forto organization or branch managing the shipment."
|
|
41
41
|
},
|
|
42
|
-
"hblNumber": {"type":
|
|
42
|
+
"hblNumber": {"type": "STRING", "nullable": true,
|
|
43
43
|
"description": "House Bill of Lading number, a document issued by a freight forwarder."
|
|
44
44
|
},
|
|
45
|
-
"currencyCode": {"type":
|
|
45
|
+
"currencyCode": {"type": "STRING", "nullable": true,
|
|
46
46
|
"description": "The currency code in which the invoice is issued, such as EUR, USD, etc."
|
|
47
47
|
},
|
|
48
|
-
"grandTotal": {"type":
|
|
48
|
+
"grandTotal": {"type": "STRING", "nullable": true,
|
|
49
49
|
"description": "The total amount of the invoice, including all line items and taxes."
|
|
50
50
|
},
|
|
51
|
-
"vatAmount": {"type":
|
|
51
|
+
"vatAmount": {"type": "STRING", "nullable": true,
|
|
52
52
|
"description": "The total VAT amount applied to the invoice. This is the tax charged on the vatApplicableAmount of the invoice. Bitte Zahlen is not the vatAmount."
|
|
53
53
|
},
|
|
54
|
-
"vatApplicableAmount": {"type":
|
|
54
|
+
"vatApplicableAmount": {"type": "STRING", "nullable": true,
|
|
55
55
|
"description": "The amount on which VAT is applicable. This is the net amount before VAT is applied (without VAT)."
|
|
56
56
|
},
|
|
57
|
-
"vatPercentage": {"type":
|
|
57
|
+
"vatPercentage": {"type": "STRING", "nullable": true,
|
|
58
58
|
"description": "The percentage rate of VAT applied to the vatApplicableAmount. This is used to calculate the vatAmount."
|
|
59
59
|
},
|
|
60
|
-
"invoiceNumber": {"type":
|
|
60
|
+
"invoiceNumber": {"type": "STRING", "nullable": true,
|
|
61
61
|
"description": "The unique identifier for the invoice. This is used to track and reference the invoice in financial records."
|
|
62
62
|
},
|
|
63
|
-
"issueDate": {"type":
|
|
63
|
+
"issueDate": {"type": "STRING", "nullable": true,
|
|
64
64
|
"description": "The date when the invoice was issued."
|
|
65
65
|
},
|
|
66
66
|
"lineItem": {
|
|
@@ -69,7 +69,7 @@
|
|
|
69
69
|
"type": "OBJECT",
|
|
70
70
|
"properties": {
|
|
71
71
|
"uniqueId": {
|
|
72
|
-
"type":
|
|
72
|
+
"type": "STRING",
|
|
73
73
|
"nullable": true,
|
|
74
74
|
"description": "A line item can belong to a different shipments. Hence, the unique IDs of a line item need to be extracted that you see only on the line item level. UniqueIds are containerNumber, shipmentId, or sealNumber."
|
|
75
75
|
},
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
<PERSONA> You are an efficient document entity data extraction specialist working for a Freight Forwarding company. <PERSONA>
|
|
2
2
|
|
|
3
|
-
<TASK>Your task is to extract data from invoice documents as per the given response schema structure.<TASK>
|
|
3
|
+
<TASK>Your task is to extract data and their page numbers starting from 0 from invoice documents as per the given response schema structure.<TASK>
|
|
4
4
|
|
|
5
5
|
<CONTEXT>
|
|
6
6
|
The Freight Forwarding company receives invoices from Carrier (Shipping Lines) partners and Customs Brokers. These include Partner Invoices (COGS Invoices) and COGS Customs Invoices.
|
|
@@ -1,31 +1,29 @@
|
|
|
1
1
|
{
|
|
2
|
-
"SCHEMA_PLACEHOLDER": {
|
|
3
2
|
"type": "OBJECT",
|
|
4
3
|
"properties": {
|
|
5
|
-
"EmptyContainerDepot": {"type": "
|
|
4
|
+
"EmptyContainerDepot": {"type": "STRING", "nullable": true},
|
|
6
5
|
"Equipment": {"type": "ARRAY",
|
|
7
6
|
"items": {
|
|
8
7
|
"type": "OBJECT", "properties": {
|
|
9
|
-
"CargoGrossWeight": {"type": "
|
|
10
|
-
"ContainerNumber": {"type": "
|
|
11
|
-
"ContainerType": {"type": "
|
|
12
|
-
"EmptyReturnReference": {"type": "
|
|
13
|
-
"Pin": {"type": "
|
|
14
|
-
"TareWeight": {"type": "
|
|
8
|
+
"CargoGrossWeight": {"type": "STRING", "nullable": true},
|
|
9
|
+
"ContainerNumber": {"type": "STRING", "nullable": true},
|
|
10
|
+
"ContainerType": {"type": "STRING", "nullable": true},
|
|
11
|
+
"EmptyReturnReference": {"type": "STRING", "nullable": true},
|
|
12
|
+
"Pin": {"type": "STRING", "nullable": true},
|
|
13
|
+
"TareWeight": {"type": "STRING", "nullable": true}
|
|
15
14
|
}, "required": []}
|
|
16
15
|
},
|
|
17
|
-
"pickUpTerminal": {"type": "
|
|
16
|
+
"pickUpTerminal": {"type": "STRING", "nullable": true},
|
|
18
17
|
"TransportLeg": {"type": "ARRAY",
|
|
19
18
|
"items": {
|
|
20
19
|
"type": "OBJECT", "properties": {
|
|
21
|
-
"eta": {"type": "
|
|
22
|
-
"etd": {"type": "
|
|
23
|
-
"portOfDischarge": {"type": "
|
|
24
|
-
"portOfLoading": {"type": "
|
|
25
|
-
"vesselName": {"type": "
|
|
26
|
-
"voyage": {"type": "
|
|
20
|
+
"eta": {"type": "STRING", "nullable": true},
|
|
21
|
+
"etd": {"type": "STRING", "nullable": true},
|
|
22
|
+
"portOfDischarge": {"type": "STRING", "nullable": true},
|
|
23
|
+
"portOfLoading": {"type": "STRING", "nullable": true},
|
|
24
|
+
"vesselName": {"type": "STRING", "nullable": true},
|
|
25
|
+
"voyage": {"type": "STRING", "nullable": true}
|
|
27
26
|
}, "required": []}
|
|
28
|
-
}
|
|
29
|
-
},
|
|
27
|
+
},
|
|
30
28
|
"required": []}
|
|
31
29
|
}
|
|
@@ -18,7 +18,7 @@ TransportLeg:
|
|
|
18
18
|
vesselName: The name of the vessel.
|
|
19
19
|
voyage: The journey or route code taken by the vessel.
|
|
20
20
|
|
|
21
|
-
Your task is to extract the text value of the following entities:
|
|
21
|
+
Your task is to extract the text value of the following entities and page numbers starting from 0 where the value was found in the document:
|
|
22
22
|
SCHEMA_PLACEHOLDER
|
|
23
23
|
|
|
24
24
|
Keywords for datapoints:
|