data-science-document-ai 1.42.5__py3-none-any.whl → 1.57.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {data_science_document_ai-1.42.5.dist-info → data_science_document_ai-1.57.0.dist-info}/METADATA +2 -2
- data_science_document_ai-1.57.0.dist-info/RECORD +60 -0
- src/constants.py +13 -34
- src/docai_processor_config.yaml +0 -69
- src/excel_processing.py +24 -14
- src/io.py +23 -0
- src/llm.py +0 -29
- src/pdf_processing.py +183 -76
- src/postprocessing/common.py +172 -28
- src/postprocessing/postprocess_partner_invoice.py +194 -59
- src/prompts/library/arrivalNotice/other/placeholders.json +70 -0
- src/prompts/library/arrivalNotice/other/prompt.txt +40 -0
- src/prompts/library/bookingConfirmation/evergreen/placeholders.json +135 -21
- src/prompts/library/bookingConfirmation/evergreen/prompt.txt +21 -17
- src/prompts/library/bookingConfirmation/hapag-lloyd/placeholders.json +136 -22
- src/prompts/library/bookingConfirmation/hapag-lloyd/prompt.txt +52 -58
- src/prompts/library/bookingConfirmation/maersk/placeholders.json +135 -21
- src/prompts/library/bookingConfirmation/maersk/prompt.txt +10 -1
- src/prompts/library/bookingConfirmation/msc/placeholders.json +135 -21
- src/prompts/library/bookingConfirmation/msc/prompt.txt +10 -1
- src/prompts/library/bookingConfirmation/oocl/placeholders.json +149 -21
- src/prompts/library/bookingConfirmation/oocl/prompt.txt +11 -3
- src/prompts/library/bookingConfirmation/other/placeholders.json +149 -21
- src/prompts/library/bookingConfirmation/other/prompt.txt +56 -57
- src/prompts/library/bookingConfirmation/yangming/placeholders.json +149 -21
- src/prompts/library/bookingConfirmation/yangming/prompt.txt +11 -1
- src/prompts/library/bundeskasse/other/placeholders.json +5 -5
- src/prompts/library/bundeskasse/other/prompt.txt +7 -5
- src/prompts/library/commercialInvoice/other/placeholders.json +125 -0
- src/prompts/library/commercialInvoice/other/prompt.txt +1 -1
- src/prompts/library/customsAssessment/other/placeholders.json +70 -0
- src/prompts/library/customsAssessment/other/prompt.txt +24 -37
- src/prompts/library/customsInvoice/other/prompt.txt +4 -3
- src/prompts/library/deliveryOrder/other/placeholders.json +80 -27
- src/prompts/library/deliveryOrder/other/prompt.txt +26 -40
- src/prompts/library/draftMbl/other/placeholders.json +33 -33
- src/prompts/library/draftMbl/other/prompt.txt +34 -44
- src/prompts/library/finalMbL/other/placeholders.json +80 -0
- src/prompts/library/finalMbL/other/prompt.txt +34 -44
- src/prompts/library/packingList/other/placeholders.json +98 -0
- src/prompts/library/partnerInvoice/other/prompt.txt +8 -7
- src/prompts/library/preprocessing/carrier/placeholders.json +0 -16
- src/prompts/library/shippingInstruction/other/placeholders.json +115 -0
- src/prompts/library/shippingInstruction/other/prompt.txt +26 -14
- src/prompts/prompt_library.py +0 -4
- src/setup.py +25 -24
- src/utils.py +120 -68
- data_science_document_ai-1.42.5.dist-info/RECORD +0 -57
- src/prompts/library/draftMbl/hapag-lloyd/prompt.txt +0 -45
- src/prompts/library/draftMbl/maersk/prompt.txt +0 -19
- src/prompts/library/finalMbL/hapag-lloyd/prompt.txt +0 -44
- src/prompts/library/finalMbL/maersk/prompt.txt +0 -19
- {data_science_document_ai-1.42.5.dist-info → data_science_document_ai-1.57.0.dist-info}/WHEEL +0 -0
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
{
|
|
2
|
+
"type": "OBJECT",
|
|
3
|
+
"properties": {
|
|
4
|
+
"bookingNumber": {
|
|
5
|
+
"type": "STRING",
|
|
6
|
+
"nullable": true,
|
|
7
|
+
"description": "The booking number associated with the Arrival Notice document. They are often referred to as 'Booking Number', 'Booking No.', 'Booking Ref.', 'Booking Reference', 'Booking ID', 'carrier's reference' or 'Order Ref'."
|
|
8
|
+
},
|
|
9
|
+
"destinationTerminal": {
|
|
10
|
+
"type": "STRING",
|
|
11
|
+
"nullable": true,
|
|
12
|
+
"description": "The terminal at the destination port where the container will be delivered."
|
|
13
|
+
},
|
|
14
|
+
"eta": {
|
|
15
|
+
"type": "STRING",
|
|
16
|
+
"nullable": true,
|
|
17
|
+
"description": "Estimated Time of Arrival (ETA) is the expected date when the shipment will arrive at its destination."
|
|
18
|
+
},
|
|
19
|
+
"mblNumber": {
|
|
20
|
+
"type": "STRING",
|
|
21
|
+
"nullable": true,
|
|
22
|
+
"description": "Bill of Lading number (B/L NO.), a document issued by the carrier."
|
|
23
|
+
},
|
|
24
|
+
"portOfDischarge": {
|
|
25
|
+
"type": "STRING",
|
|
26
|
+
"nullable": true,
|
|
27
|
+
"description": "The port where the goods are discharged from the vessel. This is the destination port for the shipment."
|
|
28
|
+
},
|
|
29
|
+
"vesselName": {
|
|
30
|
+
"type": "STRING",
|
|
31
|
+
"nullable": true,
|
|
32
|
+
"description": "The name of the vessel carrying the shipment."
|
|
33
|
+
},
|
|
34
|
+
"containers": {
|
|
35
|
+
"type": "ARRAY",
|
|
36
|
+
"items": {
|
|
37
|
+
"type": "OBJECT",
|
|
38
|
+
"properties": {
|
|
39
|
+
"containerNumber": {
|
|
40
|
+
"type": "STRING",
|
|
41
|
+
"nullable": true,
|
|
42
|
+
"description": "The unique identifier for each container. It always starts with 4 capital letters and followed by 7 digits. Example: TEMU7972458."
|
|
43
|
+
},
|
|
44
|
+
"containerType": {
|
|
45
|
+
"type": "STRING",
|
|
46
|
+
"nullable": true,
|
|
47
|
+
"description": "The size of the container associated with the containerNumber, such as 20ft, 40ft, 40HC, 20DC etc."
|
|
48
|
+
},
|
|
49
|
+
"grossWeight": {
|
|
50
|
+
"type": "STRING",
|
|
51
|
+
"nullable": true,
|
|
52
|
+
"description": "The gross weight of the container. Usually mentioned as G.W or GW or Gross Weight, etc.."
|
|
53
|
+
},
|
|
54
|
+
"measurements": {
|
|
55
|
+
"type": "STRING",
|
|
56
|
+
"nullable": true,
|
|
57
|
+
"description": "The volume of the container. Usually, it is measured in 'Cubic Meter (cbm)' or dimensions. But volume in 'cbm' is preferred."
|
|
58
|
+
},
|
|
59
|
+
"sealNumber": {
|
|
60
|
+
"type": "STRING",
|
|
61
|
+
"nullable": true,
|
|
62
|
+
"description": "The seal number associated with the container Number. But it is not same as the container number."
|
|
63
|
+
}
|
|
64
|
+
},
|
|
65
|
+
"required": ["containerNumber", "containerType", "grossWeight"]
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
},
|
|
69
|
+
"required": ["bookingNumber", "destinationTerminal", "eta", "portOfDischarge", "vesselName", "containers"]
|
|
70
|
+
}
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
<PERSONA> You are an efficient document entity data extraction specialist working for a Freight Forwarding company. <PERSONA>
|
|
2
|
+
|
|
3
|
+
<TASK> Your task is to extract data from Arrival Notice documents as per the given response schema structure. <TASK>
|
|
4
|
+
|
|
5
|
+
<CONTEXT>
|
|
6
|
+
The Freight Forwarding company receives Arrival Notice from shipping lines.
|
|
7
|
+
These documents contain various details related to arrival of a shipment to the port of destination such as container numbers, estimated time of arrival, vessel details and containers information.
|
|
8
|
+
They may be written in different languages such as English, German, Italian and can appear in a variety of formats and layouts.
|
|
9
|
+
Your role is to accurately extract specific entities from these Arrival Notices to support efficient processing and accurate record-keeping.
|
|
10
|
+
<CONTEXT>
|
|
11
|
+
|
|
12
|
+
<INSTRUCTIONS>
|
|
13
|
+
- Populate fields as defined in the response schema.
|
|
14
|
+
- Multiple Containers entries may exist, capture all instances under "containers".
|
|
15
|
+
- Use the data field description to understand the context of the data.
|
|
16
|
+
|
|
17
|
+
- bookingNumbers:
|
|
18
|
+
- Booking numbers are unique identifiers for shipments. They are often referred to as "Booking Number", "Booking No.", "Booking Ref.", "Booking Reference", "Booking ID", "SACO-Pos.", "Order Ref", "Unsere Referenz", or "Unsere Position"
|
|
19
|
+
- If there is a unique_id that starts with "S" followed by 6 or 8 digits, it is a shipmentID, not a bookingNumber.
|
|
20
|
+
|
|
21
|
+
- destinationTerminal:
|
|
22
|
+
- Destination Terminal can also be referred to as "Destination Termina;", "Pickup Location", "Delivery Location", "Delivery Terminal", "Empfangsort", "Entladeort", or "Abladestelle".
|
|
23
|
+
|
|
24
|
+
- mblNumbers:
|
|
25
|
+
- Commonly known as "Bill of Lading Number", "BILL OF LADING NO.", "BL Number", "BL No.", "B/L No.", "BL-Nr.", "B/L", "HBL No.", or "M-AWB Nummer".
|
|
26
|
+
- Bill of Lading Number is known as mblNumber. Not a shipmentID even if it starts with "S".
|
|
27
|
+
- mblNumber from Hapag-Lloyd always starts with HLC.... (e.g., "HLCUTS12303AWNT3) and named as SEA WAYBILL or "SWB-NR.
|
|
28
|
+
|
|
29
|
+
- eta:
|
|
30
|
+
- Estimated Time of Arrival (ETA) is the expected date and time when the shipment will arrive at the destination port.
|
|
31
|
+
- It can be referred to as "ETA", "Estimated Arrival", "Voraussichtliche Ankunft", "Ankunftszeit", "Arrivo", "Due to arrive at Terminal"
|
|
32
|
+
|
|
33
|
+
- vesselName:
|
|
34
|
+
- Vessel Name is the name of the ship carrying the cargo. It can be referred to as "Vessel", "Ship Name", "Schiff", "Schiffsname", "Nave", or "Vessel/Flight No.".
|
|
35
|
+
|
|
36
|
+
- containers: Details of each container on the arrival notice. Make sure to extract each container information separately.
|
|
37
|
+
- containerNumber: Container Number consists of 4 capital letters followed by 7 digits (e.g., TEMU7972458, CAIU 7222892).
|
|
38
|
+
- sealNumber: Seal numbers are unique identifiers for shipping seals. They are usually mentioned as seal numbers in the document but they are definitely not container numbers.
|
|
39
|
+
|
|
40
|
+
<INSTRUCTIONS>
|
|
@@ -1,32 +1,146 @@
|
|
|
1
1
|
{
|
|
2
2
|
"type": "OBJECT",
|
|
3
3
|
"properties": {
|
|
4
|
-
"
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
"
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
4
|
+
"bookingNumber": {
|
|
5
|
+
"type": "STRING",
|
|
6
|
+
"nullable": true,
|
|
7
|
+
"description": "A unique identifier assigned to the shipment booking, used for tracking and reference. They are often referred to as 'Booking No.', 'Booking Reference', 'Our Reference', or 'Order Ref'."
|
|
8
|
+
},
|
|
9
|
+
"contractNumber": {
|
|
10
|
+
"type": "STRING",
|
|
11
|
+
"nullable": true,
|
|
12
|
+
"description": "It's a contract number between the carrier and Forto Logistics SE & Co KG."
|
|
13
|
+
},
|
|
14
|
+
"pickUpTerminalCode": {
|
|
15
|
+
"type": "STRING",
|
|
16
|
+
"nullable": true,
|
|
17
|
+
"description": "The specific terminal for cargo pickup during the import shipment."
|
|
18
|
+
},
|
|
19
|
+
"gateInTerminalCode": {
|
|
20
|
+
"type": "STRING",
|
|
21
|
+
"nullable": true,
|
|
22
|
+
"description": "The specific terminal where cargo is gated in especially Export terminal delivery address. E.g., Export terminal delivery address, Export terminal location, or Export terminal name."
|
|
23
|
+
},
|
|
24
|
+
"performaDate": {
|
|
25
|
+
"type": "STRING",
|
|
26
|
+
"nullable": true,
|
|
27
|
+
"description": "The date considered to apply the rates and charges specified in the booking confirmation"
|
|
28
|
+
},
|
|
29
|
+
"cyCutOff": {
|
|
30
|
+
"type": "STRING",
|
|
31
|
+
"nullable": true,
|
|
32
|
+
"description": "The datetime by which the cargo to be delivered to the Container Yard. It can be found with keys FCL delivery cut-off, FCL DG delivery cut-off, CY CUT OFF, CY Closing."
|
|
33
|
+
},
|
|
34
|
+
"gateInReference": {
|
|
35
|
+
"type": "STRING",
|
|
36
|
+
"nullable": true,
|
|
37
|
+
"description": "A reference code for cargo entering the terminal to drop the loaded cargo for Export. Sometimes it can be 'Our Reference'."
|
|
38
|
+
},
|
|
39
|
+
"mblNumber": {
|
|
40
|
+
"type": "STRING",
|
|
41
|
+
"nullable": true,
|
|
42
|
+
"description": "Bill of Lading number (B/L NO.), a document issued by the carrier."
|
|
43
|
+
},
|
|
44
|
+
"pickUpReference": {
|
|
45
|
+
"type": "STRING",
|
|
46
|
+
"nullable": true,
|
|
47
|
+
"description": "A reference code for cargo pickup during the import shipment. Sometimes it can be 'Our Reference'."
|
|
48
|
+
},
|
|
49
|
+
"siCutOff": {
|
|
50
|
+
"type": "STRING",
|
|
51
|
+
"nullable": true,
|
|
52
|
+
"description": "The deadline datetime for submitting the Shipping Instructions (SI) to the carrier. It can be found with keys Shipping Instruction Closing."
|
|
53
|
+
},
|
|
54
|
+
"vgmCutOff": {
|
|
55
|
+
"type": "STRING",
|
|
56
|
+
"nullable": true,
|
|
57
|
+
"description": "The deadline datetime for submitting the Verified Gross Mass (VGM) to the carrier. It can be found with keys VGM DEADLINE, VGM DUE, VGM CUT OFF."
|
|
58
|
+
},
|
|
59
|
+
"containers": {
|
|
60
|
+
"type": "ARRAY",
|
|
61
|
+
"items": {
|
|
62
|
+
"type": "OBJECT",
|
|
63
|
+
"properties": {
|
|
64
|
+
"containerType": {
|
|
65
|
+
"type": "STRING",
|
|
66
|
+
"nullable": true,
|
|
67
|
+
"description": "The size / type of the container, such as 20ft, 40ft, 40HC, 20DC etc under Type/Size column."
|
|
68
|
+
},
|
|
69
|
+
"pickUpDepotCode": {
|
|
70
|
+
"type": "STRING",
|
|
71
|
+
"nullable": true,
|
|
72
|
+
"description": "The depot code where the empty container will be picked up. It is identified as Empty Pick Up Depot or Export Empty Pick Up Depot(s)."
|
|
73
|
+
},
|
|
74
|
+
"dropOffDepotCode": {
|
|
75
|
+
"type": "STRING",
|
|
76
|
+
"nullable": true,
|
|
77
|
+
"description": "The depot code where the empty container will be dropped off."
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
},
|
|
81
|
+
"required": ["containerType", "pickupDepotCode", "dropoffDepotCode"]
|
|
82
|
+
},
|
|
14
83
|
"transportLegs": {
|
|
15
84
|
"type": "ARRAY",
|
|
16
85
|
"items": {
|
|
17
86
|
"type": "OBJECT",
|
|
18
87
|
"properties": {
|
|
19
|
-
|
|
20
|
-
"
|
|
21
|
-
"
|
|
22
|
-
"
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
"
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
88
|
+
"eta": {
|
|
89
|
+
"type": "STRING",
|
|
90
|
+
"nullable": true,
|
|
91
|
+
"description": "Estimated Time of Arrival (ETA) is the expected date when the shipment will arrive at its destination."
|
|
92
|
+
},
|
|
93
|
+
"etd": {
|
|
94
|
+
"type": "STRING",
|
|
95
|
+
"nullable": true,
|
|
96
|
+
"description": "Estimated Time of Departure (ETD) is the expected date when the shipment will leave the origin port."
|
|
97
|
+
},
|
|
98
|
+
"imoNumber": {
|
|
99
|
+
"type": "STRING",
|
|
100
|
+
"nullable": true,
|
|
101
|
+
"description": "The International Maritime Organization number for a specific leg. It can be found as IMO No, IMO number."
|
|
102
|
+
},
|
|
103
|
+
"portOfDischarge": {
|
|
104
|
+
"type": "STRING",
|
|
105
|
+
"nullable": true,
|
|
106
|
+
"description": "The port where the goods are discharged from the vessel. This is the destination port for the shipment. It can be found at POD, Port of Discharge, To, Discharge Port"
|
|
107
|
+
},
|
|
108
|
+
"portOfLoading": {
|
|
109
|
+
"type": "STRING",
|
|
110
|
+
"nullable": true,
|
|
111
|
+
"description": "The port where the goods are loaded onto the vessel. This is the origin port for the shipment. It can be found at POL, Port of Loading, From, Load Port"
|
|
112
|
+
},
|
|
113
|
+
"vesselName": {
|
|
114
|
+
"type": "STRING",
|
|
115
|
+
"nullable": true,
|
|
116
|
+
"description": "The name of the vessel carrying the shipment. It can be found at vessel, INTENDED VESSEL/VOYAGE"
|
|
117
|
+
},
|
|
118
|
+
"voyage": {
|
|
119
|
+
"type": "STRING",
|
|
120
|
+
"nullable": true,
|
|
121
|
+
"description": "The journey or route taken by the vessel for a specific leg. It can be found at Voy. no, INTENDED VESSEL/VOYAGE"
|
|
122
|
+
}
|
|
123
|
+
}
|
|
124
|
+
},
|
|
125
|
+
"required": [
|
|
126
|
+
"eta",
|
|
127
|
+
"etd",
|
|
128
|
+
"portOfDischarge",
|
|
129
|
+
"portOfLoading",
|
|
130
|
+
"vesselName",
|
|
131
|
+
"voyage"
|
|
132
|
+
]
|
|
133
|
+
},
|
|
134
|
+
"carrierAddress": {
|
|
135
|
+
"type": "STRING",
|
|
136
|
+
"nullable": true,
|
|
137
|
+
"description": "The address of the carrier who provides service and issued the document."
|
|
138
|
+
},
|
|
139
|
+
"carrierName": {
|
|
140
|
+
"type": "STRING",
|
|
141
|
+
"nullable": true,
|
|
142
|
+
"description": "The name of the carrier who issued the document e,g, Hapag-Lloyd."
|
|
29
143
|
}
|
|
30
144
|
},
|
|
31
|
-
"required": []
|
|
145
|
+
"required": ["bookingNumber", "transportLegs", "containers", "cyCutOff", "vgmCutOff", "siCutOff"]
|
|
32
146
|
}
|
|
@@ -1,6 +1,14 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
1
|
+
<PERSONA> You are an efficient document entity data extraction specialist working for a Freight Forwarding company. <PERSONA>
|
|
2
|
+
|
|
3
|
+
<TASK> Your task is to extract data from Booking Confirmation documents as per the given response schema structure. <TASK>
|
|
4
|
+
|
|
5
|
+
<CONTEXT>
|
|
6
|
+
The Freight Forwarding company receives Booking Confirmation from EverGreen Carrier (Shipping Lines) partner.
|
|
7
|
+
These Booking Confirmations contain various details related to booking, container pick up and drop off depot details, vessel details, as well as other transport Legs data.
|
|
8
|
+
They may be written in different languages such as English, German, Vietnamese, Chinese, and other European languages, and can appear in a variety of formats and layouts.
|
|
9
|
+
Your role is to accurately extract specific entities from these Booking Confirmations to support efficient processing and accurate record-keeping.
|
|
10
|
+
<CONTEXT>
|
|
11
|
+
|
|
4
12
|
"mblNumber": "Extract the value after the label 'BOOKING NO.'.",
|
|
5
13
|
"gateInReference": "Extract the value after the label 'BOOKING NO.'.",
|
|
6
14
|
"pickUpReference": "Extract the value after the label 'BOOKING NO.'.",
|
|
@@ -14,23 +22,19 @@ your task is to extract the text value of the following entities and page number
|
|
|
14
22
|
"portOfDischarge": "Extract the text after the label 'PORT OF DISCHARGING:' and before 'FINAL DESTINATION'.",
|
|
15
23
|
"pickUpTerminal": "Extract the text after the label 'EMPTY PICK UP AT:' removing any extra spaces or line breaks.",
|
|
16
24
|
"gateInTerminal": "Extract the text after the label 'FULL RETURN TO:' removing any extra spaces or line breaks.",
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
"portOfLoading": "For the first leg, use the extracted 'portOfLoading'.",
|
|
20
|
-
"portOfDischarge": "Extract the text after the label 'T/S PORT OF LOADING:'.",
|
|
21
|
-
"vesselName": "For the first leg, use the extracted 'vesselName'.",
|
|
22
|
-
"voyage": "Voyage is a code of numbers and letters sometimes separated by '-'. For the first leg, use the extracted 'voyage'.",
|
|
23
|
-
"eta": "Extract the date after the label 'ETA DATE' that appears within the section starting with 'FINAL DESTINATION:' and ending with 'T/S PORT OF LOADING:'.",
|
|
24
|
-
"etd": "Extract the date after the label 'ETD DATE' that appears within the section starting with 'PORT OF LOADING:' and ending with 'FINAL DESTINATION:'.",
|
|
25
|
-
|
|
26
|
-
|
|
25
|
+
|
|
26
|
+
"transportLegs":
|
|
27
|
+
"portOfLoading": "For the first leg, use the extracted 'portOfLoading'.",
|
|
28
|
+
"portOfDischarge": "Extract the text after the label 'T/S PORT OF LOADING:'.",
|
|
29
|
+
"vesselName": "For the first leg, use the extracted 'vesselName'.",
|
|
30
|
+
"voyage": "Voyage is a code of numbers and letters sometimes separated by '-'. For the first leg, use the extracted 'voyage'.",
|
|
31
|
+
"eta": "Extract the date after the label 'ETA DATE' that appears within the section starting with 'FINAL DESTINATION:' and ending with 'T/S PORT OF LOADING:'.",
|
|
32
|
+
"etd": "Extract the date after the label 'ETD DATE' that appears within the section starting with 'PORT OF LOADING:' and ending with 'FINAL DESTINATION:'.",
|
|
33
|
+
|
|
34
|
+
|
|
27
35
|
"portOfLoading": "For the second leg, use the 'portOfDischarge' from the previous leg.",
|
|
28
36
|
"portOfDischarge": "For the second leg, use the extracted 'portOfDischarge' from the main extraction.",
|
|
29
37
|
"vesselName": "Extract the text after the label 'EST. CONNECT VSL/VOY:' and before the hyphen and numbers.",
|
|
30
38
|
"voyage": "Voyage is a code of numbers and letters sometimes separated by '-'. Extract the code after the label 'EST. CONNECT VSL/VOY:' and after the vessel name.",
|
|
31
39
|
"eta": "Extract the date after the label 'ETA DATE' that is after the line that contains 'T/S PORT OF LOADING'",
|
|
32
40
|
"etd": "Extract the date after the label 'ETD DATE' that is related to the 'EST. CONNECT VSL/VOY:'. "
|
|
33
|
-
}
|
|
34
|
-
]
|
|
35
|
-
}
|
|
36
|
-
```
|
|
@@ -1,32 +1,146 @@
|
|
|
1
1
|
{
|
|
2
2
|
"type": "OBJECT",
|
|
3
3
|
"properties": {
|
|
4
|
-
"
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
"
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
4
|
+
"bookingNumber": {
|
|
5
|
+
"type": "STRING",
|
|
6
|
+
"nullable": true,
|
|
7
|
+
"description": "A unique identifier assigned to the shipment booking, used for tracking and reference. They are often referred to as 'Booking No.', 'Booking Reference', 'Our Reference', or 'Order Ref'."
|
|
8
|
+
},
|
|
9
|
+
"contractNumber": {
|
|
10
|
+
"type": "STRING",
|
|
11
|
+
"nullable": true,
|
|
12
|
+
"description": "It's a contract number between the carrier and Forto Logistics SE & Co KG."
|
|
13
|
+
},
|
|
14
|
+
"pickUpTerminalCode": {
|
|
15
|
+
"type": "STRING",
|
|
16
|
+
"nullable": true,
|
|
17
|
+
"description": "The specific terminal for cargo pickup during the import shipment."
|
|
18
|
+
},
|
|
19
|
+
"gateInTerminalCode": {
|
|
20
|
+
"type": "STRING",
|
|
21
|
+
"nullable": true,
|
|
22
|
+
"description": "The specific terminal where cargo is gated in especially Export terminal delivery address. E.g., Export terminal delivery address, Export terminal location, or Export terminal name."
|
|
23
|
+
},
|
|
24
|
+
"performaDate": {
|
|
25
|
+
"type": "STRING",
|
|
26
|
+
"nullable": true,
|
|
27
|
+
"description": "The date considered to apply the rates and charges specified in the booking confirmation"
|
|
28
|
+
},
|
|
29
|
+
"cyCutOff": {
|
|
30
|
+
"type": "STRING",
|
|
31
|
+
"nullable": true,
|
|
32
|
+
"description": "The datetime by which the cargo to be delivered to the Container Yard. It can be found with keys FCL delivery cut-off, FCL DG delivery cut-off, CY CUT OFF, CY Closing."
|
|
33
|
+
},
|
|
34
|
+
"gateInReference": {
|
|
35
|
+
"type": "STRING",
|
|
36
|
+
"nullable": true,
|
|
37
|
+
"description": "A reference code for cargo entering the terminal to drop the loaded cargo for Export. Sometimes it can be 'Our Reference'."
|
|
38
|
+
},
|
|
39
|
+
"mblNumber": {
|
|
40
|
+
"type": "STRING",
|
|
41
|
+
"nullable": true,
|
|
42
|
+
"description": "Bill of Lading number (B/L NO.), a document issued by the carrier."
|
|
43
|
+
},
|
|
44
|
+
"pickUpReference": {
|
|
45
|
+
"type": "STRING",
|
|
46
|
+
"nullable": true,
|
|
47
|
+
"description": "A reference code for cargo pickup during the import shipment. Sometimes it can be 'Our Reference'."
|
|
48
|
+
},
|
|
49
|
+
"siCutOff": {
|
|
50
|
+
"type": "STRING",
|
|
51
|
+
"nullable": true,
|
|
52
|
+
"description": "The deadline datetime for submitting the Shipping Instructions (SI) to the carrier. It can be found with keys Shipping Instruction Closing."
|
|
53
|
+
},
|
|
54
|
+
"vgmCutOff": {
|
|
55
|
+
"type": "STRING",
|
|
56
|
+
"nullable": true,
|
|
57
|
+
"description": "The deadline datetime for submitting the Verified Gross Mass (VGM) to the carrier. It can be found with keys VGM DEADLINE, VGM DUE, VGM CUT OFF."
|
|
58
|
+
},
|
|
59
|
+
"containers": {
|
|
60
|
+
"type": "ARRAY",
|
|
61
|
+
"items": {
|
|
62
|
+
"type": "OBJECT",
|
|
63
|
+
"properties": {
|
|
64
|
+
"containerType": {
|
|
65
|
+
"type": "STRING",
|
|
66
|
+
"nullable": true,
|
|
67
|
+
"description": "The size / type of the container, such as 20ft, 40ft, 40HC, 20DC etc under Type/Size column."
|
|
68
|
+
},
|
|
69
|
+
"pickUpDepotCode": {
|
|
70
|
+
"type": "STRING",
|
|
71
|
+
"nullable": true,
|
|
72
|
+
"description": "The depot code where the empty container will be picked up. It is identified as Empty Pick Up Depot or Export Empty Pick Up Depot(s)."
|
|
73
|
+
},
|
|
74
|
+
"dropOffDepotCode": {
|
|
75
|
+
"type": "STRING",
|
|
76
|
+
"nullable": true,
|
|
77
|
+
"description": "The depot code where the empty container will be dropped off."
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
},
|
|
81
|
+
"required": ["containerType", "pickupDepotCode", "dropoffDepotCode"]
|
|
82
|
+
},
|
|
14
83
|
"transportLegs": {
|
|
15
84
|
"type": "ARRAY",
|
|
16
85
|
"items": {
|
|
17
86
|
"type": "OBJECT",
|
|
18
87
|
"properties": {
|
|
19
|
-
|
|
20
|
-
"
|
|
21
|
-
"
|
|
22
|
-
"
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
"
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
88
|
+
"eta": {
|
|
89
|
+
"type": "STRING",
|
|
90
|
+
"nullable": true,
|
|
91
|
+
"description": "Estimated Time of Arrival (ETA) is the expected date when the shipment will arrive at its destination."
|
|
92
|
+
},
|
|
93
|
+
"etd": {
|
|
94
|
+
"type": "STRING",
|
|
95
|
+
"nullable": true,
|
|
96
|
+
"description": "Estimated Time of Departure (ETD) is the expected date when the shipment will leave the origin port."
|
|
97
|
+
},
|
|
98
|
+
"imoNumber": {
|
|
99
|
+
"type": "STRING",
|
|
100
|
+
"nullable": true,
|
|
101
|
+
"description": "The International Maritime Organization number for a specific leg. It can be found as IMO No, IMO number."
|
|
102
|
+
},
|
|
103
|
+
"portOfDischarge": {
|
|
104
|
+
"type": "STRING",
|
|
105
|
+
"nullable": true,
|
|
106
|
+
"description": "The port where the goods are discharged from the vessel. This is the destination port for the shipment. It can be found at POD, Port of Discharge, To, Discharge Port"
|
|
107
|
+
},
|
|
108
|
+
"portOfLoading": {
|
|
109
|
+
"type": "STRING",
|
|
110
|
+
"nullable": true,
|
|
111
|
+
"description": "The port where the goods are loaded onto the vessel. This is the origin port for the shipment. It can be found at POL, Port of Loading, From, Load Port"
|
|
112
|
+
},
|
|
113
|
+
"vesselName": {
|
|
114
|
+
"type": "STRING",
|
|
115
|
+
"nullable": true,
|
|
116
|
+
"description": "The name of the vessel carrying the shipment. It can be found at vessel, INTENDED VESSEL/VOYAGE"
|
|
117
|
+
},
|
|
118
|
+
"voyage": {
|
|
119
|
+
"type": "STRING",
|
|
120
|
+
"nullable": true,
|
|
121
|
+
"description": "The journey or route taken by the vessel for a specific leg. It can be found at Voy. no, INTENDED VESSEL/VOYAGE"
|
|
122
|
+
}
|
|
123
|
+
}
|
|
124
|
+
},
|
|
125
|
+
"required": [
|
|
126
|
+
"eta",
|
|
127
|
+
"etd",
|
|
128
|
+
"portOfDischarge",
|
|
129
|
+
"portOfLoading",
|
|
130
|
+
"vesselName",
|
|
131
|
+
"voyage"
|
|
132
|
+
]
|
|
133
|
+
},
|
|
134
|
+
"carrierAddress": {
|
|
135
|
+
"type": "STRING",
|
|
136
|
+
"nullable": true,
|
|
137
|
+
"description": "The address of the carrier who provides service and issued the document."
|
|
138
|
+
},
|
|
139
|
+
"carrierName": {
|
|
140
|
+
"type": "STRING",
|
|
141
|
+
"nullable": true,
|
|
142
|
+
"description": "The name of the carrier who issued the document e,g, Hapag-Lloyd."
|
|
29
143
|
}
|
|
30
144
|
},
|
|
31
|
-
"required": []
|
|
32
|
-
}
|
|
145
|
+
"required": ["bookingNumber", "transportLegs", "containers", "cyCutOff", "vgmCutOff", "siCutOff"]
|
|
146
|
+
}
|
|
@@ -1,65 +1,59 @@
|
|
|
1
|
-
You are
|
|
1
|
+
<PERSONA> You are an efficient document entity data extraction specialist working for a Freight Forwarding company. <PERSONA>
|
|
2
2
|
|
|
3
|
-
|
|
4
|
-
cyCutOff: The deadline for cargo to be delivered to the Container Yard.
|
|
5
|
-
gateInReference: A reference code for cargo entering the terminal.
|
|
6
|
-
gateInTerminal: The specific terminal where cargo is gated in.
|
|
7
|
-
mblNumber: The Master Bill of Lading number.
|
|
8
|
-
pickUpReference: A reference code for cargo pickup.
|
|
9
|
-
pickUpTerminal: The specific terminal for cargo pickup.
|
|
10
|
-
siCutOff: The deadline for submitting shipping instructions.
|
|
11
|
-
vgmCutOff: The deadline for submitting the Verified Gross Mass of the cargo.
|
|
12
|
-
transportLegs:
|
|
13
|
-
eta: The estimated time of arrival for a specific leg.
|
|
14
|
-
etd: The estimated time of departure for a specific leg.
|
|
15
|
-
imoNumber: The International Maritime Organization number for a specific leg.
|
|
16
|
-
portOfDischarge: The port where cargo is unloaded for a specific leg.
|
|
17
|
-
portOfLoading: The port where cargo is loaded for a specific leg.
|
|
18
|
-
vesselName: The name of the vessel for a specific leg.
|
|
19
|
-
voyage: The journey or route taken by the vessel for a specific leg.
|
|
3
|
+
<TASK> Your task is to extract data from Booking Confirmation documents as per the given response schema structure. <TASK>
|
|
20
4
|
|
|
21
|
-
|
|
22
|
-
|
|
5
|
+
<CONTEXT>
|
|
6
|
+
The Freight Forwarding company receives Booking Confirmation from Hapag-Lloyd Carrier (Shipping Lines) partners.
|
|
7
|
+
These Booking Confirmations contain various details related to booking, container pick up and drop off depot details, vessel details, as well as other transport Legs data.
|
|
8
|
+
They may be written in different languages such as English, German, Vietnamese, Chinese, and other European languages, and can appear in a variety of formats and layouts.
|
|
9
|
+
Your role is to accurately extract specific entities from these Booking Confirmations to support efficient processing and accurate record-keeping.
|
|
23
10
|
|
|
24
|
-
Keywords for datapoints:
|
|
25
|
-
- bookingNumber: Our Reference
|
|
26
|
-
- cyCutOff: FCL delivery cut-off
|
|
27
|
-
- gateInReference: Our Reference
|
|
28
|
-
- gateInTerminal: Export terminal delivery address
|
|
29
|
-
- mblNumber: BL/SWB No(s).
|
|
30
|
-
- pickUpReference: Export door positioning address(es)
|
|
31
|
-
- siCutOff: shipping instruction closing
|
|
32
|
-
- vgmCutOff: VGM cut-off
|
|
33
|
-
- eta: eta, ETA
|
|
34
|
-
- etd: etd, ETD
|
|
35
|
-
- imoNumber: IMO No, IMO number
|
|
36
|
-
- portOfDischarge: to
|
|
37
|
-
- portOfLoading: from
|
|
38
|
-
- vesselName: vessel
|
|
39
|
-
- voyage: Voy. no
|
|
40
11
|
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
12
|
+
To provide context on the journey of a containers for both Export and Import shipments,
|
|
13
|
+
For Export shipment: An empty container is picked up from a depot (pickupDepotCode) using a pickUpReference and goods loaded into it at a warehouse. Then the loaded container / cargo is transported back to a Container Yard or gateInTerminal before the cyCutOff date for further shipping processes.
|
|
14
|
+
For Import Shipment: The loaded container / cargo arrives at a port of discharge then picked up at pickUpTerminal using pickUpReference. After delivery, an empty container is returned to a depot (dropOffDepotCode).
|
|
15
|
+
<CONTEXT>
|
|
44
16
|
|
|
45
|
-
|
|
46
|
-
-
|
|
47
|
-
-
|
|
48
|
-
|
|
49
|
-
-
|
|
50
|
-
-
|
|
51
|
-
-
|
|
52
|
-
-
|
|
53
|
-
|
|
17
|
+
<INSTRUCTIONS>
|
|
18
|
+
- Populate fields as defined in the response schema.
|
|
19
|
+
- Use the data field description to understand the context of the data.
|
|
20
|
+
|
|
21
|
+
- gateInTerminal: The specific terminal where cargo is gated in. It can be found as Export terminal delivery address, PORT OF LOADING (after the slash '/').
|
|
22
|
+
- gateInReference: A reference code for cargo entering the terminal. If not mentioned explicitly and gateInTerminal is extracted, then use bookingNumber as gateInReference.
|
|
23
|
+
- pickUpTerminal: The specific terminal for cargo pickup. It can be found as Import pick up address(es), PORT OF DISCHARGE (after the slash '/').
|
|
24
|
+
- pickUpReference: A reference code for cargo pickup. If not mentioned explicitly and pickUpTerminal is extracted, then use bookingNumber as pickUpReference.
|
|
25
|
+
|
|
26
|
+
- cyCutOff: The deadline for cargo to be delivered to the Container Yard. It can be referred to as FCL delivery cut-off, CY CUT OFF, CY Closing - Latest Return Container Date, Cargo Cut-off deadline
|
|
27
|
+
- siCutOff: The deadline for submitting shipping instructions. It can be referred to as Shipping Instruction closing, SI Cut Off, Shipping Instruction deadline, INTENDED SI CUT-OFF
|
|
28
|
+
- vgmCutOff: The deadline for submitting the Verified Gross Mass of the cargo. It can be referred to as VGM cut-off, VGM Submission Deadline, Verified Gross Mass deadline
|
|
29
|
+
|
|
30
|
+
- carrierName and carrierAddress:
|
|
31
|
+
- Extract the name and address of the carrier who is the main parent company in the document.
|
|
32
|
+
- Example:
|
|
33
|
+
- "Hapag-Lloyd AG" or "Hapag-Lloyd Aktiengesellschaft" for vendorName.
|
|
54
34
|
|
|
55
|
-
|
|
56
|
-
-
|
|
57
|
-
-
|
|
58
|
-
-
|
|
35
|
+
- transportLegs: Multiple Transport Legs entries may exist, capture all instances under "transportLegs". Make sure the order of the legs are important.
|
|
36
|
+
- eta: The estimated time of arrival for a specific leg.
|
|
37
|
+
- etd: The estimated time of departure for a specific leg.
|
|
38
|
+
- imoNumber: The International Maritime Organization number for a specific leg.
|
|
39
|
+
- portOfDischarge: The port where cargo is unloaded for a specific leg.
|
|
40
|
+
- portOfLoading: The port where cargo is loaded for a specific leg.
|
|
41
|
+
- vesselName: The name of the vessel for a specific leg.
|
|
42
|
+
- voyage: The journey or route taken by the vessel for a specific leg.
|
|
43
|
+
|
|
44
|
+
- Containers: Need to extract Depot details per Container Type. Multiple Containers entries may exist, capture all instances under "Containers".
|
|
45
|
+
- containerType: The type of container (e.g., 20FT, 40FT, 20ft, 40ft, 40HC, 20DC, etc...).
|
|
46
|
+
- pickupDepotCode: The code of the depot where the empty container is picked up.
|
|
47
|
+
- dropOffDepotCode: The code of the depot where the empty container is dropped off.
|
|
48
|
+
|
|
49
|
+
IMPORTANT explanation for the transportLegs part as follows:
|
|
50
|
+
- There is at least one leg in each document.
|
|
51
|
+
- 'eta' must be equal or later than 'etd'!
|
|
52
|
+
- Multiple legs are possible. When there are multiple legs,
|
|
53
|
+
- Sequential Sorting: You must manually re-order legs based on etd then eta, regardless of their order in the source text.
|
|
54
|
+
- The Connectivity Rule: For any sequence of legs, the Destination (Port of Discharge) of the previous leg must match the Origin (Port of Loading) of the following leg.
|
|
55
|
+
- Transhipment Handling: Treat any mentioned "Transhipment Port" as the bridge between two legs (Discharge for Leg A / Loading for Leg B).
|
|
56
|
+
- Timeline Integrity: Ensure a "No Time Travel" policy: The eta of a previous leg must be earlier than or equal to the etd of the following leg.
|
|
57
|
+
- Naming Convention: Look for Port Names followed by abbreviations in parentheses, e.g., "Port Name (ABCDE)".
|
|
59
58
|
|
|
60
|
-
|
|
61
|
-
- The JSON schema must be followed during the extraction.
|
|
62
|
-
- The values must only include text found in the document
|
|
63
|
-
- Do not normalize any entity value.
|
|
64
|
-
- If an entity is not found in the document, keep it empty or np.Nan.
|
|
65
|
-
- Validate the JSON make sure its a valid JSON ! No extra text, no missing comma!
|
|
59
|
+
<INSTRUCTIONS>
|