data-science-document-ai 1.45.2__tar.gz → 1.47.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {data_science_document_ai-1.45.2 → data_science_document_ai-1.47.0}/PKG-INFO +1 -1
- {data_science_document_ai-1.45.2 → data_science_document_ai-1.47.0}/pyproject.toml +1 -1
- {data_science_document_ai-1.45.2 → data_science_document_ai-1.47.0}/src/constants.py +1 -3
- {data_science_document_ai-1.45.2 → data_science_document_ai-1.47.0}/src/docai_processor_config.yaml +0 -21
- {data_science_document_ai-1.45.2 → data_science_document_ai-1.47.0}/src/pdf_processing.py +4 -1
- {data_science_document_ai-1.45.2 → data_science_document_ai-1.47.0}/src/postprocessing/postprocess_partner_invoice.py +5 -1
- data_science_document_ai-1.47.0/src/prompts/library/arrivalNotice/other/placeholders.json +70 -0
- data_science_document_ai-1.47.0/src/prompts/library/arrivalNotice/other/prompt.txt +40 -0
- data_science_document_ai-1.47.0/src/prompts/library/draftMbl/other/placeholders.json +80 -0
- data_science_document_ai-1.47.0/src/prompts/library/draftMbl/other/prompt.txt +34 -0
- data_science_document_ai-1.47.0/src/prompts/library/finalMbL/other/placeholders.json +80 -0
- data_science_document_ai-1.47.0/src/prompts/library/finalMbL/other/prompt.txt +34 -0
- data_science_document_ai-1.45.2/src/prompts/library/draftMbl/hapag-lloyd/prompt.txt +0 -45
- data_science_document_ai-1.45.2/src/prompts/library/draftMbl/maersk/prompt.txt +0 -19
- data_science_document_ai-1.45.2/src/prompts/library/draftMbl/other/placeholders.json +0 -80
- data_science_document_ai-1.45.2/src/prompts/library/draftMbl/other/prompt.txt +0 -44
- data_science_document_ai-1.45.2/src/prompts/library/finalMbL/hapag-lloyd/prompt.txt +0 -44
- data_science_document_ai-1.45.2/src/prompts/library/finalMbL/maersk/prompt.txt +0 -19
- data_science_document_ai-1.45.2/src/prompts/library/finalMbL/other/prompt.txt +0 -44
- {data_science_document_ai-1.45.2 → data_science_document_ai-1.47.0}/src/constants_sandbox.py +0 -0
- {data_science_document_ai-1.45.2 → data_science_document_ai-1.47.0}/src/docai.py +0 -0
- {data_science_document_ai-1.45.2 → data_science_document_ai-1.47.0}/src/excel_processing.py +0 -0
- {data_science_document_ai-1.45.2 → data_science_document_ai-1.47.0}/src/io.py +0 -0
- {data_science_document_ai-1.45.2 → data_science_document_ai-1.47.0}/src/llm.py +0 -0
- {data_science_document_ai-1.45.2 → data_science_document_ai-1.47.0}/src/log_setup.py +0 -0
- {data_science_document_ai-1.45.2 → data_science_document_ai-1.47.0}/src/postprocessing/common.py +0 -0
- {data_science_document_ai-1.45.2 → data_science_document_ai-1.47.0}/src/postprocessing/postprocess_booking_confirmation.py +0 -0
- {data_science_document_ai-1.45.2 → data_science_document_ai-1.47.0}/src/postprocessing/postprocess_commercial_invoice.py +0 -0
- {data_science_document_ai-1.45.2 → data_science_document_ai-1.47.0}/src/prompts/library/bookingConfirmation/evergreen/placeholders.json +0 -0
- {data_science_document_ai-1.45.2 → data_science_document_ai-1.47.0}/src/prompts/library/bookingConfirmation/evergreen/prompt.txt +0 -0
- {data_science_document_ai-1.45.2 → data_science_document_ai-1.47.0}/src/prompts/library/bookingConfirmation/hapag-lloyd/placeholders.json +0 -0
- {data_science_document_ai-1.45.2 → data_science_document_ai-1.47.0}/src/prompts/library/bookingConfirmation/hapag-lloyd/prompt.txt +0 -0
- {data_science_document_ai-1.45.2 → data_science_document_ai-1.47.0}/src/prompts/library/bookingConfirmation/maersk/placeholders.json +0 -0
- {data_science_document_ai-1.45.2 → data_science_document_ai-1.47.0}/src/prompts/library/bookingConfirmation/maersk/prompt.txt +0 -0
- {data_science_document_ai-1.45.2 → data_science_document_ai-1.47.0}/src/prompts/library/bookingConfirmation/msc/placeholders.json +0 -0
- {data_science_document_ai-1.45.2 → data_science_document_ai-1.47.0}/src/prompts/library/bookingConfirmation/msc/prompt.txt +0 -0
- {data_science_document_ai-1.45.2 → data_science_document_ai-1.47.0}/src/prompts/library/bookingConfirmation/oocl/placeholders.json +0 -0
- {data_science_document_ai-1.45.2 → data_science_document_ai-1.47.0}/src/prompts/library/bookingConfirmation/oocl/prompt.txt +0 -0
- {data_science_document_ai-1.45.2 → data_science_document_ai-1.47.0}/src/prompts/library/bookingConfirmation/other/placeholders.json +0 -0
- {data_science_document_ai-1.45.2 → data_science_document_ai-1.47.0}/src/prompts/library/bookingConfirmation/other/prompt.txt +0 -0
- {data_science_document_ai-1.45.2 → data_science_document_ai-1.47.0}/src/prompts/library/bookingConfirmation/yangming/placeholders.json +0 -0
- {data_science_document_ai-1.45.2 → data_science_document_ai-1.47.0}/src/prompts/library/bookingConfirmation/yangming/prompt.txt +0 -0
- {data_science_document_ai-1.45.2 → data_science_document_ai-1.47.0}/src/prompts/library/bundeskasse/other/placeholders.json +0 -0
- {data_science_document_ai-1.45.2 → data_science_document_ai-1.47.0}/src/prompts/library/bundeskasse/other/prompt.txt +0 -0
- {data_science_document_ai-1.45.2 → data_science_document_ai-1.47.0}/src/prompts/library/commercialInvoice/other/placeholders.json +0 -0
- {data_science_document_ai-1.45.2 → data_science_document_ai-1.47.0}/src/prompts/library/commercialInvoice/other/prompt.txt +0 -0
- {data_science_document_ai-1.45.2 → data_science_document_ai-1.47.0}/src/prompts/library/customsAssessment/other/prompt.txt +0 -0
- {data_science_document_ai-1.45.2 → data_science_document_ai-1.47.0}/src/prompts/library/customsInvoice/other/placeholders.json +0 -0
- {data_science_document_ai-1.45.2 → data_science_document_ai-1.47.0}/src/prompts/library/customsInvoice/other/prompt.txt +0 -0
- {data_science_document_ai-1.45.2 → data_science_document_ai-1.47.0}/src/prompts/library/deliveryOrder/other/placeholders.json +0 -0
- {data_science_document_ai-1.45.2 → data_science_document_ai-1.47.0}/src/prompts/library/deliveryOrder/other/prompt.txt +0 -0
- {data_science_document_ai-1.45.2 → data_science_document_ai-1.47.0}/src/prompts/library/packingList/other/placeholders.json +0 -0
- {data_science_document_ai-1.45.2 → data_science_document_ai-1.47.0}/src/prompts/library/packingList/other/prompt.txt +0 -0
- {data_science_document_ai-1.45.2 → data_science_document_ai-1.47.0}/src/prompts/library/partnerInvoice/other/placeholders.json +0 -0
- {data_science_document_ai-1.45.2 → data_science_document_ai-1.47.0}/src/prompts/library/partnerInvoice/other/prompt.txt +0 -0
- {data_science_document_ai-1.45.2 → data_science_document_ai-1.47.0}/src/prompts/library/postprocessing/port_code/placeholders.json +0 -0
- {data_science_document_ai-1.45.2 → data_science_document_ai-1.47.0}/src/prompts/library/postprocessing/port_code/prompt_port_code.txt +0 -0
- {data_science_document_ai-1.45.2 → data_science_document_ai-1.47.0}/src/prompts/library/preprocessing/carrier/placeholders.json +0 -0
- {data_science_document_ai-1.45.2 → data_science_document_ai-1.47.0}/src/prompts/library/preprocessing/carrier/prompt.txt +0 -0
- {data_science_document_ai-1.45.2 → data_science_document_ai-1.47.0}/src/prompts/library/shippingInstruction/other/prompt.txt +0 -0
- {data_science_document_ai-1.45.2 → data_science_document_ai-1.47.0}/src/prompts/prompt_library.py +0 -0
- {data_science_document_ai-1.45.2 → data_science_document_ai-1.47.0}/src/setup.py +0 -0
- {data_science_document_ai-1.45.2 → data_science_document_ai-1.47.0}/src/tms.py +0 -0
- {data_science_document_ai-1.45.2 → data_science_document_ai-1.47.0}/src/utils.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[tool.poetry]
|
|
2
2
|
name = "data-science-document-ai"
|
|
3
|
-
version = "1.
|
|
3
|
+
version = "1.47.0"
|
|
4
4
|
description = "\"Document AI repo for data science\""
|
|
5
5
|
authors = ["Naomi Nguyen <naomi.nguyen@forto.com>", "Kumar Rajendrababu <kumar.rajendrababu@forto.com>", "Igor Tonko <igor.tonko@forto.com>", "Osman Demirel <osman.demirel@forto.com>"]
|
|
6
6
|
packages = [
|
|
@@ -53,9 +53,6 @@ project_parameters = {
|
|
|
53
53
|
"model_selector": {
|
|
54
54
|
"stable": {
|
|
55
55
|
"bookingConfirmation": 1,
|
|
56
|
-
"finalMbL": 0,
|
|
57
|
-
"draftMbl": 0,
|
|
58
|
-
"arrivalNotice": 0,
|
|
59
56
|
"shippingInstruction": 0,
|
|
60
57
|
"customsAssessment": 0,
|
|
61
58
|
"deliveryOrder": 0,
|
|
@@ -87,6 +84,7 @@ project_parameters = {
|
|
|
87
84
|
# Key to combine the LLM results with the Doc Ai results
|
|
88
85
|
"key_to_combine": {
|
|
89
86
|
"bookingConfirmation": ["transportLegs"],
|
|
87
|
+
"arrivalNotice": ["containers"],
|
|
90
88
|
"finalMbL": ["containers"],
|
|
91
89
|
"draftMbl": ["containers"],
|
|
92
90
|
"customsAssessment": ["containers"],
|
{data_science_document_ai-1.45.2 → data_science_document_ai-1.47.0}/src/docai_processor_config.yaml
RENAMED
|
@@ -13,20 +13,6 @@ model_config:
|
|
|
13
13
|
author: "igor.tonko@forto.com"
|
|
14
14
|
created_date: ""
|
|
15
15
|
|
|
16
|
-
finalMbL:
|
|
17
|
-
- id: "1eda2f22d64b1b89"
|
|
18
|
-
details:
|
|
19
|
-
display_name: "doc_cap_finalMbL"
|
|
20
|
-
author: "igor.tonko@forto.com"
|
|
21
|
-
created_date: ""
|
|
22
|
-
|
|
23
|
-
draftMbl:
|
|
24
|
-
- id: "1eda2f22d64b1b89"
|
|
25
|
-
details:
|
|
26
|
-
display_name: "doc_cap_finalMbL"
|
|
27
|
-
author: "igor.tonko@forto.com"
|
|
28
|
-
created_date: ""
|
|
29
|
-
|
|
30
16
|
shippingInstruction:
|
|
31
17
|
- id: "c77a0a515d99a8ba"
|
|
32
18
|
details:
|
|
@@ -34,13 +20,6 @@ model_config:
|
|
|
34
20
|
author: "kumar.rajendrababu@forto.com"
|
|
35
21
|
created_date: ""
|
|
36
22
|
|
|
37
|
-
arrivalNotice:
|
|
38
|
-
- id: "748b2e2b9161dcf3"
|
|
39
|
-
details:
|
|
40
|
-
display_name: "doc_cap_arrivalNotice"
|
|
41
|
-
author: "osman.demirel@forto.com"
|
|
42
|
-
created_date: ""
|
|
43
|
-
|
|
44
23
|
customsAssessment:
|
|
45
24
|
- id: "c464a18d82fad9be"
|
|
46
25
|
details:
|
|
@@ -213,7 +213,10 @@ async def process_file_w_llm(params, file_content, input_doc_type, llm_client):
|
|
|
213
213
|
"commercialInvoice",
|
|
214
214
|
"packingList",
|
|
215
215
|
"bookingConfirmation",
|
|
216
|
-
|
|
216
|
+
"arrivalNotice",
|
|
217
|
+
"finalMbL",
|
|
218
|
+
"draftMbl",
|
|
219
|
+
] # Move this to constants or remove after complete migration to LLM
|
|
217
220
|
else generate_schema_structure(params, input_doc_type)
|
|
218
221
|
)
|
|
219
222
|
|
|
@@ -229,8 +229,12 @@ async def process_line_items_batch(
|
|
|
229
229
|
[
|
|
230
230
|
item.update({"reverseChargeSentence": reverse_charge})
|
|
231
231
|
for item in line_items
|
|
232
|
-
if
|
|
232
|
+
if (
|
|
233
|
+
(item.get("itemCode") and item["itemCode"]["formattedValue"] != "CDU")
|
|
234
|
+
or not item.get("itemCode")
|
|
235
|
+
)
|
|
233
236
|
]
|
|
237
|
+
|
|
234
238
|
return line_items
|
|
235
239
|
|
|
236
240
|
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
{
|
|
2
|
+
"type": "OBJECT",
|
|
3
|
+
"properties": {
|
|
4
|
+
"bookingNumber": {
|
|
5
|
+
"type": "STRING",
|
|
6
|
+
"nullable": true,
|
|
7
|
+
"description": "The booking number associated with the Arrival Notice document. They are often referred to as 'Booking Number', 'Booking No.', 'Booking Ref.', 'Booking Reference', 'Booking ID', 'carrier's reference' or 'Order Ref'."
|
|
8
|
+
},
|
|
9
|
+
"destinationTerminal": {
|
|
10
|
+
"type": "STRING",
|
|
11
|
+
"nullable": true,
|
|
12
|
+
"description": "The terminal at the destination port where the container will be delivered."
|
|
13
|
+
},
|
|
14
|
+
"eta": {
|
|
15
|
+
"type": "STRING",
|
|
16
|
+
"nullable": true,
|
|
17
|
+
"description": "Estimated Time of Arrival (ETA) is the expected date when the shipment will arrive at its destination."
|
|
18
|
+
},
|
|
19
|
+
"mblNumber": {
|
|
20
|
+
"type": "STRING",
|
|
21
|
+
"nullable": true,
|
|
22
|
+
"description": "Bill of Lading number (B/L NO.), a document issued by the carrier."
|
|
23
|
+
},
|
|
24
|
+
"portOfDischarge": {
|
|
25
|
+
"type": "STRING",
|
|
26
|
+
"nullable": true,
|
|
27
|
+
"description": "The port where the goods are discharged from the vessel. This is the destination port for the shipment."
|
|
28
|
+
},
|
|
29
|
+
"vesselName": {
|
|
30
|
+
"type": "STRING",
|
|
31
|
+
"nullable": true,
|
|
32
|
+
"description": "The name of the vessel carrying the shipment."
|
|
33
|
+
},
|
|
34
|
+
"containers": {
|
|
35
|
+
"type": "ARRAY",
|
|
36
|
+
"items": {
|
|
37
|
+
"type": "OBJECT",
|
|
38
|
+
"properties": {
|
|
39
|
+
"containerNumber": {
|
|
40
|
+
"type": "STRING",
|
|
41
|
+
"nullable": true,
|
|
42
|
+
"description": "The unique identifier for each container. It always starts with 4 capital letters and followed by 7 digits. Example: TEMU7972458."
|
|
43
|
+
},
|
|
44
|
+
"containerType": {
|
|
45
|
+
"type": "STRING",
|
|
46
|
+
"nullable": true,
|
|
47
|
+
"description": "The size of the container associated with the containerNumber, such as 20ft, 40ft, 40HC, 20DC etc."
|
|
48
|
+
},
|
|
49
|
+
"grossWeight": {
|
|
50
|
+
"type": "STRING",
|
|
51
|
+
"nullable": true,
|
|
52
|
+
"description": "The gross weight of the container. Usually mentioned as G.W or GW or Gross Weight, etc.."
|
|
53
|
+
},
|
|
54
|
+
"measurements": {
|
|
55
|
+
"type": "STRING",
|
|
56
|
+
"nullable": true,
|
|
57
|
+
"description": "The volume of the container. Usually, it is measured in 'Cubic Meter (cbm)' or dimensions. But volume in 'cbm' is preferred."
|
|
58
|
+
},
|
|
59
|
+
"sealNumber": {
|
|
60
|
+
"type": "STRING",
|
|
61
|
+
"nullable": true,
|
|
62
|
+
"description": "The seal number associated with the container Number. But it is not same as the container number."
|
|
63
|
+
}
|
|
64
|
+
},
|
|
65
|
+
"required": ["containerNumber", "containerType", "grossWeight"]
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
},
|
|
69
|
+
"required": ["bookingNumber", "destinationTerminal", "eta", "portOfDischarge", "vesselName", "containers"]
|
|
70
|
+
}
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
<PERSONA> You are an efficient document entity data extraction specialist working for a Freight Forwarding company. <PERSONA>
|
|
2
|
+
|
|
3
|
+
<TASK> Your task is to extract data from Arrival Notice documents as per the given response schema structure. <TASK>
|
|
4
|
+
|
|
5
|
+
<CONTEXT>
|
|
6
|
+
The Freight Forwarding company receives Arrival Notice from shipping lines.
|
|
7
|
+
These documents contain various details related to arrival of a shipment to the port of destination such as container numbers, estimated time of arrival, vessel details and containers information.
|
|
8
|
+
They may be written in different languages such as English, German, Italian and can appear in a variety of formats and layouts.
|
|
9
|
+
Your role is to accurately extract specific entities from these Arrival Notices to support efficient processing and accurate record-keeping.
|
|
10
|
+
<CONTEXT>
|
|
11
|
+
|
|
12
|
+
<INSTRUCTIONS>
|
|
13
|
+
- Populate fields as defined in the response schema.
|
|
14
|
+
- Multiple Containers entries may exist, capture all instances under "containers".
|
|
15
|
+
- Use the data field description to understand the context of the data.
|
|
16
|
+
|
|
17
|
+
- bookingNumbers:
|
|
18
|
+
- Booking numbers are unique identifiers for shipments. They are often referred to as "Booking Number", "Booking No.", "Booking Ref.", "Booking Reference", "Booking ID", "SACO-Pos.", "Order Ref", "Unsere Referenz", or "Unsere Position"
|
|
19
|
+
- If there is a unique_id that starts with "S" followed by 6 or 8 digits, it is a shipmentID, not a bookingNumber.
|
|
20
|
+
|
|
21
|
+
- destinationTerminal:
|
|
22
|
+
- Destination Terminal can also be referred to as "Destination Termina;", "Pickup Location", "Delivery Location", "Delivery Terminal", "Empfangsort", "Entladeort", or "Abladestelle".
|
|
23
|
+
|
|
24
|
+
- mblNumbers:
|
|
25
|
+
- Commonly known as "Bill of Lading Number", "BILL OF LADING NO.", "BL Number", "BL No.", "B/L No.", "BL-Nr.", "B/L", "HBL No.", or "M-AWB Nummer".
|
|
26
|
+
- Bill of Lading Number is known as mblNumber. Not a shipmentID even if it starts with "S".
|
|
27
|
+
- mblNumber from Hapag-Lloyd always starts with HLC.... (e.g., "HLCUTS12303AWNT3) and named as SEA WAYBILL or "SWB-NR.
|
|
28
|
+
|
|
29
|
+
- eta:
|
|
30
|
+
- Estimated Time of Arrival (ETA) is the expected date and time when the shipment will arrive at the destination port.
|
|
31
|
+
- It can be referred to as "ETA", "Estimated Arrival", "Voraussichtliche Ankunft", "Ankunftszeit", "Arrivo", "Due to arrive at Terminal"
|
|
32
|
+
|
|
33
|
+
- vesselName:
|
|
34
|
+
- Vessel Name is the name of the ship carrying the cargo. It can be referred to as "Vessel", "Ship Name", "Schiff", "Schiffsname", "Nave", or "Vessel/Flight No.".
|
|
35
|
+
|
|
36
|
+
- containers: Details of each container on the arrival notice. Make sure to extract each container information separately.
|
|
37
|
+
- containerNumber: Container Number consists of 4 capital letters followed by 7 digits (e.g., TEMU7972458, CAIU 7222892).
|
|
38
|
+
- sealNumber: Seal numbers are unique identifiers for shipping seals. They are usually mentioned as seal numbers in the document but they are definitely not container numbers.
|
|
39
|
+
|
|
40
|
+
<INSTRUCTIONS>
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
{
|
|
2
|
+
"type": "OBJECT",
|
|
3
|
+
"properties": {
|
|
4
|
+
"blNumber": {
|
|
5
|
+
"type": "string",
|
|
6
|
+
"nullable": true,
|
|
7
|
+
"description": "The Bill of Lading number associated with the document. Commonly known as 'Bill of Lading Number', 'BILL OF LADING NO.', 'BL Number', 'BL No.', 'B/L No.', 'BL-Nr.', 'B/L', 'HBL No.', or 'M-AWB Nummer' in the document."
|
|
8
|
+
},
|
|
9
|
+
"bookingNumber": {
|
|
10
|
+
"type": "string",
|
|
11
|
+
"nullable": true,
|
|
12
|
+
"description": " Booking numbers are unique identifiers for shipments. They are often referred to as 'Booking Number', 'Booking No.', 'Booking Ref.', 'Booking Reference', 'Booking ID', 'SACO-Pos.' or 'Order Ref'"
|
|
13
|
+
},
|
|
14
|
+
"containers": {
|
|
15
|
+
"type": "ARRAY",
|
|
16
|
+
"items": {
|
|
17
|
+
"type": "OBJECT",
|
|
18
|
+
"properties": {
|
|
19
|
+
"containerNumber": {
|
|
20
|
+
"type": "string",
|
|
21
|
+
"nullable": true,
|
|
22
|
+
"description": "The container number associated with the document. They MUST consist of 4 letters followed by 7 digits (e.g., 'CMAU1234567', 'BMOU 575538/3', 'XLXU 1277652'). It can be found in the document as 'Container No.', 'Container Number', 'Cont. No.', 'Cont Nr.', 'Seefrachtcontainer-Nr.', or 'Containernummer."
|
|
23
|
+
},
|
|
24
|
+
"containerType": {
|
|
25
|
+
"type": "string",
|
|
26
|
+
"nullable": true,
|
|
27
|
+
"description": "The size or Type of the container associated with the containerNumber, such as 20ft, 40ft, 40HC, 20DC etc."
|
|
28
|
+
},
|
|
29
|
+
"grossWeight": {
|
|
30
|
+
"type": "string",
|
|
31
|
+
"nullable": true,
|
|
32
|
+
"description": "The gross weight of the container. Usually mentioned as G.W or GW or Gross Weight, etc.."
|
|
33
|
+
},
|
|
34
|
+
"measurements": {
|
|
35
|
+
"type": "string",
|
|
36
|
+
"nullable": true,
|
|
37
|
+
"description": "The volume of the Container. Usually, it is measured in 'Cubic Meter (cbm)' or dimensions. But volume in 'cbm' is preferred."
|
|
38
|
+
},
|
|
39
|
+
"packageQuantity": {
|
|
40
|
+
"type": "string",
|
|
41
|
+
"nullable": true,
|
|
42
|
+
"description": "The quantity of the goods in the container. Usually quantity is in pallets, PLT, cartons, CTNS, pieces, PCS, packages, boxes, etc. Please prioritize the packaging types based on their size, as follows: Pallets (PLT) >> Cartons (CTNS) >> Pieces (PCS). Extract the Larger packaging types that will have a lower count."
|
|
43
|
+
},
|
|
44
|
+
"packageType": {
|
|
45
|
+
"type": "string",
|
|
46
|
+
"nullable": true,
|
|
47
|
+
"description": "The packaging type is the unit of packageQuantity. Example; pallets, PLT, cartons, CTNS, pieces, PCS, packages, etc. Sometimes, the packaging type is available in the column name of the quantityShipped."
|
|
48
|
+
},
|
|
49
|
+
"sealNumber": {
|
|
50
|
+
"type": "string",
|
|
51
|
+
"nullable": true,
|
|
52
|
+
"description": "The seal number associated with the container Number and you can find like seal number, seal nos., shipper seal, seal.. But it is not same as the container number."
|
|
53
|
+
}
|
|
54
|
+
},
|
|
55
|
+
"required": ["containerNumber", "containerType", "grossWeight", "measurements", "packageQuantity", "packageType", "sealNumber"]
|
|
56
|
+
}
|
|
57
|
+
},
|
|
58
|
+
"portOfDischarge": {
|
|
59
|
+
"type": "string",
|
|
60
|
+
"nullable": true,
|
|
61
|
+
"description": "The port where the containers are discharged from the vessel. This is the destination port for the shipment. Find information like port of discharge, pod, delivery, to."
|
|
62
|
+
},
|
|
63
|
+
"portOfLoading": {
|
|
64
|
+
"type": "string",
|
|
65
|
+
"nullable": true,
|
|
66
|
+
"description": "The origin port where the containers are loaded onto the vessel. Find information like 'Ladehafen', 'Port of Loading', 'pol', or 'from.' in the document."
|
|
67
|
+
},
|
|
68
|
+
"voyage": {
|
|
69
|
+
"type": "string",
|
|
70
|
+
"nullable": true,
|
|
71
|
+
"description": "The unique voyage number or identifier assigned to a vessel’s specific journey. This typically corresponds to the scheduled sailing associated with the shipment and can often be found near vessel information on shipping documents. such as voyage, voy. no, voyage-no."
|
|
72
|
+
},
|
|
73
|
+
"vessel": {
|
|
74
|
+
"type": "string",
|
|
75
|
+
"nullable": true,
|
|
76
|
+
"description": "The name of the vessel carrying the container or shipment"
|
|
77
|
+
}
|
|
78
|
+
},
|
|
79
|
+
"required": ["blNumber", "bookingNumber", "containers", "portOfDischarge", "portOfLoading", "voyage", "vessel"]
|
|
80
|
+
}
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
<PERSONA> You are an efficient document entity data extraction specialist working for a Freight Forwarding company. <PERSONA>
|
|
2
|
+
|
|
3
|
+
<TASK> Your task is to extract data from draftMBL documents as per the given response schema structure. <TASK>
|
|
4
|
+
|
|
5
|
+
<CONTEXT>
|
|
6
|
+
The Freight Forwarding company receives draftMBL from Carrier (Shipping Lines) partners.
|
|
7
|
+
These documents contain various details related to shipments, booking details, vessel details, POL, POD and containers data.
|
|
8
|
+
They may be written in different languages such as English, German, Vietnamese, Chinese, and other European languages, and can appear in a variety of formats and layouts.
|
|
9
|
+
Your role is to accurately extract specific entities from these draftMBLs to support efficient processing and accurate record-keeping.
|
|
10
|
+
<CONTEXT>
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
<INSTRUCTIONS>
|
|
14
|
+
- Populate fields as defined in the response schema.
|
|
15
|
+
- Multiple Containers entries may exist, capture all instances under "containers".
|
|
16
|
+
- Use the data field description to understand the context of the data.
|
|
17
|
+
|
|
18
|
+
- bookingNumber:
|
|
19
|
+
- Booking numbers are unique identifiers for shipments. They are often referred to as "Booking Number", "Booking No.", "Booking Ref.", "Booking Reference", "Booking ID", "SACO-Pos.", "Order Ref", "Unsere Referenz", or "Unsere Position"
|
|
20
|
+
- If there is a unique_id that starts with "S" followed by 6 or 8 digits, it is a shipmentID, not a bookingNumber.
|
|
21
|
+
|
|
22
|
+
- blNumber:
|
|
23
|
+
- Commonly known as "Bill of Lading Number", "BILL OF LADING NO.", "BL Number", "BL No.", "B/L No.", "BL-Nr.", "B/L", "HBL No.", or "M-AWB Nummer".
|
|
24
|
+
- Bill of Lading Number is known as mblNumber. Not a shipmentID even if it starts with "S".
|
|
25
|
+
- blNumber from Hapag-Lloyd always starts with HLC.... (e.g., "HLCUTS12303AWNT3) and named as SEA WAYBILL or "SWB-NR.
|
|
26
|
+
|
|
27
|
+
- vesselName:
|
|
28
|
+
- Vessel Name is the name of the ship carrying the cargo. It can be referred to as "Vessel", "Ship Name", "Schiff", "Schiffsname", "Nave", or "Vessel/Flight No.".
|
|
29
|
+
|
|
30
|
+
- containers: Details of each container on the draftMBL. Make sure to extract each container information separately.
|
|
31
|
+
- containerNumber: Container Number consists of 4 capital letters followed by 7 digits (e.g., TEMU7972458, CAIU 7222892).
|
|
32
|
+
- sealNumber: Seal numbers are unique identifiers for shipping seals. They are usually mentioned as seal numbers in the document but they are definitely not container numbers.
|
|
33
|
+
|
|
34
|
+
<INSTRUCTIONS>
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
{
|
|
2
|
+
"type": "OBJECT",
|
|
3
|
+
"properties": {
|
|
4
|
+
"blNumber": {
|
|
5
|
+
"type": "string",
|
|
6
|
+
"nullable": true,
|
|
7
|
+
"description": "The Bill of Lading number associated with the document. Commonly known as 'Bill of Lading Number', 'BILL OF LADING NO.', 'BL Number', 'BL No.', 'B/L No.', 'BL-Nr.', 'B/L', 'HBL No.', or 'M-AWB Nummer' in the document."
|
|
8
|
+
},
|
|
9
|
+
"bookingNumber": {
|
|
10
|
+
"type": "string",
|
|
11
|
+
"nullable": true,
|
|
12
|
+
"description": " Booking numbers are unique identifiers for shipments. They are often referred to as 'Booking Number', 'Booking No.', 'Booking Ref.', 'Booking Reference', 'Booking ID', 'SACO-Pos.' or 'Order Ref'"
|
|
13
|
+
},
|
|
14
|
+
"containers": {
|
|
15
|
+
"type": "ARRAY",
|
|
16
|
+
"items": {
|
|
17
|
+
"type": "OBJECT",
|
|
18
|
+
"properties": {
|
|
19
|
+
"containerNumber": {
|
|
20
|
+
"type": "string",
|
|
21
|
+
"nullable": true,
|
|
22
|
+
"description": "The container number associated with the document. They MUST consist of 4 letters followed by 7 digits (e.g., 'CMAU1234567', 'BMOU 575538/3', 'XLXU 1277652'). It can be found in the document as 'Container No.', 'Container Number', 'Cont. No.', 'Cont Nr.', 'Seefrachtcontainer-Nr.', or 'Containernummer."
|
|
23
|
+
},
|
|
24
|
+
"containerType": {
|
|
25
|
+
"type": "string",
|
|
26
|
+
"nullable": true,
|
|
27
|
+
"description": "The size or Type of the container associated with the containerNumber, such as 20ft, 40ft, 40HC, 20DC etc."
|
|
28
|
+
},
|
|
29
|
+
"grossWeight": {
|
|
30
|
+
"type": "string",
|
|
31
|
+
"nullable": true,
|
|
32
|
+
"description": "The gross weight of the container. Usually mentioned as G.W or GW or Gross Weight, etc.."
|
|
33
|
+
},
|
|
34
|
+
"measurements": {
|
|
35
|
+
"type": "string",
|
|
36
|
+
"nullable": true,
|
|
37
|
+
"description": "The volume of the Container. Usually, it is measured in 'Cubic Meter (cbm)' or dimensions. But volume in 'cbm' is preferred."
|
|
38
|
+
},
|
|
39
|
+
"packageQuantity": {
|
|
40
|
+
"type": "string",
|
|
41
|
+
"nullable": true,
|
|
42
|
+
"description": "The quantity of the goods in the container. Usually quantity is in pallets, PLT, cartons, CTNS, pieces, PCS, packages, boxes, etc. Please prioritize the packaging types based on their size, as follows: Pallets (PLT) >> Cartons (CTNS) >> Pieces (PCS). Extract the Larger packaging types that will have a lower count."
|
|
43
|
+
},
|
|
44
|
+
"packageType": {
|
|
45
|
+
"type": "string",
|
|
46
|
+
"nullable": true,
|
|
47
|
+
"description": "The packaging type is the unit of packageQuantity. Example; pallets, PLT, cartons, CTNS, pieces, PCS, packages, etc. Sometimes, the packaging type is available in the column name of the quantityShipped."
|
|
48
|
+
},
|
|
49
|
+
"sealNumber": {
|
|
50
|
+
"type": "string",
|
|
51
|
+
"nullable": true,
|
|
52
|
+
"description": "The seal number associated with the container Number and you can find like seal number, seal nos., shipper seal, seal.. But it is not same as the container number."
|
|
53
|
+
}
|
|
54
|
+
},
|
|
55
|
+
"required": ["containerNumber", "containerType", "grossWeight", "measurements", "packageQuantity", "packageType", "sealNumber"]
|
|
56
|
+
}
|
|
57
|
+
},
|
|
58
|
+
"portOfDischarge": {
|
|
59
|
+
"type": "string",
|
|
60
|
+
"nullable": true,
|
|
61
|
+
"description": "The port where the containers are discharged from the vessel. This is the destination port for the shipment. Find information like port of discharge, pod, delivery, to."
|
|
62
|
+
},
|
|
63
|
+
"portOfLoading": {
|
|
64
|
+
"type": "string",
|
|
65
|
+
"nullable": true,
|
|
66
|
+
"description": "The origin port where the containers are loaded onto the vessel. Find information like 'Ladehafen', 'Port of Loading', 'pol', or 'from.' in the document."
|
|
67
|
+
},
|
|
68
|
+
"voyage": {
|
|
69
|
+
"type": "string",
|
|
70
|
+
"nullable": true,
|
|
71
|
+
"description": "The unique voyage number or identifier assigned to a vessel’s specific journey. This typically corresponds to the scheduled sailing associated with the shipment and can often be found near vessel information on shipping documents. such as voyage, voy. no, voyage-no."
|
|
72
|
+
},
|
|
73
|
+
"vessel": {
|
|
74
|
+
"type": "string",
|
|
75
|
+
"nullable": true,
|
|
76
|
+
"description": "The name of the vessel carrying the container or shipment"
|
|
77
|
+
}
|
|
78
|
+
},
|
|
79
|
+
"required": ["blNumber", "bookingNumber", "containers", "portOfDischarge", "portOfLoading", "voyage", "vessel"]
|
|
80
|
+
}
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
<PERSONA> You are an efficient document entity data extraction specialist working for a Freight Forwarding company. <PERSONA>
|
|
2
|
+
|
|
3
|
+
<TASK> Your task is to extract data from finalMBL documents as per the given response schema structure. <TASK>
|
|
4
|
+
|
|
5
|
+
<CONTEXT>
|
|
6
|
+
The Freight Forwarding company receives finalMBL from Carrier (Shipping Lines) partners.
|
|
7
|
+
These documents contain various details related to shipments, booking details, vessel details, POL, POD and containers data.
|
|
8
|
+
They may be written in different languages such as English, German, Vietnamese, Chinese, and other European languages, and can appear in a variety of formats and layouts.
|
|
9
|
+
Your role is to accurately extract specific entities from these finalMBLs to support efficient processing and accurate record-keeping.
|
|
10
|
+
<CONTEXT>
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
<INSTRUCTIONS>
|
|
14
|
+
- Populate fields as defined in the response schema.
|
|
15
|
+
- Multiple Containers entries may exist, capture all instances under "containers".
|
|
16
|
+
- Use the data field description to understand the context of the data.
|
|
17
|
+
|
|
18
|
+
- bookingNumber:
|
|
19
|
+
- Booking numbers are unique identifiers for shipments. They are often referred to as "Booking Number", "Booking No.", "Booking Ref.", "Booking Reference", "Booking ID", "SACO-Pos.", "Order Ref", "Unsere Referenz", or "Unsere Position"
|
|
20
|
+
- If there is a unique_id that starts with "S" followed by 6 or 8 digits, it is a shipmentID, not a bookingNumber.
|
|
21
|
+
|
|
22
|
+
- blNumber:
|
|
23
|
+
- Commonly known as "Bill of Lading Number", "BILL OF LADING NO.", "BL Number", "BL No.", "B/L No.", "BL-Nr.", "B/L", "HBL No.", or "M-AWB Nummer".
|
|
24
|
+
- Bill of Lading Number is known as mblNumber. Not a shipmentID even if it starts with "S".
|
|
25
|
+
- blNumber from Hapag-Lloyd always starts with HLC.... (e.g., "HLCUTS12303AWNT3) and named as SEA WAYBILL or "SWB-NR.
|
|
26
|
+
|
|
27
|
+
- vesselName:
|
|
28
|
+
- Vessel Name is the name of the ship carrying the cargo. It can be referred to as "Vessel", "Ship Name", "Schiff", "Schiffsname", "Nave", or "Vessel/Flight No.".
|
|
29
|
+
|
|
30
|
+
- containers: Details of each container on the finalMBL. Make sure to extract each container information separately.
|
|
31
|
+
- containerNumber: Container Number consists of 4 capital letters followed by 7 digits (e.g., TEMU7972458, CAIU 7222892).
|
|
32
|
+
- sealNumber: Seal numbers are unique identifiers for shipping seals. They are usually mentioned as seal numbers in the document but they are definitely not container numbers.
|
|
33
|
+
|
|
34
|
+
<INSTRUCTIONS>
|
|
@@ -1,45 +0,0 @@
|
|
|
1
|
-
You are a document entity extraction specialist. Given a document, the explained datapoint need to extract.
|
|
2
|
-
|
|
3
|
-
blNumber: Bill of Lading number.
|
|
4
|
-
voyage: The journey or route code taken by the vessel.
|
|
5
|
-
portOfLoading: The port where cargo is loaded.
|
|
6
|
-
portOfDischarge: The port where cargo is unloaded.
|
|
7
|
-
bookingNumber: A unique identifier for the booking.
|
|
8
|
-
containers:
|
|
9
|
-
containerType: Type of the shipping container, usually related to it's size.
|
|
10
|
-
grossWeight: Total weight of the cargo, including the tare weight of the container.
|
|
11
|
-
measurements: Dimensions of the cargo (length, width, height) for freight calculations.
|
|
12
|
-
packageQuantity: package quantity.
|
|
13
|
-
packageType: Type of packaging used (e.g., cartons, pallets, barrels).
|
|
14
|
-
containerNumber: Unique ID for tracking the shipping container.
|
|
15
|
-
sealNumber: Number of the container's seal.
|
|
16
|
-
vessel: The name of the vessel.
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
Your task is to extract the text value of the following entities and page numbers starting from 0 where the value was found in the document:
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
Keywords for datapoints:
|
|
23
|
-
- blNumber: Bill of Lading number, bill of landing no., swb-no., b/l no.
|
|
24
|
-
- voyage: voyage, voy. no, voyage-no.
|
|
25
|
-
- portOfLoading: port of loading, pol, from.]
|
|
26
|
-
- portOfDischarge: port of discharge, pod, delivery, to
|
|
27
|
-
- bookingNumber: Our reference, booking no., carrier reference
|
|
28
|
-
- containers:
|
|
29
|
-
- containerType: x 40' container
|
|
30
|
-
- grossWeight: gross weight
|
|
31
|
-
- measurements: Dimensions of the cargo (length, width, height) for freight calculations
|
|
32
|
-
- packageQuantity: package quantity, number and kind of packages
|
|
33
|
-
- packageType: Type of packaging used (e.g., cartons, pallets, barrels), number and kind of packages, description of goods
|
|
34
|
-
- containerNumber: container number, cntr. nos., it is a combination of 4 letters and 7 digits separated by space right above 'SEAL'
|
|
35
|
-
- sealNumber: seal number, seal nos., shipper seal, seal.
|
|
36
|
-
- vessel: vessel
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
You must apply the following rules:
|
|
40
|
-
- The JSON schema must be followed during the extraction.
|
|
41
|
-
- The values must only include text found in the document
|
|
42
|
-
- Do not normalize any entity value.
|
|
43
|
-
- If 'sealNumber' is not found don't add it to the result.
|
|
44
|
-
- Validate the JSON make sure it is a valid JSON ! No extra text, no missing comma!
|
|
45
|
-
- Add an escape character (backwards slash) in from of all quotes in values
|
|
@@ -1,19 +0,0 @@
|
|
|
1
|
-
Extract the following information from the sea waybill document.
|
|
2
|
-
Your task is to extract the text value of the following entities and page numbers starting from 0 where the value was found in the document:
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
**blNumber:** Find the value labeled as "B/L No.".
|
|
6
|
-
**voyage:** Get the "Voyage No." value.
|
|
7
|
-
**portOfLoading:** Find the value in the "Port of Loading" field.
|
|
8
|
-
**portOfDischarge:** Extract the text from the "Port of Discharge" field.
|
|
9
|
-
**bookingNumber:** Look for the value associated with "Booking No.".
|
|
10
|
-
**containers:**
|
|
11
|
-
The document may contain multiple containers listed within the section "PARTICULARS FURNISHED BY SHIPPER" under the line starting with "Kind of Packages; Description of goods; Marks and Numbers; Container No./Seal No.". Look for container information that starts with a line that includes "Container Said to Contain" and continues until the next instance of "Container Said to Contain" or the end of the section. For each container, extract the following:
|
|
12
|
-
* **containerType:** Extract the container type information. It is usually a combination of numbers, the word "DRY", and may include additional characters. It is found on the same line as the container number.
|
|
13
|
-
* **grossWeight:** Find the value corresponding to the "gross weight" of the container. It is usually represented in KGS and is found on the same line as the container number.
|
|
14
|
-
* **measurements:** Find the value corresponding to the "measurement" of the container. It is usually represented in CBM and is found on the same line as the container number.
|
|
15
|
-
* **packageQuantity:** Extract the "package quantity" information. It is usually a whole number and precedes the text "PACKAGE". All container information will be on the same line as the "package quantity".
|
|
16
|
-
* **packageType:** Extract the value from the "Kind of Packages" field.
|
|
17
|
-
* **containerNumber:** Find the container number. It starts with "MRKU" and is followed by a sequence of digits. It is found on the same line as the text "Container Said to Contain".
|
|
18
|
-
* **sealNumber:** Get the "Shipper Seal" value. It follows after the text "Shipper Seal :".
|
|
19
|
-
**vessel:** Extract the text from the field "Vessel".
|
|
@@ -1,80 +0,0 @@
|
|
|
1
|
-
{
|
|
2
|
-
"type": "OBJECT",
|
|
3
|
-
"properties": {
|
|
4
|
-
"blNumber": {
|
|
5
|
-
"type": "string",
|
|
6
|
-
"nullable": true,
|
|
7
|
-
"description": ""
|
|
8
|
-
},
|
|
9
|
-
"voyage": {
|
|
10
|
-
"type": "string",
|
|
11
|
-
"nullable": true,
|
|
12
|
-
"description": ""
|
|
13
|
-
},
|
|
14
|
-
"portOfLoading": {
|
|
15
|
-
"type": "string",
|
|
16
|
-
"nullable": true,
|
|
17
|
-
"description": ""
|
|
18
|
-
},
|
|
19
|
-
"portOfDischarge": {
|
|
20
|
-
"type": "string",
|
|
21
|
-
"nullable": true,
|
|
22
|
-
"description": ""
|
|
23
|
-
},
|
|
24
|
-
"bookingNumber": {
|
|
25
|
-
"type": "string",
|
|
26
|
-
"nullable": true,
|
|
27
|
-
"description": ""
|
|
28
|
-
},
|
|
29
|
-
"containers": {
|
|
30
|
-
"type": "ARRAY",
|
|
31
|
-
"items": {
|
|
32
|
-
"type": "OBJECT",
|
|
33
|
-
"properties": {
|
|
34
|
-
"containerType": {
|
|
35
|
-
"type": "string",
|
|
36
|
-
"nullable": true,
|
|
37
|
-
"description": ""
|
|
38
|
-
},
|
|
39
|
-
"grossWeight": {
|
|
40
|
-
"type": "string",
|
|
41
|
-
"nullable": true,
|
|
42
|
-
"description": ""
|
|
43
|
-
},
|
|
44
|
-
"measurements": {
|
|
45
|
-
"type": "string",
|
|
46
|
-
"nullable": true,
|
|
47
|
-
"description": ""
|
|
48
|
-
},
|
|
49
|
-
"packageQuantity": {
|
|
50
|
-
"type": "string",
|
|
51
|
-
"nullable": true,
|
|
52
|
-
"description": ""
|
|
53
|
-
},
|
|
54
|
-
"packageType": {
|
|
55
|
-
"type": "string",
|
|
56
|
-
"nullable": true,
|
|
57
|
-
"description": ""
|
|
58
|
-
},
|
|
59
|
-
"containerNumber": {
|
|
60
|
-
"type": "string",
|
|
61
|
-
"nullable": true,
|
|
62
|
-
"description": ""
|
|
63
|
-
},
|
|
64
|
-
"sealNumber": {
|
|
65
|
-
"type": "string",
|
|
66
|
-
"nullable": true,
|
|
67
|
-
"description": ""
|
|
68
|
-
}
|
|
69
|
-
},
|
|
70
|
-
"required": []
|
|
71
|
-
}
|
|
72
|
-
},
|
|
73
|
-
"vessel": {
|
|
74
|
-
"type": "string",
|
|
75
|
-
"nullable": true,
|
|
76
|
-
"description": ""
|
|
77
|
-
}
|
|
78
|
-
},
|
|
79
|
-
"required": []
|
|
80
|
-
}
|
|
@@ -1,44 +0,0 @@
|
|
|
1
|
-
You are a document entity extraction specialist. Given a document, the explained datapoint need to extract.
|
|
2
|
-
|
|
3
|
-
blNumber: Bill of Lading number.
|
|
4
|
-
voyage: The journey or route code taken by the vessel.
|
|
5
|
-
portOfLoading: The port where cargo is loaded.
|
|
6
|
-
portOfDischarge: The port where cargo is unloaded.
|
|
7
|
-
bookingNumber: A unique identifier for the booking.
|
|
8
|
-
containers:
|
|
9
|
-
containerType: Type of the shipping container, usually related to it's size.
|
|
10
|
-
grossWeight: Total weight of the cargo, including the tare weight of the container.
|
|
11
|
-
measurements: Dimensions of the cargo (length, width, height) for freight calculations.
|
|
12
|
-
packageQuantity: package quantity.
|
|
13
|
-
packageType: Type of packaging used (e.g., cartons, pallets, barrels).
|
|
14
|
-
containerNumber: Unique ID for tracking the shipping container.
|
|
15
|
-
sealNumber: Number of the container's seal.
|
|
16
|
-
vessel: The name of the vessel.
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
Your task is to extract the text value of the following entities and page numbers starting from 0 where the value was found in the document:
|
|
20
|
-
|
|
21
|
-
Keywords for datapoints:
|
|
22
|
-
- blNumber: Bill of Lading number, bill of landing no., swb-no., b/l no.
|
|
23
|
-
- voyage: voyage, voy. no, voyage-no.
|
|
24
|
-
- portOfLoading: port of loading, pol, from.]
|
|
25
|
-
- portOfDischarge: port of discharge, pod, delivery, to
|
|
26
|
-
- bookingNumber: Our reference, booking no., carrier reference
|
|
27
|
-
- containers:
|
|
28
|
-
- containerType: x 40' container
|
|
29
|
-
- grossWeight: gross weight
|
|
30
|
-
- measurements: Dimensions of the cargo (length, width, height) for freight calculations
|
|
31
|
-
- packageQuantity: package quantity, number and kind of packages
|
|
32
|
-
- packageType: Type of packaging used (e.g., cartons, pallets, barrels), number and kind of packages, description of goods
|
|
33
|
-
- containerNumber: container number, cntr. nos.
|
|
34
|
-
- sealNumber: seal number, seal nos., shipper seal, seal.
|
|
35
|
-
- vessel: vessel
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
You must apply the following rules:
|
|
39
|
-
- The JSON schema must be followed during the extraction.
|
|
40
|
-
- The values must only include text found in the document
|
|
41
|
-
- Do not normalize any entity value.
|
|
42
|
-
- If 'sealNumber' is not found don't add it to the result.
|
|
43
|
-
- Validate the JSON make sure it is a valid JSON ! No extra text, no missing comma!
|
|
44
|
-
- Add an escape character (backwards slash) in from of all quotes in values
|
|
@@ -1,44 +0,0 @@
|
|
|
1
|
-
You are a document entity extraction specialist. Given a document, the explained datapoint need to extract.
|
|
2
|
-
|
|
3
|
-
blNumber: Bill of Lading number.
|
|
4
|
-
voyage: The journey or route code taken by the vessel.
|
|
5
|
-
portOfLoading: The port where cargo is loaded.
|
|
6
|
-
portOfDischarge: The port where cargo is unloaded.
|
|
7
|
-
bookingNumber: A unique identifier for the booking.
|
|
8
|
-
containers:
|
|
9
|
-
containerType: Type of the shipping container, usually related to it's size.
|
|
10
|
-
grossWeight: Total weight of the cargo, including the tare weight of the container.
|
|
11
|
-
measurements: Dimensions of the cargo (length, width, height) for freight calculations.
|
|
12
|
-
packageQuantity: package quantity.
|
|
13
|
-
packageType: Type of packaging used (e.g., cartons, pallets, barrels).
|
|
14
|
-
containerNumber: Unique ID for tracking the shipping container.
|
|
15
|
-
sealNumber: Number of the container's seal.
|
|
16
|
-
vessel: The name of the vessel.
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
Your task is to extract the text value of the following entities and page numbers starting from 0 where the value was found in the document:
|
|
20
|
-
|
|
21
|
-
Keywords for datapoints:
|
|
22
|
-
- blNumber: Bill of Lading number, bill of landing no., swb-no., b/l no.
|
|
23
|
-
- voyage: voyage, voy. no, voyage-no.
|
|
24
|
-
- portOfLoading: port of loading, pol, from.]
|
|
25
|
-
- portOfDischarge: port of discharge, pod, delivery, to
|
|
26
|
-
- bookingNumber: Our reference, booking no., carrier reference
|
|
27
|
-
- containers:
|
|
28
|
-
- containerType: x 40' container
|
|
29
|
-
- grossWeight: gross weight
|
|
30
|
-
- measurements: Dimensions of the cargo (length, width, height) for freight calculations
|
|
31
|
-
- packageQuantity: package quantity, number and kind of packages
|
|
32
|
-
- packageType: Type of packaging used (e.g., cartons, pallets, barrels), number and kind of packages, description of goods
|
|
33
|
-
- containerNumber: container number, cntr. nos., it is a combination of 4 letters and 7 digits separated by space right above 'SEAL'
|
|
34
|
-
- sealNumber: seal number, seal nos., shipper seal, seal.
|
|
35
|
-
- vessel: vessel
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
You must apply the following rules:
|
|
39
|
-
- The JSON schema must be followed during the extraction.
|
|
40
|
-
- The values must only include text found in the document
|
|
41
|
-
- Do not normalize any entity value.
|
|
42
|
-
- If 'sealNumber' is not found don't add it to the result.
|
|
43
|
-
- Validate the JSON make sure it is a valid JSON ! No extra text, no missing comma!
|
|
44
|
-
- Add an escape character (backwards slash) in from of all quotes in values
|
|
@@ -1,19 +0,0 @@
|
|
|
1
|
-
Extract the following information from the sea waybill document.
|
|
2
|
-
Your task is to extract the text value of the following entities and page numbers starting from 0 where the value was found in the document:
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
**blNumber:** Find the value labeled as "B/L No.".
|
|
6
|
-
**voyage:** Get the "Voyage No." value.
|
|
7
|
-
**portOfLoading:** Find the value in the "Port of Loading" field.
|
|
8
|
-
**portOfDischarge:** Extract the text from the "Port of Discharge" field.
|
|
9
|
-
**bookingNumber:** Look for the value associated with "Booking No.".
|
|
10
|
-
**containers:**
|
|
11
|
-
The document may contain multiple containers listed within the section "PARTICULARS FURNISHED BY SHIPPER" under the line starting with "Kind of Packages; Description of goods; Marks and Numbers; Container No./Seal No.". Look for container information that starts with a line that includes "Container Said to Contain" and continues until the next instance of "Container Said to Contain" or the end of the section. For each container, extract the following:
|
|
12
|
-
* **containerType:** Extract the container type information. It is usually a combination of numbers, the word "DRY", and may include additional characters. It is found on the same line as the container number.
|
|
13
|
-
* **grossWeight:** Find the value corresponding to the "gross weight" of the container. It is usually represented in KGS and is found on the same line as the container number.
|
|
14
|
-
* **measurements:** Find the value corresponding to the "measurement" of the container. It is usually represented in CBM and is found on the same line as the container number.
|
|
15
|
-
* **packageQuantity:** Extract the "package quantity" information. It is usually a whole number and precedes the text "PACKAGE". All container information will be on the same line as the "package quantity".
|
|
16
|
-
* **packageType:** Extract the value from the "Kind of Packages" field.
|
|
17
|
-
* **containerNumber:** Find the container number. It starts with "MRKU" and is followed by a sequence of digits. It is found on the same line as the text "Container Said to Contain".
|
|
18
|
-
* **sealNumber:** Get the "Shipper Seal" value. It follows after the text "Shipper Seal :".
|
|
19
|
-
**vessel:** Extract the text from the field "Vessel".
|
|
@@ -1,44 +0,0 @@
|
|
|
1
|
-
You are a document entity extraction specialist. Given a document, the explained datapoint need to extract.
|
|
2
|
-
|
|
3
|
-
blNumber: Bill of Lading number.
|
|
4
|
-
voyage: The journey or route code taken by the vessel.
|
|
5
|
-
portOfLoading: The port where cargo is loaded.
|
|
6
|
-
portOfDischarge: The port where cargo is unloaded.
|
|
7
|
-
bookingNumber: A unique identifier for the booking.
|
|
8
|
-
containers:
|
|
9
|
-
containerType: Type of the shipping container, usually related to it's size.
|
|
10
|
-
grossWeight: Total weight of the cargo, including the tare weight of the container.
|
|
11
|
-
measurements: Dimensions of the cargo (length, width, height) for freight calculations.
|
|
12
|
-
packageQuantity: package quantity.
|
|
13
|
-
packageType: Type of packaging used (e.g., cartons, pallets, barrels).
|
|
14
|
-
containerNumber: Unique ID for tracking the shipping container.
|
|
15
|
-
sealNumber: Number of the container's seal.
|
|
16
|
-
vessel: The name of the vessel.
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
Your task is to extract the text value of the following entities and page numbers starting from 0 where the value was found in the document:
|
|
20
|
-
|
|
21
|
-
Keywords for datapoints:
|
|
22
|
-
- blNumber: Bill of Lading number, bill of landing no., swb-no., b/l no.
|
|
23
|
-
- voyage: voyage, voy. no, voyage-no.
|
|
24
|
-
- portOfLoading: port of loading, pol, from.]
|
|
25
|
-
- portOfDischarge: port of discharge, pod, delivery, to
|
|
26
|
-
- bookingNumber: Our reference, booking no., carrier reference
|
|
27
|
-
- containers:
|
|
28
|
-
- containerType: x 40' container
|
|
29
|
-
- grossWeight: gross weight
|
|
30
|
-
- measurements: Dimensions of the cargo (length, width, height) for freight calculations
|
|
31
|
-
- packageQuantity: package quantity, number and kind of packages
|
|
32
|
-
- packageType: Type of packaging used (e.g., cartons, pallets, barrels), number and kind of packages, description of goods
|
|
33
|
-
- containerNumber: container number, cntr. nos.
|
|
34
|
-
- sealNumber: seal number, seal nos., shipper seal, seal.
|
|
35
|
-
- vessel: vessel
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
You must apply the following rules:
|
|
39
|
-
- The JSON schema must be followed during the extraction.
|
|
40
|
-
- The values must only include text found in the document
|
|
41
|
-
- Do not normalize any entity value.
|
|
42
|
-
- If 'sealNumber' is not found don't add it to the result.
|
|
43
|
-
- Validate the JSON make sure it is a valid JSON ! No extra text, no missing comma!
|
|
44
|
-
- Add an escape character (backwards slash) in from of all quotes in values
|
{data_science_document_ai-1.45.2 → data_science_document_ai-1.47.0}/src/constants_sandbox.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{data_science_document_ai-1.45.2 → data_science_document_ai-1.47.0}/src/postprocessing/common.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{data_science_document_ai-1.45.2 → data_science_document_ai-1.47.0}/src/prompts/prompt_library.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|