data-science-document-ai 1.13.0__py3-none-any.whl → 1.56.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. {data_science_document_ai-1.13.0.dist-info → data_science_document_ai-1.56.1.dist-info}/METADATA +7 -2
  2. data_science_document_ai-1.56.1.dist-info/RECORD +60 -0
  3. {data_science_document_ai-1.13.0.dist-info → data_science_document_ai-1.56.1.dist-info}/WHEEL +1 -1
  4. src/constants.py +42 -12
  5. src/constants_sandbox.py +2 -22
  6. src/docai.py +18 -7
  7. src/docai_processor_config.yaml +0 -64
  8. src/excel_processing.py +34 -15
  9. src/io.py +74 -6
  10. src/llm.py +12 -34
  11. src/pdf_processing.py +228 -78
  12. src/postprocessing/common.py +495 -618
  13. src/postprocessing/postprocess_partner_invoice.py +383 -27
  14. src/prompts/library/arrivalNotice/other/placeholders.json +70 -0
  15. src/prompts/library/arrivalNotice/other/prompt.txt +40 -0
  16. src/prompts/library/bookingConfirmation/evergreen/placeholders.json +17 -17
  17. src/prompts/library/bookingConfirmation/evergreen/prompt.txt +1 -0
  18. src/prompts/library/bookingConfirmation/hapag-lloyd/placeholders.json +18 -18
  19. src/prompts/library/bookingConfirmation/hapag-lloyd/prompt.txt +1 -1
  20. src/prompts/library/bookingConfirmation/maersk/placeholders.json +17 -17
  21. src/prompts/library/bookingConfirmation/maersk/prompt.txt +1 -1
  22. src/prompts/library/bookingConfirmation/msc/placeholders.json +17 -17
  23. src/prompts/library/bookingConfirmation/msc/prompt.txt +1 -1
  24. src/prompts/library/bookingConfirmation/oocl/placeholders.json +17 -17
  25. src/prompts/library/bookingConfirmation/oocl/prompt.txt +3 -1
  26. src/prompts/library/bookingConfirmation/other/placeholders.json +17 -17
  27. src/prompts/library/bookingConfirmation/other/prompt.txt +1 -1
  28. src/prompts/library/bookingConfirmation/yangming/placeholders.json +17 -17
  29. src/prompts/library/bookingConfirmation/yangming/prompt.txt +1 -1
  30. src/prompts/library/bundeskasse/other/placeholders.json +113 -0
  31. src/prompts/library/bundeskasse/other/prompt.txt +48 -0
  32. src/prompts/library/commercialInvoice/other/placeholders.json +125 -0
  33. src/prompts/library/commercialInvoice/other/prompt.txt +2 -1
  34. src/prompts/library/customsAssessment/other/placeholders.json +67 -16
  35. src/prompts/library/customsAssessment/other/prompt.txt +24 -37
  36. src/prompts/library/customsInvoice/other/placeholders.json +205 -0
  37. src/prompts/library/customsInvoice/other/prompt.txt +105 -0
  38. src/prompts/library/deliveryOrder/other/placeholders.json +79 -28
  39. src/prompts/library/deliveryOrder/other/prompt.txt +26 -40
  40. src/prompts/library/draftMbl/other/placeholders.json +33 -33
  41. src/prompts/library/draftMbl/other/prompt.txt +34 -44
  42. src/prompts/library/finalMbL/other/placeholders.json +34 -34
  43. src/prompts/library/finalMbL/other/prompt.txt +34 -44
  44. src/prompts/library/packingList/other/placeholders.json +98 -0
  45. src/prompts/library/packingList/other/prompt.txt +1 -1
  46. src/prompts/library/partnerInvoice/other/placeholders.json +165 -45
  47. src/prompts/library/partnerInvoice/other/prompt.txt +82 -44
  48. src/prompts/library/preprocessing/carrier/placeholders.json +0 -16
  49. src/prompts/library/shippingInstruction/other/placeholders.json +115 -0
  50. src/prompts/library/shippingInstruction/other/prompt.txt +28 -15
  51. src/setup.py +73 -63
  52. src/utils.py +207 -30
  53. data_science_document_ai-1.13.0.dist-info/RECORD +0 -55
  54. src/prompts/library/draftMbl/hapag-lloyd/prompt.txt +0 -44
  55. src/prompts/library/draftMbl/maersk/prompt.txt +0 -17
  56. src/prompts/library/finalMbL/hapag-lloyd/prompt.txt +0 -44
  57. src/prompts/library/finalMbL/maersk/prompt.txt +0 -17
@@ -1,50 +1,36 @@
1
- You are a document entity extraction specialist. Given a document, the explained datapoint need to extract.
1
+ <PERSONA> You are an efficient document entity data extraction specialist working for a Freight Forwarding company. <PERSONA>
2
2
 
3
+ <TASK> Your task is to extract data from Delivery Order documents as per the given response schema structure. <TASK>
4
+
5
+ <CONTEXT>
6
+ The Freight Forwarding company receives Delivery Order from Carrier (Shipping Lines) partners.
7
+ These documents contain various details related to shipments, equipment details, transport leg details, delivery / pickup details, vessel details, pick up terminal data.
8
+ They may be written in different languages such as English, German, Vietnamese, Chinese, and other European languages, and can appear in a variety of formats and layouts.
9
+ Your role is to accurately extract specific entities from these Delivery Orders to support efficient processing and accurate record-keeping.
10
+ <CONTEXT>
11
+
12
+ <INSTRUCTIONS>
13
+ - Populate fields as defined in the response schema.
14
+ - Multiple Equipment and TransportLeg entries may exist — capture all instances under "Equipment" and "TransportLeg".
15
+ - Use the data field description to understand the context of the data.
3
16
 
4
17
  EmptyContainerDepot: Empty container depot address.
5
- Equipment:
6
- CargoGrossWeight: Total weight of the cargo, including the tare weight of the container.
7
- ContainerNumber: Unique ID for tracking the shipping container.
18
+ Equipment: Details of each Equipment on the Delivery Order. Make sure to extract each Equipment information separately.
19
+ CargoGrossWeight: Total weight of the cargo, including the tare weight of the container. Weight(incl. tare), Cargo Weight, Weight (KG)
20
+ ContainerNumber: Container Number consists of 4 capital letters followed by 7 digits (e.g., TEMU7972458, CAIU 7222892).
8
21
  ContainerType: Type of the shipping container, usually related to it's size.
9
- EmptyReturnReference: A reference code for empty container return.
10
- Pin: Container release reference.
11
- TareWeight: Tare weight.
12
- pickUpTerminal: The specific terminal for cargo pickup.
13
- TransportLeg:
22
+ EmptyReturnReference: A reference code for empty container return. Find it as Drop off reference, turn-in reference in the document.
23
+ Pin: Container release reference or PIN code to pick up the container. Can be found as Release reference, pin
24
+ TareWeight: Weight of the empty container without cargo. It can be found as Tare weight, tare.
25
+
26
+ pickUpTerminal: The specific terminal for cargo pickup. It can also be found as pick up at depot, empty container depot, pickup depot, empty pickup location in the doc.
27
+
28
+ TransportLeg: Details of each TransportLeg on the Delivery Order. Make sure to extract each TransportLeg information separately.
14
29
  eta: The estimated time of arrival for a specific leg.
15
30
  etd: The estimated time of departure for a specific leg.
16
31
  portOfDischarge: The port where cargo is unloaded.
17
32
  portOfLoading: The port where cargo is loaded.
18
33
  vesselName: The name of the vessel.
19
- voyage: The journey or route code taken by the vessel.
20
-
21
- Your task is to extract the text value of the following entities:
22
- SCHEMA_PLACEHOLDER
23
-
24
- Keywords for datapoints:
25
- - EmptyContainerDepot: Empty Container Depot
26
- - Equipment:
27
- - CargoGrossWeight: Weight(incl. tare), Cargo Weight, Weight (KG)
28
- - ContainerNumber: Container, Container Number, Container No.
29
- - ContainerType: Type, Size/type
30
- - EmptyReturnReference: Drop off reference, turn-in reference
31
- - Pin: Release reference, pin
32
- - TareWeight: Tare weight, tare
33
- - pickUpTerminal: pick up at depot, empty container depot, pickup depot, empty pickup location
34
- - TransportLeg:
35
- - eta: eta, ETA
36
- - etd: etd, ETD
37
- - portOfDischarge: to, PORT OF DISCHARGE
38
- - portOfLoading: from, PORT OF LOADING
39
- - vesselName: vessel
40
- - voyage: voyage, voy. no, voyage-no.
41
-
34
+ voyage: The journey or route code taken by the vessel. It is often identified as voyage, voy. no, voyage-no in the document.
42
35
 
43
- You must apply the following rules:
44
- - The JSON schema must be followed during the extraction.
45
- - The values must only include text found in the document
46
- - Do not normalize any entity value.
47
- - portOfLoading and portOfDischarge are name of the Ports. You can rely on the port names from all over the world.
48
- - portOfLoading and portOfDischarge distinctly denotes the name of the ports. If you find abbreviation of the port use it, if not you can use the full name of the port
49
- - Validate the JSON make sure it is a valid JSON ! No extra text, no missing comma!
50
- - Add an escape character (backwards slash) in from of all quotes in values
36
+ <INSTRUCTIONS>
@@ -4,77 +4,77 @@
4
4
  "blNumber": {
5
5
  "type": "string",
6
6
  "nullable": true,
7
- "description": ""
8
- },
9
- "voyage": {
10
- "type": "string",
11
- "nullable": true,
12
- "description": ""
13
- },
14
- "portOfLoading": {
15
- "type": "string",
16
- "nullable": true,
17
- "description": ""
18
- },
19
- "portOfDischarge": {
20
- "type": "string",
21
- "nullable": true,
22
- "description": ""
7
+ "description": "The Bill of Lading number associated with the document. Commonly known as 'Bill of Lading Number', 'BILL OF LADING NO.', 'BL Number', 'BL No.', 'B/L No.', 'BL-Nr.', 'B/L', 'HBL No.', or 'M-AWB Nummer' in the document."
23
8
  },
24
9
  "bookingNumber": {
25
10
  "type": "string",
26
11
  "nullable": true,
27
- "description": ""
12
+ "description": " Booking numbers are unique identifiers for shipments. They are often referred to as 'Booking Number', 'Booking No.', 'Booking Ref.', 'Booking Reference', 'Booking ID', 'SACO-Pos.' or 'Order Ref'"
28
13
  },
29
14
  "containers": {
30
15
  "type": "ARRAY",
31
16
  "items": {
32
17
  "type": "OBJECT",
33
18
  "properties": {
19
+ "containerNumber": {
20
+ "type": "string",
21
+ "nullable": true,
22
+ "description": "The container number associated with the document. They MUST consist of 4 letters followed by 7 digits (e.g., 'CMAU1234567', 'BMOU 575538/3', 'XLXU 1277652'). It can be found in the document as 'Container No.', 'Container Number', 'Cont. No.', 'Cont Nr.', 'Seefrachtcontainer-Nr.', or 'Containernummer."
23
+ },
34
24
  "containerType": {
35
25
  "type": "string",
36
26
  "nullable": true,
37
- "description": ""
27
+ "description": "The size or Type of the container associated with the containerNumber, such as 20ft, 40ft, 40HC, 20DC etc."
38
28
  },
39
29
  "grossWeight": {
40
30
  "type": "string",
41
31
  "nullable": true,
42
- "description": ""
32
+ "description": "The gross weight of the container. Usually mentioned as G.W or GW or Gross Weight, etc.."
43
33
  },
44
34
  "measurements": {
45
35
  "type": "string",
46
36
  "nullable": true,
47
- "description": ""
48
- },
37
+ "description": "The volume of the Container. Usually, it is measured in 'Cubic Meter (cbm)' or dimensions. But volume in 'cbm' is preferred."
38
+ },
49
39
  "packageQuantity": {
50
40
  "type": "string",
51
41
  "nullable": true,
52
- "description": ""
42
+ "description": "The quantity of the goods in the container. Usually quantity is in pallets, PLT, cartons, CTNS, pieces, PCS, packages, boxes, etc. Please prioritize the packaging types based on their size, as follows: Pallets (PLT) >> Cartons (CTNS) >> Pieces (PCS). Extract the Larger packaging types that will have a lower count."
53
43
  },
54
44
  "packageType": {
55
45
  "type": "string",
56
46
  "nullable": true,
57
- "description": ""
58
- },
59
- "containerNumber": {
60
- "type": "string",
61
- "nullable": true,
62
- "description": ""
63
- },
47
+ "description": "The packaging type is the unit of packageQuantity. Example; pallets, PLT, cartons, CTNS, pieces, PCS, packages, etc. Sometimes, the packaging type is available in the column name of the quantityShipped."
48
+ },
64
49
  "sealNumber": {
65
50
  "type": "string",
66
51
  "nullable": true,
67
- "description": ""
52
+ "description": "The seal number associated with the container Number and you can find like seal number, seal nos., shipper seal, seal.. But it is not same as the container number."
68
53
  }
69
54
  },
70
- "required": []
55
+ "required": ["containerNumber", "containerType", "grossWeight", "measurements", "packageQuantity", "packageType", "sealNumber"]
71
56
  }
72
57
  },
58
+ "portOfDischarge": {
59
+ "type": "string",
60
+ "nullable": true,
61
+ "description": "The port where the containers are discharged from the vessel. This is the destination port for the shipment. Find information like port of discharge, pod, delivery, to."
62
+ },
63
+ "portOfLoading": {
64
+ "type": "string",
65
+ "nullable": true,
66
+ "description": "The origin port where the containers are loaded onto the vessel. Find information like 'Ladehafen', 'Port of Loading', 'pol', or 'from.' in the document."
67
+ },
68
+ "voyage": {
69
+ "type": "string",
70
+ "nullable": true,
71
+ "description": "The unique voyage number or identifier assigned to a vessel’s specific journey. This typically corresponds to the scheduled sailing associated with the shipment and can often be found near vessel information on shipping documents. such as voyage, voy. no, voyage-no."
72
+ },
73
73
  "vessel": {
74
74
  "type": "string",
75
75
  "nullable": true,
76
- "description": ""
76
+ "description": "The name of the vessel carrying the container or shipment"
77
77
  }
78
78
  },
79
- "required": []
79
+ "required": ["blNumber", "bookingNumber", "containers", "portOfDischarge", "portOfLoading", "voyage", "vessel"]
80
80
  }
@@ -1,44 +1,34 @@
1
- You are a document entity extraction specialist. Given a document, the explained datapoint need to extract.
2
-
3
- blNumber: Bill of Lading number.
4
- voyage: The journey or route code taken by the vessel.
5
- portOfLoading: The port where cargo is loaded.
6
- portOfDischarge: The port where cargo is unloaded.
7
- bookingNumber: A unique identifier for the booking.
8
- containers:
9
- containerType: Type of the shipping container, usually related to it's size.
10
- grossWeight: Total weight of the cargo, including the tare weight of the container.
11
- measurements: Dimensions of the cargo (length, width, height) for freight calculations.
12
- packageQuantity: package quantity.
13
- packageType: Type of packaging used (e.g., cartons, pallets, barrels).
14
- containerNumber: Unique ID for tracking the shipping container.
15
- sealNumber: Number of the container's seal.
16
- vessel: The name of the vessel.
17
-
18
-
19
- Your task is to extract the text value of the following entities:
20
-
21
- Keywords for datapoints:
22
- - blNumber: Bill of Lading number, bill of landing no., swb-no., b/l no.
23
- - voyage: voyage, voy. no, voyage-no.
24
- - portOfLoading: port of loading, pol, from.]
25
- - portOfDischarge: port of discharge, pod, delivery, to
26
- - bookingNumber: Our reference, booking no., carrier reference
27
- - containers:
28
- - containerType: x 40' container
29
- - grossWeight: gross weight
30
- - measurements: Dimensions of the cargo (length, width, height) for freight calculations
31
- - packageQuantity: package quantity, number and kind of packages
32
- - packageType: Type of packaging used (e.g., cartons, pallets, barrels), number and kind of packages, description of goods
33
- - containerNumber: container number, cntr. nos.
34
- - sealNumber: seal number, seal nos., shipper seal, seal.
35
- - vessel: vessel
36
-
37
-
38
- You must apply the following rules:
39
- - The JSON schema must be followed during the extraction.
40
- - The values must only include text found in the document
41
- - Do not normalize any entity value.
42
- - If 'sealNumber' is not found don't add it to the result.
43
- - Validate the JSON make sure it is a valid JSON ! No extra text, no missing comma!
44
- - Add an escape character (backwards slash) in from of all quotes in values
1
+ <PERSONA> You are an efficient document entity data extraction specialist working for a Freight Forwarding company. <PERSONA>
2
+
3
+ <TASK> Your task is to extract data from draftMBL documents as per the given response schema structure. <TASK>
4
+
5
+ <CONTEXT>
6
+ The Freight Forwarding company receives draftMBL from Carrier (Shipping Lines) partners.
7
+ These documents contain various details related to shipments, booking details, vessel details, POL, POD and containers data.
8
+ They may be written in different languages such as English, German, Vietnamese, Chinese, and other European languages, and can appear in a variety of formats and layouts.
9
+ Your role is to accurately extract specific entities from these draftMBLs to support efficient processing and accurate record-keeping.
10
+ <CONTEXT>
11
+
12
+
13
+ <INSTRUCTIONS>
14
+ - Populate fields as defined in the response schema.
15
+ - Multiple Containers entries may exist, capture all instances under "containers".
16
+ - Use the data field description to understand the context of the data.
17
+
18
+ - bookingNumber:
19
+ - Booking numbers are unique identifiers for shipments. They are often referred to as "Booking Number", "Booking No.", "Booking Ref.", "Booking Reference", "Booking ID", "SACO-Pos.", "Order Ref", "Unsere Referenz", or "Unsere Position"
20
+ - If there is a unique_id that starts with "S" followed by 6 or 8 digits, it is a shipmentID, not a bookingNumber.
21
+
22
+ - blNumber:
23
+ - Commonly known as "Bill of Lading Number", "BILL OF LADING NO.", "BL Number", "BL No.", "B/L No.", "BL-Nr.", "B/L", "HBL No.", or "M-AWB Nummer".
24
+ - Bill of Lading Number is known as mblNumber. Not a shipmentID even if it starts with "S".
25
+ - blNumber from Hapag-Lloyd always starts with HLC.... (e.g., "HLCUTS12303AWNT3) and named as SEA WAYBILL or "SWB-NR.
26
+
27
+ - vesselName:
28
+ - Vessel Name is the name of the ship carrying the cargo. It can be referred to as "Vessel", "Ship Name", "Schiff", "Schiffsname", "Nave", or "Vessel/Flight No.".
29
+
30
+ - containers: Details of each container on the draftMBL. Make sure to extract each container information separately.
31
+ - containerNumber: Container Number consists of 4 capital letters followed by 7 digits (e.g., TEMU7972458, CAIU 7222892).
32
+ - sealNumber: Seal numbers are unique identifiers for shipping seals. They are usually mentioned as seal numbers in the document but they are definitely not container numbers.
33
+
34
+ <INSTRUCTIONS>
@@ -4,77 +4,77 @@
4
4
  "blNumber": {
5
5
  "type": "string",
6
6
  "nullable": true,
7
- "description": ""
8
- },
9
- "voyage": {
10
- "type": "string",
11
- "nullable": true,
12
- "description": ""
13
- },
14
- "portOfLoading": {
15
- "type": "string",
16
- "nullable": true,
17
- "description": ""
18
- },
19
- "portOfDischarge": {
20
- "type": "string",
21
- "nullable": true,
22
- "description": ""
7
+ "description": "The Bill of Lading number associated with the document. Commonly known as 'Bill of Lading Number', 'BILL OF LADING NO.', 'BL Number', 'BL No.', 'B/L No.', 'BL-Nr.', 'B/L', 'HBL No.', or 'M-AWB Nummer' in the document."
23
8
  },
24
9
  "bookingNumber": {
25
10
  "type": "string",
26
11
  "nullable": true,
27
- "description": ""
12
+ "description": " Booking numbers are unique identifiers for shipments. They are often referred to as 'Booking Number', 'Booking No.', 'Booking Ref.', 'Booking Reference', 'Booking ID', 'SACO-Pos.' or 'Order Ref'"
28
13
  },
29
14
  "containers": {
30
15
  "type": "ARRAY",
31
16
  "items": {
32
17
  "type": "OBJECT",
33
18
  "properties": {
19
+ "containerNumber": {
20
+ "type": "string",
21
+ "nullable": true,
22
+ "description": "The container number associated with the document. They MUST consist of 4 letters followed by 7 digits (e.g., 'CMAU1234567', 'BMOU 575538/3', 'XLXU 1277652'). It can be found in the document as 'Container No.', 'Container Number', 'Cont. No.', 'Cont Nr.', 'Seefrachtcontainer-Nr.', or 'Containernummer."
23
+ },
34
24
  "containerType": {
35
25
  "type": "string",
36
26
  "nullable": true,
37
- "description": ""
27
+ "description": "The size or Type of the container associated with the containerNumber, such as 20ft, 40ft, 40HC, 20DC etc."
38
28
  },
39
29
  "grossWeight": {
40
30
  "type": "string",
41
31
  "nullable": true,
42
- "description": ""
32
+ "description": "The gross weight of the container. Usually mentioned as G.W or GW or Gross Weight, etc.."
43
33
  },
44
34
  "measurements": {
45
35
  "type": "string",
46
36
  "nullable": true,
47
- "description": ""
48
- },
37
+ "description": "The volume of the Container. Usually, it is measured in 'Cubic Meter (cbm)' or dimensions. But volume in 'cbm' is preferred."
38
+ },
49
39
  "packageQuantity": {
50
40
  "type": "string",
51
41
  "nullable": true,
52
- "description": ""
42
+ "description": "The quantity of the goods in the container. Usually quantity is in pallets, PLT, cartons, CTNS, pieces, PCS, packages, boxes, etc. Please prioritize the packaging types based on their size, as follows: Pallets (PLT) >> Cartons (CTNS) >> Pieces (PCS). Extract the Larger packaging types that will have a lower count."
53
43
  },
54
44
  "packageType": {
55
45
  "type": "string",
56
46
  "nullable": true,
57
- "description": ""
58
- },
59
- "containerNumber": {
60
- "type": "string",
61
- "nullable": true,
62
- "description": ""
63
- },
47
+ "description": "The packaging type is the unit of packageQuantity. Example; pallets, PLT, cartons, CTNS, pieces, PCS, packages, etc. Sometimes, the packaging type is available in the column name of the quantityShipped."
48
+ },
64
49
  "sealNumber": {
65
50
  "type": "string",
66
51
  "nullable": true,
67
- "description": ""
52
+ "description": "The seal number associated with the container Number and you can find like seal number, seal nos., shipper seal, seal.. But it is not same as the container number."
68
53
  }
69
54
  },
70
- "required": []
55
+ "required": ["containerNumber", "containerType", "grossWeight", "measurements", "packageQuantity", "packageType", "sealNumber"]
71
56
  }
72
57
  },
58
+ "portOfDischarge": {
59
+ "type": "string",
60
+ "nullable": true,
61
+ "description": "The port where the containers are discharged from the vessel. This is the destination port for the shipment. Find information like port of discharge, pod, delivery, to."
62
+ },
63
+ "portOfLoading": {
64
+ "type": "string",
65
+ "nullable": true,
66
+ "description": "The origin port where the containers are loaded onto the vessel. Find information like 'Ladehafen', 'Port of Loading', 'pol', or 'from.' in the document."
67
+ },
68
+ "voyage": {
69
+ "type": "string",
70
+ "nullable": true,
71
+ "description": "The unique voyage number or identifier assigned to a vessel’s specific journey. This typically corresponds to the scheduled sailing associated with the shipment and can often be found near vessel information on shipping documents. such as voyage, voy. no, voyage-no."
72
+ },
73
73
  "vessel": {
74
74
  "type": "string",
75
75
  "nullable": true,
76
- "description": ""
76
+ "description": "The name of the vessel carrying the container or shipment"
77
77
  }
78
78
  },
79
- "required": []
80
- }
79
+ "required": ["blNumber", "bookingNumber", "containers", "portOfDischarge", "portOfLoading", "voyage", "vessel"]
80
+ }
@@ -1,44 +1,34 @@
1
- You are a document entity extraction specialist. Given a document, the explained datapoint need to extract.
2
-
3
- blNumber: Bill of Lading number.
4
- voyage: The journey or route code taken by the vessel.
5
- portOfLoading: The port where cargo is loaded.
6
- portOfDischarge: The port where cargo is unloaded.
7
- bookingNumber: A unique identifier for the booking.
8
- containers:
9
- containerType: Type of the shipping container, usually related to it's size.
10
- grossWeight: Total weight of the cargo, including the tare weight of the container.
11
- measurements: Dimensions of the cargo (length, width, height) for freight calculations.
12
- packageQuantity: package quantity.
13
- packageType: Type of packaging used (e.g., cartons, pallets, barrels).
14
- containerNumber: Unique ID for tracking the shipping container.
15
- sealNumber: Number of the container's seal.
16
- vessel: The name of the vessel.
17
-
18
-
19
- Your task is to extract the text value of the following entities:
20
-
21
- Keywords for datapoints:
22
- - blNumber: Bill of Lading number, bill of landing no., swb-no., b/l no.
23
- - voyage: voyage, voy. no, voyage-no.
24
- - portOfLoading: port of loading, pol, from.]
25
- - portOfDischarge: port of discharge, pod, delivery, to
26
- - bookingNumber: Our reference, booking no., carrier reference
27
- - containers:
28
- - containerType: x 40' container
29
- - grossWeight: gross weight
30
- - measurements: Dimensions of the cargo (length, width, height) for freight calculations
31
- - packageQuantity: package quantity, number and kind of packages
32
- - packageType: Type of packaging used (e.g., cartons, pallets, barrels), number and kind of packages, description of goods
33
- - containerNumber: container number, cntr. nos.
34
- - sealNumber: seal number, seal nos., shipper seal, seal.
35
- - vessel: vessel
36
-
37
-
38
- You must apply the following rules:
39
- - The JSON schema must be followed during the extraction.
40
- - The values must only include text found in the document
41
- - Do not normalize any entity value.
42
- - If 'sealNumber' is not found don't add it to the result.
43
- - Validate the JSON make sure it is a valid JSON ! No extra text, no missing comma!
44
- - Add an escape character (backwards slash) in from of all quotes in values
1
+ <PERSONA> You are an efficient document entity data extraction specialist working for a Freight Forwarding company. <PERSONA>
2
+
3
+ <TASK> Your task is to extract data from finalMBL documents as per the given response schema structure. <TASK>
4
+
5
+ <CONTEXT>
6
+ The Freight Forwarding company receives finalMBL from Carrier (Shipping Lines) partners.
7
+ These documents contain various details related to shipments, booking details, vessel details, POL, POD and containers data.
8
+ They may be written in different languages such as English, German, Vietnamese, Chinese, and other European languages, and can appear in a variety of formats and layouts.
9
+ Your role is to accurately extract specific entities from these finalMBLs to support efficient processing and accurate record-keeping.
10
+ <CONTEXT>
11
+
12
+
13
+ <INSTRUCTIONS>
14
+ - Populate fields as defined in the response schema.
15
+ - Multiple Containers entries may exist, capture all instances under "containers".
16
+ - Use the data field description to understand the context of the data.
17
+
18
+ - bookingNumber:
19
+ - Booking numbers are unique identifiers for shipments. They are often referred to as "Booking Number", "Booking No.", "Booking Ref.", "Booking Reference", "Booking ID", "SACO-Pos.", "Order Ref", "Unsere Referenz", or "Unsere Position"
20
+ - If there is a unique_id that starts with "S" followed by 6 or 8 digits, it is a shipmentID, not a bookingNumber.
21
+
22
+ - blNumber:
23
+ - Commonly known as "Bill of Lading Number", "BILL OF LADING NO.", "BL Number", "BL No.", "B/L No.", "BL-Nr.", "B/L", "HBL No.", or "M-AWB Nummer".
24
+ - Bill of Lading Number is known as mblNumber. Not a shipmentID even if it starts with "S".
25
+ - blNumber from Hapag-Lloyd always starts with HLC.... (e.g., "HLCUTS12303AWNT3) and named as SEA WAYBILL or "SWB-NR.
26
+
27
+ - vesselName:
28
+ - Vessel Name is the name of the ship carrying the cargo. It can be referred to as "Vessel", "Ship Name", "Schiff", "Schiffsname", "Nave", or "Vessel/Flight No.".
29
+
30
+ - containers: Details of each container on the finalMBL. Make sure to extract each container information separately.
31
+ - containerNumber: Container Number consists of 4 capital letters followed by 7 digits (e.g., TEMU7972458, CAIU 7222892).
32
+ - sealNumber: Seal numbers are unique identifiers for shipping seals. They are usually mentioned as seal numbers in the document but they are definitely not container numbers.
33
+
34
+ <INSTRUCTIONS>
@@ -0,0 +1,98 @@
1
+ {
2
+ "type": "OBJECT",
3
+ "properties": {
4
+ "buyer": {
5
+ "type": "string",
6
+ "nullable": true,
7
+ "description": "The receiver or buyer of the goods."},
8
+ "grossWeight": {
9
+ "type": "string",
10
+ "nullable": true,
11
+ "description": "The total gross weight of all the goods. Usually mentioned as G.W or GW or Gross Weight, etc.."},
12
+ "invoiceNumber": {
13
+ "type": "string",
14
+ "nullable": true,
15
+ "description": "The invoice number"},
16
+ "netWeight": {
17
+ "type": "string",
18
+ "nullable": true,
19
+ "description": "The total net weight of all the goods. Usually mentioned as N.W or NW or Net Weight, etc.."},
20
+ "seller": {
21
+ "type": "string",
22
+ "nullable": true,
23
+ "description": "The seller or shipper of the goods."},
24
+ "skuData": {
25
+ "type": "ARRAY",
26
+ "items": {
27
+ "type": "OBJECT",
28
+ "properties": {
29
+ "containerNumber": {
30
+ "type": "string",
31
+ "nullable": true,
32
+ "description": "Container Number consists of 4 capital letters followed by 7 digits. Example: TEMU7972458. Usually mentioned as Container Number, CONTAINER NO. or Containers"},
33
+ "grossWeight": {
34
+ "type": "string",
35
+ "nullable": true,
36
+ "description": "The gross weight of the goods. Usually mentioned as G.W or GW or Gross Weight, etc.."},
37
+ "hsCode": {
38
+ "type": "string",
39
+ "nullable": true,
40
+ "description": "The harmonized system code of a goods."},
41
+ "measurements": {
42
+ "type": "string",
43
+ "nullable": true,
44
+ "description": "The volume of the goods. Usually, it is measured in 'Cubic Meter (cbm)' or dimensions. But volume in 'cbm' is preferred."},
45
+ "netWeight": {
46
+ "type": "string",
47
+ "nullable": true,
48
+ "description": "The net weight of the goods. Usually mentioned as N.W or NW or Net Weight, etc.."},
49
+ "packagingType": {
50
+ "type": "string",
51
+ "nullable": true,
52
+ "description": "The packaging type is the unit of quantityShipped. Example; pallets, PLT, cartons, CTNS, pieces, PCS, packages, etc. Sometimes, the packaging type is available in the column name of the quantityShipped."},
53
+ "poNumber": {
54
+ "type": "string",
55
+ "nullable": true,
56
+ "description": "Purchase order of the goods."},
57
+ "poPosition": {
58
+ "type": "string",
59
+ "nullable": true,
60
+ "description": "PO position refers to the specific item or line associated with a Purchase Order (PO). It represents the position or line number in the PO that corresponds to the items being shipped."},
61
+ "quantityShipped": {
62
+ "type": "string",
63
+ "nullable": true,
64
+ "description": "The quantity of the goods. Usually quantity is in pallets, PLT, cartons, CTNS, pieces, PCS, packages, boxes, etc. Please prioritize the packaging types based on their size, as follows: Pallets (PLT) >> Cartons (CTNS) >> Pieces (PCS). Extract the Larger packaging types that will have a lower count."},
65
+ "sealNumber": {
66
+ "type": "string",
67
+ "nullable": true,
68
+ "description": "A unique number associated with the container number"},
69
+ "skuDescription": {
70
+ "type": "string",
71
+ "nullable": true,
72
+ "description": "Description of the goods."},
73
+ "skuNumbers": {
74
+ "type": "string",
75
+ "nullable": true,
76
+ "description": "SKU number of the goods."}
77
+ },
78
+ "required": [
79
+ "skuNumbers",
80
+ "quantityShipped",
81
+ "skuDescription",
82
+ "grossWeight",
83
+ "netWeight",
84
+ "packagingType"
85
+ ]
86
+ }
87
+ },
88
+ "totalPackagingType": {
89
+ "type": "string",
90
+ "nullable": true,
91
+ "description": "The packaging type of all the goods associated with the totalQuantityShipped. It is the unit of totalQuantityShipped. Usually pallets, PLT, cartons, CTNS, pieces, PCS, packages, etc."},
92
+ "totalQuantityShipped": {
93
+ "type": "string",
94
+ "nullable": true,
95
+ "description": "The total quantity of the goods. Usually quantity is in pallets, cartons, pieces, packages, boxes, etc. Please prioritize the packaging types based on their size, as follows: Pallets >> Cartons >> Pieces. Larger packaging types will have a lower count."}
96
+ },
97
+ "required": []
98
+ }
@@ -2,7 +2,7 @@ Task: You are a document entity extraction specialist. Given a document, your ta
2
2
 
3
3
  Extract all the data points from the given document.
4
4
  Each data point is part of a master field called "skuData". There may be multiple sku entries in a document.
5
- Your goal is to extract all instances.
5
+ Your task is to extract the text value of the following entities and their page numbers starting from 0 where the value was found in the document:
6
6
 
7
7
  Instructions:
8
8
  - Populate fields as defined in the response schema.