data-science-document-ai 1.37.0__py3-none-any.whl → 1.51.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (56) hide show
  1. {data_science_document_ai-1.37.0.dist-info → data_science_document_ai-1.51.0.dist-info}/METADATA +3 -3
  2. data_science_document_ai-1.51.0.dist-info/RECORD +60 -0
  3. {data_science_document_ai-1.37.0.dist-info → data_science_document_ai-1.51.0.dist-info}/WHEEL +1 -1
  4. src/constants.py +6 -10
  5. src/docai.py +14 -5
  6. src/docai_processor_config.yaml +0 -56
  7. src/excel_processing.py +34 -13
  8. src/io.py +69 -1
  9. src/llm.py +10 -32
  10. src/pdf_processing.py +192 -57
  11. src/postprocessing/common.py +252 -590
  12. src/postprocessing/postprocess_partner_invoice.py +139 -89
  13. src/prompts/library/arrivalNotice/other/placeholders.json +70 -0
  14. src/prompts/library/arrivalNotice/other/prompt.txt +40 -0
  15. src/prompts/library/bookingConfirmation/evergreen/placeholders.json +17 -17
  16. src/prompts/library/bookingConfirmation/evergreen/prompt.txt +1 -0
  17. src/prompts/library/bookingConfirmation/hapag-lloyd/placeholders.json +18 -18
  18. src/prompts/library/bookingConfirmation/hapag-lloyd/prompt.txt +1 -1
  19. src/prompts/library/bookingConfirmation/maersk/placeholders.json +17 -17
  20. src/prompts/library/bookingConfirmation/maersk/prompt.txt +1 -1
  21. src/prompts/library/bookingConfirmation/msc/placeholders.json +17 -17
  22. src/prompts/library/bookingConfirmation/msc/prompt.txt +1 -1
  23. src/prompts/library/bookingConfirmation/oocl/placeholders.json +17 -17
  24. src/prompts/library/bookingConfirmation/oocl/prompt.txt +3 -1
  25. src/prompts/library/bookingConfirmation/other/placeholders.json +17 -17
  26. src/prompts/library/bookingConfirmation/other/prompt.txt +1 -1
  27. src/prompts/library/bookingConfirmation/yangming/placeholders.json +17 -17
  28. src/prompts/library/bookingConfirmation/yangming/prompt.txt +1 -1
  29. src/prompts/library/bundeskasse/other/placeholders.json +25 -25
  30. src/prompts/library/bundeskasse/other/prompt.txt +8 -6
  31. src/prompts/library/commercialInvoice/other/placeholders.json +125 -0
  32. src/prompts/library/commercialInvoice/other/prompt.txt +2 -1
  33. src/prompts/library/customsAssessment/other/placeholders.json +67 -16
  34. src/prompts/library/customsAssessment/other/prompt.txt +24 -37
  35. src/prompts/library/customsInvoice/other/placeholders.json +29 -20
  36. src/prompts/library/customsInvoice/other/prompt.txt +9 -4
  37. src/prompts/library/deliveryOrder/other/placeholders.json +79 -28
  38. src/prompts/library/deliveryOrder/other/prompt.txt +26 -40
  39. src/prompts/library/draftMbl/other/placeholders.json +33 -33
  40. src/prompts/library/draftMbl/other/prompt.txt +34 -44
  41. src/prompts/library/finalMbL/other/placeholders.json +34 -34
  42. src/prompts/library/finalMbL/other/prompt.txt +34 -44
  43. src/prompts/library/packingList/other/placeholders.json +98 -0
  44. src/prompts/library/packingList/other/prompt.txt +1 -1
  45. src/prompts/library/partnerInvoice/other/placeholders.json +2 -23
  46. src/prompts/library/partnerInvoice/other/prompt.txt +7 -18
  47. src/prompts/library/preprocessing/carrier/placeholders.json +0 -16
  48. src/prompts/library/shippingInstruction/other/placeholders.json +115 -0
  49. src/prompts/library/shippingInstruction/other/prompt.txt +28 -15
  50. src/setup.py +13 -61
  51. src/utils.py +189 -29
  52. data_science_document_ai-1.37.0.dist-info/RECORD +0 -59
  53. src/prompts/library/draftMbl/hapag-lloyd/prompt.txt +0 -44
  54. src/prompts/library/draftMbl/maersk/prompt.txt +0 -17
  55. src/prompts/library/finalMbL/hapag-lloyd/prompt.txt +0 -44
  56. src/prompts/library/finalMbL/maersk/prompt.txt +0 -17
@@ -4,77 +4,77 @@
4
4
  "blNumber": {
5
5
  "type": "string",
6
6
  "nullable": true,
7
- "description": ""
8
- },
9
- "voyage": {
10
- "type": "string",
11
- "nullable": true,
12
- "description": ""
13
- },
14
- "portOfLoading": {
15
- "type": "string",
16
- "nullable": true,
17
- "description": ""
18
- },
19
- "portOfDischarge": {
20
- "type": "string",
21
- "nullable": true,
22
- "description": ""
7
+ "description": "The Bill of Lading number associated with the document. Commonly known as 'Bill of Lading Number', 'BILL OF LADING NO.', 'BL Number', 'BL No.', 'B/L No.', 'BL-Nr.', 'B/L', 'HBL No.', or 'M-AWB Nummer' in the document."
23
8
  },
24
9
  "bookingNumber": {
25
10
  "type": "string",
26
11
  "nullable": true,
27
- "description": ""
12
+ "description": " Booking numbers are unique identifiers for shipments. They are often referred to as 'Booking Number', 'Booking No.', 'Booking Ref.', 'Booking Reference', 'Booking ID', 'SACO-Pos.' or 'Order Ref'"
28
13
  },
29
14
  "containers": {
30
15
  "type": "ARRAY",
31
16
  "items": {
32
17
  "type": "OBJECT",
33
18
  "properties": {
19
+ "containerNumber": {
20
+ "type": "string",
21
+ "nullable": true,
22
+ "description": "The container number associated with the document. They MUST consist of 4 letters followed by 7 digits (e.g., 'CMAU1234567', 'BMOU 575538/3', 'XLXU 1277652'). It can be found in the document as 'Container No.', 'Container Number', 'Cont. No.', 'Cont Nr.', 'Seefrachtcontainer-Nr.', or 'Containernummer."
23
+ },
34
24
  "containerType": {
35
25
  "type": "string",
36
26
  "nullable": true,
37
- "description": ""
27
+ "description": "The size or Type of the container associated with the containerNumber, such as 20ft, 40ft, 40HC, 20DC etc."
38
28
  },
39
29
  "grossWeight": {
40
30
  "type": "string",
41
31
  "nullable": true,
42
- "description": ""
32
+ "description": "The gross weight of the container. Usually mentioned as G.W or GW or Gross Weight, etc.."
43
33
  },
44
34
  "measurements": {
45
35
  "type": "string",
46
36
  "nullable": true,
47
- "description": ""
48
- },
37
+ "description": "The volume of the Container. Usually, it is measured in 'Cubic Meter (cbm)' or dimensions. But volume in 'cbm' is preferred."
38
+ },
49
39
  "packageQuantity": {
50
40
  "type": "string",
51
41
  "nullable": true,
52
- "description": ""
42
+ "description": "The quantity of the goods in the container. Usually quantity is in pallets, PLT, cartons, CTNS, pieces, PCS, packages, boxes, etc. Please prioritize the packaging types based on their size, as follows: Pallets (PLT) >> Cartons (CTNS) >> Pieces (PCS). Extract the Larger packaging types that will have a lower count."
53
43
  },
54
44
  "packageType": {
55
45
  "type": "string",
56
46
  "nullable": true,
57
- "description": ""
58
- },
59
- "containerNumber": {
60
- "type": "string",
61
- "nullable": true,
62
- "description": ""
63
- },
47
+ "description": "The packaging type is the unit of packageQuantity. Example; pallets, PLT, cartons, CTNS, pieces, PCS, packages, etc. Sometimes, the packaging type is available in the column name of the quantityShipped."
48
+ },
64
49
  "sealNumber": {
65
50
  "type": "string",
66
51
  "nullable": true,
67
- "description": ""
52
+ "description": "The seal number associated with the container Number and you can find like seal number, seal nos., shipper seal, seal.. But it is not same as the container number."
68
53
  }
69
54
  },
70
- "required": []
55
+ "required": ["containerNumber", "containerType", "grossWeight", "measurements", "packageQuantity", "packageType", "sealNumber"]
71
56
  }
72
57
  },
58
+ "portOfDischarge": {
59
+ "type": "string",
60
+ "nullable": true,
61
+ "description": "The port where the containers are discharged from the vessel. This is the destination port for the shipment. Find information like port of discharge, pod, delivery, to."
62
+ },
63
+ "portOfLoading": {
64
+ "type": "string",
65
+ "nullable": true,
66
+ "description": "The origin port where the containers are loaded onto the vessel. Find information like 'Ladehafen', 'Port of Loading', 'pol', or 'from.' in the document."
67
+ },
68
+ "voyage": {
69
+ "type": "string",
70
+ "nullable": true,
71
+ "description": "The unique voyage number or identifier assigned to a vessel’s specific journey. This typically corresponds to the scheduled sailing associated with the shipment and can often be found near vessel information on shipping documents. such as voyage, voy. no, voyage-no."
72
+ },
73
73
  "vessel": {
74
74
  "type": "string",
75
75
  "nullable": true,
76
- "description": ""
76
+ "description": "The name of the vessel carrying the container or shipment"
77
77
  }
78
78
  },
79
- "required": []
80
- }
79
+ "required": ["blNumber", "bookingNumber", "containers", "portOfDischarge", "portOfLoading", "voyage", "vessel"]
80
+ }
@@ -1,44 +1,34 @@
1
- You are a document entity extraction specialist. Given a document, the explained datapoint need to extract.
2
-
3
- blNumber: Bill of Lading number.
4
- voyage: The journey or route code taken by the vessel.
5
- portOfLoading: The port where cargo is loaded.
6
- portOfDischarge: The port where cargo is unloaded.
7
- bookingNumber: A unique identifier for the booking.
8
- containers:
9
- containerType: Type of the shipping container, usually related to it's size.
10
- grossWeight: Total weight of the cargo, including the tare weight of the container.
11
- measurements: Dimensions of the cargo (length, width, height) for freight calculations.
12
- packageQuantity: package quantity.
13
- packageType: Type of packaging used (e.g., cartons, pallets, barrels).
14
- containerNumber: Unique ID for tracking the shipping container.
15
- sealNumber: Number of the container's seal.
16
- vessel: The name of the vessel.
17
-
18
-
19
- Your task is to extract the text value of the following entities:
20
-
21
- Keywords for datapoints:
22
- - blNumber: Bill of Lading number, bill of landing no., swb-no., b/l no.
23
- - voyage: voyage, voy. no, voyage-no.
24
- - portOfLoading: port of loading, pol, from.]
25
- - portOfDischarge: port of discharge, pod, delivery, to
26
- - bookingNumber: Our reference, booking no., carrier reference
27
- - containers:
28
- - containerType: x 40' container
29
- - grossWeight: gross weight
30
- - measurements: Dimensions of the cargo (length, width, height) for freight calculations
31
- - packageQuantity: package quantity, number and kind of packages
32
- - packageType: Type of packaging used (e.g., cartons, pallets, barrels), number and kind of packages, description of goods
33
- - containerNumber: container number, cntr. nos.
34
- - sealNumber: seal number, seal nos., shipper seal, seal.
35
- - vessel: vessel
36
-
37
-
38
- You must apply the following rules:
39
- - The JSON schema must be followed during the extraction.
40
- - The values must only include text found in the document
41
- - Do not normalize any entity value.
42
- - If 'sealNumber' is not found don't add it to the result.
43
- - Validate the JSON make sure it is a valid JSON ! No extra text, no missing comma!
44
- - Add an escape character (backwards slash) in from of all quotes in values
1
+ <PERSONA> You are an efficient document entity data extraction specialist working for a Freight Forwarding company. <PERSONA>
2
+
3
+ <TASK> Your task is to extract data from finalMBL documents as per the given response schema structure. <TASK>
4
+
5
+ <CONTEXT>
6
+ The Freight Forwarding company receives finalMBL from Carrier (Shipping Lines) partners.
7
+ These documents contain various details related to shipments, booking details, vessel details, POL, POD and containers data.
8
+ They may be written in different languages such as English, German, Vietnamese, Chinese, and other European languages, and can appear in a variety of formats and layouts.
9
+ Your role is to accurately extract specific entities from these finalMBLs to support efficient processing and accurate record-keeping.
10
+ <CONTEXT>
11
+
12
+
13
+ <INSTRUCTIONS>
14
+ - Populate fields as defined in the response schema.
15
+ - Multiple Containers entries may exist, capture all instances under "containers".
16
+ - Use the data field description to understand the context of the data.
17
+
18
+ - bookingNumber:
19
+ - Booking numbers are unique identifiers for shipments. They are often referred to as "Booking Number", "Booking No.", "Booking Ref.", "Booking Reference", "Booking ID", "SACO-Pos.", "Order Ref", "Unsere Referenz", or "Unsere Position"
20
+ - If there is a unique_id that starts with "S" followed by 6 or 8 digits, it is a shipmentID, not a bookingNumber.
21
+
22
+ - blNumber:
23
+ - Commonly known as "Bill of Lading Number", "BILL OF LADING NO.", "BL Number", "BL No.", "B/L No.", "BL-Nr.", "B/L", "HBL No.", or "M-AWB Nummer".
24
+ - Bill of Lading Number is known as mblNumber. Not a shipmentID even if it starts with "S".
25
+ - blNumber from Hapag-Lloyd always starts with HLC.... (e.g., "HLCUTS12303AWNT3) and named as SEA WAYBILL or "SWB-NR.
26
+
27
+ - vesselName:
28
+ - Vessel Name is the name of the ship carrying the cargo. It can be referred to as "Vessel", "Ship Name", "Schiff", "Schiffsname", "Nave", or "Vessel/Flight No.".
29
+
30
+ - containers: Details of each container on the finalMBL. Make sure to extract each container information separately.
31
+ - containerNumber: Container Number consists of 4 capital letters followed by 7 digits (e.g., TEMU7972458, CAIU 7222892).
32
+ - sealNumber: Seal numbers are unique identifiers for shipping seals. They are usually mentioned as seal numbers in the document but they are definitely not container numbers.
33
+
34
+ <INSTRUCTIONS>
@@ -0,0 +1,98 @@
1
+ {
2
+ "type": "OBJECT",
3
+ "properties": {
4
+ "buyer": {
5
+ "type": "string",
6
+ "nullable": true,
7
+ "description": "The receiver or buyer of the goods."},
8
+ "grossWeight": {
9
+ "type": "string",
10
+ "nullable": true,
11
+ "description": "The total gross weight of all the goods. Usually mentioned as G.W or GW or Gross Weight, etc.."},
12
+ "invoiceNumber": {
13
+ "type": "string",
14
+ "nullable": true,
15
+ "description": "The invoice number"},
16
+ "netWeight": {
17
+ "type": "string",
18
+ "nullable": true,
19
+ "description": "The total net weight of all the goods. Usually mentioned as N.W or NW or Net Weight, etc.."},
20
+ "seller": {
21
+ "type": "string",
22
+ "nullable": true,
23
+ "description": "The seller or shipper of the goods."},
24
+ "skuData": {
25
+ "type": "ARRAY",
26
+ "items": {
27
+ "type": "OBJECT",
28
+ "properties": {
29
+ "containerNumber": {
30
+ "type": "string",
31
+ "nullable": true,
32
+ "description": "Container Number consists of 4 capital letters followed by 7 digits. Example: TEMU7972458. Usually mentioned as Container Number, CONTAINER NO. or Containers"},
33
+ "grossWeight": {
34
+ "type": "string",
35
+ "nullable": true,
36
+ "description": "The gross weight of the goods. Usually mentioned as G.W or GW or Gross Weight, etc.."},
37
+ "hsCode": {
38
+ "type": "string",
39
+ "nullable": true,
40
+ "description": "The harmonized system code of a goods."},
41
+ "measurements": {
42
+ "type": "string",
43
+ "nullable": true,
44
+ "description": "The volume of the goods. Usually, it is measured in 'Cubic Meter (cbm)' or dimensions. But volume in 'cbm' is preferred."},
45
+ "netWeight": {
46
+ "type": "string",
47
+ "nullable": true,
48
+ "description": "The net weight of the goods. Usually mentioned as N.W or NW or Net Weight, etc.."},
49
+ "packagingType": {
50
+ "type": "string",
51
+ "nullable": true,
52
+ "description": "The packaging type is the unit of quantityShipped. Example; pallets, PLT, cartons, CTNS, pieces, PCS, packages, etc. Sometimes, the packaging type is available in the column name of the quantityShipped."},
53
+ "poNumber": {
54
+ "type": "string",
55
+ "nullable": true,
56
+ "description": "Purchase order of the goods."},
57
+ "poPosition": {
58
+ "type": "string",
59
+ "nullable": true,
60
+ "description": "PO position refers to the specific item or line associated with a Purchase Order (PO). It represents the position or line number in the PO that corresponds to the items being shipped."},
61
+ "quantityShipped": {
62
+ "type": "string",
63
+ "nullable": true,
64
+ "description": "The quantity of the goods. Usually quantity is in pallets, PLT, cartons, CTNS, pieces, PCS, packages, boxes, etc. Please prioritize the packaging types based on their size, as follows: Pallets (PLT) >> Cartons (CTNS) >> Pieces (PCS). Extract the Larger packaging types that will have a lower count."},
65
+ "sealNumber": {
66
+ "type": "string",
67
+ "nullable": true,
68
+ "description": "A unique number associated with the container number"},
69
+ "skuDescription": {
70
+ "type": "string",
71
+ "nullable": true,
72
+ "description": "Description of the goods."},
73
+ "skuNumbers": {
74
+ "type": "string",
75
+ "nullable": true,
76
+ "description": "SKU number of the goods."}
77
+ },
78
+ "required": [
79
+ "skuNumbers",
80
+ "quantityShipped",
81
+ "skuDescription",
82
+ "grossWeight",
83
+ "netWeight",
84
+ "packagingType"
85
+ ]
86
+ }
87
+ },
88
+ "totalPackagingType": {
89
+ "type": "string",
90
+ "nullable": true,
91
+ "description": "The packaging type of all the goods associated with the totalQuantityShipped. It is the unit of totalQuantityShipped. Usually pallets, PLT, cartons, CTNS, pieces, PCS, packages, etc."},
92
+ "totalQuantityShipped": {
93
+ "type": "string",
94
+ "nullable": true,
95
+ "description": "The total quantity of the goods. Usually quantity is in pallets, cartons, pieces, packages, boxes, etc. Please prioritize the packaging types based on their size, as follows: Pallets >> Cartons >> Pieces. Larger packaging types will have a lower count."}
96
+ },
97
+ "required": []
98
+ }
@@ -2,7 +2,7 @@ Task: You are a document entity extraction specialist. Given a document, your ta
2
2
 
3
3
  Extract all the data points from the given document.
4
4
  Each data point is part of a master field called "skuData". There may be multiple sku entries in a document.
5
- Your goal is to extract all instances.
5
+ Your task is to extract the text value of the following entities and their page numbers starting from 0 where the value was found in the document:
6
6
 
7
7
  Instructions:
8
8
  - Populate fields as defined in the response schema.
@@ -114,7 +114,7 @@
114
114
  "description": "Bill of Lading number (B/L NO.), a document issued by the carrier."
115
115
  },
116
116
  "partnerReference": {"type": "STRING", "nullable": true,
117
- "description": "A partnerReference can be a shipment ID. It starts with 'S' followed by 6 or 7 digits (e.g., 'S1234567')."
117
+ "description": "A partnerReference can be a shipment ID. It starts with 'S' followed by 6 or 8 digits (e.g., 'S1234567')."
118
118
  },
119
119
  "paymentTerm": {"type": "STRING", "nullable": true,
120
120
  "description": "The payment term indicates the conditions under which the payment should be made. E.g., 'In 10 TAGEN', '14 TAGEN', '14 days', etc."},
@@ -142,34 +142,13 @@
142
142
  "type": "STRING",
143
143
  "nullable": true,
144
144
  "description": "A keyword or phrase that indicates the presence of an agent or intermediary in the transaction, such as 'As Agent For', 'Acting Agent', 'Issuing agent', 'Contact Agent', or similar words."},
145
- "paymentInformation": {
146
- "type": "OBJECT",
147
- "properties": {
148
- "paidAmount": {
149
- "type": "STRING",
150
- "nullable": true,
151
- "description": "The amount that has been paid so far. You can identify this in the invoice by looking for terms like 'Vorschuss'."
152
- },
153
- "remainingAmountToPay": {
154
- "type": "STRING",
155
- "nullable": true,
156
- "description": "The amount that is still due for payment (e.g., 'Bitte zahlen', 'Zu zahlen' only). This can be negative & ensure the negative sign is captured if applicable."
157
- },
158
- "currency": {
159
- "type": "STRING",
160
- "nullable": true,
161
- "description": "Currency code associated with the paidAmount and remainngAmountToPay"
162
- },
163
- "sentence": {"type": "STRING", "nullable": true,
164
- "description": "A sentence that indicates the payment status, such as 'Vorschuss', 'Vorauszahlung', 'Paid', 'Partially Paid', or 'Unpaid'. This is used to summarize the payment status of the invoice."}
165
- }
166
145
 
167
- },
168
146
  "reverseChargeSentence": {
169
147
  "type": "STRING",
170
148
  "nullable": true,
171
149
  "description": "A sentence which indicate that the reverse charge applies. Mostly found as VAT/Tax Clause."
172
150
  }
151
+
173
152
  },
174
153
  "required": [
175
154
  "bankAccount",
@@ -1,6 +1,6 @@
1
1
  <PERSONA> You are an efficient document entity data extraction specialist working for a Freight Forwarding company. <PERSONA>
2
2
 
3
- <TASK>Your task is to extract data from invoice documents as per the given response schema structure.<TASK>
3
+ <TASK> Your task is to extract data from invoice documents as per the given response schema structure. <TASK>
4
4
 
5
5
  <CONTEXT>
6
6
  The Freight Forwarding company receives invoices from Carrier (Shipping Lines) partners and Customs Brokers. These include Partner Invoices (COGS Invoices) and COGS Customs Invoices.
@@ -47,19 +47,19 @@ Your role is to accurately extract specific entities from these invoices to supp
47
47
  - eta and etd: Few invoices contains same date for ARRIVED/DEPARTED or ETA/ETD. Extract it for both eta and etd.
48
48
 
49
49
  - lineItem: Details of each COGS and Customs line item on the invoice. Make sure to extract each amount and currency separately.
50
- - uniqueId: A unique id which associated with the lineItem as each line item can belong to a different shipment. Extract only if its available in the line item. Either a shipmentId starting with an S and followed by 6 or 7 numeric values or a mblNumber. If shipmentId or mblNumber does not exist, set it to containerNumber.
50
+ - uniqueId: A unique id which associated with the lineItem as each line item can belong to a different shipment. Extract only if its available in the line item. Either a shipmentId starting with an S and followed by 6 or 8 numeric values or a mblNumber. If shipmentId or mblNumber does not exist, set it to containerNumber.
51
51
  - lineItemDescription: The name or description of the item. Usually, it will be a one line sentence.
52
52
  - unitPrice: Even if the quantity is not mentioned, you can still extract the unit price. Check the naming of the columns in a different languages, it can be "Unit Price", "Prezzo unitario", "Prix Unitaire", "Unitario", etc. Refer to "Prezzo unitario" field in the italian invoice example.
53
53
  - totalAmount: The total amount for the item. It can be in different currencies, so ensure to capture the currency as well for the totalAmountCurrency.
54
54
  - totalAmountEuro: Few line items contains a total amount in Euro. You can find it by looking for the term "Total EUR" or "Amount in Euro" in the line item but it's always in the EURO / € currency. Sometimes, it can be same as totalAmount if the line item is already in Euro.
55
- - quantity: The quantity of the item or service provided in the line item.
56
- - containerNumber: Container Number always starts with 4 letters and is followed by 7 digits (e.g., ABCD1234567).
55
+ - quantity: The quantity of the item or service provided in the line item. Pay attention to 2 x 40HC or 2x40HC. It means, quantity is 2 and 40HC is containerSize but not 240.
56
+ - containerNumber: Container Number always starts with 4 letters and is followed by 7 digits (e.g., ABCD1234567, XALU 8593678).
57
57
 
58
58
  - hblNumber and mblNumber:
59
59
  - The Master Bill of Lading number. Commonly known as "Bill of Lading Number", "BILL OF LADING NO.", "BL Number", "BL No.", "B/L No.", "BL-Nr.", "B/L", or "HBL No.".
60
60
  - Do not confuse with the containerNumber that always starts with 4 letters and is followed by 7 digits (e.g., SEGU3090389). This is not the mblNumber or hblNumber.
61
61
  - partnerReference:
62
- - Shipment_ID can be a reference number for the partner. Shipment_ID always starts with "S" followed by 6 or 7 digits (e.g., S2654361).
62
+ - Shipment_ID can be a reference number for the partner. Shipment_ID always starts with "S" followed by 6 or 8 digits (e.g., S2654361).
63
63
  - If Shipment_ID is not available, extract any Booking Number as partnerReference.
64
64
 
65
65
  - vendorName and vendorAddress:
@@ -68,6 +68,7 @@ Your role is to accurately extract specific entities from these invoices to supp
68
68
  - Example:
69
69
  - "COSCO SHIPPING Lines Italy, Poland, or France S.R.L. – Genova Office – As Agent For COSCO SHIPPING Lines Co.,Ltd."
70
70
  - vendorName: COSCO SHIPPING Lines Co.,Ltd.
71
+ - From Hapag-Lloyd invoices, look for "Ballindamm 25" address to extract the vendorAddress.
71
72
 
72
73
  - agentName: Name of the agent. Agencies are offices authorized to act on behalf of a company. This details usually available including the branch name of the parent company name in the invoice.
73
74
  - agentKeyWord:
@@ -79,25 +80,13 @@ Your role is to accurately extract specific entities from these invoices to supp
79
80
  - serviceDate: The date of service provided. If the serviceDate is not specifically mentioned in the invoice, you can use the ETA of the shipment as a serviceDate.
80
81
  - reverseChargeSentence: A sentence which indicate that the reverse charge applies. Mostly fund as Tax Clause.
81
82
 
82
- - paymentInformation:
83
- - Some partners receive prepayment before providing the service. They later send a final invoice that includes both the amount already paid and the remaining amount due.
84
- - This applies when the invoice contains prepayment-related terms such as Vorschuss, BEREITS BEZAHLT, or similar at the invoice total section.
85
- - do not get confused with the paidAmount and remainingAmountToPay. Few invoices may not have a paidAmount or remainingAmountToPay in such cases pay attention to the sentence field alignment.
86
- - Extract the following fields, if applicable:
87
- - paidAmount: The amount that has already been paid. You can identify this in the invoice by looking for terms like "Vorschuss", "BEREITS BEZAHLT".
88
- - remainingAmountToPay: The amount still due. This can be negative if the paid amount exceeds the total invoice amount. Ensure the negative sign is captured if applicable. You can identify this by looking for terms like "Bitte Zahlen", "Zu zahlen", "Remaining Amount", "To Pay", "Due", or "Unpaid".
89
- - currency: The currency of both the paid and remaining amounts.
90
- - sentence: A sentence from the invoice indicating the payment status (e.g., "Vorschuss", "Prepayment", "Paid", "Partially Paid", "Unpaid"). This helps summarize the overall payment status of the invoice.
91
-
92
83
  IMPORTANT NOTE:
93
84
  - Ensure all extracted values are directly from the document. Do not make assumptions, modifications or calculations.
85
+ - Do not split the quantity into different line items. e.g., if quantity is 2 or 2 CTR or 2 BIL, do not create 2 separate line items with quantity 1 each.
94
86
  - Do not normalize or modify any entity values.
95
87
  - Pay attention to the line item details and paymentInformation, as they may vary significantly across different invoices.
96
88
 
97
89
  PAY ATTENTION TO THE SGS MACO CUSTOMS SERVICE INVOICES:
98
90
  - invoices from SGS maco customs service,
99
- - Extract only "Vorschuss" as a paidAmount but not "Vorauszahlung".
100
- - Extract "Zu zahlen" or "Bitte Zahlen" as a remainingAmountToPay.
101
- - do not get confused with the paidAmount and remainingAmountToPay. Few invoices may not have a paidAmount or remainingAmountToPay. In such cases, pay attention to the sentence field alignment.
102
91
  - "Total Kosten excl. MwSt." is not the vatApplicableAmount
103
92
  <INSTRUCTIONS>
@@ -10,21 +10,5 @@
10
10
  "OOCL",
11
11
  "Other"
12
12
  ]
13
- },
14
- "finalMbL": {
15
- "type": "string",
16
- "enum": [
17
- "Hapag-Lloyd",
18
- "Maersk",
19
- "Other"
20
- ]
21
- },
22
- "draftMbl": {
23
- "type": "string",
24
- "enum": [
25
- "Hapag-Lloyd",
26
- "Maersk",
27
- "Other"
28
- ]
29
13
  }
30
14
  }
@@ -0,0 +1,115 @@
1
+ {
2
+ "type": "OBJECT",
3
+ "properties": {
4
+ "consignee": {
5
+ "type": "STRING",
6
+ "nullable": true,
7
+ "description": "The receiver or buyer of the goods. It can be find with the keywords like Importeur, Anmelder, Empfanger, Consignee, Buyer, Receiver, etc.."
8
+ },
9
+ "finalDestination": {
10
+ "type": "STRING",
11
+ "nullable": true,
12
+ "description": "The ultimate location where the goods are to be delivered, marking the end point of the shipment's journey."
13
+ },
14
+ "freight": {
15
+ "type": "STRING",
16
+ "nullable": true,
17
+ "description": "The cost type associated with transporting goods. Can be classified as 'prepaid' or 'collect'."
18
+ },
19
+ "hblType": {
20
+ "type": "STRING",
21
+ "nullable": true,
22
+ "description": "The type of House Bill of Lading such as Telex Released, ORIGINAL B/L, EXPRESS, Sur Bill, Sea WayBill, etc., indicating the document issued by a freight forwarder that outlines the terms and details of the shipment."
23
+ },
24
+ "notify": {
25
+ "type": "STRING",
26
+ "nullable": true,
27
+ "description": "The party to be informed upon the arrival of the shipment at the destination. often responsible for coordinating the delivery. Extract the notify details including the address."
28
+ },
29
+ "placeOfReceipt": {
30
+ "type": "STRING",
31
+ "nullable": true,
32
+ "description": "The location where the goods are initially handed over to the freight forwarder or carrier for transportation"
33
+ },
34
+ "portOfDischarge": {
35
+ "type": "STRING",
36
+ "nullable": true,
37
+ "description": "The port where the goods are discharged from the vessel. This is the destination port for the shipment."
38
+ },
39
+ "portOfLoading": {
40
+ "type": "STRING",
41
+ "nullable": true,
42
+ "description": "The origin port where the goods are loaded onto the vessel. Find information like 'Ladehafen' or 'Port of Loading' in the invoice."
43
+ },
44
+ "shipper": {
45
+ "type": "STRING",
46
+ "nullable": true,
47
+ "description": "The sender or exporter of the goods. It can be find with the keywords like Absender, Versender, Shipper, Exporter, Supplier, Seller, etc.."
48
+ },
49
+ "containers": {
50
+ "type": "ARRAY",
51
+ "items": {
52
+ "type": "OBJECT",
53
+ "properties": {
54
+ "cargoDescription": {
55
+ "type": "STRING",
56
+ "nullable": true,
57
+ "description": "A brief description of the goods contained within the container. It can be found with goods description, Bezeichnung, goederenomschrijving."
58
+ },
59
+ "marksAndNumbers": {
60
+ "type": "STRING",
61
+ "nullable": true,
62
+ "description": "Identification details printed or attached to packages for easy recognition during handling and customs procedures, ensuring accurate delivery. Extract the details including the numbers."
63
+ },
64
+ "hsCode": {
65
+ "type": "STRING",
66
+ "nullable": true,
67
+ "description": "A numerical code from the Harmonized System used for classifying traded products. It helps in determining tariffs and regulations for the goods being shipped. Extract the full HS code including all digits."
68
+ },
69
+ "containerNumber": {
70
+ "type": "STRING",
71
+ "nullable": true,
72
+ "description": "The unique identifier for each container. It always starts with 4 capital letters and followed by 7 digits. Example: TEMU7972458."
73
+ },
74
+ "containerType": {
75
+ "type": "STRING",
76
+ "nullable": true,
77
+ "description": "The size of the container associated with the containerNumber, such as 20ft, 40ft, 40HC, 20DC etc."
78
+ },
79
+ "grossWeight": {
80
+ "type": "STRING",
81
+ "nullable": true,
82
+ "description": "The gross weight of the container. Usually mentioned as G.W or GW, Bruto, or Gross Weight, etc.."
83
+ },
84
+ "nettWeight": {
85
+ "type": "STRING",
86
+ "nullable": true,
87
+ "description": "The net weight of the container. Usually mentioned as N.W or NW, Net Weight, or Netto, Eigenmasse, etc.."
88
+ },
89
+ "measurements": {
90
+ "type": "STRING",
91
+ "nullable": true,
92
+ "description": "The volume of the goods. Usually, it is measured in 'Cubic Meter (cbm)' or dimensions. But volume in 'Cubic Meter (cbm)' is preferred if it’s available in the skus"
93
+ },
94
+ "packageQuantity": {
95
+ "type": "STRING",
96
+ "nullable": true,
97
+ "description": "The quantity of the goods. Usually, the quantity is in pallets, PLT, cartons, CTNS, pieces, PCS, packages, boxes, etc. Please prioritize the packaging types based on their size, as follows: Pallets (PLT) >> Cartons (CTNS) >> Pieces (PCS). Extract the Larger packaging types that will have a lower count."
98
+ },
99
+ "packagingType": {
100
+ "type": "STRING",
101
+ "nullable": true,
102
+ "description": "The packaging type is the unit of packageQuantity. Example; pallets, PLT, cartons, CTNS, pieces, PCS, packages, etc. Sometimes, the packaging type is available in the column name of the packageQuantity."
103
+ },
104
+ "sealNumber": {
105
+ "type": "STRING",
106
+ "nullable": true,
107
+ "description": "A unique number associated with the container number. But it is not a container number. Usually mentioned as Seal No., Seal Number, Siegelnummer, etc.."
108
+ }
109
+ },
110
+ "required": ["cargoDescription", "containerNumber", "hsCode", "grossWeight", "nettWeight", "packageQuantity", "packagingType"]
111
+ }
112
+ }
113
+ },
114
+ "required": ["shipper", "consignee", "portOfLoading", "portOfDischarge", "placeOfReceipt", "finalDestination", "freight", "hblType", "notify", "containers"]
115
+ }
@@ -1,15 +1,28 @@
1
- Task: Extract data from the provided shipping instruction PDF document and populate the following dictionary based on the given schema.
2
-
3
- ### Instructions:
4
- 1. Extract all data points from the shipping instruction document.
5
- 2. Each extracted data point must be part of a master field called "containers". There may be multiple "containers" entries in the document. Ensure you extract details for all instances.
6
- 3. "Containers" Data Fields:
7
- - Fill in the data fields as per the response schema provided.
8
- - Always search for the Quantity mentioned as pallets, PLT, cartons, CTNS, pieces, PCS, packages, boxes, etc...
9
- - If a field such as `containerNumber`, `sealNumber`, 'hsCode' or any other fields are not found within the "containers" section, search for these fields elsewhere in the document. Once located, populate the respective fields in all relevant "containers" entities.
10
- - If the document contains only one container, use the total values for attributes like `grossWeight`, `netWeight`, `measurements`, and `packageQuantity` to populate the single container entry.
11
- - Avoid creating separate entries for these shared attributes; instead, merge the data into the existing "containers" entries.
12
-
13
- 4. Output:
14
- - Return the extracted data in JSON format.
15
- - Exclude all other information from the response.
1
+ <PERSONA> You are an efficient document entity data extraction specialist working for a Freight Forwarding company. <PERSONA>
2
+
3
+ <TASK> Your task is to extract data from Shipping Instruction documents as per the given response schema structure. <TASK>
4
+
5
+ <CONTEXT>
6
+ The Freight Forwarding company receives Shipping Instruction from customers or shipper.
7
+ These Shipping Instruction contain various details related to shipping information, as well as container data such as goods, HS code, container details and gross and net weight.
8
+ They may be written in different languages such as English, German, Vietnamese, Chinese, and other European languages, and can appear in a variety of formats and layouts.
9
+ Your role is to accurately extract specific entities from these Shipping Instruction to support efficient processing and accurate record-keeping.
10
+ <CONTEXT>
11
+
12
+ <INSTRUCTIONS>
13
+ - Populate fields as defined in the response schema.
14
+ - Multiple Container entries may exist, capture all instances under "containers".
15
+ - Use the data field description to understand the context of the data.
16
+
17
+ - "containers" Data Fields: Details of each container on the Shipping Instruction. Make sure to extract each container information separately.
18
+ - containerNumber: Container Number always starts with 4 letters and is followed by 7 digits (e.g., ABCD1234567, XALU 8593678).
19
+ - cargoDescription: Extract only the description of the goods for the "cargoDescription" but not other information like packing, marks, etc.
20
+ - packageQuantity:
21
+ - Prioritize the "Pallets/PLTS/Cartons/CTNS/Package" over "PCS" count to extract the data for the "packageQuantity".
22
+ - example: If the table has "17CTNS", "9PLTS", "850", "850PCS", prioritize "9PLTS"
23
+ - Do not extract the pack Quantity field such as "50PCS/CTN", "5PC/Box" (these represent quantity per carton, not total shipped quantity).
24
+ - packagingType:
25
+ - Extract the unit associated with the "packageQuantity" in the table to extract the "packagingType"
26
+ - Sometimes it can be found on the column name of the "packageQuantity" in the table to extract the "packagingType"
27
+
28
+ <INSTRUCTIONS>