data-science-document-ai 1.13.0__py3-none-any.whl → 1.56.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. {data_science_document_ai-1.13.0.dist-info → data_science_document_ai-1.56.1.dist-info}/METADATA +7 -2
  2. data_science_document_ai-1.56.1.dist-info/RECORD +60 -0
  3. {data_science_document_ai-1.13.0.dist-info → data_science_document_ai-1.56.1.dist-info}/WHEEL +1 -1
  4. src/constants.py +42 -12
  5. src/constants_sandbox.py +2 -22
  6. src/docai.py +18 -7
  7. src/docai_processor_config.yaml +0 -64
  8. src/excel_processing.py +34 -15
  9. src/io.py +74 -6
  10. src/llm.py +12 -34
  11. src/pdf_processing.py +228 -78
  12. src/postprocessing/common.py +495 -618
  13. src/postprocessing/postprocess_partner_invoice.py +383 -27
  14. src/prompts/library/arrivalNotice/other/placeholders.json +70 -0
  15. src/prompts/library/arrivalNotice/other/prompt.txt +40 -0
  16. src/prompts/library/bookingConfirmation/evergreen/placeholders.json +17 -17
  17. src/prompts/library/bookingConfirmation/evergreen/prompt.txt +1 -0
  18. src/prompts/library/bookingConfirmation/hapag-lloyd/placeholders.json +18 -18
  19. src/prompts/library/bookingConfirmation/hapag-lloyd/prompt.txt +1 -1
  20. src/prompts/library/bookingConfirmation/maersk/placeholders.json +17 -17
  21. src/prompts/library/bookingConfirmation/maersk/prompt.txt +1 -1
  22. src/prompts/library/bookingConfirmation/msc/placeholders.json +17 -17
  23. src/prompts/library/bookingConfirmation/msc/prompt.txt +1 -1
  24. src/prompts/library/bookingConfirmation/oocl/placeholders.json +17 -17
  25. src/prompts/library/bookingConfirmation/oocl/prompt.txt +3 -1
  26. src/prompts/library/bookingConfirmation/other/placeholders.json +17 -17
  27. src/prompts/library/bookingConfirmation/other/prompt.txt +1 -1
  28. src/prompts/library/bookingConfirmation/yangming/placeholders.json +17 -17
  29. src/prompts/library/bookingConfirmation/yangming/prompt.txt +1 -1
  30. src/prompts/library/bundeskasse/other/placeholders.json +113 -0
  31. src/prompts/library/bundeskasse/other/prompt.txt +48 -0
  32. src/prompts/library/commercialInvoice/other/placeholders.json +125 -0
  33. src/prompts/library/commercialInvoice/other/prompt.txt +2 -1
  34. src/prompts/library/customsAssessment/other/placeholders.json +67 -16
  35. src/prompts/library/customsAssessment/other/prompt.txt +24 -37
  36. src/prompts/library/customsInvoice/other/placeholders.json +205 -0
  37. src/prompts/library/customsInvoice/other/prompt.txt +105 -0
  38. src/prompts/library/deliveryOrder/other/placeholders.json +79 -28
  39. src/prompts/library/deliveryOrder/other/prompt.txt +26 -40
  40. src/prompts/library/draftMbl/other/placeholders.json +33 -33
  41. src/prompts/library/draftMbl/other/prompt.txt +34 -44
  42. src/prompts/library/finalMbL/other/placeholders.json +34 -34
  43. src/prompts/library/finalMbL/other/prompt.txt +34 -44
  44. src/prompts/library/packingList/other/placeholders.json +98 -0
  45. src/prompts/library/packingList/other/prompt.txt +1 -1
  46. src/prompts/library/partnerInvoice/other/placeholders.json +165 -45
  47. src/prompts/library/partnerInvoice/other/prompt.txt +82 -44
  48. src/prompts/library/preprocessing/carrier/placeholders.json +0 -16
  49. src/prompts/library/shippingInstruction/other/placeholders.json +115 -0
  50. src/prompts/library/shippingInstruction/other/prompt.txt +28 -15
  51. src/setup.py +73 -63
  52. src/utils.py +207 -30
  53. data_science_document_ai-1.13.0.dist-info/RECORD +0 -55
  54. src/prompts/library/draftMbl/hapag-lloyd/prompt.txt +0 -44
  55. src/prompts/library/draftMbl/maersk/prompt.txt +0 -17
  56. src/prompts/library/finalMbL/hapag-lloyd/prompt.txt +0 -44
  57. src/prompts/library/finalMbL/maersk/prompt.txt +0 -17
@@ -1,61 +1,181 @@
1
1
  {
2
2
  "type": "OBJECT",
3
3
  "properties": {
4
- "bankAccount": {"type": "STRING"},
5
- "contractNumber": {"type": "STRING"},
4
+ "bankAccount": {
5
+ "type": "ARRAY",
6
+ "items": {
7
+ "type": "STRING",
8
+ "nullable": true,
9
+ "description": "The bank account(s) number(s) of the vendor. This is the account to which the payment should be made. Extract all the relevant bank account numbers mentioned in the invoice."
10
+ }
11
+ },
12
+ "contractNumber": {"type": "STRING", "nullable": true,
13
+ "description": "It's a contract number between the carrier and Forto Logistics SE & Co KG."},
6
14
  "currencyExchange": {
7
15
  "type": "OBJECT",
8
16
  "properties": {
9
- "from": {"type": "STRING"},
10
- "fxRate": {"type": "STRING"},
11
- "to": {"type": "STRING"}
17
+ "from": {"type": "STRING",
18
+ "nullable": true,
19
+ "description": "The currency code from which the exchange rate is applied."
20
+ },
21
+ "fxRate": {"type": "STRING",
22
+ "nullable": true,
23
+ "description": "The exchange rate applied to convert the amount from the 'from' currency to the 'to' currency."
24
+ },
25
+ "to": {"type": "STRING",
26
+ "nullable": true,
27
+ "description": "The currency code to which the exchange rate is applied."}
12
28
  }
13
29
  },
14
- "documentType": {"type": "STRING"},
15
- "dueDate": {"type": "STRING"},
16
- "eta": {"type": "STRING"},
17
- "etd": {"type": "STRING"},
18
- "fortoEntity": {"type": "STRING"},
19
- "hblNumber": {"type": "STRING"},
20
- "currencyCode": {"type": "STRING"},
21
- "grandTotal": {"type": "STRING"},
22
- "vatAmount": {"type": "STRING"},
23
- "vatApplicableAmount": {"type": "STRING"},
24
- "vatPercentage": {"type": "STRING"},
25
- "invoiceNumber": {"type": "STRING"},
26
- "issueDate": {"type": "STRING"},
30
+ "documentType": {"type": "STRING", "nullable": true},
31
+ "dueDate": {"type": "STRING", "nullable": true,
32
+ "description": "The date by which the payment should be made by Forto Logistics SE & Co KG. Do Not calculate dueDate based on issueDate or any other date. Extract it directly from the invoice."},
33
+ "eta": {"type": "STRING", "nullable": true,
34
+ "description": "Estimated Time of Arrival (ETA) is the expected date when the shipment will arrive at its destination."},
35
+ "etd": {"type": "STRING", "nullable": true,
36
+ "description": "Estimated Time of Departure (ETD) is the expected date when the shipment will leave the origin port."},
37
+ "fortoEntity": {"type": "STRING", "nullable": true,
38
+ "description": "The entity of 'Forto Logistics SE & Co KG' that is responsible for the invoice. The Forto organization or branch managing the shipment."
39
+ },
40
+ "hblNumber": {"type": "STRING", "nullable": true,
41
+ "description": "House Bill of Lading number, a document issued by a freight forwarder."
42
+ },
43
+ "currencyCode": {"type": "STRING", "nullable": true,
44
+ "description": "The currency code in which the invoice is issued, such as EUR, USD, etc."
45
+ },
46
+ "grandTotal": {"type": "STRING", "nullable": true,
47
+ "description": "The total amount of the invoice, including all line items and taxes."
48
+ },
49
+ "vatAmount": {"type": "STRING", "nullable": true,
50
+ "description": "The total VAT amount applied to the invoice. This is the tax charged on the vatApplicableAmount of the invoice. Bitte Zahlen is not the vatAmount."
51
+ },
52
+ "vatApplicableAmount": {"type": "STRING", "nullable": true,
53
+ "description": "The amount on which VAT is applicable. This is the net amount before VAT is applied (without VAT)."
54
+ },
55
+ "vatPercentage": {"type": "STRING", "nullable": true,
56
+ "description": "The percentage rate of VAT applied to the vatApplicableAmount. This is used to calculate the vatAmount."
57
+ },
58
+ "invoiceNumber": {"type": "STRING", "nullable": true,
59
+ "description": "The unique identifier for the invoice. This is used to track and reference the invoice in financial records."
60
+ },
61
+ "issueDate": {"type": "STRING", "nullable": true,
62
+ "description": "The date when the invoice was issued."
63
+ },
27
64
  "lineItem": {
28
65
  "type": "ARRAY",
29
66
  "items": {
30
67
  "type": "OBJECT",
31
68
  "properties": {
32
- "uniqueId": {"type": "STRING"},
33
- "lineItemDescription": {"type": "STRING"},
34
- "totalAmount": {"type": "STRING"},
35
- "totalAmountCurrency": {"type": "STRING"},
36
- "totalAmountEuro": {"type": "STRING"},
37
- "quantity": {"type": "STRING"},
38
- "unitPrice": {"type": "STRING"},
39
- "unitPriceCurrency": {"type": "STRING"},
40
- "vatAmount": {"type": "STRING"},
41
- "vatPercentage": {"type": "STRING"},
42
- "containerNumber": {"type": "STRING"},
43
- "containerSize": {"type": "STRING"}
69
+ "uniqueId": {
70
+ "type": "STRING",
71
+ "nullable": true,
72
+ "description": "A line item can belong to a different shipments. Hence, the unique IDs of a line item need to be extracted that you see only on the line item level. UniqueIds are containerNumber, shipmentId, or sealNumber."
73
+ },
74
+ "lineItemDescription": {"type": "STRING", "nullable": true,
75
+ "description": "A description of the line item (COGS or Customs line items), which can include details about the service provided."},
76
+ "totalAmount": {"type": "STRING", "nullable": true,
77
+ "description": "The total amount for the line item, which may include the cost of services, and applicable taxes."},
78
+ "totalAmountCurrency": {"type": "STRING", "nullable": true,
79
+ "description": "The currency code for the total amount, such as EUR, USD, etc."},
80
+ "totalAmountEuro": {"type": "STRING", "nullable": true,
81
+ "description": "The total amount converted to Euro, if applicable. You can find it by looking for the term 'Total EUR' or 'Amount in Euro' in the line item."},
82
+ "quantity": {"type": "STRING", "nullable": true,
83
+ "description": "The quantity of the item or service provided in the line item."},
84
+ "unitPrice": {"type": "STRING", "nullable": true,
85
+ "description": "The price per unit of the item or service in the line item. Check the naming in a different languages, such as 'Einzelpreis', 'Unit Price', 'Prezzo unitario', 'Preis pro Einheit', etc.. Refer to 'Prezzo unitario' field in the italian invoice example"},
86
+ "unitPriceCurrency": {"type": "STRING", "nullable": true,
87
+ "description": "The currency code for the unit price, such as EUR, USD, etc."},
88
+ "vatAmount": {"type": "STRING", "nullable": true,
89
+ "description": "The VAT amount applied to the line item. This is the tax charged on the totalAmount of the line item."},
90
+ "vatPercentage": {"type": "STRING", "nullable": true,
91
+ "description": "The percentage rate of VAT applied to the totalAmount of the line item. This is used to calculate the vatAmount."
92
+ },
93
+ "containerNumber": {"type": "STRING", "nullable": true,
94
+ "description": "The container number associated with the line item. containerNumber MUST start with 4 letters followed by 7 digits (e.g., CMAU1234567)"},
95
+ "containerSize": {"type": "STRING", "nullable": true,
96
+ "description": "The size of the container associated with the containerNumber, such as 20ft, 40ft, 40HC, 20DC etc."}
44
97
  }
45
- }
98
+ },
99
+ "required": [
100
+ "uniqueId",
101
+ "lineItemDescription",
102
+ "totalAmount",
103
+ "totalAmountCurrency",
104
+ "totalAmountEuro",
105
+ "unitPrice",
106
+ "unitPriceCurrency",
107
+ "vatAmount",
108
+ "vatPercentage",
109
+ "containerNumber",
110
+ "containerSize"
111
+ ]
112
+ },
113
+ "mblNumber": {"type": "STRING", "nullable": true,
114
+ "description": "Bill of Lading number (B/L NO.), a document issued by the carrier."
115
+ },
116
+ "partnerReference": {"type": "STRING", "nullable": true,
117
+ "description": "A partnerReference can be a shipment ID. It starts with 'S' followed by 6 or 8 digits (e.g., 'S1234567')."
46
118
  },
47
- "mblNumber": {"type": "STRING"},
48
- "partnerReference": {"type": "STRING"},
49
- "paymentTerm": {"type": "STRING"},
50
- "portOfDischarge": {"type": "STRING"},
51
- "portOfLoading": {"type": "STRING"},
52
- "recipientAddress": {"type": "STRING"},
53
- "recipientName": {"type": "STRING"},
54
- "serviceDate": {"type": "STRING"},
55
- "vatId": {"type": "STRING"},
56
- "vendorAddress": {"type": "STRING"},
57
- "vendorName": {"type": "STRING"},
58
- "reverseChargeSentence": {"type": "STRING"}
119
+ "paymentTerm": {"type": "STRING", "nullable": true,
120
+ "description": "The payment term indicates the conditions under which the payment should be made. E.g., 'In 10 TAGEN', '14 TAGEN', '14 days', etc."},
121
+ "portOfDischarge": {"type": "STRING", "nullable": true,
122
+ "description": "The port where the goods are discharged from the vessel. This is the destination port for the shipment."},
123
+ "portOfLoading": {"type": "STRING", "nullable": true,
124
+ "description": "The origin port where the goods are loaded onto the vessel. Find information like 'Ladehafen' or 'Port of Loading' in the invoice."},
125
+ "recipientAddress": {"type": "STRING", "nullable": true,
126
+ "description": "Majority of the times, it is 'Forto Logistics SE & Co KG' Address depends on the entity."},
127
+ "recipientName": {"type": "STRING", "nullable": true,
128
+ "description": "The name of the recipient who is responsible for making the payment. This is often the 'Forto Logistics SE & Co KG' entity or partner."},
129
+ "serviceDate": {"type": "STRING", "nullable": true,
130
+ "description": "The date when the service was provided. If Service date is not available in the invoice, Estimated Time of Arrival (ETA) can be used."},
131
+ "vatId": {"type": "STRING", "nullable": true,
132
+ "description": "The VAT ID of the vendor. This is used for tax purposes and to identify the vendor in financial transactions."},
133
+ "vendorAddress": {"type": "STRING", "nullable": true,
134
+ "description": "The address of the vendor to whom the payment should be made."},
135
+ "vendorName": {"type": "STRING", "nullable": true,
136
+ "description": "The name of the vendor to whom the payment should be made. Extract the main vendor details incase the invoice contains 'As Agent For'."},
137
+ "agentName": {
138
+ "type": "STRING",
139
+ "nullable": true,
140
+ "description": "The name of the agent or intermediary involved in the transaction, if applicable."},
141
+ "agentKeyWord": {
142
+ "type": "STRING",
143
+ "nullable": true,
144
+ "description": "A keyword or phrase that indicates the presence of an agent or intermediary in the transaction, such as 'As Agent For', 'Acting Agent', 'Issuing agent', 'Contact Agent', or similar words."},
145
+
146
+ "reverseChargeSentence": {
147
+ "type": "STRING",
148
+ "nullable": true,
149
+ "description": "A sentence which indicate that the reverse charge applies. Mostly found as VAT/Tax Clause."
150
+ }
151
+
59
152
  },
60
- "required": []
61
- }
153
+ "required": [
154
+ "bankAccount",
155
+ "contractNumber",
156
+ "currencyExchange",
157
+ "documentType",
158
+ "eta",
159
+ "etd",
160
+ "fortoEntity",
161
+ "currencyCode",
162
+ "grandTotal",
163
+ "vatAmount",
164
+ "vatApplicableAmount",
165
+ "vatPercentage",
166
+ "invoiceNumber",
167
+ "issueDate",
168
+ "lineItem",
169
+ "mblNumber",
170
+ "partnerReference",
171
+ "paymentTerm",
172
+ "portOfDischarge",
173
+ "portOfLoading",
174
+ "recipientAddress",
175
+ "recipientName",
176
+ "serviceDate",
177
+ "vatId",
178
+ "vendorAddress",
179
+ "vendorName"
180
+ ]
181
+ }
@@ -1,53 +1,91 @@
1
- You are a document entity extraction specialist. Your task is to extract data from shipment and invoice documents.
2
- These documents contain critical information for invoices and cost items.
1
+ <PERSONA> You are an efficient document entity data extraction specialist working for a Freight Forwarding company. <PERSONA>
2
+
3
+ <TASK> Your task is to extract data from invoice documents as per the given response schema structure. <TASK>
4
+
5
+ <CONTEXT>
6
+ The Freight Forwarding company receives invoices from Carrier (Shipping Lines) partners and Customs Brokers. These include Partner Invoices (COGS Invoices) and COGS Customs Invoices.
7
+ These invoices contain various details related to shipments, cost line items, vendor and recipient information, as well as other financial data.
8
+ They may be written in different languages such as English, German, Vietnamese, Chinese, and other European languages, and can appear in a variety of formats and layouts.
9
+ Your role is to accurately extract specific entities from these invoices to support efficient processing and accurate record-keeping.
10
+ <CONTEXT>
11
+
12
+
13
+ <INSTRUCTIONS>
14
+ - Populate fields as defined in the response schema.
15
+ - Multiple line item entries may exist, capture all instances under "lineItem".
16
+ - Use the data field description to understand the context of the data.
3
17
 
4
- Entities to Extract:
5
- - bankAccount: The bank account number associated with the transaction.
6
18
  - contractNumber: The reference number for the contract related to the shipment.
7
19
  - currencyExchange: Details about currency conversion involved
8
20
  - from: The original currency code.
9
- - fxRate: The exchange rate applied.
21
+ - fxRate: The exchange rate applied. In the most cases the value is between 0.0 and 1.5 and rarely exceeds 1.5.
10
22
  - to: The target currency code.
11
- - documentType: The classification of the document.
12
- - dueDate: The date by which payment or action is due.
13
- - eta: The estimated time of arrival for the shipment.
14
- - etd: The estimated time of departure for the shipment.
15
- - fortoEntity: The entity responsible for the shipment.
16
- - hblNumber: The House Bill of Lading number.
17
- - currencyCode: The currency in which the invoice is issued.
18
- - grandTotal: The overall total amount.
19
- - vatAmount: The value-added tax amount.
20
- - vatApplicableAmount: The amount to which VAT is applicable.
21
- - vatPercentage: The percentage rate of VAT applied.
22
- - invoiceNumber: The unique identifier for the invoice.
23
+
24
+ - fortoEntity: The entity responsible for the shipment. It could be Forto branch from a different country. If branch is not available, it could be "Forto Logistics SE & Co KG".
25
+ - hblNumber: The House Bill of Lading number. Commonly known as "Bill of Lading Number", "BILL OF LADING NO.", "BL Number", "BL No.", "B/L No.", "BL-Nr.", "B/L", or "HBL No.".
26
+
27
+ - grandTotal:
28
+ - The overall total Gross amount. Including all line items, taxes, and fees.
29
+
30
+ - vatApplicableAmount:
31
+ - Do not get confused with the prepaid and due amount. zu zahlen or Bitte Zahlen is not the vatApplicableAmount.
32
+ - vatApplicableAmount is the Net Amount of the invoice. Basically the total amount before VAT is applied. You can find it in the invoice total section and sometimes as "Netto"
33
+ - If vatAmount is not applied, then vatApplicableAmount can be same as grandTotal.
34
+ - Invoices from SGS maco customs service, "Total Kosten excl. MwSt." is not the vatApplicableAmount
35
+
36
+ - vatAmount:
37
+ - The total VAT amount applied to the invoice. This is the tax charged on the vatApplicableAmount of the invoice.
38
+ - Do not get confused with the prepaid and due amount.
39
+ - Do not extract vatAmount from the line items, it should be extracted from the invoice total section.
40
+
41
+ - vatId: Extract Vendor's VAT ID. "DE348812473" is Forto's VAT ID in Germany. Do not confuse it with the vendor's VAT ID.
42
+ - Remove "TVA" characters from VAT ID in MSC invoices. For example, CHE-111.954.803 TVA should be extracted as CHE-111.954.803
43
+
23
44
  - issueDate: The date the document was issued.
24
- - lineItem: Details of each item on the invoice
25
- - uniqueId: A unique which associated with the lineItem. Either a shipmentId starting with an S and followed by 6 or 7 numeric values or a mblNumber. If shipmentId or mblNumber does not exist, set it to containerNumber.
26
- - lineItemDescription: The name or description of the item.
27
- - totalAmount: The total amount for the item.
28
- - totalAmountCurrency: The currency of the total amount.
29
- - totalAmountEuro: The total amount converted to Euros.
30
- - quantity: The unit of measurement for the item.
31
- - unitPrice: The price per unit.
32
- - unitPriceCurrency: The currency of the unit price.
33
- - vatAmount: The VAT amount for the item.
34
- - vatPercentage: The VAT percentage rate for the item.
35
- - containerNumber: The unique identifier for each container.
36
- - containerSize: The size classification of the container.
37
- - mblNumber: The Master Bill of Lading number.
38
- - partnerReference: The reference number for the partner involved.
39
- - paymentTerm: The terms of payment agreed upon.
40
- - pod: The port of discharge.
41
- - pol: The port of loading.
42
- - recipientAddress: The address of the recipient.
43
- - recipientName: The name of the recipient.
44
- - serviceDate: The date of service or transaction.
45
- - vatId: The VAT identification number.
46
- - vendorAddress: The address of the vendor.
47
- - vendorName: The name of the vendor.
45
+ - dueDate: The date by which the payment should be made. Do Not calculate dueDate based on issueDate or any other date. Extract it directly from the invoice.
46
+
47
+ - eta and etd: Few invoices contains same date for ARRIVED/DEPARTED or ETA/ETD. Extract it for both eta and etd.
48
+
49
+ - lineItem: Details of each COGS and Customs line item on the invoice from each page. Make sure to extract each amount and currency separately.
50
+ - uniqueId: A unique id which associated with the lineItem as each line item can belong to a different shipment. Extract only if its available in the line item. Either a shipmentId starting with an S and followed by 6 or 8 numeric values or a mblNumber. If shipmentId or mblNumber does not exist, set it to containerNumber.
51
+ - lineItemDescription: The name or description of the item. Usually, it will be a one line sentence.
52
+ - unitPrice: Even if the quantity is not mentioned, you can still extract the unit price. Check the naming of the columns in a different languages, it can be "Unit Price", "Prezzo unitario", "Prix Unitaire", "Unitario", etc. Refer to "Prezzo unitario" field in the italian invoice example.
53
+ - totalAmount: The total amount for the item. It can be in different currencies, so ensure to capture the currency as well for the totalAmountCurrency.
54
+ - totalAmountEuro: Few line items contains a total amount in Euro. You can find it by looking for the term "Total EUR" or "Amount in Euro" in the line item but it's always in the EURO / € currency. Sometimes, it can be same as totalAmount if the line item is already in Euro.
55
+ - quantity: The quantity of the item or service provided in the line item. Pay attention to 2 x 40HC or 2x40HC. It means, quantity is 2 and 40HC is containerSize but not 240.
56
+ - containerNumber: Container Number always starts with 4 letters and is followed by 7 digits (e.g., ABCD1234567, XALU 8593678).
57
+
58
+ - hblNumber and mblNumber:
59
+ - The Master Bill of Lading number. Commonly known as "Bill of Lading Number", "BILL OF LADING NO.", "BL Number", "BL No.", "B/L No.", "BL-Nr.", "B/L", or "HBL No.".
60
+ - Do not confuse with the containerNumber that always starts with 4 letters and is followed by 7 digits (e.g., SEGU3090389). This is not the mblNumber or hblNumber.
61
+ - partnerReference:
62
+ - Shipment_ID can be a reference number for the partner. Shipment_ID always starts with "S" followed by 6 or 8 digits (e.g., S2654361).
63
+ - If Shipment_ID is not available, extract any Booking Number as partnerReference.
64
+
65
+ - vendorName and vendorAddress:
66
+ - Extract the name and address of the vendor who is the main parent company in the invoice. Do not extract the agent name and address as the vendorName or vendorAddress.
67
+ - If the invoice contains phrases such as "As Agent For" or "As Agent Of" in the vendor name or address, extract the main vendor’s details (the entity after / in front of the phrase) and ignore the local agent’s details.
68
+ - Example:
69
+ - "COSCO SHIPPING Lines Italy, Poland, or France S.R.L. – Genova Office – As Agent For COSCO SHIPPING Lines Co.,Ltd."
70
+ - vendorName: COSCO SHIPPING Lines Co.,Ltd.
71
+ - From Hapag-Lloyd invoices, look for "Ballindamm 25" address to extract the vendorAddress.
72
+
73
+ - agentName: Name of the agent. Agencies are offices authorized to act on behalf of a company. This details usually available including the branch name of the parent company name in the invoice.
74
+ - agentKeyWord:
75
+ - A keyword or phrase that indicates the presence of an agent or intermediary in the transaction, such as 'As Agent For', 'Acting Agent', 'Acting on behalf of', 'Issuing agent', 'Contact Agent', 'Service rendered by', 'C/O as agent for', 'Invoice issued in the name of', 'Diese Rechnung wird von uns als Agent im Namen und für Rechnung der XX ausgestellt', or similar words.
76
+ - Extract this field only if the agentName is present in the invoice. Do not assume from the references given above.
77
+
78
+ - recipientName and recipientAddress: This is often the 'Forto Logistics SE & Co KG' entity or partner, and it's address. The address depends on the Forto entity responsible for the shipment.
79
+
80
+ - serviceDate: The date of service provided. If the serviceDate is not specifically mentioned in the invoice, you can use the ETA of the shipment as a serviceDate.
48
81
  - reverseChargeSentence: A sentence which indicate that the reverse charge applies. Mostly fund as Tax Clause.
49
82
 
50
- Important Note:
51
- - Ensure all extracted values are directly from the document.
83
+ IMPORTANT NOTE:
84
+ - Ensure all extracted values are directly from the document. Do not make assumptions, modifications or calculations.
85
+ - CustomSized invoices contain line items in a table format in the attached page. Table with headings Shipment ID, Partner Line Item Description, Quantity, Amount, and VAT. Extract all the line items from such tables from each page.
86
+ - Do not split the quantity into different line items. e.g., if quantity is 2 or 2 CTR or 2 BIL, do not create 2 separate line items with quantity 1 each.
52
87
  - Do not normalize or modify any entity values.
53
- - If an entity is not found in the document, set the entity value to null.
88
+ - Pay attention to the line item details and paymentInformation, as they may vary significantly across different invoices.
89
+
90
+
91
+ <INSTRUCTIONS>
@@ -10,21 +10,5 @@
10
10
  "OOCL",
11
11
  "Other"
12
12
  ]
13
- },
14
- "finalMbL": {
15
- "type": "string",
16
- "enum": [
17
- "Hapag-Lloyd",
18
- "Maersk",
19
- "Other"
20
- ]
21
- },
22
- "draftMbl": {
23
- "type": "string",
24
- "enum": [
25
- "Hapag-Lloyd",
26
- "Maersk",
27
- "Other"
28
- ]
29
13
  }
30
14
  }
@@ -0,0 +1,115 @@
1
+ {
2
+ "type": "OBJECT",
3
+ "properties": {
4
+ "consignee": {
5
+ "type": "STRING",
6
+ "nullable": true,
7
+ "description": "The receiver or buyer of the goods. It can be find with the keywords like Importeur, Anmelder, Empfanger, Consignee, Buyer, Receiver, etc.."
8
+ },
9
+ "finalDestination": {
10
+ "type": "STRING",
11
+ "nullable": true,
12
+ "description": "The ultimate location where the goods are to be delivered, marking the end point of the shipment's journey."
13
+ },
14
+ "freight": {
15
+ "type": "STRING",
16
+ "nullable": true,
17
+ "description": "The cost type associated with transporting goods. Can be classified as 'prepaid' or 'collect'."
18
+ },
19
+ "hblType": {
20
+ "type": "STRING",
21
+ "nullable": true,
22
+ "description": "The type of House Bill of Lading such as Telex Released, ORIGINAL B/L, EXPRESS, Sur Bill, Sea WayBill, etc., indicating the document issued by a freight forwarder that outlines the terms and details of the shipment."
23
+ },
24
+ "notify": {
25
+ "type": "STRING",
26
+ "nullable": true,
27
+ "description": "The party to be informed upon the arrival of the shipment at the destination. often responsible for coordinating the delivery. Extract the notify details including the address."
28
+ },
29
+ "placeOfReceipt": {
30
+ "type": "STRING",
31
+ "nullable": true,
32
+ "description": "The location where the goods are initially handed over to the freight forwarder or carrier for transportation"
33
+ },
34
+ "portOfDischarge": {
35
+ "type": "STRING",
36
+ "nullable": true,
37
+ "description": "The port where the goods are discharged from the vessel. This is the destination port for the shipment."
38
+ },
39
+ "portOfLoading": {
40
+ "type": "STRING",
41
+ "nullable": true,
42
+ "description": "The origin port where the goods are loaded onto the vessel. Find information like 'Ladehafen' or 'Port of Loading' in the invoice."
43
+ },
44
+ "shipper": {
45
+ "type": "STRING",
46
+ "nullable": true,
47
+ "description": "The sender or exporter of the goods. It can be find with the keywords like Absender, Versender, Shipper, Exporter, Supplier, Seller, etc.."
48
+ },
49
+ "containers": {
50
+ "type": "ARRAY",
51
+ "items": {
52
+ "type": "OBJECT",
53
+ "properties": {
54
+ "cargoDescription": {
55
+ "type": "STRING",
56
+ "nullable": true,
57
+ "description": "A brief description of the goods contained within the container. It can be found with goods description, Bezeichnung, goederenomschrijving."
58
+ },
59
+ "marksAndNumbers": {
60
+ "type": "STRING",
61
+ "nullable": true,
62
+ "description": "Identification details printed or attached to packages for easy recognition during handling and customs procedures, ensuring accurate delivery. Extract the details including the numbers."
63
+ },
64
+ "hsCode": {
65
+ "type": "STRING",
66
+ "nullable": true,
67
+ "description": "A numerical code from the Harmonized System used for classifying traded products. It helps in determining tariffs and regulations for the goods being shipped. Extract the full HS code including all digits."
68
+ },
69
+ "containerNumber": {
70
+ "type": "STRING",
71
+ "nullable": true,
72
+ "description": "The unique identifier for each container. It always starts with 4 capital letters and followed by 7 digits. Example: TEMU7972458."
73
+ },
74
+ "containerType": {
75
+ "type": "STRING",
76
+ "nullable": true,
77
+ "description": "The size of the container associated with the containerNumber, such as 20ft, 40ft, 40HC, 20DC etc."
78
+ },
79
+ "grossWeight": {
80
+ "type": "STRING",
81
+ "nullable": true,
82
+ "description": "The gross weight of the container. Usually mentioned as G.W or GW, Bruto, or Gross Weight, etc.."
83
+ },
84
+ "nettWeight": {
85
+ "type": "STRING",
86
+ "nullable": true,
87
+ "description": "The net weight of the container. Usually mentioned as N.W or NW, Net Weight, or Netto, Eigenmasse, etc.."
88
+ },
89
+ "measurements": {
90
+ "type": "STRING",
91
+ "nullable": true,
92
+ "description": "The volume of the goods. Usually, it is measured in 'Cubic Meter (cbm)' or dimensions. But volume in 'Cubic Meter (cbm)' is preferred if it’s available in the skus"
93
+ },
94
+ "packageQuantity": {
95
+ "type": "STRING",
96
+ "nullable": true,
97
+ "description": "The quantity of the goods. Usually, the quantity is in pallets, PLT, cartons, CTNS, pieces, PCS, packages, boxes, etc. Please prioritize the packaging types based on their size, as follows: Pallets (PLT) >> Cartons (CTNS) >> Pieces (PCS). Extract the Larger packaging types that will have a lower count."
98
+ },
99
+ "packagingType": {
100
+ "type": "STRING",
101
+ "nullable": true,
102
+ "description": "The packaging type is the unit of packageQuantity. Example; pallets, PLT, cartons, CTNS, pieces, PCS, packages, etc. Sometimes, the packaging type is available in the column name of the packageQuantity."
103
+ },
104
+ "sealNumber": {
105
+ "type": "STRING",
106
+ "nullable": true,
107
+ "description": "A unique number associated with the container number. But it is not a container number. Usually mentioned as Seal No., Seal Number, Siegelnummer, etc.."
108
+ }
109
+ },
110
+ "required": ["cargoDescription", "containerNumber", "hsCode", "grossWeight", "nettWeight", "packageQuantity", "packagingType"]
111
+ }
112
+ }
113
+ },
114
+ "required": ["shipper", "consignee", "portOfLoading", "portOfDischarge", "placeOfReceipt", "finalDestination", "freight", "hblType", "notify", "containers"]
115
+ }
@@ -1,15 +1,28 @@
1
- Task: Extract data from the provided shipping instruction PDF document and populate the following dictionary based on the given schema.
2
-
3
- ### Instructions:
4
- 1. Extract all data points from the shipping instruction document.
5
- 2. Each extracted data point must be part of a master field called "containers". There may be multiple "containers" entries in the document. Ensure you extract details for all instances.
6
- 3. "Containers" Data Fields:
7
- - Fill in the data fields as per the response schema provided.
8
- - Always search for the Quantity mentioned as pallets, PLT, cartons, CTNS, pieces, PCS, packages, boxes, etc...
9
- - If a field such as `containerNumber`, `sealNumber`, 'hsCode' or any other fields are not found within the "containers" section, search for these fields elsewhere in the document. Once located, populate the respective fields in all relevant "containers" entities.
10
- - If the document contains only one container, use the total values for attributes like `grossWeight`, `netWeight`, `measurements`, and `packageQuantity` to populate the single container entry.
11
- - Avoid creating separate entries for these shared attributes; instead, merge the data into the existing "containers" entries.
12
-
13
- 4. Output:
14
- - Return the extracted data in JSON format.
15
- - Exclude all other information from the response.
1
+ <PERSONA> You are an efficient document entity data extraction specialist working for a Freight Forwarding company. <PERSONA>
2
+
3
+ <TASK> Your task is to extract data from Shipping Instruction documents as per the given response schema structure. <TASK>
4
+
5
+ <CONTEXT>
6
+ The Freight Forwarding company receives Shipping Instruction from customers or shipper.
7
+ These Shipping Instruction contain various details related to shipping information, as well as container data such as goods, HS code, container details and gross and net weight.
8
+ They may be written in different languages such as English, German, Vietnamese, Chinese, and other European languages, and can appear in a variety of formats and layouts.
9
+ Your role is to accurately extract specific entities from these Shipping Instruction to support efficient processing and accurate record-keeping.
10
+ <CONTEXT>
11
+
12
+ <INSTRUCTIONS>
13
+ - Populate fields as defined in the response schema.
14
+ - Multiple Container entries may exist, capture all instances under "containers".
15
+ - Use the data field description to understand the context of the data.
16
+
17
+ - "containers" Data Fields: Details of each container on the Shipping Instruction. Make sure to extract each container information separately.
18
+ - containerNumber: Container Number always starts with 4 letters and is followed by 7 digits (e.g., ABCD1234567, XALU 8593678).
19
+ - cargoDescription: Extract only the description of the goods for the "cargoDescription" but not other information like packing, marks, etc.
20
+ - packageQuantity:
21
+ - Prioritize the "Pallets/PLTS/Cartons/CTNS/Package" over "PCS" count to extract the data for the "packageQuantity".
22
+ - example: If the table has "17CTNS", "9PLTS", "850", "850PCS", prioritize "9PLTS"
23
+ - Do not extract the pack Quantity field such as "50PCS/CTN", "5PC/Box" (these represent quantity per carton, not total shipped quantity).
24
+ - packagingType:
25
+ - Extract the unit associated with the "packageQuantity" in the table to extract the "packagingType"
26
+ - Sometimes it can be found on the column name of the "packageQuantity" in the table to extract the "packagingType"
27
+
28
+ <INSTRUCTIONS>