data-science-document-ai 1.13.0__py3-none-any.whl → 1.56.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. {data_science_document_ai-1.13.0.dist-info → data_science_document_ai-1.56.1.dist-info}/METADATA +7 -2
  2. data_science_document_ai-1.56.1.dist-info/RECORD +60 -0
  3. {data_science_document_ai-1.13.0.dist-info → data_science_document_ai-1.56.1.dist-info}/WHEEL +1 -1
  4. src/constants.py +42 -12
  5. src/constants_sandbox.py +2 -22
  6. src/docai.py +18 -7
  7. src/docai_processor_config.yaml +0 -64
  8. src/excel_processing.py +34 -15
  9. src/io.py +74 -6
  10. src/llm.py +12 -34
  11. src/pdf_processing.py +228 -78
  12. src/postprocessing/common.py +495 -618
  13. src/postprocessing/postprocess_partner_invoice.py +383 -27
  14. src/prompts/library/arrivalNotice/other/placeholders.json +70 -0
  15. src/prompts/library/arrivalNotice/other/prompt.txt +40 -0
  16. src/prompts/library/bookingConfirmation/evergreen/placeholders.json +17 -17
  17. src/prompts/library/bookingConfirmation/evergreen/prompt.txt +1 -0
  18. src/prompts/library/bookingConfirmation/hapag-lloyd/placeholders.json +18 -18
  19. src/prompts/library/bookingConfirmation/hapag-lloyd/prompt.txt +1 -1
  20. src/prompts/library/bookingConfirmation/maersk/placeholders.json +17 -17
  21. src/prompts/library/bookingConfirmation/maersk/prompt.txt +1 -1
  22. src/prompts/library/bookingConfirmation/msc/placeholders.json +17 -17
  23. src/prompts/library/bookingConfirmation/msc/prompt.txt +1 -1
  24. src/prompts/library/bookingConfirmation/oocl/placeholders.json +17 -17
  25. src/prompts/library/bookingConfirmation/oocl/prompt.txt +3 -1
  26. src/prompts/library/bookingConfirmation/other/placeholders.json +17 -17
  27. src/prompts/library/bookingConfirmation/other/prompt.txt +1 -1
  28. src/prompts/library/bookingConfirmation/yangming/placeholders.json +17 -17
  29. src/prompts/library/bookingConfirmation/yangming/prompt.txt +1 -1
  30. src/prompts/library/bundeskasse/other/placeholders.json +113 -0
  31. src/prompts/library/bundeskasse/other/prompt.txt +48 -0
  32. src/prompts/library/commercialInvoice/other/placeholders.json +125 -0
  33. src/prompts/library/commercialInvoice/other/prompt.txt +2 -1
  34. src/prompts/library/customsAssessment/other/placeholders.json +67 -16
  35. src/prompts/library/customsAssessment/other/prompt.txt +24 -37
  36. src/prompts/library/customsInvoice/other/placeholders.json +205 -0
  37. src/prompts/library/customsInvoice/other/prompt.txt +105 -0
  38. src/prompts/library/deliveryOrder/other/placeholders.json +79 -28
  39. src/prompts/library/deliveryOrder/other/prompt.txt +26 -40
  40. src/prompts/library/draftMbl/other/placeholders.json +33 -33
  41. src/prompts/library/draftMbl/other/prompt.txt +34 -44
  42. src/prompts/library/finalMbL/other/placeholders.json +34 -34
  43. src/prompts/library/finalMbL/other/prompt.txt +34 -44
  44. src/prompts/library/packingList/other/placeholders.json +98 -0
  45. src/prompts/library/packingList/other/prompt.txt +1 -1
  46. src/prompts/library/partnerInvoice/other/placeholders.json +165 -45
  47. src/prompts/library/partnerInvoice/other/prompt.txt +82 -44
  48. src/prompts/library/preprocessing/carrier/placeholders.json +0 -16
  49. src/prompts/library/shippingInstruction/other/placeholders.json +115 -0
  50. src/prompts/library/shippingInstruction/other/prompt.txt +28 -15
  51. src/setup.py +73 -63
  52. src/utils.py +207 -30
  53. data_science_document_ai-1.13.0.dist-info/RECORD +0 -55
  54. src/prompts/library/draftMbl/hapag-lloyd/prompt.txt +0 -44
  55. src/prompts/library/draftMbl/maersk/prompt.txt +0 -17
  56. src/prompts/library/finalMbL/hapag-lloyd/prompt.txt +0 -44
  57. src/prompts/library/finalMbL/maersk/prompt.txt +0 -17
@@ -1,19 +1,70 @@
1
1
  {
2
- "SCHEMA_PLACEHOLDER": {
3
- "consignee": "",
4
- "shipper":"",
5
- "countryOfOrigin": "",
6
- "MRN": "",
7
- "totalValueOfGoods": "",
8
- "containers": [
9
- {
10
- "containerNumber": "",
11
- "grossWeight": "",
12
- "goodsDescription": "",
13
- "nettWeight": "",
14
- "packagingNumber": "",
15
- "packagingType": ""
2
+ "type": "OBJECT",
3
+ "properties": {
4
+ "consignee": {
5
+ "type": "STRING",
6
+ "nullable": true,
7
+ "description": "The receiver or buyer of the goods. It can be find with the keywords like Importeur, Anmelder, Empfanger, Consignee, Buyer, Receiver, etc.."
8
+ },
9
+ "countryOfOrigin": {
10
+ "type": "STRING",
11
+ "nullable": true,
12
+ "description": "The country where the goods were manufactured or produced. It can be identified as Land van oorsprong, Ursprungsland in the document."
13
+ },
14
+ "MRN": {
15
+ "type": "STRING",
16
+ "nullable": true,
17
+ "description": "Movement Reference Number (MRN) is a unique identifier assigned to each customs declaration for goods being imported or exported within the European Union (EU). It is used to track and monitor the movement of goods across EU member states. It can be found with MRN, Reg. Nr., Reg. Kennzeigechen, etc.."
18
+ },
19
+ "shipper": {
20
+ "type": "STRING",
21
+ "nullable": true,
22
+ "description": "The seller or shipper of the goods. It is often indicated by the term Shipper, Speditore, Esportatore, Exporteur, Versender."
23
+ },
24
+ "totalValueOfGoods": {
25
+ "type": "STRING",
26
+ "nullable": true,
27
+ "description": "The total monetary value of the goods being shipped, usually declared for customs purposes. It can be found with Waarde, Warenwert, Factuurwaarde, Invoice Value, etc.."
28
+ },
29
+ "containers": {
30
+ "type": "ARRAY",
31
+ "items": {
32
+ "type": "OBJECT",
33
+ "properties": {
34
+ "containerNumber": {
35
+ "type": "STRING",
36
+ "nullable": true,
37
+ "description": "The unique identifier for each container. It always starts with 4 capital letters and followed by 7 digits. Example: TEMU7972458."
38
+ },
39
+ "goodsDescription": {
40
+ "type": "STRING",
41
+ "nullable": true,
42
+ "description": "A brief description of the goods contained within the container. It can be found with goods description, Bezeichnung, goederenomschrijving."
43
+ },
44
+ "grossWeight": {
45
+ "type": "STRING",
46
+ "nullable": true,
47
+ "description": "The gross weight of the container. Usually mentioned as G.W or GW, Bruto, or Gross Weight, etc.."
48
+ },
49
+ "nettWeight": {
50
+ "type": "STRING",
51
+ "nullable": true,
52
+ "description": "The net weight of the goods inside the container. Usually mentioned as N.W or NW, Net Weight, or Netto, Eigenmasse, etc.."
53
+ },
54
+ "packagingNumber": {
55
+ "type": "STRING",
56
+ "nullable": true,
57
+ "description": "The quantity of the goods. Usually, the quantity is in pallets, PLT, cartons, CTNS, pieces, PCS, packages, boxes, etc. Please prioritize the packaging types based on their size, as follows: Pallets (PLT) >> Cartons (CTNS) >> Pieces (PCS). Extract the Larger packaging types that will have a lower count."
58
+ },
59
+ "packagingType": {
60
+ "type": "STRING",
61
+ "nullable": true,
62
+ "description": "The packaging type is the unit of packagingNumber. Example; pallets, PLT, cartons, CTNS, pieces, PCS, packages, etc. Sometimes, the packaging type is available in the column name of the packagingNumber."
63
+ }
64
+ },
65
+ "required": ["containerNumber", "goodsDescription", "grossWeight", "nettWeight", "packagingNumber", "packagingType"]
66
+ }
16
67
  }
17
- ]
18
- }
68
+ },
69
+ "required": ["countryOfOrigin", "MRN", "totalValueOfGoods", "containers"]
19
70
  }
@@ -1,42 +1,29 @@
1
- You are a document entity extraction specialist. Your task is to extract data from a customs assessment document.
2
- Customs assessment contain necessary information about imported / exported goods and containers.
1
+ <PERSONA> You are an efficient document entity data extraction specialist working for a Freight Forwarding company. <PERSONA>
3
2
 
4
- consignee: Legal Entity that is responsible for importing goods, name and address.
5
- shipper: Legal Entity that is responsible for sending goods, name and address.
6
- countryOfOrigin: Country of origin of goods.
7
- MRN: MRN code.
8
- totalValueOfGoods: Total value of goods.
9
- containers:
10
- containerNumber: Unique ID for tracking the shipping container.
11
- grossWeight: Total weight of the cargo, including the tare weight of the container.
12
- packagingNumber: Packaging number.
13
- nettWeight: Weight of the goods excluding packaging and containers.
14
- packagingType: Type of packaging used (e.g., cartons, pallets, barrels).
15
- goodsDescription: Goods description.
3
+ <TASK> Your task is to extract data from delivery order documents as per the given response schema structure. <TASK>
16
4
 
5
+ <CONTEXT>
6
+ The Freight Forwarding company receives Customs Assessment from customs partners.
7
+ These documents contain various details related to shipper, buyer, MRN, and container data such as container number, goods details at container level.
8
+ They may be written in different languages such as English, German, Vietnamese, Chinese, and other European languages, and can appear in a variety of formats and layouts.
9
+ Your role is to accurately extract specific entities from these Customs Assessment to support efficient processing and accurate record-keeping.
10
+ <CONTEXT>
17
11
 
18
- Your task is to extract the text value of the following entities:
19
- SCHEMA_PLACEHOLDER
12
+ <INSTRUCTIONS>
13
+ - Populate fields as defined in the response schema.
14
+ - Multiple containers entries may exist — capture all instances under "containers".
15
+ - Use the data field description to understand the context of the data.
20
16
 
21
- Keywords for datapoints:
22
- - consignee: Importeur, Anmelder, Empfanger.
23
- - shipper: Speditore, Esportatore, Exporteur, Versender.
24
- - countryOfOrigin: Land van oorsprong, Ursprungsland.
25
- - MRN: MRN, Reg. Nr., Reg. Kennzeigechen.
26
- - totalValueOfGoods: Waarde, Warenwert, Factuurwaarde.
27
- - containers:
28
- - containerNumber: container number, cntr. nos., containernummern, cont. nr.
29
- - grossWeight: gross weight, Bruto.
30
- - nettWeight: Weight of the goods excluding packaging and containers, Netto, Eigenmasse.
31
- - packagingNumber: package number, Anzahl.
32
- - packagingType: Type of packaging used (e.g., cartons, pallets, barrels), number and kind of packages, description of goods.
33
- - goodsDescription: goods description, Bezeichnung, goederenomschrijving.
17
+ - MRN: Movement Reference Number (MRN) is a unique identifier assigned to each customs declaration for goods being imported or exported within the European Union (EU). It is used to track and monitor the movement of goods across EU member states. It can be found with MRN, Reg. Nr., Reg. Kennzeigechen, etc..
34
18
 
35
-
36
- You must apply the following rules:
37
- - The JSON schema must be followed during the extraction.
38
- - The values must only include text found in the document
39
- - Do not normalize any entity value.
40
- - nettWeight can't be equal to grossWeight.
41
- - Validate the JSON make sure it is a valid JSON ! No extra text, no missing comma!
42
- - Add an escape character (backwards slash) in from of all quotes in values
19
+ - containers: Details of each container on the Customs Assessment. Make sure to extract each container information separately.
20
+ - containerNumber: Container Number consists of 4 capital letters followed by 7 digits (e.g., TEMU7972458, CAIU 7222892). It can be identified as container number, cntr. nos., containernummern, cont. nr.
21
+ - goodsDescription: Extract only the description of the goods for the "goodsDescription" but not other information like packing, marks, etc.
22
+ - packagingNumber:
23
+ - Prioritize the "Pallets/PLTS/Cartons/CTNS/Package" over "PCS" count to extract the data for the "packagingNumber".
24
+ - example: If the table has "17CTNS", "9PLTS", "850", "850PCS", prioritize "9PLTS"
25
+ - Do not extract the pack Quantity field such as "50PCS/CTN", "5PC/Box" (these represent quantity per carton, not total shipped quantity).
26
+ - packagingType:
27
+ - Extract the unit associated with the "packagingNumber" in the table to extract the "packagingType"
28
+ - Sometimes it can be found on the column name of the "packagingNumber" in the table to extract the "packagingType"
29
+ <INSTRUCTIONS>
@@ -0,0 +1,205 @@
1
+ {
2
+ "type": "OBJECT",
3
+ "properties": {
4
+ "bankAccount": {
5
+ "type": "ARRAY",
6
+ "items": {
7
+ "type": "STRING",
8
+ "nullable": true,
9
+ "description": "The bank account(s) number(s) of the vendor. This is the account to which the payment should be made. Extract all the relevant bank account numbers mentioned in the invoice."
10
+ }
11
+ },
12
+ "contractNumber": {"type": "STRING",
13
+ "nullable": true,
14
+ "description": "It's a contract number between the carrier and Forto Logistics SE & Co KG."
15
+ },
16
+ "currencyExchange": {
17
+ "type": "OBJECT",
18
+ "properties": {
19
+ "from": {"type": "STRING",
20
+ "nullable": true,
21
+ "description": "The currency code from which the exchange rate is applied."
22
+ },
23
+ "fxRate": {"type": "STRING",
24
+ "nullable": true,
25
+ "description": "The exchange rate applied to convert the amount from the 'from' currency to the 'to' currency."
26
+ },
27
+ "to": {"type": "STRING",
28
+ "nullable": true,
29
+ "description": "The currency code to which the exchange rate is applied."}
30
+ }
31
+ },
32
+ "documentType": {"type": "STRING", "nullable": true},
33
+ "dueDate": {"type": "STRING", "nullable": true,
34
+ "description": "The date by which the payment should be made by Forto Logistics SE & Co KG. Do Not calculate dueDate based on issueDate or any other date. Extract it directly from the invoice."},
35
+ "eta": {"type": "STRING", "nullable": true,
36
+ "description": "Estimated Time of Arrival (ETA) is the expected date when the shipment will arrive at its destination."},
37
+ "etd": {"type": "STRING", "nullable": true,
38
+ "description": "Estimated Time of Departure (ETD) is the expected date when the shipment will leave the origin port."},
39
+ "fortoEntity": {"type": "STRING", "nullable": true,
40
+ "description": "The entity of 'Forto Logistics SE & Co KG' that is responsible for the invoice. The Forto organization or branch managing the shipment."
41
+ },
42
+ "hblNumber": {"type": "STRING", "nullable": true,
43
+ "description": "House Bill of Lading number, a document issued by a freight forwarder."
44
+ },
45
+ "currencyCode": {"type": "STRING", "nullable": true,
46
+ "description": "The currency code in which the invoice is issued, such as EUR, USD, etc."
47
+ },
48
+ "grandTotal": {"type": "STRING", "nullable": true,
49
+ "description": "The total amount of the invoice, including all line items and taxes."
50
+ },
51
+ "vatAmount": {"type": "STRING", "nullable": true,
52
+ "description": "The total VAT amount applied to the invoice. This is the tax charged on the vatApplicableAmount of the invoice. Bitte Zahlen is not the vatAmount."
53
+ },
54
+ "vatApplicableAmount": {"type": "STRING", "nullable": true,
55
+ "description": "The amount on which VAT is applicable. This is the net amount before VAT is applied (without VAT)."
56
+ },
57
+ "vatPercentage": {"type": "STRING", "nullable": true,
58
+ "description": "The percentage rate of VAT applied to the vatApplicableAmount. This is used to calculate the vatAmount."
59
+ },
60
+ "invoiceNumber": {"type": "STRING", "nullable": true,
61
+ "description": "The unique identifier for the invoice. This is used to track and reference the invoice in financial records."
62
+ },
63
+ "issueDate": {"type": "STRING", "nullable": true,
64
+ "description": "The date when the invoice was issued."
65
+ },
66
+ "lineItem": {
67
+ "type": "ARRAY",
68
+ "items": {
69
+ "type": "OBJECT",
70
+ "properties": {
71
+ "uniqueId": {
72
+ "type": "STRING",
73
+ "nullable": true,
74
+ "description": "A line item can belong to a different shipments. Hence, the unique IDs of a line item need to be extracted that you see only on the line item level. UniqueIds are containerNumber, shipmentId, or sealNumber."
75
+ },
76
+ "lineItemDescription": {"type": "STRING", "nullable": true,
77
+ "description": "A description of the line item (COGS or Customs line items), which can include details about the service provided."},
78
+ "totalAmount": {"type": "STRING", "nullable": true,
79
+ "description": "The total amount for the line item, which may include the cost of services, and applicable taxes."},
80
+ "totalAmountCurrency": {"type": "STRING", "nullable": true,
81
+ "description": "The currency code for the total amount, such as EUR, USD, etc."},
82
+ "totalAmountEuro": {"type": "STRING", "nullable": true,
83
+ "description": "The total amount converted to Euro, if applicable. You can find it by looking for the term 'Total EUR' or 'Amount in Euro' in the line item."},
84
+ "quantity": {"type": "STRING", "nullable": true,
85
+ "description": "The quantity of the item or service provided in the line item."},
86
+ "unitPrice": {"type": "STRING", "nullable": true,
87
+ "description": "The price per unit of the item or service in the line item. Check the naming in a different languages, such as 'Einzelpreis', 'Unit Price', 'Prezzo unitario', 'Preis pro Einheit', etc.. Refer to 'Prezzo unitario' field in the italian invoice example"},
88
+ "unitPriceCurrency": {"type": "STRING", "nullable": true,
89
+ "description": "The currency code for the unit price, such as EUR, USD, etc."},
90
+ "vatAmount": {"type": "STRING", "nullable": true,
91
+ "description": "The VAT amount applied to the line item. This is the tax charged on the totalAmount of the line item."},
92
+ "vatPercentage": {"type": "STRING", "nullable": true,
93
+ "description": "The percentage rate of VAT applied to the totalAmount of the line item. This is used to calculate the vatAmount."
94
+ },
95
+ "containerNumber": {"type": "STRING", "nullable": true,
96
+ "description": "The container number associated with the line item. containerNumber MUST start with 4 letters followed by 7 digits (e.g., CMAU1234567)"},
97
+ "containerSize": {"type": "STRING", "nullable": true,
98
+ "description": "The size of the container associated with the containerNumber, such as 20ft, 40ft, 40HC, 20DC etc."}
99
+ }
100
+ },
101
+ "required": [
102
+ "uniqueId",
103
+ "lineItemDescription",
104
+ "totalAmount",
105
+ "totalAmountCurrency",
106
+ "totalAmountEuro",
107
+ "unitPrice",
108
+ "unitPriceCurrency",
109
+ "vatAmount",
110
+ "vatPercentage",
111
+ "containerNumber",
112
+ "containerSize"
113
+ ]
114
+ },
115
+ "mblNumber": {"type": "STRING", "nullable": true,
116
+ "description": "Bill of Lading number (B/L NO.), a document issued by the carrier."
117
+ },
118
+ "partnerReference": {"type": "STRING", "nullable": true,
119
+ "description": "A partnerReference can be a shipment ID. It starts with 'S' followed by 6 or 8 digits (e.g., 'S1234567')."
120
+ },
121
+ "paymentTerm": {"type": "STRING", "nullable": true,
122
+ "description": "The payment term indicates the conditions under which the payment should be made. E.g., 'In 10 TAGEN', '14 TAGEN', '14 days', etc."},
123
+ "portOfDischarge": {"type": "STRING", "nullable": true,
124
+ "description": "The port where the goods are discharged from the vessel. This is the destination port for the shipment."},
125
+ "portOfLoading": {"type": "STRING", "nullable": true,
126
+ "description": "The origin port where the goods are loaded onto the vessel. Find information like 'Ladehafen' or 'Port of Loading' in the invoice."},
127
+ "recipientAddress": {"type": "STRING", "nullable": true,
128
+ "description": "Majority of the times, it is 'Forto Logistics SE & Co KG' Address depends on the entity."},
129
+ "recipientName": {"type": "STRING", "nullable": true,
130
+ "description": "The name of the recipient who is responsible for making the payment. This is often the 'Forto Logistics SE & Co KG' entity or partner."},
131
+ "serviceDate": {"type": "STRING", "nullable": true,
132
+ "description": "The date when the service was provided. If Service date is not available in the invoice, Estimated Time of Arrival (ETA) can be used."},
133
+ "vatId": {"type": "STRING", "nullable": true,
134
+ "description": "The VAT ID of the vendor. This is used for tax purposes and to identify the vendor in financial transactions."},
135
+ "vendorAddress": {"type": "STRING", "nullable": true,
136
+ "description": "The address of the vendor to whom the payment should be made."},
137
+ "vendorName": {"type": "STRING", "nullable": true,
138
+ "description": "The name of the vendor to whom the payment should be made. Extract the main vendor details incase the invoice contains 'As Agent For'."},
139
+ "agentName": {
140
+ "type": "STRING",
141
+ "nullable": true,
142
+ "description": "The name of the agent or intermediary involved in the transaction, if applicable."},
143
+ "agentKeyWord": {
144
+ "type": "STRING",
145
+ "nullable": true,
146
+ "description": "A keyword or phrase that indicates the presence of an agent or intermediary in the transaction, such as 'As Agent For', 'Acting Agent', 'Issuing agent', 'Contact Agent', or similar words."},
147
+
148
+ "paymentInformation": {
149
+ "type": "OBJECT",
150
+ "properties": {
151
+ "paidAmount": {
152
+ "type": "STRING",
153
+ "nullable": true,
154
+ "description": "The amount that has been paid so far. You can identify this in the invoice by looking for terms like 'Vorschuss'."
155
+ },
156
+ "remainingAmountToPay": {
157
+ "type": "STRING",
158
+ "nullable": true,
159
+ "description": "The amount that is still due for payment (e.g., 'Bitte zahlen', 'Zu zahlen' only). This can be negative & ensure the negative sign is captured if applicable."
160
+ },
161
+ "currency": {
162
+ "type": "STRING",
163
+ "nullable": true,
164
+ "description": "Currency code associated with the paidAmount and remainngAmountToPay"
165
+ },
166
+ "sentence": {"type": "STRING", "nullable": true,
167
+ "description": "A sentence that indicates the payment status, such as 'Vorschuss', 'Vorauszahlung', 'Paid', 'Partially Paid', or 'Unpaid'. This is used to summarize the payment status of the invoice."}
168
+ }
169
+
170
+ },
171
+ "reverseChargeSentence": {
172
+ "type": "STRING",
173
+ "nullable": true,
174
+ "description": "A sentence which indicate that the reverse charge applies. Mostly found as VAT/Tax Clause."
175
+ }
176
+ },
177
+ "required": [
178
+ "bankAccount",
179
+ "contractNumber",
180
+ "currencyExchange",
181
+ "documentType",
182
+ "eta",
183
+ "etd",
184
+ "fortoEntity",
185
+ "currencyCode",
186
+ "grandTotal",
187
+ "vatAmount",
188
+ "vatApplicableAmount",
189
+ "vatPercentage",
190
+ "invoiceNumber",
191
+ "issueDate",
192
+ "lineItem",
193
+ "mblNumber",
194
+ "partnerReference",
195
+ "paymentTerm",
196
+ "portOfDischarge",
197
+ "portOfLoading",
198
+ "recipientAddress",
199
+ "recipientName",
200
+ "serviceDate",
201
+ "vatId",
202
+ "vendorAddress",
203
+ "vendorName"
204
+ ]
205
+ }
@@ -0,0 +1,105 @@
1
+ <PERSONA> You are an efficient document entity data extraction specialist working for a Freight Forwarding company. <PERSONA>
2
+
3
+ <TASK> Your task is to extract data from invoice documents as per the given response schema structure. <TASK>
4
+
5
+ <CONTEXT>
6
+ The Freight Forwarding company receives invoices from Carrier (Shipping Lines) partners and Customs Brokers. These include Partner Invoices (COGS Invoices) and COGS Customs Invoices.
7
+ These invoices contain various details related to shipments, cost line items, vendor and recipient information, as well as other financial data.
8
+ They may be written in different languages such as English, German, Vietnamese, Chinese, and other European languages, and can appear in a variety of formats and layouts.
9
+ Your role is to accurately extract specific entities from these invoices to support efficient processing and accurate record-keeping.
10
+ <CONTEXT>
11
+
12
+ <INSTRUCTIONS>
13
+ - Populate fields as defined in the response schema.
14
+ - Multiple line item entries may exist, capture all instances under "lineItem".
15
+ - Use the data field description to understand the context of the data.
16
+
17
+ - contractNumber: The reference number for the contract related to the shipment.
18
+ - currencyExchange: Details about currency conversion involved
19
+ - from: The original currency code.
20
+ - fxRate: The exchange rate applied. In the most cases the value is between 0.0 and 1.5 and rarely exceeds 1.5.
21
+ - to: The target currency code.
22
+
23
+ - fortoEntity: The entity responsible for the shipment. It could be Forto branch from a different country.
24
+ - hblNumber: The House Bill of Lading number. Commonly known as "Bill of Lading Number", "BILL OF LADING NO.", "BL Number", "BL No.", "B/L No.", "BL-Nr.", "B/L", or "HBL No.".
25
+
26
+ - grandTotal:
27
+ - The overall total Gross amount. Including all line items, taxes, and fees.
28
+
29
+ - vatApplicableAmount:
30
+ - Do not get confused with the prepaid and due amount. zu zahlen or Bitte Zahlen is not the vatApplicableAmount.
31
+ - vatApplicableAmount is the Net Amount of the invoice. Basically the total amount before VAT is applied. You can find it in the invoice total section and sometimes as "Netto"
32
+ - If vatAmount is not applied, then vatApplicableAmount can be same as grandTotal.
33
+ - Invoices from SGS maco customs service, "Total Kosten excl. MwSt." is not the vatApplicableAmount
34
+
35
+ - IMPORTANT NOTE:
36
+ - CUSTOMS INVOICES and VAT/DUTY INVOICES do not have a vatAmount and vatPercentage.
37
+ - CUSTOMS INVOICES are "SGS maco customs service", "Woodland Global", and "CUSTOMS SUPPORT".
38
+ - Do not confuse with the prepaid and due amount from SGS maco customs service. The prepaid and due amount is not the vatApplicableAmount.
39
+
40
+ - vatAmount:
41
+ - The total VAT amount applied to the invoice. This is the tax charged on the vatApplicableAmount of the invoice.
42
+ - Do not get confused with the prepaid and due amount.
43
+ - Do not extract vatAmount from the line items, it should be extracted from the invoice total section.
44
+ - IMPORTANT:
45
+ - Invoices from "SGS maco customs service", values under 'MwSt Betrag' section is not a vatAmount. E.g., "Bitte Zahlen" or "Zu zahlen" is not the vatAmount.
46
+ - CUSTOMS SUPPORT invoices do not have a vatAmount and vatPercentage.
47
+
48
+ - issueDate: The date the document was issued.
49
+ - dueDate: The date by which the payment should be made. Do Not calculate dueDate based on issueDate or any other date. Extract it directly from the invoice.
50
+
51
+ - lineItem: Details of each COGS and Customs line item on the invoice from each page. Make sure to extract each amount and currency separately.
52
+ - uniqueId: A unique id which associated with the lineItem as each line item can belong to a different shipment. Extract only if its available in the line item. Either a shipmentId starting with an S and followed by 6 or 8 numeric values or a mblNumber. If shipmentId or mblNumber does not exist, set it to containerNumber.
53
+ - lineItemDescription: The name or description of the item. Usually, it will be a one line sentence.
54
+ - unitPrice: Even if the quantity is not mentioned, you can still extract the unit price. Check the naming of the columns in a different languages, it can be "Unit Price", "Prezzo unitario", "Prix Unitaire", "Unitario", etc. Refer to "Prezzo unitario" field in the italian invoice example.
55
+ - totalAmount: The total amount for the item. It can be in different currencies, so ensure to capture the currency as well for the totalAmountCurrency.
56
+ - totalAmountEuro: Few line items contains a total amount in Euro. You can find it by looking for the term "Total EUR" or "Amount in Euro" in the line item but it's always in the EURO / € currency. Sometimes, it can be same as totalAmount if the line item is already in Euro.
57
+ - quantity: The quantity of the item or service provided in the line item. Pay attention to 2 x 40HC or 2x40HC. It means, quantity is 2 and 40HC is containerSize but not 240.
58
+ - containerNumber: Container Number always starts with 4 letters and is followed by 7 digits (e.g., ABCD1234567).
59
+
60
+ - hblNumber and mblNumber:
61
+ - The Master Bill of Lading number. Commonly known as "Bill of Lading Number", "BILL OF LADING NO.", "BL Number", "BL No.", "B/L No.", "BL-Nr.", "B/L", or "HBL No.".
62
+ - Do not confuse with the containerNumber that always starts with 4 letters and is followed by 7 digits (e.g., SEGU3090389). This is not the mblNumber or hblNumber.
63
+ - partnerReference: Shipment_ID can be a reference number for the partner. Shipment_ID always starts with "S" followed by 6 or 8 digits (e.g., S2654361).
64
+
65
+ - vendorName and vendorAddress:
66
+ - The name and address of the vendor providing the service and to whom the payment should be made.
67
+ - In some cases, invoices may be issued by agents—such as a sister company, subsidiary, or other related entity of the actual vendor. In such scenarios, extract the name and address of the parent or principal company as the vendorName and vendorAddress.
68
+ - If the invoice contains phrases such as "As Agent For" or "As Agent Of" in the vendor name or address, extract the main vendor’s details (the entity after the phrase) and ignore the local agent’s details.
69
+ - Example:
70
+ - "COSCO SHIPPING Lines Italy, Poland, or France S.R.L. – Genova Office – As Agent For COSCO SHIPPING Lines Co.,Ltd."
71
+ - vendorName: COSCO SHIPPING Lines Co.,Ltd.
72
+
73
+ - agentName: Name of the agent. Agencies are offices authorized to act on behalf of a company. This details usually available including the branch name of the parent company name in the invoice.
74
+ - agentKeyWord:
75
+ - A keyword or phrase that indicates the presence of an agent or intermediary in the transaction, such as 'As Agent For', 'Acting Agent', 'Acting on behalf of', 'Issuing agent', 'Contact Agent', 'Service rendered by', 'C/O as agent for', 'Invoice issued in the name of', 'Diese Rechnung wird von uns als Agent im Namen und für Rechnung der XX ausgestellt', or similar words.
76
+ - Extract this field only if the agentName is present in the invoice. Do not assume from the references given above.
77
+
78
+ - recipientName and recipientAddress: This is often the 'Forto Logistics SE & Co KG' entity or partner, and it's address. The address depends on the Forto entity responsible for the shipment.
79
+
80
+ - serviceDate: The date of service provided. If the serviceDate is not specifically mentioned in the invoice, you can use the ETA of the shipment as a serviceDate.
81
+ - reverseChargeSentence: A sentence which indicate that the reverse charge applies. Mostly fund as Tax Clause.
82
+
83
+ - paymentInformation:
84
+ - Some partners receive prepayment before providing the service. They later send a final invoice that includes both the amount already paid and the remaining amount due.
85
+ - This applies when the invoice contains prepayment-related terms such as Vorschuss, BEREITS BEZAHLT, or similar at the invoice total section.
86
+ - do not get confused with the paidAmount and remainingAmountToPay. Few invoices may not have a paidAmount or remainingAmountToPay in such cases pay attention to the sentence field alignment.
87
+ - Extract the following fields, if applicable:
88
+ - paidAmount: The amount that has already been paid. You can identify this in the invoice by looking for terms like "Vorschuss", "BEREITS BEZAHLT".
89
+ - remainingAmountToPay: The amount still due. This can be negative if the paid amount exceeds the total invoice amount. Ensure the negative sign is captured if applicable. You can identify this by looking for terms like "Bitte Zahlen", "Zu zahlen", "Remaining Amount", "To Pay", "Due", or "Unpaid".
90
+ - currency: The currency of both the paid and remaining amounts.
91
+ - sentence: A sentence from the invoice indicating the payment status (e.g., "Vorschuss", "Prepayment", "Paid", "Partially Paid", "Unpaid"). This helps summarize the overall payment status of the invoice.
92
+
93
+ IMPORTANT NOTE:
94
+ - Ensure all extracted values are directly from the document. Do not make assumptions or modifications.
95
+ - CustomSized invoices contain line items in a table format in a attached page. Table with headings Shipment ID, Partner Line Item Description, Quantity, Amount, and VAT. Extract all the line items from each tables from each page.
96
+ - Do not normalize or modify any entity values.
97
+ - Pay attention to the line item details and paymentInformation, as they may vary significantly across different invoices.
98
+
99
+ PAY ATTENTION TO THE SGS MACO CUSTOMS SERVICE INVOICES:
100
+ - invoices from SGS maco customs service,
101
+ - Extract only "Vorschuss" as a paidAmount but not "Vorauszahlung".
102
+ - Extract "Zu zahlen" or "Bitte Zahlen" as a remainingAmountToPay.
103
+ - do not get confused with the paidAmount and remainingAmountToPay. Few invoices may not have a paidAmount or remainingAmountToPay. In such cases, pay attention to the sentence field alignment.
104
+ - "Total Kosten excl. MwSt." is not the vatApplicableAmount
105
+ <INSTRUCTIONS>
@@ -1,31 +1,82 @@
1
1
  {
2
- "SCHEMA_PLACEHOLDER": {
3
- "type": "OBJECT",
4
- "properties": {
5
- "EmptyContainerDepot": {"type": "string", "nullable": true},
6
- "Equipment": {"type": "ARRAY",
7
- "items": {
8
- "type": "OBJECT", "properties": {
9
- "CargoGrossWeight": {"type": "string", "nullable": true},
10
- "ContainerNumber": {"type": "string", "nullable": true},
11
- "ContainerType": {"type": "string", "nullable": true},
12
- "EmptyReturnReference": {"type": "string", "nullable": true},
13
- "Pin": {"type": "string", "nullable": true},
14
- "TareWeight": {"type": "string", "nullable": true}
15
- }, "required": []}
16
- },
17
- "pickUpTerminal": {"type": "string", "nullable": true},
18
- "TransportLeg": {"type": "ARRAY",
19
- "items": {
20
- "type": "OBJECT", "properties": {
21
- "eta": {"type": "string", "nullable": true},
22
- "etd": {"type": "string", "nullable": true},
23
- "portOfDischarge": {"type": "string", "nullable": true},
24
- "portOfLoading": {"type": "string", "nullable": true},
25
- "vesselName": {"type": "string", "nullable": true},
26
- "voyage": {"type": "string", "nullable": true}
27
- }, "required": []}
2
+ "type": "OBJECT",
3
+ "properties": {
4
+ "EmptyContainerDepot": {
5
+ "type": "STRING",
6
+ "nullable": true,
7
+ "description": "The depot where the empty container is returned."
8
+ },
9
+ "Equipment": {
10
+ "type": "ARRAY",
11
+ "items": {
12
+ "type": "OBJECT",
13
+ "properties": {
14
+ "CargoGrossWeight": {
15
+ "type": "STRING",
16
+ "nullable": true,
17
+ "description": "The gross weight of the Cargo. Usually mentioned as G.W or GW or Gross Weight, etc.."},
18
+ "ContainerNumber": {
19
+ "type": "STRING",
20
+ "nullable": true,
21
+ "description": "The container number associated with the document. They MUST consist of 4 letters followed by 7 digits (e.g., 'CMAU1234567', 'BMOU 575538/3', 'XLXU 1277652'). It can be found in the document as 'Container No.', 'Container Number', 'Cont. No.', 'Cont Nr.', 'Seefrachtcontainer-Nr.', or 'Containernummer'."},
22
+ "ContainerType": {
23
+ "type": "STRING",
24
+ "nullable": true,
25
+ "description": "The size or Type of the container associated with the containerNumber, such as 20ft, 40ft, 40HC, 20DC etc."},
26
+ "EmptyReturnReference": {
27
+ "type": "STRING",
28
+ "nullable": true,
29
+ "description": "The reference number or code for the return of the empty container."},
30
+ "Pin": {
31
+ "type": "STRING",
32
+ "nullable": true,
33
+ "description": "The PIN code associated with the container, often used for security or access purposes."},
34
+ "TareWeight": {
35
+ "type": "STRING",
36
+ "nullable": true,
37
+ "description": "The weight of the empty container itself, without any cargo inside. Usually mentioned as T.W or TW or Tare Weight, etc.."}
38
+ },
39
+ "required": ["CargoGrossWeight", "ContainerNumber", "EmptyReturnReference", "Pin", "TareWeight"]
28
40
  }
29
- },
30
- "required": []}
41
+ },
42
+ "pickUpTerminal": {
43
+ "type": "STRING",
44
+ "nullable": true,
45
+ "description": "The terminal where the container or cargo is picked up."
46
+ },
47
+ "TransportLeg": {
48
+ "type": "ARRAY",
49
+ "items": {
50
+ "type": "OBJECT",
51
+ "properties": {
52
+ "eta": {
53
+ "type": "STRING",
54
+ "nullable": true,
55
+ "description": "Estimated Time of Arrival (ETA) is the expected date when the shipment will arrive at its destination."},
56
+ "etd": {
57
+ "type": "STRING",
58
+ "nullable": true,
59
+ "description": "Estimated Time of Departure (ETD) is the expected date when the shipment will leave the origin port."},
60
+ "portOfDischarge": {
61
+ "type": "STRING",
62
+ "nullable": true,
63
+ "description": "The port where the goods are discharged from the vessel. This is the destination port for the shipment."},
64
+ "portOfLoading": {
65
+ "type": "STRING",
66
+ "nullable": true,
67
+ "description": "The origin port where the goods are loaded onto the vessel. Find information like 'Ladehafen' or 'Port of Loading' in the invoice."},
68
+ "vesselName": {
69
+ "type": "STRING",
70
+ "nullable": true,
71
+ "description": "The name of the vessel carrying the container or shipment"},
72
+ "voyage": {
73
+ "type": "STRING",
74
+ "nullable": true,
75
+ "description": "The unique voyage number or identifier assigned to a vessel’s specific journey. This typically corresponds to the scheduled sailing associated with the shipment and can often be found near vessel information on shipping documents. such as voyage, voy. no, voyage-no."}
76
+ },
77
+ "required": ["eta", "etd", "portOfDischarge", "portOfLoading", "vesselName", "voyage"]
78
+ }
79
+ }
80
+ },
81
+ "required": ["Equipment", "TransportLeg"]
31
82
  }