data-science-document-ai 1.37.0__py3-none-any.whl → 1.51.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {data_science_document_ai-1.37.0.dist-info → data_science_document_ai-1.51.0.dist-info}/METADATA +3 -3
- data_science_document_ai-1.51.0.dist-info/RECORD +60 -0
- {data_science_document_ai-1.37.0.dist-info → data_science_document_ai-1.51.0.dist-info}/WHEEL +1 -1
- src/constants.py +6 -10
- src/docai.py +14 -5
- src/docai_processor_config.yaml +0 -56
- src/excel_processing.py +34 -13
- src/io.py +69 -1
- src/llm.py +10 -32
- src/pdf_processing.py +192 -57
- src/postprocessing/common.py +252 -590
- src/postprocessing/postprocess_partner_invoice.py +139 -89
- src/prompts/library/arrivalNotice/other/placeholders.json +70 -0
- src/prompts/library/arrivalNotice/other/prompt.txt +40 -0
- src/prompts/library/bookingConfirmation/evergreen/placeholders.json +17 -17
- src/prompts/library/bookingConfirmation/evergreen/prompt.txt +1 -0
- src/prompts/library/bookingConfirmation/hapag-lloyd/placeholders.json +18 -18
- src/prompts/library/bookingConfirmation/hapag-lloyd/prompt.txt +1 -1
- src/prompts/library/bookingConfirmation/maersk/placeholders.json +17 -17
- src/prompts/library/bookingConfirmation/maersk/prompt.txt +1 -1
- src/prompts/library/bookingConfirmation/msc/placeholders.json +17 -17
- src/prompts/library/bookingConfirmation/msc/prompt.txt +1 -1
- src/prompts/library/bookingConfirmation/oocl/placeholders.json +17 -17
- src/prompts/library/bookingConfirmation/oocl/prompt.txt +3 -1
- src/prompts/library/bookingConfirmation/other/placeholders.json +17 -17
- src/prompts/library/bookingConfirmation/other/prompt.txt +1 -1
- src/prompts/library/bookingConfirmation/yangming/placeholders.json +17 -17
- src/prompts/library/bookingConfirmation/yangming/prompt.txt +1 -1
- src/prompts/library/bundeskasse/other/placeholders.json +25 -25
- src/prompts/library/bundeskasse/other/prompt.txt +8 -6
- src/prompts/library/commercialInvoice/other/placeholders.json +125 -0
- src/prompts/library/commercialInvoice/other/prompt.txt +2 -1
- src/prompts/library/customsAssessment/other/placeholders.json +67 -16
- src/prompts/library/customsAssessment/other/prompt.txt +24 -37
- src/prompts/library/customsInvoice/other/placeholders.json +29 -20
- src/prompts/library/customsInvoice/other/prompt.txt +9 -4
- src/prompts/library/deliveryOrder/other/placeholders.json +79 -28
- src/prompts/library/deliveryOrder/other/prompt.txt +26 -40
- src/prompts/library/draftMbl/other/placeholders.json +33 -33
- src/prompts/library/draftMbl/other/prompt.txt +34 -44
- src/prompts/library/finalMbL/other/placeholders.json +34 -34
- src/prompts/library/finalMbL/other/prompt.txt +34 -44
- src/prompts/library/packingList/other/placeholders.json +98 -0
- src/prompts/library/packingList/other/prompt.txt +1 -1
- src/prompts/library/partnerInvoice/other/placeholders.json +2 -23
- src/prompts/library/partnerInvoice/other/prompt.txt +7 -18
- src/prompts/library/preprocessing/carrier/placeholders.json +0 -16
- src/prompts/library/shippingInstruction/other/placeholders.json +115 -0
- src/prompts/library/shippingInstruction/other/prompt.txt +28 -15
- src/setup.py +13 -61
- src/utils.py +189 -29
- data_science_document_ai-1.37.0.dist-info/RECORD +0 -59
- src/prompts/library/draftMbl/hapag-lloyd/prompt.txt +0 -44
- src/prompts/library/draftMbl/maersk/prompt.txt +0 -17
- src/prompts/library/finalMbL/hapag-lloyd/prompt.txt +0 -44
- src/prompts/library/finalMbL/maersk/prompt.txt +0 -17
|
@@ -1,28 +1,28 @@
|
|
|
1
1
|
{
|
|
2
2
|
"type": "OBJECT",
|
|
3
3
|
"properties": {
|
|
4
|
-
"cfsCutOff": {"type": "
|
|
5
|
-
"bookingNumber": {"type": "
|
|
6
|
-
"cyCutOff": {"type": "
|
|
7
|
-
"gateInReference": {"type": "
|
|
8
|
-
"gateInTerminal": {"type": "
|
|
9
|
-
"mblNumber": {"type": "
|
|
10
|
-
"pickUpReference": {"type": "
|
|
11
|
-
"pickUpTerminal": {"type": "
|
|
12
|
-
"siCutOff": {"type": "
|
|
13
|
-
"vgmCutOff": {"type": "
|
|
4
|
+
"cfsCutOff": {"type": "STRING", "nullable": true, "description": "the date by which an LCL (Less than Container Load) shipment needs to be checked in to a CFS (Container Freight Station) to meet its scheduled sailing"},
|
|
5
|
+
"bookingNumber": {"type": "STRING", "nullable": true},
|
|
6
|
+
"cyCutOff": {"type": "STRING", "nullable": true},
|
|
7
|
+
"gateInReference": {"type": "STRING", "nullable": true},
|
|
8
|
+
"gateInTerminal": {"type": "STRING", "nullable": true},
|
|
9
|
+
"mblNumber": {"type": "STRING", "nullable": true},
|
|
10
|
+
"pickUpReference": {"type": "STRING", "nullable": true},
|
|
11
|
+
"pickUpTerminal": {"type": "STRING", "nullable": true},
|
|
12
|
+
"siCutOff": {"type": "STRING", "nullable": true},
|
|
13
|
+
"vgmCutOff": {"type": "STRING", "nullable": true},
|
|
14
14
|
"transportLegs": {
|
|
15
15
|
"type": "ARRAY",
|
|
16
16
|
"items": {
|
|
17
17
|
"type": "OBJECT",
|
|
18
18
|
"properties": {
|
|
19
|
-
"eta": {"type": "
|
|
20
|
-
"etd": {"type": "
|
|
21
|
-
"portOfDischarge": {"type": "
|
|
22
|
-
"portOfLoading": {"type": "
|
|
23
|
-
"vesselName": {"type": "
|
|
24
|
-
"voyage": {"type": "
|
|
25
|
-
"imoNumber": {"type": "
|
|
19
|
+
"eta": {"type": "STRING", "nullable": true},
|
|
20
|
+
"etd": {"type": "STRING", "nullable": true},
|
|
21
|
+
"portOfDischarge": {"type": "STRING", "nullable": true},
|
|
22
|
+
"portOfLoading": {"type": "STRING", "nullable": true},
|
|
23
|
+
"vesselName": {"type": "STRING", "nullable": true},
|
|
24
|
+
"voyage": {"type": "STRING", "nullable": true},
|
|
25
|
+
"imoNumber": {"type": "STRING", "nullable": true}
|
|
26
26
|
},
|
|
27
27
|
"required": []
|
|
28
28
|
}
|
|
@@ -1,4 +1,6 @@
|
|
|
1
|
-
|
|
1
|
+
your task is to extract the text value of the following entities and page numbers starting from 0 where the value was found in the document:
|
|
2
|
+
|
|
3
|
+
bookingNumber: Extract the booking number. This information can be found near the labels "BOOKING ACKNOWLEDGEMENT" or "BOOKING NUMBER".
|
|
2
4
|
gateInReference: This field should have the same value as the bookingNumber.
|
|
3
5
|
cyCutOff: Look for the "INTENDED FCL CY CUT-OFF" label and extract the date and time value.
|
|
4
6
|
vgmCutOff: Look for the "INTENDED VGM CUT-OFF" label and extract the date and time value.
|
|
@@ -1,28 +1,28 @@
|
|
|
1
1
|
{
|
|
2
2
|
"type": "OBJECT",
|
|
3
3
|
"properties": {
|
|
4
|
-
"cfsCutOff": {"type": "
|
|
5
|
-
"bookingNumber": {"type": "
|
|
6
|
-
"cyCutOff": {"type": "
|
|
7
|
-
"gateInReference": {"type": "
|
|
8
|
-
"gateInTerminal": {"type": "
|
|
9
|
-
"mblNumber": {"type": "
|
|
10
|
-
"pickUpReference": {"type": "
|
|
11
|
-
"pickUpTerminal": {"type": "
|
|
12
|
-
"siCutOff": {"type": "
|
|
13
|
-
"vgmCutOff": {"type": "
|
|
4
|
+
"cfsCutOff": {"type": "STRING", "nullable": true, "description": "the date by which an LCL (Less than Container Load) shipment needs to be checked in to a CFS (Container Freight Station) to meet its scheduled sailing"},
|
|
5
|
+
"bookingNumber": {"type": "STRING", "nullable": true},
|
|
6
|
+
"cyCutOff": {"type": "STRING", "nullable": true},
|
|
7
|
+
"gateInReference": {"type": "STRING", "nullable": true},
|
|
8
|
+
"gateInTerminal": {"type": "STRING", "nullable": true},
|
|
9
|
+
"mblNumber": {"type": "STRING", "nullable": true},
|
|
10
|
+
"pickUpReference": {"type": "STRING", "nullable": true},
|
|
11
|
+
"pickUpTerminal": {"type": "STRING", "nullable": true},
|
|
12
|
+
"siCutOff": {"type": "STRING", "nullable": true},
|
|
13
|
+
"vgmCutOff": {"type": "STRING", "nullable": true},
|
|
14
14
|
"transportLegs": {
|
|
15
15
|
"type": "ARRAY",
|
|
16
16
|
"items": {
|
|
17
17
|
"type": "OBJECT",
|
|
18
18
|
"properties": {
|
|
19
|
-
"eta": {"type": "
|
|
20
|
-
"etd": {"type": "
|
|
21
|
-
"imoNumber": {"type": "
|
|
22
|
-
"portOfDischarge": {"type": "
|
|
23
|
-
"portOfLoading": {"type": "
|
|
24
|
-
"vesselName": {"type": "
|
|
25
|
-
"voyage": {"type": "
|
|
19
|
+
"eta": {"type": "STRING", "nullable": true},
|
|
20
|
+
"etd": {"type": "STRING", "nullable": true},
|
|
21
|
+
"imoNumber": {"type": "STRING", "nullable": true},
|
|
22
|
+
"portOfDischarge": {"type": "STRING", "nullable": true},
|
|
23
|
+
"portOfLoading": {"type": "STRING", "nullable": true},
|
|
24
|
+
"vesselName": {"type": "STRING", "nullable": true},
|
|
25
|
+
"voyage": {"type": "STRING", "nullable": true}
|
|
26
26
|
},
|
|
27
27
|
"required": []
|
|
28
28
|
}
|
|
@@ -18,7 +18,7 @@ transportLegs:
|
|
|
18
18
|
vesselName: The name of the vessel for a specific leg.
|
|
19
19
|
voyage: The journey or route taken by the vessel for a specific leg.
|
|
20
20
|
|
|
21
|
-
your task is to extract the text value of the following entities:
|
|
21
|
+
your task is to extract the text value of the following entities and page numbers starting from 0 where the value was found in the document:
|
|
22
22
|
SCHEMA_PLACEHOLDER
|
|
23
23
|
|
|
24
24
|
Further explanation for the transportLegs part as follows:
|
|
@@ -1,28 +1,28 @@
|
|
|
1
1
|
{
|
|
2
2
|
"type": "OBJECT",
|
|
3
3
|
"properties": {
|
|
4
|
-
"cfsCutOff": {"type": "
|
|
5
|
-
"bookingNumber": {"type": "
|
|
6
|
-
"cyCutOff": {"type": "
|
|
7
|
-
"gateInReference": {"type": "
|
|
8
|
-
"gateInTerminal": {"type": "
|
|
9
|
-
"mblNumber": {"type": "
|
|
10
|
-
"pickUpReference": {"type": "
|
|
11
|
-
"pickUpTerminal": {"type": "
|
|
12
|
-
"siCutOff": {"type": "
|
|
13
|
-
"vgmCutOff": {"type": "
|
|
4
|
+
"cfsCutOff": {"type": "STRING", "nullable": true, "description": "the date by which an LCL (Less than Container Load) shipment needs to be checked in to a CFS (Container Freight Station) to meet its scheduled sailing"},
|
|
5
|
+
"bookingNumber": {"type": "STRING", "nullable": true},
|
|
6
|
+
"cyCutOff": {"type": "STRING", "nullable": true},
|
|
7
|
+
"gateInReference": {"type": "STRING", "nullable": true},
|
|
8
|
+
"gateInTerminal": {"type": "STRING", "nullable": true},
|
|
9
|
+
"mblNumber": {"type": "STRING", "nullable": true},
|
|
10
|
+
"pickUpReference": {"type": "STRING", "nullable": true},
|
|
11
|
+
"pickUpTerminal": {"type": "STRING", "nullable": true},
|
|
12
|
+
"siCutOff": {"type": "STRING", "nullable": true},
|
|
13
|
+
"vgmCutOff": {"type": "STRING", "nullable": true},
|
|
14
14
|
"transportLegs": {
|
|
15
15
|
"type": "ARRAY",
|
|
16
16
|
"items": {
|
|
17
17
|
"type": "OBJECT",
|
|
18
18
|
"properties": {
|
|
19
|
-
"eta": {"type": "
|
|
20
|
-
"etd": {"type": "
|
|
21
|
-
"imoNumber": {"type": "
|
|
22
|
-
"portOfDischarge": {"type": "
|
|
23
|
-
"portOfLoading": {"type": "
|
|
24
|
-
"vesselName": {"type": "
|
|
25
|
-
"voyage": {"type": "
|
|
19
|
+
"eta": {"type": "STRING", "nullable": true},
|
|
20
|
+
"etd": {"type": "STRING", "nullable": true},
|
|
21
|
+
"imoNumber": {"type": "STRING", "nullable": true},
|
|
22
|
+
"portOfDischarge": {"type": "STRING", "nullable": true},
|
|
23
|
+
"portOfLoading": {"type": "STRING", "nullable": true},
|
|
24
|
+
"vesselName": {"type": "STRING", "nullable": true},
|
|
25
|
+
"voyage": {"type": "STRING", "nullable": true}
|
|
26
26
|
},
|
|
27
27
|
"required": []
|
|
28
28
|
}
|
|
@@ -18,7 +18,7 @@ transportLegs:
|
|
|
18
18
|
vesselName: The name of the vessel for a specific leg.
|
|
19
19
|
voyage: The journey or route taken by the vessel for a specific leg.
|
|
20
20
|
|
|
21
|
-
your task is to extract the text value of the following entities:
|
|
21
|
+
your task is to extract the text value of the following entities and page numbers starting from 0 where the value was found in the document:
|
|
22
22
|
SCHEMA_PLACEHOLDER
|
|
23
23
|
|
|
24
24
|
Keywords for datapoints:
|
|
@@ -1,48 +1,48 @@
|
|
|
1
1
|
{
|
|
2
2
|
"type": "OBJECT",
|
|
3
3
|
"properties": {
|
|
4
|
-
"
|
|
5
|
-
"type": "
|
|
4
|
+
"grandTotal": {
|
|
5
|
+
"type": "STRING",
|
|
6
6
|
"nullable": true,
|
|
7
|
-
"description": "The
|
|
7
|
+
"description": "The overall total amount of the invoice. It can be found with the key words Gesamtabgabenbetrag, Gesamtbetrag, or Zu erstattender Abgabenbetrag"
|
|
8
8
|
},
|
|
9
|
-
"
|
|
10
|
-
"type": "
|
|
9
|
+
"currencyCode": {
|
|
10
|
+
"type": "STRING",
|
|
11
11
|
"nullable": true,
|
|
12
|
-
"description": "The
|
|
12
|
+
"description": "The currency in which the invoice is issued. Extract the currency associated with the grand total (grandTotal) amount. It is majorly mentioned as EUR, Euro or €."
|
|
13
13
|
},
|
|
14
14
|
"issueDate": {
|
|
15
|
-
"type": "
|
|
15
|
+
"type": "STRING",
|
|
16
16
|
"nullable": true,
|
|
17
17
|
"description": "The date the document was issued."
|
|
18
18
|
},
|
|
19
19
|
"recipientAddress": {
|
|
20
|
-
"type": "
|
|
20
|
+
"type": "STRING",
|
|
21
21
|
"nullable": true,
|
|
22
22
|
"description": "The address of the recipient."
|
|
23
23
|
},
|
|
24
24
|
"recipientName": {
|
|
25
|
-
"type": "
|
|
25
|
+
"type": "STRING",
|
|
26
26
|
"nullable": true,
|
|
27
27
|
"description": "The name of the recipient."
|
|
28
28
|
},
|
|
29
29
|
"serviceDate": {
|
|
30
|
-
"type": "
|
|
30
|
+
"type": "STRING",
|
|
31
31
|
"nullable": true,
|
|
32
32
|
"description": "The date of service or transaction."
|
|
33
33
|
},
|
|
34
34
|
"shipmentId": {
|
|
35
|
-
"type": "
|
|
35
|
+
"type": "STRING",
|
|
36
36
|
"nullable": true,
|
|
37
|
-
"description": "Starting with an \"S\" and followed by 6 or
|
|
37
|
+
"description": "Starting with an \"S\" and followed by 6 or 8 digits. Example: S1243213 or S12876549"
|
|
38
38
|
},
|
|
39
39
|
"vendorName": {
|
|
40
|
-
"type": "
|
|
40
|
+
"type": "STRING",
|
|
41
41
|
"nullable": true,
|
|
42
42
|
"description": "The name of the vendor."
|
|
43
43
|
},
|
|
44
44
|
"vendorAddress": {
|
|
45
|
-
"type": "
|
|
45
|
+
"type": "STRING",
|
|
46
46
|
"nullable": true,
|
|
47
47
|
"description": "The address of the vendor."
|
|
48
48
|
},
|
|
@@ -52,37 +52,37 @@
|
|
|
52
52
|
"type": "OBJECT",
|
|
53
53
|
"properties": {
|
|
54
54
|
"deferredDutyPayer": {
|
|
55
|
-
"type": "
|
|
55
|
+
"type": "STRING",
|
|
56
56
|
"nullable": true,
|
|
57
|
-
"description": "It can be identified under
|
|
57
|
+
"description": "It can be identified under 'Aufschubenhmer' for each line item"
|
|
58
58
|
},
|
|
59
59
|
"name": {
|
|
60
|
-
"type": "
|
|
60
|
+
"type": "STRING",
|
|
61
61
|
"nullable": true,
|
|
62
62
|
"description": "The name or description of the line item A0000 and B0000"
|
|
63
63
|
},
|
|
64
64
|
"taxType": {
|
|
65
|
-
"type": "
|
|
65
|
+
"type": "STRING",
|
|
66
66
|
"nullable": true,
|
|
67
67
|
"description": "It's a line item mentioned in the invoice. For example; A0000 and B0000"
|
|
68
68
|
},
|
|
69
69
|
"totalAmount": {
|
|
70
|
-
"type": "
|
|
70
|
+
"type": "STRING",
|
|
71
71
|
"nullable": true,
|
|
72
72
|
"description": "The total amount for the line item."
|
|
73
73
|
},
|
|
74
74
|
"totalAmountCurrency": {
|
|
75
|
-
"type": "
|
|
75
|
+
"type": "STRING",
|
|
76
76
|
"nullable": true,
|
|
77
77
|
"description": "The currency of the total amount."
|
|
78
78
|
},
|
|
79
79
|
"vatId": {
|
|
80
|
-
"type": "
|
|
80
|
+
"type": "STRING",
|
|
81
81
|
"nullable": true,
|
|
82
82
|
"description": "The VAT identification number. This is named a Konto-Nummer for each line item."
|
|
83
83
|
},
|
|
84
84
|
"dueDate": {
|
|
85
|
-
"type": "
|
|
85
|
+
"type": "STRING",
|
|
86
86
|
"nullable": true,
|
|
87
87
|
"description": "It's a due date. Due date to pay the amount. It's usually mentioned either in a date or a number of days format"
|
|
88
88
|
}
|
|
@@ -91,20 +91,20 @@
|
|
|
91
91
|
}
|
|
92
92
|
},
|
|
93
93
|
"invoiceNumber": {
|
|
94
|
-
"type": "
|
|
94
|
+
"type": "STRING",
|
|
95
95
|
"nullable": true,
|
|
96
96
|
"description": "Invoice Number is a unique identifier for the invoice, it starts with \"ATC\", \"AT-C\", or \"AT/C\" only (e.g., ATC40, AT-C-40-, AT/C/40/....). Do NOT extract \"NIZZA-Registrierkennzeichen number."
|
|
97
97
|
},
|
|
98
98
|
"containerNumber": {
|
|
99
99
|
"type": "ARRAY",
|
|
100
100
|
"items": {
|
|
101
|
-
"type": "
|
|
101
|
+
"type": "STRING",
|
|
102
102
|
"nullable": true,
|
|
103
103
|
"description": "The unique identifier for each container. It always starts with 4 capital letters and followed by 7 digits. Example: TEMU7972458."
|
|
104
104
|
}
|
|
105
105
|
},
|
|
106
106
|
"creditNoteInvoiceNumber": {
|
|
107
|
-
"type": "
|
|
107
|
+
"type": "STRING",
|
|
108
108
|
"nullable": true,
|
|
109
109
|
"description": "The unique identifier for the associated Invoice. The number usually starts with ATS..."
|
|
110
110
|
}
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
<PERSONA> You are an efficient document entity data extraction specialist working for a Freight Forwarding company. <PERSONA>
|
|
2
2
|
|
|
3
|
-
<TASK>Your task is to extract data from customs invoice documents as per the given response schema structure
|
|
3
|
+
<TASK> Your task is to extract data from customs invoice documents as per the given response schema structure. <TASK>
|
|
4
4
|
|
|
5
5
|
<CONTEXT>
|
|
6
6
|
The Freight Forwarding company receives Customs invoices from Customs Brokers called Bundeskasse.
|
|
@@ -13,13 +13,14 @@ Your role is to accurately extract specific entities from these Customs invoices
|
|
|
13
13
|
- Populate fields as defined in the response schema.
|
|
14
14
|
- Multiple line item entries may exist, capture all instances under "lineItem".
|
|
15
15
|
- Use the data field description to understand the context of the data.
|
|
16
|
+
- The amount and the currency is always in EUR both for grandTotal and line items.
|
|
16
17
|
|
|
17
18
|
- containerNumber:
|
|
18
19
|
- Container Number consists of 4 capital letters followed by 7 digits (e.g., TEMU7972458, CAIU7222892).
|
|
19
20
|
- Few invoices contains multiple container numbers, in that case, all container numbers should be captured.
|
|
20
21
|
|
|
21
22
|
- shipmentID:
|
|
22
|
-
- Shipment ID is a unique identifier for the shipment, it starts with "S" followed by 6-
|
|
23
|
+
- Shipment ID is a unique identifier for the shipment, it starts with "S" followed by 6-8 digits (e.g., S1230583 or S12305876).
|
|
23
24
|
- It can be found in the top section of the invoice. Few times, it can be found in between a long string of numbers as well. (e.g., "FORTO-S136748-").
|
|
24
25
|
- It can also be referred to as "Bezugsnummer" in the invoice.
|
|
25
26
|
|
|
@@ -31,15 +32,16 @@ Your role is to accurately extract specific entities from these Customs invoices
|
|
|
31
32
|
- Credit Note Invoice Number is a unique identifier for the credit note, it starts with "ATS" only (e.g., ATS.....).
|
|
32
33
|
- NIZZA is not a credit note invoice number.
|
|
33
34
|
|
|
34
|
-
- grandTotal
|
|
35
|
+
- grandTotal:
|
|
36
|
+
- It can be found with the key words Gesamtabgabenbetrag, Gesamtbetragin. In credit notes, it can be found under "Zu erstattender Abgabenbetrag".
|
|
37
|
+
- grandTotal value is always or mostly mentioned in EUR currency as it is issued by German Customs.
|
|
35
38
|
|
|
36
39
|
- serviceDate can also be referred to as "Zollanmeldung" or "Eingangdatum" in the invoice.
|
|
37
40
|
- issueDate can also be referred to as "Einfuhrabgabenbescheid" in the invoice. issueDate and serviceDate can be same in some cases.
|
|
38
41
|
- vendor details can be "Hauptzollamt" details in the top portion of the invoice.
|
|
39
42
|
|
|
40
|
-
- lineItem:
|
|
41
|
-
-
|
|
42
|
-
- totalAmount in the Credit Note is the Differenzbetrag in the line items.
|
|
43
|
+
- lineItem: Each line item should be extracted only once. Give priority to the first occurrence of the line item details in the document.
|
|
44
|
+
- totalAmount in the Credit Note is the Differenzbetrag in the line items. The totalAmount value is always or mostly mentioned in EUR currency.
|
|
43
45
|
- deferredDutyPayer can be identified under "Aufschubenhmer" for each line item. It is a combination of number code and entity.
|
|
44
46
|
|
|
45
47
|
You can usually find all the information in the top 2 pages of the invoice.
|
|
@@ -0,0 +1,125 @@
|
|
|
1
|
+
{
|
|
2
|
+
"type": "OBJECT",
|
|
3
|
+
"properties": {
|
|
4
|
+
"consignee": {
|
|
5
|
+
"type": "string",
|
|
6
|
+
"nullable": true,
|
|
7
|
+
"description": "The receiver or buyer of the goods."
|
|
8
|
+
},
|
|
9
|
+
"currency": {
|
|
10
|
+
"type": "string",
|
|
11
|
+
"nullable": true,
|
|
12
|
+
"description": "The currency of the totalAmount."
|
|
13
|
+
},
|
|
14
|
+
"grossWeight": {
|
|
15
|
+
"type": "string",
|
|
16
|
+
"nullable": true,
|
|
17
|
+
"description": "The total gross weight of all the goods. Usually mentioned as G.W or GW or Gross Weight, etc.."
|
|
18
|
+
},
|
|
19
|
+
"incoterm": {
|
|
20
|
+
"type": "string",
|
|
21
|
+
"nullable": true,
|
|
22
|
+
"description": "An Incoterm is a 3 letter standardized trade term defining the responsibilities of buyers and sellers in international shipping and logistics. For example, FOB, CFR, DAP, CIF, etc..."
|
|
23
|
+
},
|
|
24
|
+
"invoiceDate": {
|
|
25
|
+
"type": "string",
|
|
26
|
+
"nullable": true,
|
|
27
|
+
"description": "A date that the invoice was created or issued."
|
|
28
|
+
},
|
|
29
|
+
"invoiceNumber": {
|
|
30
|
+
"type": "string",
|
|
31
|
+
"nullable": true,
|
|
32
|
+
"description": "The invoice number of the commercial invoice document."
|
|
33
|
+
},
|
|
34
|
+
"measurement": {
|
|
35
|
+
"type": "string",
|
|
36
|
+
"nullable": true,
|
|
37
|
+
"description": "The volume of the goods. Usually, it is measured in \"Cubic Meter (cbm)\" or dimensions. But volume in \"cbm\" is preferred."
|
|
38
|
+
},
|
|
39
|
+
"netWeight": {
|
|
40
|
+
"type": "string",
|
|
41
|
+
"nullable": true,
|
|
42
|
+
"description": "The total net weight of all the goods. Usually, mentioned as N.W or NW or Net Weight, etc.."
|
|
43
|
+
},
|
|
44
|
+
"shipper": {
|
|
45
|
+
"type": "string",
|
|
46
|
+
"nullable": true,
|
|
47
|
+
"description": "The seller or shipper of the goods."
|
|
48
|
+
},
|
|
49
|
+
"totalAmount": {
|
|
50
|
+
"type": "string",
|
|
51
|
+
"nullable": true,
|
|
52
|
+
"description": "The total amount of all the goods mentioned in the invoice."
|
|
53
|
+
},
|
|
54
|
+
"skus": {
|
|
55
|
+
"type": "ARRAY",
|
|
56
|
+
"items": {
|
|
57
|
+
"type": "OBJECT",
|
|
58
|
+
"properties": {
|
|
59
|
+
"amount": {
|
|
60
|
+
"type": "string",
|
|
61
|
+
"nullable": true,
|
|
62
|
+
"description": "Amount of the goods."
|
|
63
|
+
},
|
|
64
|
+
"containerNumber": {
|
|
65
|
+
"type": "string",
|
|
66
|
+
"nullable": true,
|
|
67
|
+
"description": "Container Number consists of 4 capital letters followed by 7 digits. Example: TEMU7972458. Usually mentioned as Container Number, CONTAINER NO. Containers, or Container / Truck No"
|
|
68
|
+
},
|
|
69
|
+
"currency": {
|
|
70
|
+
"type": "string",
|
|
71
|
+
"nullable": true,
|
|
72
|
+
"description": "The currency of the Amount. Usually mentioned in USD, EURO, CNY, $, or any other currency units and symbols."
|
|
73
|
+
},
|
|
74
|
+
"goodsDescription": {
|
|
75
|
+
"type": "string",
|
|
76
|
+
"nullable": true,
|
|
77
|
+
"description": "Description of the goods."
|
|
78
|
+
},
|
|
79
|
+
"grossWeight": {
|
|
80
|
+
"type": "string",
|
|
81
|
+
"nullable": true,
|
|
82
|
+
"description": "The gross weight of an individual product/goods. Usually, mentioned as G.W or GW or Gross Weight, etc.."
|
|
83
|
+
},
|
|
84
|
+
"hsCode": {
|
|
85
|
+
"type": "string",
|
|
86
|
+
"nullable": true,
|
|
87
|
+
"description": "The harmonized system code of a goods."
|
|
88
|
+
},
|
|
89
|
+
"materialNumber": {
|
|
90
|
+
"type": "string",
|
|
91
|
+
"nullable": true,
|
|
92
|
+
"description": "Material number of the product or goods."
|
|
93
|
+
},
|
|
94
|
+
"netWeight": {
|
|
95
|
+
"type": "string",
|
|
96
|
+
"nullable": true,
|
|
97
|
+
"description": "The net weight of an individual product/goods. Usually, mentioned as N.W or NW or Net Weight, etc.."
|
|
98
|
+
},
|
|
99
|
+
"packagingQuantity": {
|
|
100
|
+
"type": "string",
|
|
101
|
+
"nullable": true,
|
|
102
|
+
"description": "The quantity of the goods. Usually, the quantity is in pallets, PLT, cartons, CTNS, pieces, PCS, packages, boxes, etc. Please prioritize the packaging types based on their size, as follows: Pallets (PLT) >> Cartons (CTNS) >> Pieces (PCS). Extract the Larger packaging types that will have a lower count."
|
|
103
|
+
},
|
|
104
|
+
"packageType": {
|
|
105
|
+
"type": "string",
|
|
106
|
+
"nullable": true,
|
|
107
|
+
"description": "The packaging type is the unit of packagingQuantity. Example; pallets, PLT, cartons, CTNS, pieces, PCS, packages, etc. Sometimes, the packaging type is available in the column name of the packagingQuantity."
|
|
108
|
+
},
|
|
109
|
+
"poNumber": {
|
|
110
|
+
"type": "string",
|
|
111
|
+
"nullable": true,
|
|
112
|
+
"description": "Purchase order of the goods."
|
|
113
|
+
},
|
|
114
|
+
"skuNumber": {
|
|
115
|
+
"type": "string",
|
|
116
|
+
"nullable": true,
|
|
117
|
+
"description": "SKU number of the goods."
|
|
118
|
+
}
|
|
119
|
+
},
|
|
120
|
+
"required": []
|
|
121
|
+
}
|
|
122
|
+
}
|
|
123
|
+
},
|
|
124
|
+
"required": []
|
|
125
|
+
}
|
|
@@ -2,7 +2,8 @@ Task: You are a document entity extraction specialist. Given a document, your ta
|
|
|
2
2
|
|
|
3
3
|
Extract all the data points from the given document.
|
|
4
4
|
Each data point is part of a master field called skus. There may be multiple skus entries in a document.
|
|
5
|
-
Your
|
|
5
|
+
Your task is to extract the text value of the entities and page numbers starting from 0 where the value was found in the document.
|
|
6
|
+
|
|
6
7
|
|
|
7
8
|
Instructions:
|
|
8
9
|
- Populate fields as defined in the response schema.
|
|
@@ -1,19 +1,70 @@
|
|
|
1
1
|
{
|
|
2
|
-
"
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
{
|
|
10
|
-
"
|
|
11
|
-
"
|
|
12
|
-
"
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
"
|
|
2
|
+
"type": "OBJECT",
|
|
3
|
+
"properties": {
|
|
4
|
+
"consignee": {
|
|
5
|
+
"type": "STRING",
|
|
6
|
+
"nullable": true,
|
|
7
|
+
"description": "The receiver or buyer of the goods. It can be find with the keywords like Importeur, Anmelder, Empfanger, Consignee, Buyer, Receiver, etc.."
|
|
8
|
+
},
|
|
9
|
+
"countryOfOrigin": {
|
|
10
|
+
"type": "STRING",
|
|
11
|
+
"nullable": true,
|
|
12
|
+
"description": "The country where the goods were manufactured or produced. It can be identified as Land van oorsprong, Ursprungsland in the document."
|
|
13
|
+
},
|
|
14
|
+
"MRN": {
|
|
15
|
+
"type": "STRING",
|
|
16
|
+
"nullable": true,
|
|
17
|
+
"description": "Movement Reference Number (MRN) is a unique identifier assigned to each customs declaration for goods being imported or exported within the European Union (EU). It is used to track and monitor the movement of goods across EU member states. It can be found with MRN, Reg. Nr., Reg. Kennzeigechen, etc.."
|
|
18
|
+
},
|
|
19
|
+
"shipper": {
|
|
20
|
+
"type": "STRING",
|
|
21
|
+
"nullable": true,
|
|
22
|
+
"description": "The seller or shipper of the goods. It is often indicated by the term Shipper, Speditore, Esportatore, Exporteur, Versender."
|
|
23
|
+
},
|
|
24
|
+
"totalValueOfGoods": {
|
|
25
|
+
"type": "STRING",
|
|
26
|
+
"nullable": true,
|
|
27
|
+
"description": "The total monetary value of the goods being shipped, usually declared for customs purposes. It can be found with Waarde, Warenwert, Factuurwaarde, Invoice Value, etc.."
|
|
28
|
+
},
|
|
29
|
+
"containers": {
|
|
30
|
+
"type": "ARRAY",
|
|
31
|
+
"items": {
|
|
32
|
+
"type": "OBJECT",
|
|
33
|
+
"properties": {
|
|
34
|
+
"containerNumber": {
|
|
35
|
+
"type": "STRING",
|
|
36
|
+
"nullable": true,
|
|
37
|
+
"description": "The unique identifier for each container. It always starts with 4 capital letters and followed by 7 digits. Example: TEMU7972458."
|
|
38
|
+
},
|
|
39
|
+
"goodsDescription": {
|
|
40
|
+
"type": "STRING",
|
|
41
|
+
"nullable": true,
|
|
42
|
+
"description": "A brief description of the goods contained within the container. It can be found with goods description, Bezeichnung, goederenomschrijving."
|
|
43
|
+
},
|
|
44
|
+
"grossWeight": {
|
|
45
|
+
"type": "STRING",
|
|
46
|
+
"nullable": true,
|
|
47
|
+
"description": "The gross weight of the container. Usually mentioned as G.W or GW, Bruto, or Gross Weight, etc.."
|
|
48
|
+
},
|
|
49
|
+
"nettWeight": {
|
|
50
|
+
"type": "STRING",
|
|
51
|
+
"nullable": true,
|
|
52
|
+
"description": "The net weight of the goods inside the container. Usually mentioned as N.W or NW, Net Weight, or Netto, Eigenmasse, etc.."
|
|
53
|
+
},
|
|
54
|
+
"packagingNumber": {
|
|
55
|
+
"type": "STRING",
|
|
56
|
+
"nullable": true,
|
|
57
|
+
"description": "The quantity of the goods. Usually, the quantity is in pallets, PLT, cartons, CTNS, pieces, PCS, packages, boxes, etc. Please prioritize the packaging types based on their size, as follows: Pallets (PLT) >> Cartons (CTNS) >> Pieces (PCS). Extract the Larger packaging types that will have a lower count."
|
|
58
|
+
},
|
|
59
|
+
"packagingType": {
|
|
60
|
+
"type": "STRING",
|
|
61
|
+
"nullable": true,
|
|
62
|
+
"description": "The packaging type is the unit of packagingNumber. Example; pallets, PLT, cartons, CTNS, pieces, PCS, packages, etc. Sometimes, the packaging type is available in the column name of the packagingNumber."
|
|
63
|
+
}
|
|
64
|
+
},
|
|
65
|
+
"required": ["containerNumber", "goodsDescription", "grossWeight", "nettWeight", "packagingNumber", "packagingType"]
|
|
66
|
+
}
|
|
16
67
|
}
|
|
17
|
-
|
|
18
|
-
|
|
68
|
+
},
|
|
69
|
+
"required": ["countryOfOrigin", "MRN", "totalValueOfGoods", "containers"]
|
|
19
70
|
}
|