data-science-document-ai 1.37.0__py3-none-any.whl → 1.51.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {data_science_document_ai-1.37.0.dist-info → data_science_document_ai-1.51.0.dist-info}/METADATA +3 -3
- data_science_document_ai-1.51.0.dist-info/RECORD +60 -0
- {data_science_document_ai-1.37.0.dist-info → data_science_document_ai-1.51.0.dist-info}/WHEEL +1 -1
- src/constants.py +6 -10
- src/docai.py +14 -5
- src/docai_processor_config.yaml +0 -56
- src/excel_processing.py +34 -13
- src/io.py +69 -1
- src/llm.py +10 -32
- src/pdf_processing.py +192 -57
- src/postprocessing/common.py +252 -590
- src/postprocessing/postprocess_partner_invoice.py +139 -89
- src/prompts/library/arrivalNotice/other/placeholders.json +70 -0
- src/prompts/library/arrivalNotice/other/prompt.txt +40 -0
- src/prompts/library/bookingConfirmation/evergreen/placeholders.json +17 -17
- src/prompts/library/bookingConfirmation/evergreen/prompt.txt +1 -0
- src/prompts/library/bookingConfirmation/hapag-lloyd/placeholders.json +18 -18
- src/prompts/library/bookingConfirmation/hapag-lloyd/prompt.txt +1 -1
- src/prompts/library/bookingConfirmation/maersk/placeholders.json +17 -17
- src/prompts/library/bookingConfirmation/maersk/prompt.txt +1 -1
- src/prompts/library/bookingConfirmation/msc/placeholders.json +17 -17
- src/prompts/library/bookingConfirmation/msc/prompt.txt +1 -1
- src/prompts/library/bookingConfirmation/oocl/placeholders.json +17 -17
- src/prompts/library/bookingConfirmation/oocl/prompt.txt +3 -1
- src/prompts/library/bookingConfirmation/other/placeholders.json +17 -17
- src/prompts/library/bookingConfirmation/other/prompt.txt +1 -1
- src/prompts/library/bookingConfirmation/yangming/placeholders.json +17 -17
- src/prompts/library/bookingConfirmation/yangming/prompt.txt +1 -1
- src/prompts/library/bundeskasse/other/placeholders.json +25 -25
- src/prompts/library/bundeskasse/other/prompt.txt +8 -6
- src/prompts/library/commercialInvoice/other/placeholders.json +125 -0
- src/prompts/library/commercialInvoice/other/prompt.txt +2 -1
- src/prompts/library/customsAssessment/other/placeholders.json +67 -16
- src/prompts/library/customsAssessment/other/prompt.txt +24 -37
- src/prompts/library/customsInvoice/other/placeholders.json +29 -20
- src/prompts/library/customsInvoice/other/prompt.txt +9 -4
- src/prompts/library/deliveryOrder/other/placeholders.json +79 -28
- src/prompts/library/deliveryOrder/other/prompt.txt +26 -40
- src/prompts/library/draftMbl/other/placeholders.json +33 -33
- src/prompts/library/draftMbl/other/prompt.txt +34 -44
- src/prompts/library/finalMbL/other/placeholders.json +34 -34
- src/prompts/library/finalMbL/other/prompt.txt +34 -44
- src/prompts/library/packingList/other/placeholders.json +98 -0
- src/prompts/library/packingList/other/prompt.txt +1 -1
- src/prompts/library/partnerInvoice/other/placeholders.json +2 -23
- src/prompts/library/partnerInvoice/other/prompt.txt +7 -18
- src/prompts/library/preprocessing/carrier/placeholders.json +0 -16
- src/prompts/library/shippingInstruction/other/placeholders.json +115 -0
- src/prompts/library/shippingInstruction/other/prompt.txt +28 -15
- src/setup.py +13 -61
- src/utils.py +189 -29
- data_science_document_ai-1.37.0.dist-info/RECORD +0 -59
- src/prompts/library/draftMbl/hapag-lloyd/prompt.txt +0 -44
- src/prompts/library/draftMbl/maersk/prompt.txt +0 -17
- src/prompts/library/finalMbL/hapag-lloyd/prompt.txt +0 -44
- src/prompts/library/finalMbL/maersk/prompt.txt +0 -17
|
@@ -1,9 +1,8 @@
|
|
|
1
1
|
"""This module contains the postprocessing functions for the partner invoice."""
|
|
2
|
-
from
|
|
3
|
-
|
|
4
|
-
from fuzzywuzzy import fuzz
|
|
2
|
+
from rapidfuzz import fuzz, process
|
|
5
3
|
|
|
6
4
|
from src.io import logger
|
|
5
|
+
from src.utils import get_tms_mappings
|
|
7
6
|
|
|
8
7
|
|
|
9
8
|
def postprocessing_partner_invoice(partner_invoice):
|
|
@@ -104,9 +103,18 @@ def post_process_bundeskasse(aggregated_data):
|
|
|
104
103
|
)
|
|
105
104
|
|
|
106
105
|
# Check if the deferredDutyPayer is forto
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
106
|
+
KEYWORDS = {"de789147263644738", "forto", "009812"}
|
|
107
|
+
|
|
108
|
+
def is_forto_recipient(line_item: dict) -> bool:
|
|
109
|
+
values_to_check = [
|
|
110
|
+
line_item.get("deferredDutyPayer", {}).get("documentValue", ""),
|
|
111
|
+
line_item.get("vatId", {}).get("documentValue", ""),
|
|
112
|
+
]
|
|
113
|
+
|
|
114
|
+
combined = " ".join(values_to_check).lower()
|
|
115
|
+
return any(keyword in combined for keyword in KEYWORDS)
|
|
116
|
+
|
|
117
|
+
if is_forto_recipient(line_item):
|
|
110
118
|
is_recipient_forto = True
|
|
111
119
|
|
|
112
120
|
update_recipient_and_vendor(aggregated_data, is_recipient_forto)
|
|
@@ -135,38 +143,9 @@ def update_recipient_and_vendor(aggregated_data, is_recipient_forto):
|
|
|
135
143
|
] = "Dasbachstraße 15, 54292 Trier, Germany"
|
|
136
144
|
|
|
137
145
|
|
|
138
|
-
def process_partner_invoice(params, aggregated_data,
|
|
146
|
+
async def process_partner_invoice(params, aggregated_data, document_type_code):
|
|
139
147
|
"""Process the partner invoice data."""
|
|
140
|
-
# Post process
|
|
141
|
-
# TODO: Remove this block of code after migrating to LLM completely and update the placeholder in the prompt library
|
|
142
|
-
if "containerNumber" in aggregated_data and isinstance(
|
|
143
|
-
aggregated_data["containerNumber"], dict
|
|
144
|
-
):
|
|
145
|
-
container_number = aggregated_data.get("containerNumber", {}).get(
|
|
146
|
-
"formattedValue", None
|
|
147
|
-
)
|
|
148
|
-
if container_number:
|
|
149
|
-
aggregated_data["containerNumber"] = (
|
|
150
|
-
[
|
|
151
|
-
{
|
|
152
|
-
"documentValue": aggregated_data.get("containerNumber", {}).get(
|
|
153
|
-
"documentValue", ""
|
|
154
|
-
),
|
|
155
|
-
"formattedValue": ctr_number,
|
|
156
|
-
}
|
|
157
|
-
for ctr_number in container_number
|
|
158
|
-
]
|
|
159
|
-
if isinstance(container_number, list)
|
|
160
|
-
else [
|
|
161
|
-
{
|
|
162
|
-
"documentValue": aggregated_data.get("containerNumber", {}).get(
|
|
163
|
-
"documentValue", ""
|
|
164
|
-
),
|
|
165
|
-
"formattedValue": container_number,
|
|
166
|
-
}
|
|
167
|
-
]
|
|
168
|
-
)
|
|
169
|
-
|
|
148
|
+
# Post process bundeskasse invoices
|
|
170
149
|
if document_type_code == "bundeskasse":
|
|
171
150
|
post_process_bundeskasse(aggregated_data)
|
|
172
151
|
return
|
|
@@ -188,24 +167,84 @@ def process_partner_invoice(params, aggregated_data, embed_manager, document_typ
|
|
|
188
167
|
reverse_charge_info["formattedValue"] = reverse_charge_value
|
|
189
168
|
reverse_charge = aggregated_data.pop("reverseChargeSentence", None)
|
|
190
169
|
|
|
191
|
-
# Process
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
170
|
+
# Process everything in one go
|
|
171
|
+
processed_items = await process_line_items_batch(params, line_items, reverse_charge)
|
|
172
|
+
|
|
173
|
+
# Update your main data structure
|
|
174
|
+
aggregated_data["lineItem"] = processed_items
|
|
175
|
+
|
|
176
|
+
|
|
177
|
+
async def process_line_items_batch(
|
|
178
|
+
params: dict, line_items: list[dict], reverse_charge=None
|
|
179
|
+
):
|
|
180
|
+
"""
|
|
181
|
+
Processes all line items efficiently using a "Split-Apply-Combine" strategy.
|
|
182
|
+
"""
|
|
183
|
+
# To store items that need external API lookup
|
|
184
|
+
pending_line_items = {}
|
|
185
|
+
|
|
186
|
+
# Check Fuzzy Matching
|
|
187
|
+
logger.info(f"Mapping line item codes with Fuzzy matching....")
|
|
188
|
+
for i, item in enumerate(line_items):
|
|
189
|
+
description_obj = item.get("lineItemDescription")
|
|
190
|
+
|
|
191
|
+
if not description_obj or not description_obj.get("formattedValue"):
|
|
192
|
+
continue
|
|
193
|
+
# Get the formatted description text
|
|
194
|
+
desc = description_obj["formattedValue"]
|
|
195
|
+
|
|
196
|
+
# Find Fuzzy Match
|
|
197
|
+
matched_code = find_matching_lineitem(
|
|
198
|
+
desc,
|
|
199
|
+
params["lookup_data"]["item_code"],
|
|
200
|
+
params["fuzzy_threshold_item_code"],
|
|
201
|
+
)
|
|
202
|
+
|
|
203
|
+
if matched_code:
|
|
204
|
+
# Set the code to the line item
|
|
205
|
+
item["itemCode"] = {
|
|
206
|
+
"documentValue": desc,
|
|
207
|
+
"formattedValue": matched_code,
|
|
208
|
+
"page": description_obj.get("page"),
|
|
209
|
+
}
|
|
210
|
+
else:
|
|
211
|
+
# Store for batch API call
|
|
212
|
+
pending_line_items[i] = desc
|
|
199
213
|
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
214
|
+
# Batch API Call for Embedding lookups
|
|
215
|
+
if pending_line_items:
|
|
216
|
+
values_to_fetch = list(set(pending_line_items.values()))
|
|
217
|
+
logger.info(f"Mapping {len(values_to_fetch)} line items from Embedding API...")
|
|
203
218
|
|
|
219
|
+
# Await the batch response {"desc1": "code1", "desc2": "code2"}
|
|
220
|
+
api_results = await get_tms_mappings(
|
|
221
|
+
input_list=values_to_fetch, embedding_type="line_items"
|
|
222
|
+
)
|
|
204
223
|
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
224
|
+
# Merge API results back into original list
|
|
225
|
+
for index, desc in pending_line_items.items():
|
|
226
|
+
# Get result from API response, or None if API failed for that item
|
|
227
|
+
forto_code = api_results.get(desc)
|
|
228
|
+
|
|
229
|
+
# Update the original item
|
|
230
|
+
line_items[index]["itemCode"] = {
|
|
231
|
+
"documentValue": desc,
|
|
232
|
+
"formattedValue": forto_code, # Might be None if API failed
|
|
233
|
+
"page": line_items[index]["lineItemDescription"].get("page"),
|
|
234
|
+
}
|
|
235
|
+
|
|
236
|
+
# Add reverse charge here if exists
|
|
237
|
+
if reverse_charge:
|
|
238
|
+
[
|
|
239
|
+
item.update({"reverseChargeSentence": reverse_charge})
|
|
240
|
+
for item in line_items
|
|
241
|
+
if (
|
|
242
|
+
(item.get("itemCode") and item["itemCode"]["formattedValue"] != "CDU")
|
|
243
|
+
or not item.get("itemCode")
|
|
244
|
+
)
|
|
245
|
+
]
|
|
246
|
+
|
|
247
|
+
return line_items
|
|
209
248
|
|
|
210
249
|
|
|
211
250
|
def get_fuzzy_match_score(target: str, sentences: list, threshold: int):
|
|
@@ -220,16 +259,18 @@ def get_fuzzy_match_score(target: str, sentences: list, threshold: int):
|
|
|
220
259
|
tuple: (best_match, score) if above threshold, else (None, 0)
|
|
221
260
|
"""
|
|
222
261
|
# Use multiprocessing to find the best match
|
|
223
|
-
|
|
224
|
-
|
|
262
|
+
result = process.extractOne(
|
|
263
|
+
target, sentences, scorer=fuzz.WRatio, score_cutoff=threshold
|
|
264
|
+
)
|
|
265
|
+
|
|
266
|
+
if result is None:
|
|
267
|
+
return None, False
|
|
225
268
|
|
|
226
|
-
|
|
227
|
-
best_match, best_score = max(results, key=lambda x: x[1], default=(None, 0))
|
|
269
|
+
match, score, index = result
|
|
228
270
|
|
|
229
|
-
# return best_match
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
return best_match, True
|
|
271
|
+
# return best_match if the best match score is above a threshold (e.g., 80)
|
|
272
|
+
if match:
|
|
273
|
+
return match, True
|
|
233
274
|
|
|
234
275
|
return None, False
|
|
235
276
|
|
|
@@ -261,50 +302,59 @@ def find_matching_lineitem(new_lineitem: str, kvp_dict: dict, threshold=90):
|
|
|
261
302
|
Returns:
|
|
262
303
|
str: The best matching 'Forto SLI' value from the dictionary.
|
|
263
304
|
"""
|
|
264
|
-
new_lineitem = new_lineitem.upper()
|
|
265
|
-
|
|
266
305
|
# Check if the new line item is already in the dictionary
|
|
267
306
|
if new_lineitem in kvp_dict:
|
|
268
307
|
return kvp_dict[new_lineitem]
|
|
269
308
|
|
|
270
309
|
# Get the best fuzzy match score for the extracted line item
|
|
271
|
-
|
|
272
|
-
new_lineitem,
|
|
310
|
+
match, _ = get_fuzzy_match_score(
|
|
311
|
+
new_lineitem,
|
|
312
|
+
list(kvp_dict.keys()),
|
|
313
|
+
threshold,
|
|
273
314
|
)
|
|
274
315
|
|
|
275
|
-
|
|
276
|
-
|
|
316
|
+
if match:
|
|
317
|
+
# find the code from the kvp_dict
|
|
318
|
+
return kvp_dict[match]
|
|
277
319
|
|
|
278
|
-
|
|
279
|
-
"""
|
|
280
|
-
Finds a match for the input string using fuzzy matching first, then embedding fallback.
|
|
320
|
+
return None
|
|
281
321
|
|
|
282
|
-
1. Tries to find a fuzzy match for input_string against the keys in
|
|
283
|
-
mapping_data using RapidFuzz, requiring a score >= fuzzy_threshold.
|
|
284
|
-
2. If found, returns the corresponding value from mapping_data.
|
|
285
|
-
3. If not found above threshold, calls the embedding_fallback function.
|
|
286
322
|
|
|
323
|
+
async def associate_forto_item_code(line_item_data, params):
|
|
324
|
+
"""
|
|
325
|
+
Associates Forto item codes to a list of line item descriptions.
|
|
287
326
|
Args:
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
params: Parameters containing the lookup data and fuzzy threshold.
|
|
327
|
+
line_item_data (dict): A dictionary where keys are original descriptions and values are cleaned descriptions.
|
|
328
|
+
params (dict): Parameters containing lookup data and thresholds.
|
|
291
329
|
|
|
292
330
|
Returns:
|
|
293
|
-
|
|
331
|
+
list: A list of dictionaries with 'description' and 'itemCode' keys.
|
|
294
332
|
"""
|
|
295
|
-
# Get the Forto item code using fuzzy matching
|
|
296
|
-
forto_item_code = find_matching_lineitem(
|
|
297
|
-
new_lineitem=input_string,
|
|
298
|
-
kvp_dict=params["lookup_data"]["item_code"], # TODO: Parse the KVP dictionary
|
|
299
|
-
threshold=params["fuzzy_threshold_item_code"],
|
|
300
|
-
)
|
|
301
333
|
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
334
|
+
result = []
|
|
335
|
+
pending_line_items = {}
|
|
336
|
+
for desc, f_desc in line_item_data.items():
|
|
337
|
+
# Get the Forto item code using fuzzy matching
|
|
338
|
+
code = find_matching_lineitem(
|
|
339
|
+
new_lineitem=f_desc,
|
|
340
|
+
kvp_dict=params["lookup_data"]["item_code"],
|
|
341
|
+
threshold=params["fuzzy_threshold_item_code"],
|
|
307
342
|
)
|
|
343
|
+
if code:
|
|
344
|
+
result.append({"description": desc, "itemCode": code})
|
|
345
|
+
else:
|
|
346
|
+
pending_line_items[desc] = f_desc
|
|
347
|
+
|
|
348
|
+
# Batch API Call for Embedding lookups
|
|
349
|
+
if pending_line_items:
|
|
350
|
+
api_results = await get_tms_mappings(
|
|
351
|
+
input_list=list(pending_line_items.values()),
|
|
352
|
+
embedding_type="line_items",
|
|
353
|
+
)
|
|
354
|
+
|
|
355
|
+
# Merge API results back into original list
|
|
356
|
+
for desc, f_desc in pending_line_items.items():
|
|
357
|
+
code = api_results.get(f_desc)
|
|
358
|
+
result.append({"description": desc, "itemCode": code})
|
|
308
359
|
|
|
309
|
-
result = {"documentValue": input_string, "formattedValue": forto_item_code}
|
|
310
360
|
return result
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
{
|
|
2
|
+
"type": "OBJECT",
|
|
3
|
+
"properties": {
|
|
4
|
+
"bookingNumber": {
|
|
5
|
+
"type": "STRING",
|
|
6
|
+
"nullable": true,
|
|
7
|
+
"description": "The booking number associated with the Arrival Notice document. They are often referred to as 'Booking Number', 'Booking No.', 'Booking Ref.', 'Booking Reference', 'Booking ID', 'carrier's reference' or 'Order Ref'."
|
|
8
|
+
},
|
|
9
|
+
"destinationTerminal": {
|
|
10
|
+
"type": "STRING",
|
|
11
|
+
"nullable": true,
|
|
12
|
+
"description": "The terminal at the destination port where the container will be delivered."
|
|
13
|
+
},
|
|
14
|
+
"eta": {
|
|
15
|
+
"type": "STRING",
|
|
16
|
+
"nullable": true,
|
|
17
|
+
"description": "Estimated Time of Arrival (ETA) is the expected date when the shipment will arrive at its destination."
|
|
18
|
+
},
|
|
19
|
+
"mblNumber": {
|
|
20
|
+
"type": "STRING",
|
|
21
|
+
"nullable": true,
|
|
22
|
+
"description": "Bill of Lading number (B/L NO.), a document issued by the carrier."
|
|
23
|
+
},
|
|
24
|
+
"portOfDischarge": {
|
|
25
|
+
"type": "STRING",
|
|
26
|
+
"nullable": true,
|
|
27
|
+
"description": "The port where the goods are discharged from the vessel. This is the destination port for the shipment."
|
|
28
|
+
},
|
|
29
|
+
"vesselName": {
|
|
30
|
+
"type": "STRING",
|
|
31
|
+
"nullable": true,
|
|
32
|
+
"description": "The name of the vessel carrying the shipment."
|
|
33
|
+
},
|
|
34
|
+
"containers": {
|
|
35
|
+
"type": "ARRAY",
|
|
36
|
+
"items": {
|
|
37
|
+
"type": "OBJECT",
|
|
38
|
+
"properties": {
|
|
39
|
+
"containerNumber": {
|
|
40
|
+
"type": "STRING",
|
|
41
|
+
"nullable": true,
|
|
42
|
+
"description": "The unique identifier for each container. It always starts with 4 capital letters and followed by 7 digits. Example: TEMU7972458."
|
|
43
|
+
},
|
|
44
|
+
"containerType": {
|
|
45
|
+
"type": "STRING",
|
|
46
|
+
"nullable": true,
|
|
47
|
+
"description": "The size of the container associated with the containerNumber, such as 20ft, 40ft, 40HC, 20DC etc."
|
|
48
|
+
},
|
|
49
|
+
"grossWeight": {
|
|
50
|
+
"type": "STRING",
|
|
51
|
+
"nullable": true,
|
|
52
|
+
"description": "The gross weight of the container. Usually mentioned as G.W or GW or Gross Weight, etc.."
|
|
53
|
+
},
|
|
54
|
+
"measurements": {
|
|
55
|
+
"type": "STRING",
|
|
56
|
+
"nullable": true,
|
|
57
|
+
"description": "The volume of the container. Usually, it is measured in 'Cubic Meter (cbm)' or dimensions. But volume in 'cbm' is preferred."
|
|
58
|
+
},
|
|
59
|
+
"sealNumber": {
|
|
60
|
+
"type": "STRING",
|
|
61
|
+
"nullable": true,
|
|
62
|
+
"description": "The seal number associated with the container Number. But it is not same as the container number."
|
|
63
|
+
}
|
|
64
|
+
},
|
|
65
|
+
"required": ["containerNumber", "containerType", "grossWeight"]
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
},
|
|
69
|
+
"required": ["bookingNumber", "destinationTerminal", "eta", "portOfDischarge", "vesselName", "containers"]
|
|
70
|
+
}
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
<PERSONA> You are an efficient document entity data extraction specialist working for a Freight Forwarding company. <PERSONA>
|
|
2
|
+
|
|
3
|
+
<TASK> Your task is to extract data from Arrival Notice documents as per the given response schema structure. <TASK>
|
|
4
|
+
|
|
5
|
+
<CONTEXT>
|
|
6
|
+
The Freight Forwarding company receives Arrival Notice from shipping lines.
|
|
7
|
+
These documents contain various details related to arrival of a shipment to the port of destination such as container numbers, estimated time of arrival, vessel details and containers information.
|
|
8
|
+
They may be written in different languages such as English, German, Italian and can appear in a variety of formats and layouts.
|
|
9
|
+
Your role is to accurately extract specific entities from these Arrival Notices to support efficient processing and accurate record-keeping.
|
|
10
|
+
<CONTEXT>
|
|
11
|
+
|
|
12
|
+
<INSTRUCTIONS>
|
|
13
|
+
- Populate fields as defined in the response schema.
|
|
14
|
+
- Multiple Containers entries may exist, capture all instances under "containers".
|
|
15
|
+
- Use the data field description to understand the context of the data.
|
|
16
|
+
|
|
17
|
+
- bookingNumbers:
|
|
18
|
+
- Booking numbers are unique identifiers for shipments. They are often referred to as "Booking Number", "Booking No.", "Booking Ref.", "Booking Reference", "Booking ID", "SACO-Pos.", "Order Ref", "Unsere Referenz", or "Unsere Position"
|
|
19
|
+
- If there is a unique_id that starts with "S" followed by 6 or 8 digits, it is a shipmentID, not a bookingNumber.
|
|
20
|
+
|
|
21
|
+
- destinationTerminal:
|
|
22
|
+
- Destination Terminal can also be referred to as "Destination Termina;", "Pickup Location", "Delivery Location", "Delivery Terminal", "Empfangsort", "Entladeort", or "Abladestelle".
|
|
23
|
+
|
|
24
|
+
- mblNumbers:
|
|
25
|
+
- Commonly known as "Bill of Lading Number", "BILL OF LADING NO.", "BL Number", "BL No.", "B/L No.", "BL-Nr.", "B/L", "HBL No.", or "M-AWB Nummer".
|
|
26
|
+
- Bill of Lading Number is known as mblNumber. Not a shipmentID even if it starts with "S".
|
|
27
|
+
- mblNumber from Hapag-Lloyd always starts with HLC.... (e.g., "HLCUTS12303AWNT3) and named as SEA WAYBILL or "SWB-NR.
|
|
28
|
+
|
|
29
|
+
- eta:
|
|
30
|
+
- Estimated Time of Arrival (ETA) is the expected date and time when the shipment will arrive at the destination port.
|
|
31
|
+
- It can be referred to as "ETA", "Estimated Arrival", "Voraussichtliche Ankunft", "Ankunftszeit", "Arrivo", "Due to arrive at Terminal"
|
|
32
|
+
|
|
33
|
+
- vesselName:
|
|
34
|
+
- Vessel Name is the name of the ship carrying the cargo. It can be referred to as "Vessel", "Ship Name", "Schiff", "Schiffsname", "Nave", or "Vessel/Flight No.".
|
|
35
|
+
|
|
36
|
+
- containers: Details of each container on the arrival notice. Make sure to extract each container information separately.
|
|
37
|
+
- containerNumber: Container Number consists of 4 capital letters followed by 7 digits (e.g., TEMU7972458, CAIU 7222892).
|
|
38
|
+
- sealNumber: Seal numbers are unique identifiers for shipping seals. They are usually mentioned as seal numbers in the document but they are definitely not container numbers.
|
|
39
|
+
|
|
40
|
+
<INSTRUCTIONS>
|
|
@@ -1,28 +1,28 @@
|
|
|
1
1
|
{
|
|
2
2
|
"type": "OBJECT",
|
|
3
3
|
"properties": {
|
|
4
|
-
"cfsCutOff": {"type": "
|
|
5
|
-
"bookingNumber": {"type": "
|
|
6
|
-
"cyCutOff": {"type": "
|
|
7
|
-
"gateInReference": {"type": "
|
|
8
|
-
"gateInTerminal": {"type": "
|
|
9
|
-
"mblNumber": {"type": "
|
|
10
|
-
"pickUpReference": {"type": "
|
|
11
|
-
"pickUpTerminal": {"type": "
|
|
12
|
-
"siCutOff": {"type": "
|
|
13
|
-
"vgmCutOff": {"type": "
|
|
4
|
+
"cfsCutOff": {"type": "STRING", "nullable": true, "description": "the date by which an LCL (Less than Container Load) shipment needs to be checked in to a CFS (Container Freight Station) to meet its scheduled sailing"},
|
|
5
|
+
"bookingNumber": {"type": "STRING", "nullable": true},
|
|
6
|
+
"cyCutOff": {"type": "STRING", "nullable": true},
|
|
7
|
+
"gateInReference": {"type": "STRING", "nullable": true},
|
|
8
|
+
"gateInTerminal": {"type": "STRING", "nullable": true},
|
|
9
|
+
"mblNumber": {"type": "STRING", "nullable": true},
|
|
10
|
+
"pickUpReference": {"type": "STRING", "nullable": true},
|
|
11
|
+
"pickUpTerminal": {"type": "STRING", "nullable": true},
|
|
12
|
+
"siCutOff": {"type": "STRING", "nullable": true},
|
|
13
|
+
"vgmCutOff": {"type": "STRING", "nullable": true},
|
|
14
14
|
"transportLegs": {
|
|
15
15
|
"type": "ARRAY",
|
|
16
16
|
"items": {
|
|
17
17
|
"type": "OBJECT",
|
|
18
18
|
"properties": {
|
|
19
|
-
"eta": {"type": "
|
|
20
|
-
"etd": {"type": "
|
|
21
|
-
"imoNumber": {"type": "
|
|
22
|
-
"portOfDischarge": {"type": "
|
|
23
|
-
"portOfLoading": {"type": "
|
|
24
|
-
"vesselName": {"type": "
|
|
25
|
-
"voyage": {"type": "
|
|
19
|
+
"eta": {"type": "STRING", "nullable": true},
|
|
20
|
+
"etd": {"type": "STRING", "nullable": true},
|
|
21
|
+
"imoNumber": {"type": "STRING", "nullable": true},
|
|
22
|
+
"portOfDischarge": {"type": "STRING", "nullable": true},
|
|
23
|
+
"portOfLoading": {"type": "STRING", "nullable": true},
|
|
24
|
+
"vesselName": {"type": "STRING", "nullable": true},
|
|
25
|
+
"voyage": {"type": "STRING", "nullable": true}
|
|
26
26
|
},
|
|
27
27
|
"required": []
|
|
28
28
|
}
|
|
@@ -1,32 +1,32 @@
|
|
|
1
1
|
{
|
|
2
2
|
"type": "OBJECT",
|
|
3
3
|
"properties": {
|
|
4
|
-
"cfsCutOff": {"type": "
|
|
5
|
-
"bookingNumber": {"type": "
|
|
6
|
-
"cyCutOff": {"type": "
|
|
7
|
-
"gateInReference": {"type": "
|
|
8
|
-
"gateInTerminal": {"type": "
|
|
9
|
-
"mblNumber": {"type": "
|
|
10
|
-
"pickUpReference": {"type": "
|
|
11
|
-
"pickUpTerminal": {"type": "
|
|
12
|
-
"siCutOff": {"type": "
|
|
13
|
-
"vgmCutOff": {"type": "
|
|
4
|
+
"cfsCutOff": {"type": "STRING", "nullable": true, "description": "the date by which an LCL (Less than Container Load) shipment needs to be checked in to a CFS (Container Freight Station) to meet its scheduled sailing"},
|
|
5
|
+
"bookingNumber": {"type": "STRING", "nullable": true},
|
|
6
|
+
"cyCutOff": {"type": "STRING", "nullable": true},
|
|
7
|
+
"gateInReference": {"type": "STRING", "nullable": true},
|
|
8
|
+
"gateInTerminal": {"type": "STRING", "nullable": true},
|
|
9
|
+
"mblNumber": {"type": "STRING", "nullable": true},
|
|
10
|
+
"pickUpReference": {"type": "STRING", "nullable": true},
|
|
11
|
+
"pickUpTerminal": {"type": "STRING", "nullable": true},
|
|
12
|
+
"siCutOff": {"type": "STRING", "nullable": true},
|
|
13
|
+
"vgmCutOff": {"type": "STRING", "nullable": true},
|
|
14
14
|
"transportLegs": {
|
|
15
15
|
"type": "ARRAY",
|
|
16
16
|
"items": {
|
|
17
17
|
"type": "OBJECT",
|
|
18
18
|
"properties": {
|
|
19
|
-
"eta": {"type": "
|
|
20
|
-
"etd": {"type": "
|
|
21
|
-
"imoNumber": {"type": "
|
|
22
|
-
"portOfDischarge": {"type": "
|
|
23
|
-
"portOfLoading": {"type": "
|
|
24
|
-
"vesselName": {"type": "
|
|
25
|
-
"voyage": {"type": "
|
|
19
|
+
"eta": {"type": "STRING", "nullable": true},
|
|
20
|
+
"etd": {"type": "STRING", "nullable": true},
|
|
21
|
+
"imoNumber": {"type": "STRING", "nullable": true},
|
|
22
|
+
"portOfDischarge": {"type": "STRING", "nullable": true},
|
|
23
|
+
"portOfLoading": {"type": "STRING", "nullable": true},
|
|
24
|
+
"vesselName": {"type": "STRING", "nullable": true},
|
|
25
|
+
"voyage": {"type": "STRING", "nullable": true}
|
|
26
26
|
},
|
|
27
27
|
"required": []
|
|
28
28
|
}
|
|
29
29
|
}
|
|
30
30
|
},
|
|
31
31
|
"required": []
|
|
32
|
-
}
|
|
32
|
+
}
|
|
@@ -18,7 +18,7 @@ transportLegs:
|
|
|
18
18
|
vesselName: The name of the vessel for a specific leg.
|
|
19
19
|
voyage: The journey or route taken by the vessel for a specific leg.
|
|
20
20
|
|
|
21
|
-
your task is to extract the text value of the following entities:
|
|
21
|
+
your task is to extract the text value of the following entities and page numbers starting from 0 where the value was found in the document:
|
|
22
22
|
SCHEMA_PLACEHOLDER
|
|
23
23
|
|
|
24
24
|
Keywords for datapoints:
|
|
@@ -1,28 +1,28 @@
|
|
|
1
1
|
{
|
|
2
2
|
"type": "OBJECT",
|
|
3
3
|
"properties": {
|
|
4
|
-
"bookingNumber": {"type": "
|
|
5
|
-
"cfsCutOff": {"type": "
|
|
6
|
-
"cyCutOff": {"type": "
|
|
7
|
-
"gateInReference": {"type": "
|
|
8
|
-
"gateInTerminal": {"type": "
|
|
9
|
-
"mblNumber": {"type": "
|
|
10
|
-
"pickUpReference": {"type": "
|
|
11
|
-
"pickUpTerminal": {"type": "
|
|
12
|
-
"siCutOff": {"type": "
|
|
13
|
-
"vgmCutOff": {"type": "
|
|
4
|
+
"bookingNumber": {"type": "STRING", "nullable": true},
|
|
5
|
+
"cfsCutOff": {"type": "STRING", "nullable": true, "description": "the date by which an LCL (Less than Container Load) shipment needs to be checked in to a CFS (Container Freight Station) to meet its scheduled sailing"},
|
|
6
|
+
"cyCutOff": {"type": "STRING", "nullable": true},
|
|
7
|
+
"gateInReference": {"type": "STRING", "nullable": true},
|
|
8
|
+
"gateInTerminal": {"type": "STRING", "nullable": true},
|
|
9
|
+
"mblNumber": {"type": "STRING", "nullable": true},
|
|
10
|
+
"pickUpReference": {"type": "STRING", "nullable": true},
|
|
11
|
+
"pickUpTerminal": {"type": "STRING", "nullable": true},
|
|
12
|
+
"siCutOff": {"type": "STRING", "nullable": true},
|
|
13
|
+
"vgmCutOff": {"type": "STRING", "nullable": true},
|
|
14
14
|
"transportLegs": {
|
|
15
15
|
"type": "ARRAY",
|
|
16
16
|
"items": {
|
|
17
17
|
"type": "OBJECT",
|
|
18
18
|
"properties": {
|
|
19
|
-
"eta": {"type": "
|
|
20
|
-
"etd": {"type": "
|
|
21
|
-
"imoNumber": {"type": "
|
|
22
|
-
"portOfDischarge": {"type": "
|
|
23
|
-
"portOfLoading": {"type": "
|
|
24
|
-
"vesselName": {"type": "
|
|
25
|
-
"voyage": {"type": "
|
|
19
|
+
"eta": {"type": "STRING", "nullable": true},
|
|
20
|
+
"etd": {"type": "STRING", "nullable": true},
|
|
21
|
+
"imoNumber": {"type": "STRING", "nullable": true},
|
|
22
|
+
"portOfDischarge": {"type": "STRING", "nullable": true},
|
|
23
|
+
"portOfLoading": {"type": "STRING", "nullable": true},
|
|
24
|
+
"vesselName": {"type": "STRING", "nullable": true},
|
|
25
|
+
"voyage": {"type": "STRING", "nullable": true}
|
|
26
26
|
},
|
|
27
27
|
"required": []
|
|
28
28
|
}
|
|
@@ -18,7 +18,7 @@ transportLegs:
|
|
|
18
18
|
vesselName: The name of the vessel for a specific leg.
|
|
19
19
|
voyage: The journey or route taken by the vessel for a specific leg.
|
|
20
20
|
|
|
21
|
-
your task is to extract the text value of the following entities:
|
|
21
|
+
your task is to extract the text value of the following entities and page numbers starting from 0 where the value was found in the document:
|
|
22
22
|
SCHEMA_PLACEHOLDER
|
|
23
23
|
|
|
24
24
|
Keywords for datapoints:
|
|
@@ -1,28 +1,28 @@
|
|
|
1
1
|
{
|
|
2
2
|
"type": "OBJECT",
|
|
3
3
|
"properties": {
|
|
4
|
-
"cfsCutOff": {"type": "
|
|
5
|
-
"bookingNumber": {"type": "
|
|
6
|
-
"cyCutOff": {"type": "
|
|
7
|
-
"gateInReference": {"type": "
|
|
8
|
-
"gateInTerminal": {"type": "
|
|
9
|
-
"mblNumber": {"type": "
|
|
10
|
-
"pickUpReference": {"type": "
|
|
11
|
-
"pickUpTerminal": {"type": "
|
|
12
|
-
"siCutOff": {"type": "
|
|
13
|
-
"vgmCutOff": {"type": "
|
|
4
|
+
"cfsCutOff": {"type": "STRING", "nullable": true, "description": "the date by which an LCL (Less than Container Load) shipment needs to be checked in to a CFS (Container Freight Station) to meet its scheduled sailing"},
|
|
5
|
+
"bookingNumber": {"type": "STRING", "nullable": true},
|
|
6
|
+
"cyCutOff": {"type": "STRING", "nullable": true},
|
|
7
|
+
"gateInReference": {"type": "STRING", "nullable": true},
|
|
8
|
+
"gateInTerminal": {"type": "STRING", "nullable": true},
|
|
9
|
+
"mblNumber": {"type": "STRING", "nullable": true},
|
|
10
|
+
"pickUpReference": {"type": "STRING", "nullable": true},
|
|
11
|
+
"pickUpTerminal": {"type": "STRING", "nullable": true},
|
|
12
|
+
"siCutOff": {"type": "STRING", "nullable": true},
|
|
13
|
+
"vgmCutOff": {"type": "STRING", "nullable": true},
|
|
14
14
|
"transportLegs": {
|
|
15
15
|
"type": "ARRAY",
|
|
16
16
|
"items": {
|
|
17
17
|
"type": "OBJECT",
|
|
18
18
|
"properties": {
|
|
19
|
-
"eta": {"type": "
|
|
20
|
-
"etd": {"type": "
|
|
21
|
-
"imoNumber": {"type": "
|
|
22
|
-
"portOfDischarge": {"type": "
|
|
23
|
-
"portOfLoading": {"type": "
|
|
24
|
-
"vesselName": {"type": "
|
|
25
|
-
"voyage": {"type": "
|
|
19
|
+
"eta": {"type": "STRING", "nullable": true},
|
|
20
|
+
"etd": {"type": "STRING", "nullable": true},
|
|
21
|
+
"imoNumber": {"type": "STRING", "nullable": true},
|
|
22
|
+
"portOfDischarge": {"type": "STRING", "nullable": true},
|
|
23
|
+
"portOfLoading": {"type": "STRING", "nullable": true},
|
|
24
|
+
"vesselName": {"type": "STRING", "nullable": true},
|
|
25
|
+
"voyage": {"type": "STRING", "nullable": true}
|
|
26
26
|
},
|
|
27
27
|
"required": []
|
|
28
28
|
}
|
|
@@ -18,7 +18,7 @@ transportLegs:
|
|
|
18
18
|
vesselName: The name of the vessel for a specific leg.
|
|
19
19
|
voyage: The journey or route taken by the vessel for a specific leg.
|
|
20
20
|
|
|
21
|
-
your task is to extract the text value of the following entities:
|
|
21
|
+
your task is to extract the text value of the following entities and page numbers starting from 0 where the value was found in the document:
|
|
22
22
|
SCHEMA_PLACEHOLDER
|
|
23
23
|
|
|
24
24
|
Further explanation and Keywords for the transportLegs part as follows. The below 2 conditions is crucial. Take attention here:
|