data-science-document-ai 1.40.4__py3-none-any.whl → 1.41.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. {data_science_document_ai-1.40.4.dist-info → data_science_document_ai-1.41.0.dist-info}/METADATA +1 -1
  2. data_science_document_ai-1.41.0.dist-info/RECORD +57 -0
  3. src/excel_processing.py +4 -0
  4. src/pdf_processing.py +14 -3
  5. src/postprocessing/common.py +27 -0
  6. src/prompts/library/bookingConfirmation/evergreen/placeholders.json +17 -17
  7. src/prompts/library/bookingConfirmation/evergreen/prompt.txt +1 -0
  8. src/prompts/library/bookingConfirmation/hapag-lloyd/placeholders.json +18 -18
  9. src/prompts/library/bookingConfirmation/hapag-lloyd/prompt.txt +1 -1
  10. src/prompts/library/bookingConfirmation/maersk/placeholders.json +17 -17
  11. src/prompts/library/bookingConfirmation/maersk/prompt.txt +1 -1
  12. src/prompts/library/bookingConfirmation/msc/placeholders.json +17 -17
  13. src/prompts/library/bookingConfirmation/msc/prompt.txt +1 -1
  14. src/prompts/library/bookingConfirmation/oocl/placeholders.json +17 -17
  15. src/prompts/library/bookingConfirmation/oocl/prompt.txt +3 -1
  16. src/prompts/library/bookingConfirmation/other/placeholders.json +17 -17
  17. src/prompts/library/bookingConfirmation/other/prompt.txt +1 -1
  18. src/prompts/library/bookingConfirmation/yangming/placeholders.json +17 -17
  19. src/prompts/library/bookingConfirmation/yangming/prompt.txt +1 -1
  20. src/prompts/library/bundeskasse/other/placeholders.json +19 -19
  21. src/prompts/library/bundeskasse/other/prompt.txt +1 -1
  22. src/prompts/library/commercialInvoice/other/prompt.txt +2 -1
  23. src/prompts/library/customsAssessment/other/prompt.txt +1 -1
  24. src/prompts/library/customsInvoice/other/placeholders.json +19 -19
  25. src/prompts/library/customsInvoice/other/prompt.txt +1 -1
  26. src/prompts/library/deliveryOrder/other/placeholders.json +15 -17
  27. src/prompts/library/deliveryOrder/other/prompt.txt +1 -1
  28. src/prompts/library/draftMbl/hapag-lloyd/prompt.txt +2 -1
  29. src/prompts/library/draftMbl/maersk/prompt.txt +2 -0
  30. src/prompts/library/draftMbl/other/prompt.txt +1 -1
  31. src/prompts/library/finalMbL/hapag-lloyd/prompt.txt +1 -1
  32. src/prompts/library/finalMbL/maersk/prompt.txt +2 -0
  33. src/prompts/library/finalMbL/other/prompt.txt +1 -1
  34. src/prompts/library/packingList/other/prompt.txt +1 -1
  35. src/prompts/library/partnerInvoice/other/placeholders.json +12 -60
  36. src/prompts/library/partnerInvoice/other/prompt.txt +1 -1
  37. src/prompts/library/shippingInstruction/other/prompt.txt +1 -0
  38. src/prompts/prompt_library.py +4 -0
  39. src/utils.py +57 -0
  40. data_science_document_ai-1.40.4.dist-info/RECORD +0 -59
  41. src/prompts/library/customsAssessment/other/placeholders.json +0 -19
  42. src/prompts/library/finalMbL/other/placeholders.json +0 -80
  43. {data_science_document_ai-1.40.4.dist-info → data_science_document_ai-1.41.0.dist-info}/WHEEL +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: data-science-document-ai
3
- Version: 1.40.4
3
+ Version: 1.41.0
4
4
  Summary: "Document AI repo for data science"
5
5
  Author: Naomi Nguyen
6
6
  Author-email: naomi.nguyen@forto.com
@@ -0,0 +1,57 @@
1
+ src/constants.py,sha256=TF_UblovdXZnKIb1lnyJwUqQncJCbzBVihoelI6foSU,3579
2
+ src/constants_sandbox.py,sha256=Iu6HdjCoNSmOX0AwoL9qUQkhq_ZnIN5U9e-Q2UfNuGc,547
3
+ src/docai.py,sha256=AepGdF3ZuSGkujLpewX393FgOBMy-e4sEudiGKho5EA,5280
4
+ src/docai_processor_config.yaml,sha256=qOMmCIORpLQ_D-ytvejXxFvER0e0uGYuzPVdZBGv4Pc,2105
5
+ src/excel_processing.py,sha256=wArdSxwxdgyj5WVgVTaWsVSmF7z5zK6rq-bUKGENmo4,2660
6
+ src/io.py,sha256=IXz4wWqiHa9mnHNgtrC6X9M2lItYp9eu6rHCThUIh5c,3585
7
+ src/llm.py,sha256=aEK3rL8XvY7CakvkOJQmcHpEKwZRd8PPrLrzHiO-GFk,7827
8
+ src/log_setup.py,sha256=RhHnpXqcl-ii4EJzRt47CF2R-Q3YPF68tepg_Kg7tkw,2895
9
+ src/pdf_processing.py,sha256=GNHQl_ryyVOHu3FK39XzPJzOCrn01NNW3E2HO43Ot_c,15836
10
+ src/postprocessing/common.py,sha256=Vj_NohcgWZRCzipnPGeM-rg11wdDJ-wwCR12QeE6qOY,21451
11
+ src/postprocessing/postprocess_booking_confirmation.py,sha256=nK32eDiBNbauyQz0oCa9eraysku8aqzrcoRFoWVumDU,4827
12
+ src/postprocessing/postprocess_commercial_invoice.py,sha256=3I8ijluTZcOs_sMnFZxfkAPle0UFQ239EMuvZfDZVPg,1028
13
+ src/postprocessing/postprocess_partner_invoice.py,sha256=cM4te4qjOI_bXyrF8Zhb6X7eNf5aMKoRaPCFfqFv-98,11538
14
+ src/prompts/library/bookingConfirmation/evergreen/placeholders.json,sha256=IpM9nmSPdyroliZfXB1-NDCjiHZX_Ff5BH7-scNhGqE,1406
15
+ src/prompts/library/bookingConfirmation/evergreen/prompt.txt,sha256=5ivskCG831M2scW3oqQaoltXIyHV-n6DYUygWycXxjw,2755
16
+ src/prompts/library/bookingConfirmation/hapag-lloyd/placeholders.json,sha256=hMPNt9s3LuxR85AxYy7bPcCDleug6gSwVjefm3ismWY,1405
17
+ src/prompts/library/bookingConfirmation/hapag-lloyd/prompt.txt,sha256=XgfhrFTXLJ467L4Cer77K0KTPtWTg_-QJXCsltvLlpI,3430
18
+ src/prompts/library/bookingConfirmation/maersk/placeholders.json,sha256=6p_IQMA1PUgGZqjf_by4ja9jK27ba4loYhEpIa7Oxx4,1406
19
+ src/prompts/library/bookingConfirmation/maersk/prompt.txt,sha256=t-yh1dOrcRa0fm0VPFC1xCRBf0R0Zjp9j_Hb31aZS1w,3223
20
+ src/prompts/library/bookingConfirmation/msc/placeholders.json,sha256=IpM9nmSPdyroliZfXB1-NDCjiHZX_Ff5BH7-scNhGqE,1406
21
+ src/prompts/library/bookingConfirmation/msc/prompt.txt,sha256=_Jfioislp7SNs2BEXoklvnTPVXe6Z0M6myD1IWnBFYQ,4705
22
+ src/prompts/library/bookingConfirmation/oocl/placeholders.json,sha256=JTtWvLSsoxN7huXY8ZNqqPkODM-DOs5wu3YvNHOna3k,1404
23
+ src/prompts/library/bookingConfirmation/oocl/prompt.txt,sha256=xNTrJdUtDalcP3AKkfRiOnHjAdRCbcTvehcBQKurRj0,2201
24
+ src/prompts/library/bookingConfirmation/other/placeholders.json,sha256=IpM9nmSPdyroliZfXB1-NDCjiHZX_Ff5BH7-scNhGqE,1406
25
+ src/prompts/library/bookingConfirmation/other/prompt.txt,sha256=kUK7NgVNDYFMnqOcIblCwWSw2SC0YQEtHsYrspiVUMo,3379
26
+ src/prompts/library/bookingConfirmation/yangming/placeholders.json,sha256=IpM9nmSPdyroliZfXB1-NDCjiHZX_Ff5BH7-scNhGqE,1406
27
+ src/prompts/library/bookingConfirmation/yangming/prompt.txt,sha256=fYKfusDajDFw0v54-nv2iAqUSp2yCeOzc6G7AFe-h2w,3226
28
+ src/prompts/library/bundeskasse/other/placeholders.json,sha256=J57CNPWcz87PV4k3ctb_gVuOu-zGObolqst-y-mESQY,4054
29
+ src/prompts/library/bundeskasse/other/prompt.txt,sha256=t5nsLK-6rpOqcVnfHtU04RxN8wXTi9WpX1f4ASLoZ3E,2923
30
+ src/prompts/library/commercialInvoice/other/prompt.txt,sha256=6sowYMzrKvgmTDpDnAzkeG4OqA44e6-8aUKWRKNziBY,2699
31
+ src/prompts/library/customsAssessment/other/prompt.txt,sha256=XSqWa3k9LM7dTiJtX8AKTp_0x5Z0pCNRKNUWaywwBlY,2191
32
+ src/prompts/library/customsInvoice/other/placeholders.json,sha256=g82I3LbLZr2mwfpFIFoCTK1Y_MEkGaEK4ew7jovr6nw,12172
33
+ src/prompts/library/customsInvoice/other/prompt.txt,sha256=ZMdIysq7B1CvOG93YSHorDJpqNUDxXEnomS4cVNaJ90,9632
34
+ src/prompts/library/deliveryOrder/other/placeholders.json,sha256=7fjqag3kCVMV4mJ52dTjAcLtaBX0paXrDrW48vQVZSk,1250
35
+ src/prompts/library/deliveryOrder/other/prompt.txt,sha256=y3QjN54e8PplEJngNlxoykbdrToBefS3r8gWixCbjfE,2468
36
+ src/prompts/library/draftMbl/hapag-lloyd/prompt.txt,sha256=4FxiO1eHkimZVQZXU6gGNikuDVAWNniYvY8FUdVhpvk,2327
37
+ src/prompts/library/draftMbl/maersk/prompt.txt,sha256=4neW6buJirgoS84iDsy9ZcfQTaMeOFt92Emba01mzJA,2192
38
+ src/prompts/library/draftMbl/other/placeholders.json,sha256=wIN06_NWsESDyNEDfOLPi3F2Vq-XPa4O3U32A32s-_Q,1736
39
+ src/prompts/library/draftMbl/other/prompt.txt,sha256=pj-kgPV51upLhDppSKfhc2s5ylgr06l4IxrkFYjE9uM,2241
40
+ src/prompts/library/finalMbL/hapag-lloyd/prompt.txt,sha256=RhxEJ4eWikAQiE40cuPsssnzizge6AJYFTSJLGUmz_U,2326
41
+ src/prompts/library/finalMbL/maersk/prompt.txt,sha256=4neW6buJirgoS84iDsy9ZcfQTaMeOFt92Emba01mzJA,2192
42
+ src/prompts/library/finalMbL/other/prompt.txt,sha256=pj-kgPV51upLhDppSKfhc2s5ylgr06l4IxrkFYjE9uM,2241
43
+ src/prompts/library/packingList/other/prompt.txt,sha256=6Q9d0KBG6YWmNtzFivvmtQmitaUE2jytfwwc5YwsUgQ,2872
44
+ src/prompts/library/partnerInvoice/other/placeholders.json,sha256=AJNBVKwDGebyNAuyWEwEuaUTL9hbLK0Rjr2H0lNfOBY,8686
45
+ src/prompts/library/partnerInvoice/other/prompt.txt,sha256=HuTUlCpUgDQKUKF5QYYoUoHZ0pkBIqX0g5NzciF_fps,9393
46
+ src/prompts/library/postprocessing/port_code/placeholders.json,sha256=2TiXf3zSzrglOMPtDOlCntIa5RSvyZQAKG2-IgrCY5A,22
47
+ src/prompts/library/postprocessing/port_code/prompt_port_code.txt,sha256=--1wunSqEr2ox958lEhjO-0JFBfOLzA3qfKYIzG_Iok,884
48
+ src/prompts/library/preprocessing/carrier/placeholders.json,sha256=1UmrQNqBEsjLIpOO-a39Az6bQ_g1lxDGlwqZFU3IEt0,408
49
+ src/prompts/library/preprocessing/carrier/prompt.txt,sha256=NLvRZQCZ6aWC1yTr7Q93jK5z7Vi_b4HBaiFYYnIsO-w,134
50
+ src/prompts/library/shippingInstruction/other/prompt.txt,sha256=dT2e-dPuvuz0rVYpwmok_1dWQ2Oa8Qy9NGZ6CCLOUI4,1468
51
+ src/prompts/prompt_library.py,sha256=jPxybNPPGH7mzonqtAOqmw5WcT-RtbGP0pvMqqP22hg,2760
52
+ src/setup.py,sha256=kPSZosrICfaGZeDaajr40Ha7Ok4XK4fo_uq35Omiwr0,7128
53
+ src/tms.py,sha256=UXbIo1QE--hIX6NZi5Qyp2R_CP338syrY9pCTPrfgnE,1741
54
+ src/utils.py,sha256=SIEThJlaXWGoWV7236iNoAlabCPNge5oTBpDywTxJw0,15968
55
+ data_science_document_ai-1.41.0.dist-info/METADATA,sha256=MmKbqDbe9voabVucTrE-GoM192GMqFgD09_KNvp6Wsg,2153
56
+ data_science_document_ai-1.41.0.dist-info/WHEEL,sha256=zp0Cn7JsFoX2ATtOhtaFYIiE2rmFAD4OcMhtUki8W3U,88
57
+ data_science_document_ai-1.41.0.dist-info/RECORD,,
src/excel_processing.py CHANGED
@@ -2,6 +2,8 @@
2
2
  # flake8: noqa: E402
3
3
  import logging
4
4
 
5
+ from src.postprocessing.common import llm_prediction_to_tuples
6
+
5
7
  logger = logging.getLogger(__name__)
6
8
 
7
9
  import asyncio
@@ -73,6 +75,8 @@ async def extract_data_from_excel(
73
75
  ]
74
76
  extracted_data = {k: v for k, v in await asyncio.gather(*sheet_extract_tasks)}
75
77
 
78
+ # Convert LLM prediction dictionary to tuples of (value, page_number).
79
+ extracted_data = llm_prediction_to_tuples(extracted_data)
76
80
  stored_data = json.dumps(extracted_data)
77
81
 
78
82
  return extracted_data, stored_data, params["gemini_params"]["model_id"]
src/pdf_processing.py CHANGED
@@ -14,7 +14,7 @@ from google.cloud.documentai_v1 import Document as docaiv1_document
14
14
 
15
15
  from src.docai import _batch_process_pdf_w_docai, _process_pdf_w_docai
16
16
  from src.excel_processing import extract_data_from_excel
17
- from src.postprocessing.common import format_all_entities, remove_none_values
17
+ from src.postprocessing.common import format_all_entities, remove_none_values, llm_prediction_to_tuples
18
18
  from src.postprocessing.postprocess_booking_confirmation import (
19
19
  postprocess_booking_confirmation,
20
20
  )
@@ -31,6 +31,7 @@ from src.utils import (
31
31
  get_processor_name,
32
32
  run_background_tasks,
33
33
  validate_based_on_schema,
34
+ transform_schema_strings
34
35
  )
35
36
 
36
37
 
@@ -104,9 +105,16 @@ async def extract_data_from_pdf_w_docai(
104
105
  # Extract entities from the result
105
106
  for entity in result.entities:
106
107
  value = (
107
- {child.type_: child.mention_text for child in entity.properties}
108
+ {child.type_: (child.mention_text,
109
+ child.page_anchor.page_refs[0].page
110
+ if hasattr(child.page_anchor.page_refs[0], "page")
111
+ else 0)
112
+ for child in entity.properties}
108
113
  if entity.properties
109
- else entity.mention_text
114
+ else (entity.mention_text,
115
+ entity.page_anchor.page_refs[0].page
116
+ if hasattr(entity.page_anchor.page_refs[0], "page")
117
+ else 0)
110
118
  )
111
119
  aggregated_data[entity.type_].append(value)
112
120
 
@@ -220,6 +228,9 @@ async def process_file_w_llm(params, file_content, input_doc_type, llm_client):
220
228
  result = await llm_client.get_unified_json_genai(
221
229
  prompt=prompt, document=document, response_schema=response_schema
222
230
  )
231
+
232
+ result = llm_prediction_to_tuples(result)
233
+
223
234
  return result
224
235
  return {}
225
236
 
@@ -380,6 +380,11 @@ async def format_label(entity_k, entity_value, document_type_code, params):
380
380
  ]
381
381
  )
382
382
  return entity_k, [v for _, v in format_tasks]
383
+ if isinstance(entity_value, tuple):
384
+ page = entity_value[1]
385
+ entity_value = entity_value[0]
386
+ else:
387
+ page = -1
383
388
  entity_key = entity_k.lower()
384
389
  formatted_value = None
385
390
 
@@ -474,6 +479,7 @@ async def format_label(entity_k, entity_value, document_type_code, params):
474
479
  result = {
475
480
  "documentValue": entity_value,
476
481
  "formattedValue": formatted_value,
482
+ "page": page,
477
483
  }
478
484
  return entity_k, result
479
485
 
@@ -616,3 +622,24 @@ def remove_stop_words(lineitem: str):
616
622
  .upper()
617
623
  .strip()
618
624
  )
625
+
626
+
627
+ def llm_prediction_to_tuples(llm_prediction):
628
+ """Convert LLM prediction dictionary to tuples of (value, page_number)."""
629
+ if isinstance(llm_prediction, dict):
630
+ if "page_number" in llm_prediction.keys() and "value" in llm_prediction.keys():
631
+ if llm_prediction["value"]:
632
+ try:
633
+ page_number = int(llm_prediction["page_number"])
634
+ except: # noqa: E722
635
+ page_number = -1
636
+ return (llm_prediction["value"], page_number)
637
+ return None
638
+ for key, value in llm_prediction.items():
639
+ llm_prediction[key] = llm_prediction_to_tuples(
640
+ llm_prediction.get(key, value)
641
+ )
642
+ elif isinstance(llm_prediction, list):
643
+ for i, item in enumerate(llm_prediction):
644
+ llm_prediction[i] = llm_prediction_to_tuples(item)
645
+ return llm_prediction
@@ -1,28 +1,28 @@
1
1
  {
2
2
  "type": "OBJECT",
3
3
  "properties": {
4
- "cfsCutOff": {"type": "string", "nullable": true, "description": "he date by which an LCL (Less than Container Load) shipment needs to be checked in to a CFS (Container Freight Station) to meet its scheduled sailing"},
5
- "bookingNumber": {"type": "string", "nullable": true},
6
- "cyCutOff": {"type": "string", "nullable": true},
7
- "gateInReference": {"type": "string", "nullable": true},
8
- "gateInTerminal": {"type": "string", "nullable": true},
9
- "mblNumber": {"type": "string", "nullable": true},
10
- "pickUpReference": {"type": "string", "nullable": true},
11
- "pickUpTerminal": {"type": "string", "nullable": true},
12
- "siCutOff": {"type": "string", "nullable": true},
13
- "vgmCutOff": {"type": "string", "nullable": true},
4
+ "cfsCutOff": {"type": "STRING", "nullable": true, "description": "the date by which an LCL (Less than Container Load) shipment needs to be checked in to a CFS (Container Freight Station) to meet its scheduled sailing"},
5
+ "bookingNumber": {"type": "STRING", "nullable": true},
6
+ "cyCutOff": {"type": "STRING", "nullable": true},
7
+ "gateInReference": {"type": "STRING", "nullable": true},
8
+ "gateInTerminal": {"type": "STRING", "nullable": true},
9
+ "mblNumber": {"type": "STRING", "nullable": true},
10
+ "pickUpReference": {"type": "STRING", "nullable": true},
11
+ "pickUpTerminal": {"type": "STRING", "nullable": true},
12
+ "siCutOff": {"type": "STRING", "nullable": true},
13
+ "vgmCutOff": {"type": "STRING", "nullable": true},
14
14
  "transportLegs": {
15
15
  "type": "ARRAY",
16
16
  "items": {
17
17
  "type": "OBJECT",
18
18
  "properties": {
19
- "eta": {"type": "string", "nullable": true},
20
- "etd": {"type": "string", "nullable": true},
21
- "imoNumber": {"type": "string", "nullable": true},
22
- "portOfDischarge": {"type": "string", "nullable": true},
23
- "portOfLoading": {"type": "string", "nullable": true},
24
- "vesselName": {"type": "string", "nullable": true},
25
- "voyage": {"type": "string", "nullable": true}
19
+ "eta": {"type": "STRING", "nullable": true},
20
+ "etd": {"type": "STRING", "nullable": true},
21
+ "imoNumber": {"type": "STRING", "nullable": true},
22
+ "portOfDischarge": {"type": "STRING", "nullable": true},
23
+ "portOfLoading": {"type": "STRING", "nullable": true},
24
+ "vesselName": {"type": "STRING", "nullable": true},
25
+ "voyage": {"type": "STRING", "nullable": true}
26
26
  },
27
27
  "required": []
28
28
  }
@@ -1,3 +1,4 @@
1
+ your task is to extract the text value of the following entities and page numbers starting from 0 where the value was found in the document:
1
2
  ```json
2
3
  {
3
4
  "mblNumber": "Extract the value after the label 'BOOKING NO.'.",
@@ -1,32 +1,32 @@
1
1
  {
2
2
  "type": "OBJECT",
3
3
  "properties": {
4
- "cfsCutOff": {"type": "string", "nullable": true, "description": "he date by which an LCL (Less than Container Load) shipment needs to be checked in to a CFS (Container Freight Station) to meet its scheduled sailing"},
5
- "bookingNumber": {"type": "string", "nullable": true},
6
- "cyCutOff": {"type": "string", "nullable": true},
7
- "gateInReference": {"type": "string", "nullable": true},
8
- "gateInTerminal": {"type": "string", "nullable": true},
9
- "mblNumber": {"type": "string", "nullable": true},
10
- "pickUpReference": {"type": "string", "nullable": true},
11
- "pickUpTerminal": {"type": "string", "nullable": true},
12
- "siCutOff": {"type": "string", "nullable": true},
13
- "vgmCutOff": {"type": "string", "nullable": true},
4
+ "cfsCutOff": {"type": "STRING", "nullable": true, "description": "the date by which an LCL (Less than Container Load) shipment needs to be checked in to a CFS (Container Freight Station) to meet its scheduled sailing"},
5
+ "bookingNumber": {"type": "STRING", "nullable": true},
6
+ "cyCutOff": {"type": "STRING", "nullable": true},
7
+ "gateInReference": {"type": "STRING", "nullable": true},
8
+ "gateInTerminal": {"type": "STRING", "nullable": true},
9
+ "mblNumber": {"type": "STRING", "nullable": true},
10
+ "pickUpReference": {"type": "STRING", "nullable": true},
11
+ "pickUpTerminal": {"type": "STRING", "nullable": true},
12
+ "siCutOff": {"type": "STRING", "nullable": true},
13
+ "vgmCutOff": {"type": "STRING", "nullable": true},
14
14
  "transportLegs": {
15
15
  "type": "ARRAY",
16
16
  "items": {
17
17
  "type": "OBJECT",
18
18
  "properties": {
19
- "eta": {"type": "string", "nullable": true},
20
- "etd": {"type": "string", "nullable": true},
21
- "imoNumber": {"type": "string", "nullable": true},
22
- "portOfDischarge": {"type": "string", "nullable": true},
23
- "portOfLoading": {"type": "string", "nullable": true},
24
- "vesselName": {"type": "string", "nullable": true},
25
- "voyage": {"type": "string", "nullable": true}
19
+ "eta": {"type": "STRING", "nullable": true},
20
+ "etd": {"type": "STRING", "nullable": true},
21
+ "imoNumber": {"type": "STRING", "nullable": true},
22
+ "portOfDischarge": {"type": "STRING", "nullable": true},
23
+ "portOfLoading": {"type": "STRING", "nullable": true},
24
+ "vesselName": {"type": "STRING", "nullable": true},
25
+ "voyage": {"type": "STRING", "nullable": true}
26
26
  },
27
27
  "required": []
28
28
  }
29
29
  }
30
30
  },
31
31
  "required": []
32
- }
32
+ }
@@ -18,7 +18,7 @@ transportLegs:
18
18
  vesselName: The name of the vessel for a specific leg.
19
19
  voyage: The journey or route taken by the vessel for a specific leg.
20
20
 
21
- your task is to extract the text value of the following entities:
21
+ your task is to extract the text value of the following entities and page numbers starting from 0 where the value was found in the document:
22
22
  SCHEMA_PLACEHOLDER
23
23
 
24
24
  Keywords for datapoints:
@@ -1,28 +1,28 @@
1
1
  {
2
2
  "type": "OBJECT",
3
3
  "properties": {
4
- "bookingNumber": {"type": "string", "nullable": true},
5
- "cfsCutOff": {"type": "string", "nullable": true, "description": "he date by which an LCL (Less than Container Load) shipment needs to be checked in to a CFS (Container Freight Station) to meet its scheduled sailing"},
6
- "cyCutOff": {"type": "string", "nullable": true},
7
- "gateInReference": {"type": "string", "nullable": true},
8
- "gateInTerminal": {"type": "string", "nullable": true},
9
- "mblNumber": {"type": "string", "nullable": true},
10
- "pickUpReference": {"type": "string", "nullable": true},
11
- "pickUpTerminal": {"type": "string", "nullable": true},
12
- "siCutOff": {"type": "string", "nullable": true},
13
- "vgmCutOff": {"type": "string", "nullable": true},
4
+ "bookingNumber": {"type": "STRING", "nullable": true},
5
+ "cfsCutOff": {"type": "STRING", "nullable": true, "description": "the date by which an LCL (Less than Container Load) shipment needs to be checked in to a CFS (Container Freight Station) to meet its scheduled sailing"},
6
+ "cyCutOff": {"type": "STRING", "nullable": true},
7
+ "gateInReference": {"type": "STRING", "nullable": true},
8
+ "gateInTerminal": {"type": "STRING", "nullable": true},
9
+ "mblNumber": {"type": "STRING", "nullable": true},
10
+ "pickUpReference": {"type": "STRING", "nullable": true},
11
+ "pickUpTerminal": {"type": "STRING", "nullable": true},
12
+ "siCutOff": {"type": "STRING", "nullable": true},
13
+ "vgmCutOff": {"type": "STRING", "nullable": true},
14
14
  "transportLegs": {
15
15
  "type": "ARRAY",
16
16
  "items": {
17
17
  "type": "OBJECT",
18
18
  "properties": {
19
- "eta": {"type": "string", "nullable": true},
20
- "etd": {"type": "string", "nullable": true},
21
- "imoNumber": {"type": "string", "nullable": true},
22
- "portOfDischarge": {"type": "string", "nullable": true},
23
- "portOfLoading": {"type": "string", "nullable": true},
24
- "vesselName": {"type": "string", "nullable": true},
25
- "voyage": {"type": "string", "nullable": true}
19
+ "eta": {"type": "STRING", "nullable": true},
20
+ "etd": {"type": "STRING", "nullable": true},
21
+ "imoNumber": {"type": "STRING", "nullable": true},
22
+ "portOfDischarge": {"type": "STRING", "nullable": true},
23
+ "portOfLoading": {"type": "STRING", "nullable": true},
24
+ "vesselName": {"type": "STRING", "nullable": true},
25
+ "voyage": {"type": "STRING", "nullable": true}
26
26
  },
27
27
  "required": []
28
28
  }
@@ -18,7 +18,7 @@ transportLegs:
18
18
  vesselName: The name of the vessel for a specific leg.
19
19
  voyage: The journey or route taken by the vessel for a specific leg.
20
20
 
21
- your task is to extract the text value of the following entities:
21
+ your task is to extract the text value of the following entities and page numbers starting from 0 where the value was found in the document:
22
22
  SCHEMA_PLACEHOLDER
23
23
 
24
24
  Keywords for datapoints:
@@ -1,28 +1,28 @@
1
1
  {
2
2
  "type": "OBJECT",
3
3
  "properties": {
4
- "cfsCutOff": {"type": "string", "nullable": true, "description": "he date by which an LCL (Less than Container Load) shipment needs to be checked in to a CFS (Container Freight Station) to meet its scheduled sailing"},
5
- "bookingNumber": {"type": "string", "nullable": true},
6
- "cyCutOff": {"type": "string", "nullable": true},
7
- "gateInReference": {"type": "string", "nullable": true},
8
- "gateInTerminal": {"type": "string", "nullable": true},
9
- "mblNumber": {"type": "string", "nullable": true},
10
- "pickUpReference": {"type": "string", "nullable": true},
11
- "pickUpTerminal": {"type": "string", "nullable": true},
12
- "siCutOff": {"type": "string", "nullable": true},
13
- "vgmCutOff": {"type": "string", "nullable": true},
4
+ "cfsCutOff": {"type": "STRING", "nullable": true, "description": "the date by which an LCL (Less than Container Load) shipment needs to be checked in to a CFS (Container Freight Station) to meet its scheduled sailing"},
5
+ "bookingNumber": {"type": "STRING", "nullable": true},
6
+ "cyCutOff": {"type": "STRING", "nullable": true},
7
+ "gateInReference": {"type": "STRING", "nullable": true},
8
+ "gateInTerminal": {"type": "STRING", "nullable": true},
9
+ "mblNumber": {"type": "STRING", "nullable": true},
10
+ "pickUpReference": {"type": "STRING", "nullable": true},
11
+ "pickUpTerminal": {"type": "STRING", "nullable": true},
12
+ "siCutOff": {"type": "STRING", "nullable": true},
13
+ "vgmCutOff": {"type": "STRING", "nullable": true},
14
14
  "transportLegs": {
15
15
  "type": "ARRAY",
16
16
  "items": {
17
17
  "type": "OBJECT",
18
18
  "properties": {
19
- "eta": {"type": "string", "nullable": true},
20
- "etd": {"type": "string", "nullable": true},
21
- "imoNumber": {"type": "string", "nullable": true},
22
- "portOfDischarge": {"type": "string", "nullable": true},
23
- "portOfLoading": {"type": "string", "nullable": true},
24
- "vesselName": {"type": "string", "nullable": true},
25
- "voyage": {"type": "string", "nullable": true}
19
+ "eta": {"type": "STRING", "nullable": true},
20
+ "etd": {"type": "STRING", "nullable": true},
21
+ "imoNumber": {"type": "STRING", "nullable": true},
22
+ "portOfDischarge": {"type": "STRING", "nullable": true},
23
+ "portOfLoading": {"type": "STRING", "nullable": true},
24
+ "vesselName": {"type": "STRING", "nullable": true},
25
+ "voyage": {"type": "STRING", "nullable": true}
26
26
  },
27
27
  "required": []
28
28
  }
@@ -18,7 +18,7 @@ transportLegs:
18
18
  vesselName: The name of the vessel for a specific leg.
19
19
  voyage: The journey or route taken by the vessel for a specific leg.
20
20
 
21
- your task is to extract the text value of the following entities:
21
+ your task is to extract the text value of the following entities and page numbers starting from 0 where the value was found in the document:
22
22
  SCHEMA_PLACEHOLDER
23
23
 
24
24
  Further explanation and Keywords for the transportLegs part as follows. The below 2 conditions is crucial. Take attention here:
@@ -1,28 +1,28 @@
1
1
  {
2
2
  "type": "OBJECT",
3
3
  "properties": {
4
- "cfsCutOff": {"type": "string", "nullable": true, "description": "he date by which an LCL (Less than Container Load) shipment needs to be checked in to a CFS (Container Freight Station) to meet its scheduled sailing"},
5
- "bookingNumber": {"type": "string", "nullable": true},
6
- "cyCutOff": {"type": "string", "nullable": true},
7
- "gateInReference": {"type": "string", "nullable": true},
8
- "gateInTerminal": {"type": "string", "nullable": true},
9
- "mblNumber": {"type": "string", "nullable": true},
10
- "pickUpReference": {"type": "string", "nullable": true},
11
- "pickUpTerminal": {"type": "string", "nullable": true},
12
- "siCutOff": {"type": "string", "nullable": true},
13
- "vgmCutOff": {"type": "string", "nullable": true},
4
+ "cfsCutOff": {"type": "STRING", "nullable": true, "description": "the date by which an LCL (Less than Container Load) shipment needs to be checked in to a CFS (Container Freight Station) to meet its scheduled sailing"},
5
+ "bookingNumber": {"type": "STRING", "nullable": true},
6
+ "cyCutOff": {"type": "STRING", "nullable": true},
7
+ "gateInReference": {"type": "STRING", "nullable": true},
8
+ "gateInTerminal": {"type": "STRING", "nullable": true},
9
+ "mblNumber": {"type": "STRING", "nullable": true},
10
+ "pickUpReference": {"type": "STRING", "nullable": true},
11
+ "pickUpTerminal": {"type": "STRING", "nullable": true},
12
+ "siCutOff": {"type": "STRING", "nullable": true},
13
+ "vgmCutOff": {"type": "STRING", "nullable": true},
14
14
  "transportLegs": {
15
15
  "type": "ARRAY",
16
16
  "items": {
17
17
  "type": "OBJECT",
18
18
  "properties": {
19
- "eta": {"type": "string", "nullable": true},
20
- "etd": {"type": "string", "nullable": true},
21
- "portOfDischarge": {"type": "string", "nullable": true},
22
- "portOfLoading": {"type": "string", "nullable": true},
23
- "vesselName": {"type": "string", "nullable": true},
24
- "voyage": {"type": "string", "nullable": true},
25
- "imoNumber": {"type": "string", "nullable": true}
19
+ "eta": {"type": "STRING", "nullable": true},
20
+ "etd": {"type": "STRING", "nullable": true},
21
+ "portOfDischarge": {"type": "STRING", "nullable": true},
22
+ "portOfLoading": {"type": "STRING", "nullable": true},
23
+ "vesselName": {"type": "STRING", "nullable": true},
24
+ "voyage": {"type": "STRING", "nullable": true},
25
+ "imoNumber": {"type": "STRING", "nullable": true}
26
26
  },
27
27
  "required": []
28
28
  }
@@ -1,4 +1,6 @@
1
- bookingNumber: Extract the booking number. This information can be found near the labels "BOOKING ACKNOWLEDGEMENT" or "BOOKING NUMBER".
1
+ your task is to extract the text value of the following entities and page numbers starting from 0 where the value was found in the document:
2
+
3
+ bookingNumber: Extract the booking number. This information can be found near the labels "BOOKING ACKNOWLEDGEMENT" or "BOOKING NUMBER".
2
4
  gateInReference: This field should have the same value as the bookingNumber.
3
5
  cyCutOff: Look for the "INTENDED FCL CY CUT-OFF" label and extract the date and time value.
4
6
  vgmCutOff: Look for the "INTENDED VGM CUT-OFF" label and extract the date and time value.
@@ -1,28 +1,28 @@
1
1
  {
2
2
  "type": "OBJECT",
3
3
  "properties": {
4
- "cfsCutOff": {"type": "string", "nullable": true, "description": "he date by which an LCL (Less than Container Load) shipment needs to be checked in to a CFS (Container Freight Station) to meet its scheduled sailing"},
5
- "bookingNumber": {"type": "string", "nullable": true},
6
- "cyCutOff": {"type": "string", "nullable": true},
7
- "gateInReference": {"type": "string", "nullable": true},
8
- "gateInTerminal": {"type": "string", "nullable": true},
9
- "mblNumber": {"type": "string", "nullable": true},
10
- "pickUpReference": {"type": "string", "nullable": true},
11
- "pickUpTerminal": {"type": "string", "nullable": true},
12
- "siCutOff": {"type": "string", "nullable": true},
13
- "vgmCutOff": {"type": "string", "nullable": true},
4
+ "cfsCutOff": {"type": "STRING", "nullable": true, "description": "the date by which an LCL (Less than Container Load) shipment needs to be checked in to a CFS (Container Freight Station) to meet its scheduled sailing"},
5
+ "bookingNumber": {"type": "STRING", "nullable": true},
6
+ "cyCutOff": {"type": "STRING", "nullable": true},
7
+ "gateInReference": {"type": "STRING", "nullable": true},
8
+ "gateInTerminal": {"type": "STRING", "nullable": true},
9
+ "mblNumber": {"type": "STRING", "nullable": true},
10
+ "pickUpReference": {"type": "STRING", "nullable": true},
11
+ "pickUpTerminal": {"type": "STRING", "nullable": true},
12
+ "siCutOff": {"type": "STRING", "nullable": true},
13
+ "vgmCutOff": {"type": "STRING", "nullable": true},
14
14
  "transportLegs": {
15
15
  "type": "ARRAY",
16
16
  "items": {
17
17
  "type": "OBJECT",
18
18
  "properties": {
19
- "eta": {"type": "string", "nullable": true},
20
- "etd": {"type": "string", "nullable": true},
21
- "imoNumber": {"type": "string", "nullable": true},
22
- "portOfDischarge": {"type": "string", "nullable": true},
23
- "portOfLoading": {"type": "string", "nullable": true},
24
- "vesselName": {"type": "string", "nullable": true},
25
- "voyage": {"type": "string", "nullable": true}
19
+ "eta": {"type": "STRING", "nullable": true},
20
+ "etd": {"type": "STRING", "nullable": true},
21
+ "imoNumber": {"type": "STRING", "nullable": true},
22
+ "portOfDischarge": {"type": "STRING", "nullable": true},
23
+ "portOfLoading": {"type": "STRING", "nullable": true},
24
+ "vesselName": {"type": "STRING", "nullable": true},
25
+ "voyage": {"type": "STRING", "nullable": true}
26
26
  },
27
27
  "required": []
28
28
  }
@@ -18,7 +18,7 @@ transportLegs:
18
18
  vesselName: The name of the vessel for a specific leg.
19
19
  voyage: The journey or route taken by the vessel for a specific leg.
20
20
 
21
- your task is to extract the text value of the following entities:
21
+ your task is to extract the text value of the following entities and page numbers starting from 0 where the value was found in the document:
22
22
  SCHEMA_PLACEHOLDER
23
23
 
24
24
  Further explanation for the transportLegs part as follows:
@@ -1,28 +1,28 @@
1
1
  {
2
2
  "type": "OBJECT",
3
3
  "properties": {
4
- "cfsCutOff": {"type": "string", "nullable": true, "description": "he date by which an LCL (Less than Container Load) shipment needs to be checked in to a CFS (Container Freight Station) to meet its scheduled sailing"},
5
- "bookingNumber": {"type": "string", "nullable": true},
6
- "cyCutOff": {"type": "string", "nullable": true},
7
- "gateInReference": {"type": "string", "nullable": true},
8
- "gateInTerminal": {"type": "string", "nullable": true},
9
- "mblNumber": {"type": "string", "nullable": true},
10
- "pickUpReference": {"type": "string", "nullable": true},
11
- "pickUpTerminal": {"type": "string", "nullable": true},
12
- "siCutOff": {"type": "string", "nullable": true},
13
- "vgmCutOff": {"type": "string", "nullable": true},
4
+ "cfsCutOff": {"type": "STRING", "nullable": true, "description": "the date by which an LCL (Less than Container Load) shipment needs to be checked in to a CFS (Container Freight Station) to meet its scheduled sailing"},
5
+ "bookingNumber": {"type": "STRING", "nullable": true},
6
+ "cyCutOff": {"type": "STRING", "nullable": true},
7
+ "gateInReference": {"type": "STRING", "nullable": true},
8
+ "gateInTerminal": {"type": "STRING", "nullable": true},
9
+ "mblNumber": {"type": "STRING", "nullable": true},
10
+ "pickUpReference": {"type": "STRING", "nullable": true},
11
+ "pickUpTerminal": {"type": "STRING", "nullable": true},
12
+ "siCutOff": {"type": "STRING", "nullable": true},
13
+ "vgmCutOff": {"type": "STRING", "nullable": true},
14
14
  "transportLegs": {
15
15
  "type": "ARRAY",
16
16
  "items": {
17
17
  "type": "OBJECT",
18
18
  "properties": {
19
- "eta": {"type": "string", "nullable": true},
20
- "etd": {"type": "string", "nullable": true},
21
- "imoNumber": {"type": "string", "nullable": true},
22
- "portOfDischarge": {"type": "string", "nullable": true},
23
- "portOfLoading": {"type": "string", "nullable": true},
24
- "vesselName": {"type": "string", "nullable": true},
25
- "voyage": {"type": "string", "nullable": true}
19
+ "eta": {"type": "STRING", "nullable": true},
20
+ "etd": {"type": "STRING", "nullable": true},
21
+ "imoNumber": {"type": "STRING", "nullable": true},
22
+ "portOfDischarge": {"type": "STRING", "nullable": true},
23
+ "portOfLoading": {"type": "STRING", "nullable": true},
24
+ "vesselName": {"type": "STRING", "nullable": true},
25
+ "voyage": {"type": "STRING", "nullable": true}
26
26
  },
27
27
  "required": []
28
28
  }
@@ -18,7 +18,7 @@ transportLegs:
18
18
  vesselName: The name of the vessel for a specific leg.
19
19
  voyage: The journey or route taken by the vessel for a specific leg.
20
20
 
21
- your task is to extract the text value of the following entities:
21
+ your task is to extract the text value of the following entities and page numbers starting from 0 where the value was found in the document:
22
22
  SCHEMA_PLACEHOLDER
23
23
 
24
24
  Keywords for datapoints: