data-science-document-ai 1.58.0__py3-none-any.whl → 1.60.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (22) hide show
  1. {data_science_document_ai-1.58.0.dist-info → data_science_document_ai-1.60.0.dist-info}/METADATA +1 -1
  2. {data_science_document_ai-1.58.0.dist-info → data_science_document_ai-1.60.0.dist-info}/RECORD +22 -22
  3. src/postprocessing/common.py +0 -35
  4. src/prompts/library/bookingConfirmation/evergreen/placeholders.json +7 -7
  5. src/prompts/library/bookingConfirmation/evergreen/prompt.txt +45 -29
  6. src/prompts/library/bookingConfirmation/hapag-lloyd/prompt.txt +3 -3
  7. src/prompts/library/bookingConfirmation/maersk/placeholders.json +5 -5
  8. src/prompts/library/bookingConfirmation/maersk/prompt.txt +48 -56
  9. src/prompts/library/bookingConfirmation/msc/placeholders.json +9 -9
  10. src/prompts/library/bookingConfirmation/msc/prompt.txt +57 -60
  11. src/prompts/library/bookingConfirmation/oocl/placeholders.json +12 -12
  12. src/prompts/library/bookingConfirmation/oocl/prompt.txt +38 -13
  13. src/prompts/library/bookingConfirmation/other/placeholders.json +11 -11
  14. src/prompts/library/bookingConfirmation/other/prompt.txt +36 -12
  15. src/prompts/library/bookingConfirmation/yangming/placeholders.json +12 -12
  16. src/prompts/library/bookingConfirmation/yangming/prompt.txt +45 -57
  17. src/prompts/library/customsInvoice/other/placeholders.json +1 -1
  18. src/prompts/library/customsInvoice/other/prompt.txt +6 -2
  19. src/prompts/library/partnerInvoice/other/placeholders.json +1 -1
  20. src/prompts/library/partnerInvoice/other/prompt.txt +6 -2
  21. src/utils.py +2 -6
  22. {data_science_document_ai-1.58.0.dist-info → data_science_document_ai-1.60.0.dist-info}/WHEEL +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: data-science-document-ai
3
- Version: 1.58.0
3
+ Version: 1.60.0
4
4
  Summary: "Document AI repo for data science"
5
5
  Author: Naomi Nguyen
6
6
  Author-email: naomi.nguyen@forto.com
@@ -7,34 +7,34 @@ src/io.py,sha256=rYjXVLlriEacw1uNuPIYhg12bXNu48Qs9GYMY2YcVTE,5563
7
7
  src/llm.py,sha256=a7UYA4ITUNjzct_2fHgM-bma_XWc28VC0FV71g9tnUI,7137
8
8
  src/log_setup.py,sha256=RhHnpXqcl-ii4EJzRt47CF2R-Q3YPF68tepg_Kg7tkw,2895
9
9
  src/pdf_processing.py,sha256=Fx-Glb9niEUU3WUCrBZ02ZYV-E2vWoUM0ifN7-0A1Q4,19961
10
- src/postprocessing/common.py,sha256=tyy97UBfcnSs8Oh5vVDp4D1qDRit32ri9IGqRlNZcaY,27254
10
+ src/postprocessing/common.py,sha256=dagAg0hZGuZc03bXdfOolxekewMEVUfz917IGCiAtWI,26118
11
11
  src/postprocessing/postprocess_booking_confirmation.py,sha256=nK32eDiBNbauyQz0oCa9eraysku8aqzrcoRFoWVumDU,4827
12
12
  src/postprocessing/postprocess_commercial_invoice.py,sha256=3I8ijluTZcOs_sMnFZxfkAPle0UFQ239EMuvZfDZVPg,1028
13
13
  src/postprocessing/postprocess_partner_invoice.py,sha256=WuaTQK5D09dV_QNrh29ZoKX9IvQn2Ub-WnAMyRjCsvI,14240
14
14
  src/prompts/library/arrivalNotice/other/placeholders.json,sha256=1vzly1amgyKt3jr2JJQbb24kNZsnI289iduvoUo5dJU,3061
15
15
  src/prompts/library/arrivalNotice/other/prompt.txt,sha256=QNuU-BvMA8VbdupVNapad4O3WmCotH5cKNxImRMbKDk,2906
16
- src/prompts/library/bookingConfirmation/evergreen/placeholders.json,sha256=5efq6b--KGWeqGbvASZFTqXJgUEAvsC-0ljo-q0Lhew,5855
17
- src/prompts/library/bookingConfirmation/evergreen/prompt.txt,sha256=OxNfXZaWppwsFMprthzJpOOr8ApQL4KYEmlu9fSUvxk,3485
16
+ src/prompts/library/bookingConfirmation/evergreen/placeholders.json,sha256=94OSMKxqgZHyxyGekNV8pnzs7ueCoOpQji8I8_5J1nM,5755
17
+ src/prompts/library/bookingConfirmation/evergreen/prompt.txt,sha256=cDxfiiPVChYAEvhpZ9UbH3qBFi5dChvDu6iAf2mrzTw,4435
18
18
  src/prompts/library/bookingConfirmation/hapag-lloyd/placeholders.json,sha256=en83Em25e5PF2OAgFJC8w-MONVnketPZ3J_3zCjIVfE,5915
19
- src/prompts/library/bookingConfirmation/hapag-lloyd/prompt.txt,sha256=bLHQgGR9e8X4UvFpiyd1OasD00XGvUMG6HSLQy4IgQ4,5157
20
- src/prompts/library/bookingConfirmation/maersk/placeholders.json,sha256=5efq6b--KGWeqGbvASZFTqXJgUEAvsC-0ljo-q0Lhew,5855
21
- src/prompts/library/bookingConfirmation/maersk/prompt.txt,sha256=S-C5cq8AkEoGKilCO0XiXLZXgZPwz9udQOTm557GG64,3984
22
- src/prompts/library/bookingConfirmation/msc/placeholders.json,sha256=5efq6b--KGWeqGbvASZFTqXJgUEAvsC-0ljo-q0Lhew,5855
23
- src/prompts/library/bookingConfirmation/msc/prompt.txt,sha256=bojE6BytnEoQfdXrQebaXYTToDF1Fbyn4YdIGMke2Jo,5463
24
- src/prompts/library/bookingConfirmation/oocl/placeholders.json,sha256=LqjzD-8LkX9hAq3eOBMwit6tLrSLmVMUXTIyhBEaYxk,6037
25
- src/prompts/library/bookingConfirmation/oocl/prompt.txt,sha256=pCsj2BNnP-_kwgUEDt8IehO-tyMv6qeD5nyIzXJL3c0,2925
26
- src/prompts/library/bookingConfirmation/other/placeholders.json,sha256=LqjzD-8LkX9hAq3eOBMwit6tLrSLmVMUXTIyhBEaYxk,6037
27
- src/prompts/library/bookingConfirmation/other/prompt.txt,sha256=-629upv9-ciO6eG3A0_2TTjy7iLlInMsmQfSwAukjLg,4919
28
- src/prompts/library/bookingConfirmation/yangming/placeholders.json,sha256=LqjzD-8LkX9hAq3eOBMwit6tLrSLmVMUXTIyhBEaYxk,6037
29
- src/prompts/library/bookingConfirmation/yangming/prompt.txt,sha256=gySDhfRdQHy3IIomOR3qwY49wlO63Xw73GUSPrEkkr4,3990
19
+ src/prompts/library/bookingConfirmation/hapag-lloyd/prompt.txt,sha256=GobSqeqt_d-OKBMjAddcD0AFzyPXayHSiS1aPNRbtPk,5162
20
+ src/prompts/library/bookingConfirmation/maersk/placeholders.json,sha256=L7MtCAgz7Wc1avh4K0UCuH8JPwXpu_08ApBu16NBhkk,5953
21
+ src/prompts/library/bookingConfirmation/maersk/prompt.txt,sha256=nnzMu4_FGBNNK9jLTsjotKdD9dMtdNgbT2uolAHU4JQ,5479
22
+ src/prompts/library/bookingConfirmation/msc/placeholders.json,sha256=vIkVrK1sjv9pvWM5rVHFSTD64r5--N-QDD-K_hsFXeY,5909
23
+ src/prompts/library/bookingConfirmation/msc/prompt.txt,sha256=iuvN8UMdOlOBjymiyuTBfizsqvto0IphCik-iHzpwyM,5850
24
+ src/prompts/library/bookingConfirmation/oocl/placeholders.json,sha256=21krpZhmr7UHkX-R3l4OAxPUkonHwJK0TvGGJrRoXwg,6222
25
+ src/prompts/library/bookingConfirmation/oocl/prompt.txt,sha256=z0iJXIwgsnrZafav7qqDBffz_L_IieEyfoJ1vpKcnyk,4502
26
+ src/prompts/library/bookingConfirmation/other/placeholders.json,sha256=wxLDMGfN6HG9O4sV882lgxvdAxSDO7Binkni1b5Uz8I,6922
27
+ src/prompts/library/bookingConfirmation/other/prompt.txt,sha256=MDxadz505Ddssa5DNOpssQrNrZxgOKzrQea0S3sLjUA,7578
28
+ src/prompts/library/bookingConfirmation/yangming/placeholders.json,sha256=kXTrHFIdltMB0uEkj74XV_2bqsD5leDfqwKddZvzXjA,6040
29
+ src/prompts/library/bookingConfirmation/yangming/prompt.txt,sha256=c_qHnP7f2JTCumDRN0aQ9QpXmglLPRuaMEiDXq3wMJ0,4605
30
30
  src/prompts/library/bundeskasse/other/placeholders.json,sha256=7xKzi_ypkIICO9nrEl45W9G7-h33uWVRVWnpg2b5lUg,4288
31
31
  src/prompts/library/bundeskasse/other/prompt.txt,sha256=miNYoqRZEd6Z1LNisTahX1-tenzr5kEpRA6gvPH7NCw,3316
32
32
  src/prompts/library/commercialInvoice/other/placeholders.json,sha256=zUK2mg9MnHiEQRYF6VgTiUiq68WGy5f7_4qL63CWyR0,4700
33
33
  src/prompts/library/commercialInvoice/other/prompt.txt,sha256=CJapcVrmcvynJUanETDklkzU-0N9hHdhq5wL4MK7OIY,2683
34
34
  src/prompts/library/customsAssessment/other/placeholders.json,sha256=scIV--C9HNWAQbU9zEz3GT_FoAvJqbfuY85YUtt7t-Q,3850
35
35
  src/prompts/library/customsAssessment/other/prompt.txt,sha256=z3FuoHZ588Pz1WBJDW7ISAC3J6n7hPJCcS92CdHDTFw,2494
36
- src/prompts/library/customsInvoice/other/placeholders.json,sha256=BnWYtl4sPooTHb_EHRIlrPawBrfHI8_QVas8zytbqyY,12172
37
- src/prompts/library/customsInvoice/other/prompt.txt,sha256=hUBDhocFdHTiWdEPgEE8yKHqpIYOfOj-j9CvZd-3YZc,9941
36
+ src/prompts/library/customsInvoice/other/placeholders.json,sha256=hZb6Gl_rUMzmuiXuB39TBHwL08h2EDXa2AASNPgZEeM,12169
37
+ src/prompts/library/customsInvoice/other/prompt.txt,sha256=N1kzaxOlgzc779V4ZjPO9s_djMyiVY5I7uBx7-ZMxUA,10225
38
38
  src/prompts/library/deliveryOrder/other/placeholders.json,sha256=j-9F4V3yDg4610PPsOwU3oOj_S9vAvAB9Ix155WGIwc,3827
39
39
  src/prompts/library/deliveryOrder/other/prompt.txt,sha256=RD076vq0x0IjoEVQfh-G0u4nxITCpgKZGrwMlR9YAvk,2695
40
40
  src/prompts/library/draftMbl/other/placeholders.json,sha256=Gn8kQ8cMmrzRGLSFH7_8wO1_j2jxhqHd4zeivZP2SjU,4304
@@ -43,8 +43,8 @@ src/prompts/library/finalMbL/other/placeholders.json,sha256=Gn8kQ8cMmrzRGLSFH7_8
43
43
  src/prompts/library/finalMbL/other/prompt.txt,sha256=cyeKjK94sepqXiLEeZKB4VpmT0-nqXALP4dih-B67M8,2386
44
44
  src/prompts/library/packingList/other/placeholders.json,sha256=cGUUvEFoi4Lm0BAiyD29KbNFbUgzO1s7eit_qK3F0ig,4478
45
45
  src/prompts/library/packingList/other/prompt.txt,sha256=6Q9d0KBG6YWmNtzFivvmtQmitaUE2jytfwwc5YwsUgQ,2872
46
- src/prompts/library/partnerInvoice/other/placeholders.json,sha256=NX6ADT4gxLpP90uoNCYDbmfBvROxxVWRKK0lRFy1n9s,10897
47
- src/prompts/library/partnerInvoice/other/prompt.txt,sha256=A3nw6QfraU1N6Aui4TC7eFofG3rUyo9cz8Ha1iQbMpU,8141
46
+ src/prompts/library/partnerInvoice/other/placeholders.json,sha256=igfmag8KGQ7106aSG7UHZEksO4LB-qWeq8SS8VDzckM,10894
47
+ src/prompts/library/partnerInvoice/other/prompt.txt,sha256=QegtgBgB6RmeAhBDguuczJ7UIqOfLnCAboYNaB7fLFI,8411
48
48
  src/prompts/library/postprocessing/port_code/placeholders.json,sha256=2TiXf3zSzrglOMPtDOlCntIa5RSvyZQAKG2-IgrCY5A,22
49
49
  src/prompts/library/postprocessing/port_code/prompt_port_code.txt,sha256=--1wunSqEr2ox958lEhjO-0JFBfOLzA3qfKYIzG_Iok,884
50
50
  src/prompts/library/preprocessing/carrier/placeholders.json,sha256=tQeVDtvembhVqvel9vGoy4qcKp1hOvg-bLCgZRdQj0g,192
@@ -54,7 +54,7 @@ src/prompts/library/shippingInstruction/other/prompt.txt,sha256=CbrqlKMtB-sVY-8E
54
54
  src/prompts/prompt_library.py,sha256=VJWHeXN-s501C2GiidIIvQQuZdU6T1R27hE2dKBiI40,2555
55
55
  src/setup.py,sha256=8-vZWjC8Iwa3xxdk3iR4412VCjtNtgzVqkXcFon7UBE,7309
56
56
  src/tms.py,sha256=UXbIo1QE--hIX6NZi5Qyp2R_CP338syrY9pCTPrfgnE,1741
57
- src/utils.py,sha256=8BpuJJLiJZntZAI86cQMNa-FGjl9jbOjlCWIG27mjJo,17418
58
- data_science_document_ai-1.58.0.dist-info/METADATA,sha256=8MWt4KlixrpV8lQhKmFo5i1UZn02o16vMaR3uEe94Js,2152
59
- data_science_document_ai-1.58.0.dist-info/WHEEL,sha256=zp0Cn7JsFoX2ATtOhtaFYIiE2rmFAD4OcMhtUki8W3U,88
60
- data_science_document_ai-1.58.0.dist-info/RECORD,,
57
+ src/utils.py,sha256=Ow5_Jals88o8mbZ1BoHfZpHZoCfig_UQb5aalH-mpWE,17278
58
+ data_science_document_ai-1.60.0.dist-info/METADATA,sha256=dHVIkv6YQ-en4vuOYnS_7cpmujx7v0XAF058BHuF0ow,2152
59
+ data_science_document_ai-1.60.0.dist-info/WHEEL,sha256=zp0Cn7JsFoX2ATtOhtaFYIiE2rmFAD4OcMhtUki8W3U,88
60
+ data_science_document_ai-1.60.0.dist-info/RECORD,,
@@ -723,45 +723,10 @@ async def format_all_entities(result, document_type_code, params, mime_type):
723
723
  if document_type_code in ["partnerInvoice", "bundeskasse"]:
724
724
  await process_partner_invoice(params, aggregated_data, document_type_code)
725
725
 
726
- if document_type_code in ["bookingConfirmation"]:
727
- aggregated_data["legalEntity"] = await get_legal_entity(
728
- aggregated_data.get("carrierName", {}).get("documentValue", None),
729
- aggregated_data.get("carrierAddress", {}).get("documentValue", None),
730
- )
731
-
732
726
  logger.info("Data Extraction completed successfully")
733
727
  return aggregated_data
734
728
 
735
729
 
736
- async def get_legal_entity(name, address):
737
- """Get legal entity mapping from TMS mappings.
738
-
739
- Args:
740
- name (str): The name of the legal entity. Mandatory.
741
- address (str): The address of the legal entity. Optional for better matching.
742
-
743
- Returns:
744
- dict or None: The mapping result from TMS embeddings, or None if not found.
745
- """
746
- # Name is mandatory for legal entity mapping
747
- if not name:
748
- return {"documentValue": None, "mappedValue": None}
749
-
750
- # Build input safely
751
- input_text = name if not address else f"{name} | {address}"
752
-
753
- api_results = await get_tms_mappings(
754
- input_list=[input_text],
755
- embedding_type="legal_entities",
756
- input_key="partnerNameAddress",
757
- )
758
-
759
- return {
760
- "documentValue": None,
761
- "formattedValue": api_results.get(input_text),
762
- }
763
-
764
-
765
730
  def add_text_without_space(text):
766
731
  """If the cleaned text is different from the original text, append it.
767
732
  Useful for port names like QUINHON - Quinhon"""
@@ -19,7 +19,7 @@
19
19
  "gateInTerminalCode": {
20
20
  "type": "STRING",
21
21
  "nullable": true,
22
- "description": "The specific terminal where cargo is gated in especially Export terminal delivery address. E.g., Export terminal delivery address, Export terminal location, or Export terminal name."
22
+ "description": "The specific terminal where cargo is gated in especially Export terminal delivery address. E.g., FULL RETURN TO or Export terminal name."
23
23
  },
24
24
  "performaDate": {
25
25
  "type": "STRING",
@@ -29,7 +29,7 @@
29
29
  "cyCutOff": {
30
30
  "type": "STRING",
31
31
  "nullable": true,
32
- "description": "The datetime by which the cargo to be delivered to the Container Yard. It can be found with keys FCL delivery cut-off, FCL DG delivery cut-off, CY CUT OFF, CY Closing."
32
+ "description": "The datetime by which the cargo to be delivered to the Container Yard. It can be found with keys CARGO CUT OFF DATE/TIME"
33
33
  },
34
34
  "gateInReference": {
35
35
  "type": "STRING",
@@ -49,7 +49,7 @@
49
49
  "siCutOff": {
50
50
  "type": "STRING",
51
51
  "nullable": true,
52
- "description": "The deadline datetime for submitting the Shipping Instructions (SI) to the carrier. It can be found with keys Shipping Instruction Closing."
52
+ "description": "The deadline datetime for submitting the Shipping Instructions (SI) to the carrier. It can be found with keys DOC CUT OFF DATE/TIME"
53
53
  },
54
54
  "vgmCutOff": {
55
55
  "type": "STRING",
@@ -69,7 +69,7 @@
69
69
  "pickUpDepotCode": {
70
70
  "type": "STRING",
71
71
  "nullable": true,
72
- "description": "The depot code where the empty container will be picked up. It is identified as Empty Pick Up Depot or Export Empty Pick Up Depot(s)."
72
+ "description": "The depot code where the empty container will be picked up. It is identified as Empty Pick Up AT Depot or Export Empty Pick Up Depot(s)."
73
73
  },
74
74
  "dropOffDepotCode": {
75
75
  "type": "STRING",
@@ -113,12 +113,12 @@
113
113
  "vesselName": {
114
114
  "type": "STRING",
115
115
  "nullable": true,
116
- "description": "The name of the vessel carrying the shipment. It can be found at vessel, INTENDED VESSEL/VOYAGE"
116
+ "description": "The name of the vessel carrying the shipment. It can be found at VESSEL/VOYAGE e.g., MOL EMERALD"
117
117
  },
118
118
  "voyage": {
119
119
  "type": "STRING",
120
120
  "nullable": true,
121
- "description": "The journey or route taken by the vessel for a specific leg. It can be found at Voy. no, INTENDED VESSEL/VOYAGE"
121
+ "description": "The journey or route taken by the vessel for a specific leg. It can be found at VESSEL/VOYAGE e.g., 087E"
122
122
  }
123
123
  }
124
124
  },
@@ -139,7 +139,7 @@
139
139
  "carrierName": {
140
140
  "type": "STRING",
141
141
  "nullable": true,
142
- "description": "The name of the carrier who issued the document e,g, Hapag-Lloyd."
142
+ "description": "The name of the carrier who issued the document e,g, Evergreen Line."
143
143
  }
144
144
  },
145
145
  "required": ["bookingNumber", "transportLegs", "containers", "cyCutOff", "vgmCutOff", "siCutOff"]
@@ -7,34 +7,50 @@ The Freight Forwarding company receives Booking Confirmation from EverGreen Carr
7
7
  These Booking Confirmations contain various details related to booking, container pick up and drop off depot details, vessel details, as well as other transport Legs data.
8
8
  They may be written in different languages such as English, German, Vietnamese, Chinese, and other European languages, and can appear in a variety of formats and layouts.
9
9
  Your role is to accurately extract specific entities from these Booking Confirmations to support efficient processing and accurate record-keeping.
10
+
11
+ To provide context on the journey of a containers for both Export and Import shipments,
12
+ For Export shipment: An empty container is picked up from a depot (pickupDepotCode) using a pickUpReference and goods loaded into it at a warehouse. Then the loaded container / cargo is transported back to a Container Yard or gateInTerminal before the cyCutOff date for further shipping processes. Then the POL of the First TransportLeg may start from the gateInTerminal or a different POL too.
13
+ For Import Shipment: The loaded container / cargo arrives at a port of discharge then picked up at pickUpTerminal using pickUpReference. After delivery, an empty container is returned to a depot (dropOffDepotCode).
10
14
  <CONTEXT>
11
15
 
12
- "mblNumber": "Extract the value after the label 'BOOKING NO.'.",
13
- "gateInReference": "Extract the value after the label 'BOOKING NO.'.",
14
- "pickUpReference": "Extract the value after the label 'BOOKING NO.'.",
15
- "bookingNumber": "Extract the value after the label 'BOOKING NO.'.",
16
- "vesselName": "Extract the text after the label 'VESSEL/VOYAGE' and before the hyphen.",
17
- "voyage": "Voyage is a code of numbers and letters sometimes separated by '-'. Extract the text after the label 'VESSEL/VOYAGE' and after the hyphen.",
18
- "portOfLoading": "Extract the text after the label 'PORT OF LOADING'.",
19
- "cyCutOff": "Extract the date and time information after the label 'CARGO CUT OFF DATE/TIME'.",
20
- "siCutOff": "Extract the date and time information after the label 'DOC CUT OFF DATE/TIME'.",
21
- "vgmCutOff": "Extract the date and time information after the label 'VGM CUT OFF via EDI/WEB/APP'.",
22
- "portOfDischarge": "Extract the text after the label 'PORT OF DISCHARGING:' and before 'FINAL DESTINATION'.",
23
- "pickUpTerminal": "Extract the text after the label 'EMPTY PICK UP AT:' removing any extra spaces or line breaks.",
24
- "gateInTerminal": "Extract the text after the label 'FULL RETURN TO:' removing any extra spaces or line breaks.",
25
-
26
- "transportLegs":
27
- "portOfLoading": "For the first leg, use the extracted 'portOfLoading'.",
28
- "portOfDischarge": "Extract the text after the label 'T/S PORT OF LOADING:'.",
29
- "vesselName": "For the first leg, use the extracted 'vesselName'.",
30
- "voyage": "Voyage is a code of numbers and letters sometimes separated by '-'. For the first leg, use the extracted 'voyage'.",
31
- "eta": "Extract the date after the label 'ETA DATE' that appears within the section starting with 'FINAL DESTINATION:' and ending with 'T/S PORT OF LOADING:'.",
32
- "etd": "Extract the date after the label 'ETD DATE' that appears within the section starting with 'PORT OF LOADING:' and ending with 'FINAL DESTINATION:'.",
33
-
34
-
35
- "portOfLoading": "For the second leg, use the 'portOfDischarge' from the previous leg.",
36
- "portOfDischarge": "For the second leg, use the extracted 'portOfDischarge' from the main extraction.",
37
- "vesselName": "Extract the text after the label 'EST. CONNECT VSL/VOY:' and before the hyphen and numbers.",
38
- "voyage": "Voyage is a code of numbers and letters sometimes separated by '-'. Extract the code after the label 'EST. CONNECT VSL/VOY:' and after the vessel name.",
39
- "eta": "Extract the date after the label 'ETA DATE' that is after the line that contains 'T/S PORT OF LOADING'",
40
- "etd": "Extract the date after the label 'ETD DATE' that is related to the 'EST. CONNECT VSL/VOY:'. "
16
+ <INSTRUCTIONS>
17
+ - Populate fields as defined in the response schema.
18
+ - Use the data field description to understand the context of the data.
19
+
20
+ - transportLegs: Multiple Transport Legs entries may exist, capture all instances under "transportLegs". Make sure the order of the legs are important.
21
+ - eta: The estimated time of arrival for a specific leg.
22
+ - etd: The estimated time of departure for a specific leg. ETD DATE above the PORT OF DISCHARGING information.
23
+ - imoNumber: The International Maritime Organization number for a specific leg.
24
+ - portOfDischarge: The port where cargo is unloaded for a specific leg.
25
+ - portOfLoading: The port where cargo is loaded for a specific leg.
26
+ - vesselName: The name of the vessel for a specific leg. Can be found at VESSEL/VOYAGE (e.g., EVER LAUREL).
27
+ - voyage: The journey or route taken by the vessel for a specific leg. It can be found at VESSEL/VOYAGE e.g., 087E.
28
+
29
+ IMPORTANT explanation for the transportLegs part as follows:
30
+ - There is at least one leg in each document.
31
+ - 'eta' must be equal or later than 'etd'!
32
+ - Multiple legs are possible. When there are multiple legs,
33
+ - Sequential Sorting: You must manually re-order legs based on etd then eta, regardless of their order in the source text.
34
+ - "T/S PORT OF LOADING" indicates the presence of a multi-leg journey.
35
+ - Transhipment Handling: Treat any mentioned "T/S PORT OF LOADING" as the bridge between two legs (Discharge for Leg A and Loading for Leg B).
36
+ - The Connectivity Rule: For any sequence of legs, the Port of Discharge of the previous leg must match the Port of Loading of the following leg.
37
+ - First T/S PORT OF LOADING is the Port of Discharge for the first transportLegs and Port of Loading for the second transportLegs.
38
+ - Second T/S PORT OF LOADING is the Port of Discharge for the second transportLegs and Port of Loading for the third transportLegs.
39
+ - Timeline Integrity: Ensure a "No Time Travel" policy: The eta of a previous leg must be earlier than or equal to the etd of the following leg.
40
+
41
+ Structure of Multiple Leg Sequence & Mapping
42
+ Leg 1 (Initial):
43
+ - `portOfLoading`: PORT OF LOADING.
44
+ - `portOfDischarge`: T/S PORT OF LOADING.
45
+ - `vesselName`: VESSEL/VOYAGE (ignore parentheses).
46
+ - `etd`: ETD DATE above the PORT OF DISCHARGING information.
47
+ - `eta`: ETA DATE below first T/S PORT OF LOADING
48
+
49
+ Leg 2 (Intermediate): Trigger: Only if T/S PORT OF LOADING exists.
50
+ - `portOfLoading`: First T/S PORT OF LOADING. POD of Leg 1.
51
+ - `portOfDischarge`: Second T/S PORT OF LOADING (if exists), otherwise PORT OF DISCHARGE before the FINAL DESTINATION.
52
+ - `vesselName`: EST. CONNECTING VESSEL / VOY.
53
+ - `etd`: ETD DATE after first T/S PORT OF LOADING
54
+ - `eta`: ETA DATE after second T/S PORT OF LOADING (if exists), otherwise ETA next to the FINAL DESTINATION section.
55
+
56
+ <INSTRUCTIONS>
@@ -31,8 +31,8 @@ For Import Shipment: The loaded container / cargo arrives at a port of discharge
31
31
  - Extract the name and address of the carrier who is the main parent company in the document.
32
32
  - It can be found in the top section of the document, often near the logo or header.
33
33
  - Example:
34
- - "Hapag-Lloyd" for vendorName
35
- - Hamburg, Germany, Poland, Italy, Vietnam, China etc... for vendorAddress
34
+ - "Hapag-Lloyd" for carrierName
35
+ - Hamburg, Germany, Poland, Italy, Vietnam, China etc... for carrierAddress
36
36
 
37
37
  - transportLegs: Multiple Transport Legs entries may exist, capture all instances under "transportLegs". Make sure the order of the legs are important.
38
38
  - eta: The estimated time of arrival for a specific leg.
@@ -46,7 +46,7 @@ For Import Shipment: The loaded container / cargo arrives at a port of discharge
46
46
  - Containers: Need to extract Depot details per Container Type. Multiple Containers entries may exist, capture all instances under "Containers".
47
47
  - containerType: The type of container (e.g., 20FT, 40FT, 20ft, 40ft, 40HC, 20DC, etc...).
48
48
  - pickupDepotCode: The code of the depot where the empty container is picked up.
49
- - dropOffDepotCode: The code of the depot where the empty container is dropped off.
49
+ - dropOffDepotCode: The code of the depot where the empty container is dropped off. It
50
50
 
51
51
  IMPORTANT explanation for the transportLegs part as follows:
52
52
  - There is at least one leg in each document.
@@ -19,12 +19,12 @@
19
19
  "gateInTerminalCode": {
20
20
  "type": "STRING",
21
21
  "nullable": true,
22
- "description": "The specific terminal where cargo is gated in especially Export terminal delivery address. E.g., Export terminal delivery address, Export terminal location, or Export terminal name."
22
+ "description": "The specific terminal where cargo is gated in especially Export terminal delivery address. E.g., Export terminal delivery address, Export terminal location, or Return Equip Delivery Terminal."
23
23
  },
24
24
  "performaDate": {
25
25
  "type": "STRING",
26
26
  "nullable": true,
27
- "description": "The date considered to apply the rates and charges specified in the booking confirmation"
27
+ "description": "The date considered to apply the rates and charges specified in the booking confirmation. It is mentioned as Price Calculation Date or Performa Date."
28
28
  },
29
29
  "cyCutOff": {
30
30
  "type": "STRING",
@@ -64,12 +64,12 @@
64
64
  "containerType": {
65
65
  "type": "STRING",
66
66
  "nullable": true,
67
- "description": "The size / type of the container, such as 20ft, 40ft, 40HC, 20DC etc under Type/Size column."
67
+ "description": "The size / type of the container, such as 20ft, 40ft, 40HC, 40 DRY, 20DC etc under Type/Size column."
68
68
  },
69
69
  "pickUpDepotCode": {
70
70
  "type": "STRING",
71
71
  "nullable": true,
72
- "description": "The depot code where the empty container will be picked up. It is identified as Empty Pick Up Depot or Export Empty Pick Up Depot(s)."
72
+ "description": "The depot code where the empty container will be picked up. It is identified as Empty Pick Up Depot, Empty Container Depot, or Export Empty Pick Up Depot(s)."
73
73
  },
74
74
  "dropOffDepotCode": {
75
75
  "type": "STRING",
@@ -139,7 +139,7 @@
139
139
  "carrierName": {
140
140
  "type": "STRING",
141
141
  "nullable": true,
142
- "description": "The name of the carrier who issued the document e,g, Hapag-Lloyd."
142
+ "description": "The name of the carrier who issued the document e,g, MAERSK."
143
143
  }
144
144
  },
145
145
  "required": ["bookingNumber", "transportLegs", "containers", "cyCutOff", "vgmCutOff", "siCutOff"]
@@ -7,61 +7,53 @@ The Freight Forwarding company receives Booking Confirmation from MAERSK Carrier
7
7
  These Booking Confirmations contain various details related to booking, container pick up and drop off depot details, vessel details, as well as other transport Legs data.
8
8
  They may be written in different languages such as English, German, Vietnamese, Chinese, and other European languages, and can appear in a variety of formats and layouts.
9
9
  Your role is to accurately extract specific entities from these Booking Confirmations to support efficient processing and accurate record-keeping.
10
- <CONTEXT>
11
-
12
- bookingNumber: A unique identifier for the booking.
13
- cyCutOff: The deadline for cargo to be delivered to the Container Yard.
14
- gateInReference: A reference code for cargo entering the terminal.
15
- gateInTerminal: The specific terminal where cargo is gated in.
16
- mblNumber: The Master Bill of Lading number.
17
- pickUpReference: A reference code for cargo pickup.
18
- pickUpTerminal: The specific terminal for cargo pickup.
19
- siCutOff: The deadline for submitting shipping instructions.
20
- vgmCutOff: The deadline for submitting the Verified Gross Mass of the cargo.
21
- transportLegs:
22
- eta: The estimated time of arrival for a specific leg.
23
- etd: The estimated time of departure for a specific leg.
24
- imoNumber: The International Maritime Organization number for a specific leg.
25
- portOfDischarge: The port where cargo is unloaded for a specific leg.
26
- portOfLoading: The port where cargo is loaded for a specific leg.
27
- vesselName: The name of the vessel for a specific leg.
28
- voyage: The journey or route taken by the vessel for a specific leg.
29
-
30
- your task is to extract the text value of the following entities and page numbers starting from 0 where the value was found in the document:
31
- SCHEMA_PLACEHOLDER
32
10
 
33
- Keywords for datapoints:
34
- - bookingNumber: Booking No.
35
- - cyCutOff: CY CUT OFF, CY Closing - Latest Return Container Date, Cargo Cut-off deadline
36
- - gateInTerminal: Return Equip Delivery Terminal and Location interception
37
- - pickUpTerminal: Empty Container Depot and Location interception
38
- - siCutOff: SI Cut Off, Shipping Instruction deadline
39
- - vgmCutOff: VGM Submission Deadline, Verified Gross Mass deadline
40
- - eta: ETA
41
- - etd: ETD
42
- - portOfDischarge: To
43
- - portOfLoading: From
44
- - vesselName: Vessel
45
- - voyage: Voy No.
46
-
47
- Table Structure to extract TransportLegs:
48
- - transportlegs table has following colum names: From, To, Mode, Vessel, Voy No., ETD, ETA
49
- - The tables can be found under the title of Intended Transport Plan
50
- - use Vessel column for vesselName. Do not mix 'Vessel' column with 'Mode' column. Vessel cannot be 'TRK' or 'RCO'!
51
- - use 'Voy No.' column for voyage. Do not mix 'Voy No.' column with 'Mode' column. voyage cannot be 'TRK' or 'RCO'!
52
- - If vesselName value is ROC, TRK or MVS, RCO it is a Mode not a vesselName.
53
- - Don't use the same vesselName for multiple transport legs!
54
- - The table rows are not uniform, the words can be wrapped.
55
- - To be able to determine the number of row; you can reference the number of ETA and ETD.
56
-
57
- Further explanation for datapoints except transportLegs part as follows:
58
- - If gateInReference is null, assign it the same value as bookingNumber.
59
- - If pickUpReference is null, assign it the same value as bookingNumber.
60
- - If mblNumber is null, assign 'MAEU' + bookingNumber. E.g. if booking number is '244211559', assign 'MAEU244211559'.
11
+ To provide context on the journey of a containers for both Export and Import shipments,
12
+ For Export shipment: An empty container is picked up from a depot (pickupDepotCode) using a pickUpReference and goods loaded into it at a warehouse. Then the loaded container / cargo is transported back to a Container Yard or gateInTerminal before the cyCutOff date for further shipping processes. Then the POL of the First TransportLeg may start from the gateInTerminal or a different POL too.
13
+ For Import Shipment: The loaded container / cargo arrives at a port of discharge then picked up at pickUpTerminal using pickUpReference. After delivery, an empty container is returned to a depot (dropOffDepotCode).
14
+ <CONTEXT>
61
15
 
62
- You must apply the following rules:
63
- - The JSON schema must be followed during the extraction.
64
- - The values must only include text found in the document
65
- - Do not normalize any entity value.
66
- - If an entity is not found in the document, keep it empty or np.Nan.
67
- - Validate the JSON make sure its a valid JSON ! No extra text, no missing comma!
16
+ <INSTRUCTIONS>
17
+ - bookingNumber: A unique identifier for the booking.
18
+
19
+ - gateInTerminalCode: The specific terminal where cargo is gated in. It can be called Return Equip Delivery Terminal and Location interception. This sometimes can be the same as portOfLoading of the First transportLeg.
20
+ - gateInReference: A reference code for cargo entering the terminal. If not mentioned explicitly and gateInTerminal is extracted, then use bookingNumber as gateInReference.
21
+ - pickUpTerminal: The specific terminal for cargo pickup. It can be found as Import pick up address(es), PORT OF DISCHARGE (after the slash '/').
22
+ - pickUpReference: A reference code for cargo pickup. If not mentioned explicitly and pickUpTerminal is extracted, then use bookingNumber as pickUpReference.
23
+
24
+ - cyCutOff: The deadline for cargo to be delivered to the Container Yard. It can be referred to as Cargo Cut-off deadline, FCL delivery cut-off, CY CUT OFF, CY Closing - Latest Return Container Date.
25
+ - siCutOff: The deadline for submitting shipping instructions. It can be referred to as Shipping Instruction closing, SI Cut Off, Shipping Instruction deadline, INTENDED SI CUT-OFF
26
+ - vgmCutOff: The deadline for submitting the Verified Gross Mass of the cargo. It can be referred to as VGM cut-off, VGM Submission Deadline, Verified Gross Mass deadline
27
+
28
+ - carrierName and carrierAddress:
29
+ - Extract the name and address of the carrier who is the main parent company in the document.
30
+ - It can be found in the top section of the document, often near the logo or header.
31
+ - Example:
32
+ - "MAERSK" for carrierName
33
+ - Hamburg, Germany, NETHERLANDS, Poland, Italy, Vietnam, China SHANGHAI BRANCH etc... for carrierAddress
34
+
35
+ - Containers: Need to extract Depot details per Container Type. Multiple Containers entries may exist, capture all instances under "Containers".
36
+ - containerType: The type of container (e.g., 20FT, 40FT, 20ft, 40ft, 40HC, 20DC, etc...).
37
+ - pickupDepotCode: The code of the depot where the empty container is picked up. Can be found as Empty Container Depot under Load Itinerary table.
38
+ - dropOffDepotCode: The code of the depot where the empty container is dropped off.
39
+
40
+ - transportLegs: Multiple Transport Legs entries may exist, capture all instances under "transportLegs". Make sure the order of the legs are important.
41
+ - eta: The estimated time of arrival for a specific leg.
42
+ - etd: The estimated time of departure for a specific leg.
43
+ - imoNumber: The International Maritime Organization number for a specific leg.
44
+ - portOfDischarge: The port where cargo is unloaded for a specific leg.
45
+ - portOfLoading: The port where cargo is loaded for a specific leg.
46
+ - vesselName: The name of the vessel for a specific leg. Can be found under 'Vessel' column in the Intended Transport Plan table.
47
+ - voyage: The journey or route taken by the vessel for a specific leg. Can be found under 'Voy No.' column in the Intended Transport Plan table.
48
+
49
+ IMPORTANT explanation for the transportLegs part as follows:
50
+ - There is at least one leg in each document.
51
+ - 'eta' must be equal or later than 'etd'!
52
+ - Multiple legs are possible. When there are multiple legs,
53
+ - Sequential Sorting: You must manually re-order legs based on etd then eta, regardless of their order in the source text.
54
+ - The Connectivity Rule: For any sequence of legs, the Destination (Port of Discharge) of the previous leg must match the Origin (Port of Loading) of the following leg.
55
+ - Transhipment Handling: Treat any mentioned "Transhipment Port" as the bridge between two legs (Discharge for Leg A / Loading for Leg B).
56
+ - Timeline Integrity: Ensure a "No Time Travel" policy: The eta of a previous leg must be earlier than or equal to the etd of the following leg.
57
+ - Naming Convention: Look for Port Names followed by abbreviations in parentheses, e.g., "Port Name (ABCDE)".
58
+
59
+ <INSTRUCTIONS>
@@ -4,12 +4,12 @@
4
4
  "bookingNumber": {
5
5
  "type": "STRING",
6
6
  "nullable": true,
7
- "description": "A unique identifier assigned to the shipment booking, used for tracking and reference. They are often referred to as 'Booking No.', 'Booking Reference', 'Our Reference', or 'Order Ref'."
7
+ "description": "A unique identifier assigned to the shipment booking, used for tracking and reference. They are often referred to as 'Booking Reference', 'Booking No.', or 'Our Reference'."
8
8
  },
9
9
  "contractNumber": {
10
10
  "type": "STRING",
11
11
  "nullable": true,
12
- "description": "It's a contract number between the carrier and Forto Logistics SE & Co KG."
12
+ "description": "It's a contract number between the carrier and Forto Logistics SE & Co KG. It can be found as Service Contract/Rate Reff. No."
13
13
  },
14
14
  "pickUpTerminalCode": {
15
15
  "type": "STRING",
@@ -19,17 +19,17 @@
19
19
  "gateInTerminalCode": {
20
20
  "type": "STRING",
21
21
  "nullable": true,
22
- "description": "The specific terminal where cargo is gated in especially Export terminal delivery address. E.g., Export terminal delivery address, Export terminal location, or Export terminal name."
22
+ "description": "The specific terminal where cargo is gated in especially Export terminal delivery address. It can be found as GATE IN AT TERMINAL/DEPOT"
23
23
  },
24
24
  "performaDate": {
25
25
  "type": "STRING",
26
26
  "nullable": true,
27
- "description": "The date considered to apply the rates and charges specified in the booking confirmation"
27
+ "description": "The date considered to apply the rates and charges specified in the booking confirmation. It is mentioned as Price Calculation Date or Performa Date."
28
28
  },
29
29
  "cyCutOff": {
30
30
  "type": "STRING",
31
31
  "nullable": true,
32
- "description": "The datetime by which the cargo to be delivered to the Container Yard. It can be found with keys FCL delivery cut-off, FCL DG delivery cut-off, CY CUT OFF, CY Closing."
32
+ "description": "The datetime by which the cargo to be delivered to the Container Yard. It can be found with keys CUT-OFF(Date/Time), FCL DG delivery cut-off, CY CUT OFF, CY Closing."
33
33
  },
34
34
  "gateInReference": {
35
35
  "type": "STRING",
@@ -64,12 +64,12 @@
64
64
  "containerType": {
65
65
  "type": "STRING",
66
66
  "nullable": true,
67
- "description": "The size / type of the container, such as 20ft, 40ft, 40HC, 20DC etc under Type/Size column."
67
+ "description": "The size / type of the container, such as 20ft, 40ft, 40HC, 40 DRY, 20DC etc under Type/Size column."
68
68
  },
69
69
  "pickUpDepotCode": {
70
70
  "type": "STRING",
71
71
  "nullable": true,
72
- "description": "The depot code where the empty container will be picked up. It is identified as Empty Pick Up Depot or Export Empty Pick Up Depot(s)."
72
+ "description": "The depot code where the empty container will be picked up. It is identified as PICK UP AT DEPOT, Empty Container Depot, or Export Empty Pick Up Depot(s)."
73
73
  },
74
74
  "dropOffDepotCode": {
75
75
  "type": "STRING",
@@ -118,7 +118,7 @@
118
118
  "voyage": {
119
119
  "type": "STRING",
120
120
  "nullable": true,
121
- "description": "The journey or route taken by the vessel for a specific leg. It can be found at Voy. no, INTENDED VESSEL/VOYAGE"
121
+ "description": "The journey or route taken by the vessel for a specific leg. It can be found at Voyage Number"
122
122
  }
123
123
  }
124
124
  },
@@ -139,7 +139,7 @@
139
139
  "carrierName": {
140
140
  "type": "STRING",
141
141
  "nullable": true,
142
- "description": "The name of the carrier who issued the document e,g, Hapag-Lloyd."
142
+ "description": "The name of the carrier who issued the document e,g, MSC."
143
143
  }
144
144
  },
145
145
  "required": ["bookingNumber", "transportLegs", "containers", "cyCutOff", "vgmCutOff", "siCutOff"]