data-science-document-ai 1.40.1__py3-none-any.whl → 1.40.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: data-science-document-ai
3
- Version: 1.40.1
3
+ Version: 1.40.3
4
4
  Summary: "Document AI repo for data science"
5
5
  Author: Naomi Nguyen
6
6
  Author-email: naomi.nguyen@forto.com
@@ -7,10 +7,10 @@ src/io.py,sha256=IXz4wWqiHa9mnHNgtrC6X9M2lItYp9eu6rHCThUIh5c,3585
7
7
  src/llm.py,sha256=aEK3rL8XvY7CakvkOJQmcHpEKwZRd8PPrLrzHiO-GFk,7827
8
8
  src/log_setup.py,sha256=RhHnpXqcl-ii4EJzRt47CF2R-Q3YPF68tepg_Kg7tkw,2895
9
9
  src/pdf_processing.py,sha256=S_eTsgaDIIr3SCrEmaQZyc7TDJlRI0GCuP0P9EGF1Xc,15385
10
- src/postprocessing/common.py,sha256=OR9O73gUP4tevIZMnorbiUgzviEJlVr46ArTWMXrYVA,19316
10
+ src/postprocessing/common.py,sha256=ll7VMEJ_51OeczcV8Uw-aVrufV3kd3kNLCmss3kt0Do,19291
11
11
  src/postprocessing/postprocess_booking_confirmation.py,sha256=nK32eDiBNbauyQz0oCa9eraysku8aqzrcoRFoWVumDU,4827
12
12
  src/postprocessing/postprocess_commercial_invoice.py,sha256=3I8ijluTZcOs_sMnFZxfkAPle0UFQ239EMuvZfDZVPg,1028
13
- src/postprocessing/postprocess_partner_invoice.py,sha256=bWm3Miaq_mtX62xSs14vNQCWPHOj2895Bt6TuOVZWZU,11742
13
+ src/postprocessing/postprocess_partner_invoice.py,sha256=cM4te4qjOI_bXyrF8Zhb6X7eNf5aMKoRaPCFfqFv-98,11538
14
14
  src/prompts/library/bookingConfirmation/evergreen/placeholders.json,sha256=Re2wBgZoaJ5yImUUAwZOZxFcKXHxi83TCZwTuqd2v2k,1405
15
15
  src/prompts/library/bookingConfirmation/evergreen/prompt.txt,sha256=qlBMFDHy-gwr2PVeuHrfMEg_8Ibdym243DnaCgINa7g,2614
16
16
  src/prompts/library/bookingConfirmation/hapag-lloyd/placeholders.json,sha256=Re2wBgZoaJ5yImUUAwZOZxFcKXHxi83TCZwTuqd2v2k,1405
@@ -54,6 +54,6 @@ src/prompts/prompt_library.py,sha256=VJWHeXN-s501C2GiidIIvQQuZdU6T1R27hE2dKBiI40
54
54
  src/setup.py,sha256=kPSZosrICfaGZeDaajr40Ha7Ok4XK4fo_uq35Omiwr0,7128
55
55
  src/tms.py,sha256=UXbIo1QE--hIX6NZi5Qyp2R_CP338syrY9pCTPrfgnE,1741
56
56
  src/utils.py,sha256=-1Yq_5ExZlFQRUPRsQHiBD3TthNSiPVPp46Dvdb9Kf0,13830
57
- data_science_document_ai-1.40.1.dist-info/METADATA,sha256=RhLAfUQIqkJFJByLNyO0C3er2Q4loNhYi9n7T9uAY5Y,2153
58
- data_science_document_ai-1.40.1.dist-info/WHEEL,sha256=M5asmiAlL6HEcOq52Yi5mmk9KmTVjY2RDPtO4p9DMrc,88
59
- data_science_document_ai-1.40.1.dist-info/RECORD,,
57
+ data_science_document_ai-1.40.3.dist-info/METADATA,sha256=ym7EzwlZAar6Qvx0GgwVQM44p30sw74-nrPl7Liyg_8,2153
58
+ data_science_document_ai-1.40.3.dist-info/WHEEL,sha256=zp0Cn7JsFoX2ATtOhtaFYIiE2rmFAD4OcMhtUki8W3U,88
59
+ data_science_document_ai-1.40.3.dist-info/RECORD,,
@@ -1,4 +1,4 @@
1
1
  Wheel-Version: 1.0
2
- Generator: poetry-core 2.2.0
2
+ Generator: poetry-core 2.2.1
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
@@ -425,10 +425,12 @@ async def format_label(entity_k, entity_value, document_type_code, params):
425
425
  # Remove all non-alphanumeric characters like ' ', '-', etc.
426
426
  formatted_value = convert_container_number(entity_value)
427
427
 
428
- elif (
429
- document_type_code in ["finalMbL", "draftMbl"] and entity_key == "measurements"
428
+ elif any(
429
+ numeric_indicator in entity_key
430
+ for numeric_indicator in ["measurements", "weight"]
430
431
  ):
431
- formatted_value = decimal_convertor(extract_number(entity_value))
432
+ formatted_value = extract_number(entity_value)
433
+
432
434
  elif any(
433
435
  packaging_type in entity_key
434
436
  for packaging_type in ["packagingtype", "packagetype", "currency"]
@@ -447,7 +449,6 @@ async def format_label(entity_k, entity_value, document_type_code, params):
447
449
  elif any(
448
450
  numeric_indicator in entity_key
449
451
  for numeric_indicator in [
450
- "weight",
451
452
  "quantity",
452
453
  "value",
453
454
  "amount",
@@ -301,10 +301,6 @@ def associate_forto_item_code(input_string, params):
301
301
  if forto_item_code is None:
302
302
  # 2. Fallback to embedding function if no good fuzzy match
303
303
  forto_item_code = get_tms_mappings(input_string, "line_items")
304
- # embeddings_dict = embed_manager.embeddings_dict
305
- # forto_item_code = embed_manager._find_most_similar_option(
306
- # input_string, *embeddings_dict["item_codes_label"]
307
- # )
308
304
 
309
305
  result = {"documentValue": input_string, "formattedValue": forto_item_code}
310
306
  return result