data-science-document-ai 1.40.3__py3-none-any.whl → 1.40.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {data_science_document_ai-1.40.3.dist-info → data_science_document_ai-1.40.4.dist-info}/METADATA +1 -1
- {data_science_document_ai-1.40.3.dist-info → data_science_document_ai-1.40.4.dist-info}/RECORD +4 -4
- src/postprocessing/common.py +39 -17
- {data_science_document_ai-1.40.3.dist-info → data_science_document_ai-1.40.4.dist-info}/WHEEL +0 -0
{data_science_document_ai-1.40.3.dist-info → data_science_document_ai-1.40.4.dist-info}/RECORD
RENAMED
|
@@ -7,7 +7,7 @@ src/io.py,sha256=IXz4wWqiHa9mnHNgtrC6X9M2lItYp9eu6rHCThUIh5c,3585
|
|
|
7
7
|
src/llm.py,sha256=aEK3rL8XvY7CakvkOJQmcHpEKwZRd8PPrLrzHiO-GFk,7827
|
|
8
8
|
src/log_setup.py,sha256=RhHnpXqcl-ii4EJzRt47CF2R-Q3YPF68tepg_Kg7tkw,2895
|
|
9
9
|
src/pdf_processing.py,sha256=S_eTsgaDIIr3SCrEmaQZyc7TDJlRI0GCuP0P9EGF1Xc,15385
|
|
10
|
-
src/postprocessing/common.py,sha256=
|
|
10
|
+
src/postprocessing/common.py,sha256=W4L455j7IvTRZDWiBizoj9KC_UGUflkL_hEkk5P0h0k,20391
|
|
11
11
|
src/postprocessing/postprocess_booking_confirmation.py,sha256=nK32eDiBNbauyQz0oCa9eraysku8aqzrcoRFoWVumDU,4827
|
|
12
12
|
src/postprocessing/postprocess_commercial_invoice.py,sha256=3I8ijluTZcOs_sMnFZxfkAPle0UFQ239EMuvZfDZVPg,1028
|
|
13
13
|
src/postprocessing/postprocess_partner_invoice.py,sha256=cM4te4qjOI_bXyrF8Zhb6X7eNf5aMKoRaPCFfqFv-98,11538
|
|
@@ -54,6 +54,6 @@ src/prompts/prompt_library.py,sha256=VJWHeXN-s501C2GiidIIvQQuZdU6T1R27hE2dKBiI40
|
|
|
54
54
|
src/setup.py,sha256=kPSZosrICfaGZeDaajr40Ha7Ok4XK4fo_uq35Omiwr0,7128
|
|
55
55
|
src/tms.py,sha256=UXbIo1QE--hIX6NZi5Qyp2R_CP338syrY9pCTPrfgnE,1741
|
|
56
56
|
src/utils.py,sha256=-1Yq_5ExZlFQRUPRsQHiBD3TthNSiPVPp46Dvdb9Kf0,13830
|
|
57
|
-
data_science_document_ai-1.40.
|
|
58
|
-
data_science_document_ai-1.40.
|
|
59
|
-
data_science_document_ai-1.40.
|
|
57
|
+
data_science_document_ai-1.40.4.dist-info/METADATA,sha256=zM542Z9wdq9B2SaEmjoAEzj20BYlbL9LxwjZvMXb22w,2153
|
|
58
|
+
data_science_document_ai-1.40.4.dist-info/WHEEL,sha256=zp0Cn7JsFoX2ATtOhtaFYIiE2rmFAD4OcMhtUki8W3U,88
|
|
59
|
+
data_science_document_ai-1.40.4.dist-info/RECORD,,
|
src/postprocessing/common.py
CHANGED
|
@@ -446,10 +446,18 @@ async def format_label(entity_k, entity_value, document_type_code, params):
|
|
|
446
446
|
elif "reversechargesentence" in entity_key:
|
|
447
447
|
formatted_value = clean_item_description(entity_value, remove_numbers=False)
|
|
448
448
|
|
|
449
|
+
elif "quantity" in entity_key:
|
|
450
|
+
if document_type_code in ["partnerInvoice", "customsInvoice", "bundeskasse"]:
|
|
451
|
+
# For partner invoice, quantity can be mentioned as whole number
|
|
452
|
+
formatted_value = decimal_convertor(
|
|
453
|
+
extract_number(entity_value), quantity=True
|
|
454
|
+
)
|
|
455
|
+
else:
|
|
456
|
+
formatted_value = extract_number(entity_value)
|
|
457
|
+
|
|
449
458
|
elif any(
|
|
450
459
|
numeric_indicator in entity_key
|
|
451
460
|
for numeric_indicator in [
|
|
452
|
-
"quantity",
|
|
453
461
|
"value",
|
|
454
462
|
"amount",
|
|
455
463
|
"price",
|
|
@@ -514,7 +522,7 @@ async def get_port_code_llm(port: str, llm_client):
|
|
|
514
522
|
return None
|
|
515
523
|
|
|
516
524
|
|
|
517
|
-
def decimal_convertor(value):
|
|
525
|
+
def decimal_convertor(value, quantity=False):
|
|
518
526
|
"""Convert EU values to English values."""
|
|
519
527
|
if value is None:
|
|
520
528
|
return None
|
|
@@ -522,25 +530,39 @@ def decimal_convertor(value):
|
|
|
522
530
|
# Remove spaces
|
|
523
531
|
value = value.strip().replace(" ", "")
|
|
524
532
|
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
|
|
533
|
+
if not quantity:
|
|
534
|
+
# Convert comma to dot for decimal point (e.g., 4.123,45 -> 4123.45)
|
|
535
|
+
if re.match(r"^\d{1,3}(\.\d{3})*,\d{1,2}$", value):
|
|
536
|
+
value = value.replace(".", "").replace(",", ".")
|
|
537
|
+
|
|
538
|
+
# European style integer with thousand separator: 2.500
|
|
539
|
+
elif re.match(r"^\d{1,3}(\.\d{3})+$", value):
|
|
540
|
+
value = value.replace(".", "")
|
|
541
|
+
|
|
542
|
+
# Format english values as well for consistency (e.g., 4,123.45 -> 4123.45)
|
|
543
|
+
elif re.match(r"^\d{1,3}(,\d{3})*\.\d{1,2}$", value):
|
|
544
|
+
value = value.replace(",", "")
|
|
545
|
+
|
|
546
|
+
# English style integer with thousand separator: 2,500
|
|
547
|
+
elif re.match(r"^\d{1,3}(,\d{3})+$", value):
|
|
548
|
+
value = value.replace(",", "")
|
|
528
549
|
|
|
529
|
-
|
|
530
|
-
|
|
531
|
-
|
|
550
|
+
# Just replace comma decimals with dot (e.g., 65,45 -> 65.45)
|
|
551
|
+
if re.match(r"^\d+,\d{1,2}$", value):
|
|
552
|
+
value = value.replace(",", ".")
|
|
532
553
|
|
|
533
|
-
|
|
534
|
-
|
|
535
|
-
|
|
554
|
+
# If there are more than 3 0s after decimal point, consider only 2 decimal points (e.g., 8.500000 -> 8.50)
|
|
555
|
+
elif re.match(r"^\d+\.\d{3,}$", value):
|
|
556
|
+
value = value[: value.index(".") + 3]
|
|
536
557
|
|
|
537
|
-
#
|
|
538
|
-
|
|
539
|
-
|
|
558
|
+
else: # quantity=True → only last two
|
|
559
|
+
# Just replace comma decimals with dot (e.g., 65,45 -> 65.45)
|
|
560
|
+
if re.match(r"^\d+,\d{1,2}$", value):
|
|
561
|
+
value = value.replace(",", ".")
|
|
540
562
|
|
|
541
|
-
|
|
542
|
-
|
|
543
|
-
|
|
563
|
+
# If there are more than 3 0s after decimal point, consider only 2 decimal points (e.g., 8.500000 -> 8.50)
|
|
564
|
+
elif re.match(r"^\d+\.\d{3,}$", value):
|
|
565
|
+
value = value[: value.index(".") + 3]
|
|
544
566
|
|
|
545
567
|
return value
|
|
546
568
|
|
{data_science_document_ai-1.40.3.dist-info → data_science_document_ai-1.40.4.dist-info}/WHEEL
RENAMED
|
File without changes
|