data-science-document-ai 1.48.0__tar.gz → 1.49.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {data_science_document_ai-1.48.0 → data_science_document_ai-1.49.0}/PKG-INFO +1 -1
- {data_science_document_ai-1.48.0 → data_science_document_ai-1.49.0}/pyproject.toml +1 -1
- {data_science_document_ai-1.48.0 → data_science_document_ai-1.49.0}/src/postprocessing/postprocess_partner_invoice.py +12 -3
- {data_science_document_ai-1.48.0 → data_science_document_ai-1.49.0}/src/setup.py +9 -16
- {data_science_document_ai-1.48.0 → data_science_document_ai-1.49.0}/src/constants.py +0 -0
- {data_science_document_ai-1.48.0 → data_science_document_ai-1.49.0}/src/constants_sandbox.py +0 -0
- {data_science_document_ai-1.48.0 → data_science_document_ai-1.49.0}/src/docai.py +0 -0
- {data_science_document_ai-1.48.0 → data_science_document_ai-1.49.0}/src/docai_processor_config.yaml +0 -0
- {data_science_document_ai-1.48.0 → data_science_document_ai-1.49.0}/src/excel_processing.py +0 -0
- {data_science_document_ai-1.48.0 → data_science_document_ai-1.49.0}/src/io.py +0 -0
- {data_science_document_ai-1.48.0 → data_science_document_ai-1.49.0}/src/llm.py +0 -0
- {data_science_document_ai-1.48.0 → data_science_document_ai-1.49.0}/src/log_setup.py +0 -0
- {data_science_document_ai-1.48.0 → data_science_document_ai-1.49.0}/src/pdf_processing.py +0 -0
- {data_science_document_ai-1.48.0 → data_science_document_ai-1.49.0}/src/postprocessing/common.py +0 -0
- {data_science_document_ai-1.48.0 → data_science_document_ai-1.49.0}/src/postprocessing/postprocess_booking_confirmation.py +0 -0
- {data_science_document_ai-1.48.0 → data_science_document_ai-1.49.0}/src/postprocessing/postprocess_commercial_invoice.py +0 -0
- {data_science_document_ai-1.48.0 → data_science_document_ai-1.49.0}/src/prompts/library/arrivalNotice/other/placeholders.json +0 -0
- {data_science_document_ai-1.48.0 → data_science_document_ai-1.49.0}/src/prompts/library/arrivalNotice/other/prompt.txt +0 -0
- {data_science_document_ai-1.48.0 → data_science_document_ai-1.49.0}/src/prompts/library/bookingConfirmation/evergreen/placeholders.json +0 -0
- {data_science_document_ai-1.48.0 → data_science_document_ai-1.49.0}/src/prompts/library/bookingConfirmation/evergreen/prompt.txt +0 -0
- {data_science_document_ai-1.48.0 → data_science_document_ai-1.49.0}/src/prompts/library/bookingConfirmation/hapag-lloyd/placeholders.json +0 -0
- {data_science_document_ai-1.48.0 → data_science_document_ai-1.49.0}/src/prompts/library/bookingConfirmation/hapag-lloyd/prompt.txt +0 -0
- {data_science_document_ai-1.48.0 → data_science_document_ai-1.49.0}/src/prompts/library/bookingConfirmation/maersk/placeholders.json +0 -0
- {data_science_document_ai-1.48.0 → data_science_document_ai-1.49.0}/src/prompts/library/bookingConfirmation/maersk/prompt.txt +0 -0
- {data_science_document_ai-1.48.0 → data_science_document_ai-1.49.0}/src/prompts/library/bookingConfirmation/msc/placeholders.json +0 -0
- {data_science_document_ai-1.48.0 → data_science_document_ai-1.49.0}/src/prompts/library/bookingConfirmation/msc/prompt.txt +0 -0
- {data_science_document_ai-1.48.0 → data_science_document_ai-1.49.0}/src/prompts/library/bookingConfirmation/oocl/placeholders.json +0 -0
- {data_science_document_ai-1.48.0 → data_science_document_ai-1.49.0}/src/prompts/library/bookingConfirmation/oocl/prompt.txt +0 -0
- {data_science_document_ai-1.48.0 → data_science_document_ai-1.49.0}/src/prompts/library/bookingConfirmation/other/placeholders.json +0 -0
- {data_science_document_ai-1.48.0 → data_science_document_ai-1.49.0}/src/prompts/library/bookingConfirmation/other/prompt.txt +0 -0
- {data_science_document_ai-1.48.0 → data_science_document_ai-1.49.0}/src/prompts/library/bookingConfirmation/yangming/placeholders.json +0 -0
- {data_science_document_ai-1.48.0 → data_science_document_ai-1.49.0}/src/prompts/library/bookingConfirmation/yangming/prompt.txt +0 -0
- {data_science_document_ai-1.48.0 → data_science_document_ai-1.49.0}/src/prompts/library/bundeskasse/other/placeholders.json +0 -0
- {data_science_document_ai-1.48.0 → data_science_document_ai-1.49.0}/src/prompts/library/bundeskasse/other/prompt.txt +0 -0
- {data_science_document_ai-1.48.0 → data_science_document_ai-1.49.0}/src/prompts/library/commercialInvoice/other/placeholders.json +0 -0
- {data_science_document_ai-1.48.0 → data_science_document_ai-1.49.0}/src/prompts/library/commercialInvoice/other/prompt.txt +0 -0
- {data_science_document_ai-1.48.0 → data_science_document_ai-1.49.0}/src/prompts/library/customsAssessment/other/placeholders.json +0 -0
- {data_science_document_ai-1.48.0 → data_science_document_ai-1.49.0}/src/prompts/library/customsAssessment/other/prompt.txt +0 -0
- {data_science_document_ai-1.48.0 → data_science_document_ai-1.49.0}/src/prompts/library/customsInvoice/other/placeholders.json +0 -0
- {data_science_document_ai-1.48.0 → data_science_document_ai-1.49.0}/src/prompts/library/customsInvoice/other/prompt.txt +0 -0
- {data_science_document_ai-1.48.0 → data_science_document_ai-1.49.0}/src/prompts/library/deliveryOrder/other/placeholders.json +0 -0
- {data_science_document_ai-1.48.0 → data_science_document_ai-1.49.0}/src/prompts/library/deliveryOrder/other/prompt.txt +0 -0
- {data_science_document_ai-1.48.0 → data_science_document_ai-1.49.0}/src/prompts/library/draftMbl/other/placeholders.json +0 -0
- {data_science_document_ai-1.48.0 → data_science_document_ai-1.49.0}/src/prompts/library/draftMbl/other/prompt.txt +0 -0
- {data_science_document_ai-1.48.0 → data_science_document_ai-1.49.0}/src/prompts/library/finalMbL/other/placeholders.json +0 -0
- {data_science_document_ai-1.48.0 → data_science_document_ai-1.49.0}/src/prompts/library/finalMbL/other/prompt.txt +0 -0
- {data_science_document_ai-1.48.0 → data_science_document_ai-1.49.0}/src/prompts/library/packingList/other/placeholders.json +0 -0
- {data_science_document_ai-1.48.0 → data_science_document_ai-1.49.0}/src/prompts/library/packingList/other/prompt.txt +0 -0
- {data_science_document_ai-1.48.0 → data_science_document_ai-1.49.0}/src/prompts/library/partnerInvoice/other/placeholders.json +0 -0
- {data_science_document_ai-1.48.0 → data_science_document_ai-1.49.0}/src/prompts/library/partnerInvoice/other/prompt.txt +0 -0
- {data_science_document_ai-1.48.0 → data_science_document_ai-1.49.0}/src/prompts/library/postprocessing/port_code/placeholders.json +0 -0
- {data_science_document_ai-1.48.0 → data_science_document_ai-1.49.0}/src/prompts/library/postprocessing/port_code/prompt_port_code.txt +0 -0
- {data_science_document_ai-1.48.0 → data_science_document_ai-1.49.0}/src/prompts/library/preprocessing/carrier/placeholders.json +0 -0
- {data_science_document_ai-1.48.0 → data_science_document_ai-1.49.0}/src/prompts/library/preprocessing/carrier/prompt.txt +0 -0
- {data_science_document_ai-1.48.0 → data_science_document_ai-1.49.0}/src/prompts/library/shippingInstruction/other/placeholders.json +0 -0
- {data_science_document_ai-1.48.0 → data_science_document_ai-1.49.0}/src/prompts/library/shippingInstruction/other/prompt.txt +0 -0
- {data_science_document_ai-1.48.0 → data_science_document_ai-1.49.0}/src/prompts/prompt_library.py +0 -0
- {data_science_document_ai-1.48.0 → data_science_document_ai-1.49.0}/src/tms.py +0 -0
- {data_science_document_ai-1.48.0 → data_science_document_ai-1.49.0}/src/utils.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[tool.poetry]
|
|
2
2
|
name = "data-science-document-ai"
|
|
3
|
-
version = "1.
|
|
3
|
+
version = "1.49.0"
|
|
4
4
|
description = "\"Document AI repo for data science\""
|
|
5
5
|
authors = ["Naomi Nguyen <naomi.nguyen@forto.com>", "Kumar Rajendrababu <kumar.rajendrababu@forto.com>", "Igor Tonko <igor.tonko@forto.com>", "Osman Demirel <osman.demirel@forto.com>"]
|
|
6
6
|
packages = [
|
|
@@ -103,9 +103,18 @@ def post_process_bundeskasse(aggregated_data):
|
|
|
103
103
|
)
|
|
104
104
|
|
|
105
105
|
# Check if the deferredDutyPayer is forto
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
106
|
+
KEYWORDS = {"de789147263644738", "forto", "009812"}
|
|
107
|
+
|
|
108
|
+
def is_forto_recipient(line_item: dict) -> bool:
|
|
109
|
+
values_to_check = [
|
|
110
|
+
line_item.get("deferredDutyPayer", {}).get("documentValue", ""),
|
|
111
|
+
line_item.get("vatId", {}).get("documentValue", ""),
|
|
112
|
+
]
|
|
113
|
+
|
|
114
|
+
combined = " ".join(values_to_check).lower()
|
|
115
|
+
return any(keyword in combined for keyword in KEYWORDS)
|
|
116
|
+
|
|
117
|
+
if is_forto_recipient(line_item):
|
|
109
118
|
is_recipient_forto = True
|
|
110
119
|
|
|
111
120
|
update_recipient_and_vendor(aggregated_data, is_recipient_forto)
|
|
@@ -1,11 +1,8 @@
|
|
|
1
1
|
"""Contains project setup parameters and initialization functions."""
|
|
2
2
|
import json
|
|
3
|
-
|
|
4
|
-
# import streamlit as st
|
|
5
3
|
import os
|
|
6
4
|
import random
|
|
7
5
|
import time
|
|
8
|
-
from pathlib import Path
|
|
9
6
|
|
|
10
7
|
import toml
|
|
11
8
|
import vertexai
|
|
@@ -18,7 +15,7 @@ from src.constants import project_parameters
|
|
|
18
15
|
from src.constants_sandbox import project_parameters_sandbox
|
|
19
16
|
|
|
20
17
|
# Parent repos are imported without .
|
|
21
|
-
from src.io import
|
|
18
|
+
from src.io import get_bq_client, get_storage_client, logger
|
|
22
19
|
from src.llm import LlmClient
|
|
23
20
|
|
|
24
21
|
|
|
@@ -133,15 +130,7 @@ def setup_params(args=None):
|
|
|
133
130
|
assert params.keys() & yaml_content.keys() == set()
|
|
134
131
|
params.update(yaml_content)
|
|
135
132
|
|
|
136
|
-
#
|
|
137
|
-
client = get_storage_client(params)
|
|
138
|
-
bucket = client.bucket(params["doc_ai_bucket_name"])
|
|
139
|
-
downloaded_meta = download_dir_from_bucket(
|
|
140
|
-
bucket, params["g_model_data_folder"], Path(params["local_model_data_folder"])
|
|
141
|
-
)
|
|
142
|
-
if not downloaded_meta:
|
|
143
|
-
logger.info(f"Could not load models metadata from cloud.")
|
|
144
|
-
|
|
133
|
+
# Set up LLM clients
|
|
145
134
|
params["LlmClient"] = LlmClient(
|
|
146
135
|
openai_key=os.getenv("OPENAI_KEY"), parameters=params["gemini_params"]
|
|
147
136
|
)
|
|
@@ -149,7 +138,8 @@ def setup_params(args=None):
|
|
|
149
138
|
openai_key=os.getenv("OPENAI_KEY"), parameters=params["gemini_flash_params"]
|
|
150
139
|
)
|
|
151
140
|
|
|
152
|
-
|
|
141
|
+
# Load lookup data from GCS bucket
|
|
142
|
+
setup_lookup_data(params)
|
|
153
143
|
|
|
154
144
|
return params
|
|
155
145
|
|
|
@@ -182,10 +172,13 @@ def setup_vertexai(params):
|
|
|
182
172
|
)
|
|
183
173
|
|
|
184
174
|
|
|
185
|
-
def setup_lookup_data(params
|
|
175
|
+
def setup_lookup_data(params):
|
|
186
176
|
"""
|
|
187
177
|
Loads JSON mapping data from given GCP Bucket.
|
|
188
178
|
"""
|
|
179
|
+
client = get_storage_client(params)
|
|
180
|
+
bucket = client.bucket(params["doc_ai_bucket_name"])
|
|
181
|
+
|
|
189
182
|
data = dict()
|
|
190
183
|
|
|
191
184
|
input_path_item_code = (
|
|
@@ -208,4 +201,4 @@ def setup_lookup_data(params, bucket):
|
|
|
208
201
|
input_path_reverse_charge
|
|
209
202
|
)
|
|
210
203
|
|
|
211
|
-
|
|
204
|
+
params["lookup_data"] = data
|
|
File without changes
|
{data_science_document_ai-1.48.0 → data_science_document_ai-1.49.0}/src/constants_sandbox.py
RENAMED
|
File without changes
|
|
File without changes
|
{data_science_document_ai-1.48.0 → data_science_document_ai-1.49.0}/src/docai_processor_config.yaml
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{data_science_document_ai-1.48.0 → data_science_document_ai-1.49.0}/src/postprocessing/common.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{data_science_document_ai-1.48.0 → data_science_document_ai-1.49.0}/src/prompts/prompt_library.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|