data-science-document-ai 1.42.1__py3-none-any.whl → 1.42.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {data_science_document_ai-1.42.1.dist-info → data_science_document_ai-1.42.2.dist-info}/METADATA +1 -1
- {data_science_document_ai-1.42.1.dist-info → data_science_document_ai-1.42.2.dist-info}/RECORD +5 -5
- src/excel_processing.py +2 -1
- src/postprocessing/postprocess_partner_invoice.py +5 -30
- {data_science_document_ai-1.42.1.dist-info → data_science_document_ai-1.42.2.dist-info}/WHEEL +0 -0
{data_science_document_ai-1.42.1.dist-info → data_science_document_ai-1.42.2.dist-info}/RECORD
RENAMED
|
@@ -2,7 +2,7 @@ src/constants.py,sha256=TF_UblovdXZnKIb1lnyJwUqQncJCbzBVihoelI6foSU,3579
|
|
|
2
2
|
src/constants_sandbox.py,sha256=Iu6HdjCoNSmOX0AwoL9qUQkhq_ZnIN5U9e-Q2UfNuGc,547
|
|
3
3
|
src/docai.py,sha256=dHuR0ehVjUi1CnoNvdp_yxJtpU_HFXqAZ61ywdz7BEo,5655
|
|
4
4
|
src/docai_processor_config.yaml,sha256=qOMmCIORpLQ_D-ytvejXxFvER0e0uGYuzPVdZBGv4Pc,2105
|
|
5
|
-
src/excel_processing.py,sha256=
|
|
5
|
+
src/excel_processing.py,sha256=8toKsafUvwE5QN3TOQO3zfLo0Wv2sGxZHKPsL7n5LkA,2771
|
|
6
6
|
src/io.py,sha256=tOJpMyI-mP1AaXKG4UFudH47MHWzjWBgVahFJUcjGfs,4749
|
|
7
7
|
src/llm.py,sha256=OE4IEIqcM-hYK9U7e0x1rAfcqdpeo4iXPHBp64L5Qz0,8199
|
|
8
8
|
src/log_setup.py,sha256=RhHnpXqcl-ii4EJzRt47CF2R-Q3YPF68tepg_Kg7tkw,2895
|
|
@@ -10,7 +10,7 @@ src/pdf_processing.py,sha256=dxsYvNnONAjzS-T7K5aSo89rz7QcdW3ZDfeuFyeCeII,16294
|
|
|
10
10
|
src/postprocessing/common.py,sha256=lc95nGvy-KrFFQyX2X3ABMjrx1xVYDjuTBgeAXQTcuU,21570
|
|
11
11
|
src/postprocessing/postprocess_booking_confirmation.py,sha256=nK32eDiBNbauyQz0oCa9eraysku8aqzrcoRFoWVumDU,4827
|
|
12
12
|
src/postprocessing/postprocess_commercial_invoice.py,sha256=3I8ijluTZcOs_sMnFZxfkAPle0UFQ239EMuvZfDZVPg,1028
|
|
13
|
-
src/postprocessing/postprocess_partner_invoice.py,sha256=
|
|
13
|
+
src/postprocessing/postprocess_partner_invoice.py,sha256=koGR7dN37FqJcepdzkrzNBHuBBUuCp_3CrteScASqyE,10590
|
|
14
14
|
src/prompts/library/bookingConfirmation/evergreen/placeholders.json,sha256=IpM9nmSPdyroliZfXB1-NDCjiHZX_Ff5BH7-scNhGqE,1406
|
|
15
15
|
src/prompts/library/bookingConfirmation/evergreen/prompt.txt,sha256=5ivskCG831M2scW3oqQaoltXIyHV-n6DYUygWycXxjw,2755
|
|
16
16
|
src/prompts/library/bookingConfirmation/hapag-lloyd/placeholders.json,sha256=hMPNt9s3LuxR85AxYy7bPcCDleug6gSwVjefm3ismWY,1405
|
|
@@ -52,6 +52,6 @@ src/prompts/prompt_library.py,sha256=jPxybNPPGH7mzonqtAOqmw5WcT-RtbGP0pvMqqP22hg
|
|
|
52
52
|
src/setup.py,sha256=M-p5c8M9ejKcSZ9N86VtmtPc4TYLxe1_4_dxf6jpfVc,7262
|
|
53
53
|
src/tms.py,sha256=UXbIo1QE--hIX6NZi5Qyp2R_CP338syrY9pCTPrfgnE,1741
|
|
54
54
|
src/utils.py,sha256=nU69zR3TB7IZmCc19DD8H27Riek8GJAldmhJjCSwNEE,16090
|
|
55
|
-
data_science_document_ai-1.42.
|
|
56
|
-
data_science_document_ai-1.42.
|
|
57
|
-
data_science_document_ai-1.42.
|
|
55
|
+
data_science_document_ai-1.42.2.dist-info/METADATA,sha256=HdKZ83beNVAa6P-cWIunfRkjk4ry9FNaZd4-OSIKsCo,2153
|
|
56
|
+
data_science_document_ai-1.42.2.dist-info/WHEEL,sha256=zp0Cn7JsFoX2ATtOhtaFYIiE2rmFAD4OcMhtUki8W3U,88
|
|
57
|
+
data_science_document_ai-1.42.2.dist-info/RECORD,,
|
src/excel_processing.py
CHANGED
|
@@ -20,7 +20,8 @@ async def extract_data_from_sheet(
|
|
|
20
20
|
params, sheet_name, sheet, response_schema, doc_type=None
|
|
21
21
|
):
|
|
22
22
|
logger.info(f"Processing sheet: {sheet_name}")
|
|
23
|
-
excel_content = pd.DataFrame(sheet.values)
|
|
23
|
+
excel_content = pd.DataFrame(sheet.values).dropna(how="all", axis=1)
|
|
24
|
+
|
|
24
25
|
# Convert to Markdown format for the LLM model
|
|
25
26
|
worksheet = (
|
|
26
27
|
"This is from a excel. Pay attention to the cell position:\n"
|
|
@@ -138,36 +138,7 @@ def update_recipient_and_vendor(aggregated_data, is_recipient_forto):
|
|
|
138
138
|
|
|
139
139
|
def process_partner_invoice(params, aggregated_data, document_type_code):
|
|
140
140
|
"""Process the partner invoice data."""
|
|
141
|
-
# Post process
|
|
142
|
-
# TODO: Remove this block of code after migrating to LLM completely and update the placeholder in the prompt library
|
|
143
|
-
if "containerNumber" in aggregated_data and isinstance(
|
|
144
|
-
aggregated_data["containerNumber"], dict
|
|
145
|
-
):
|
|
146
|
-
container_number = aggregated_data.get("containerNumber", {}).get(
|
|
147
|
-
"formattedValue", None
|
|
148
|
-
)
|
|
149
|
-
if container_number:
|
|
150
|
-
aggregated_data["containerNumber"] = (
|
|
151
|
-
[
|
|
152
|
-
{
|
|
153
|
-
"documentValue": aggregated_data.get("containerNumber", {}).get(
|
|
154
|
-
"documentValue", ""
|
|
155
|
-
),
|
|
156
|
-
"formattedValue": ctr_number,
|
|
157
|
-
}
|
|
158
|
-
for ctr_number in container_number
|
|
159
|
-
]
|
|
160
|
-
if isinstance(container_number, list)
|
|
161
|
-
else [
|
|
162
|
-
{
|
|
163
|
-
"documentValue": aggregated_data.get("containerNumber", {}).get(
|
|
164
|
-
"documentValue", ""
|
|
165
|
-
),
|
|
166
|
-
"formattedValue": container_number,
|
|
167
|
-
}
|
|
168
|
-
]
|
|
169
|
-
)
|
|
170
|
-
|
|
141
|
+
# Post process bundeskasse invoices
|
|
171
142
|
if document_type_code == "bundeskasse":
|
|
172
143
|
post_process_bundeskasse(aggregated_data)
|
|
173
144
|
return
|
|
@@ -197,9 +168,13 @@ def process_partner_invoice(params, aggregated_data, document_type_code):
|
|
|
197
168
|
params,
|
|
198
169
|
)
|
|
199
170
|
|
|
171
|
+
# Add page number for the consistency
|
|
172
|
+
line_item["itemCode"]["page"] = line_item["lineItemDescription"]["page"]
|
|
173
|
+
|
|
200
174
|
if reverse_charge:
|
|
201
175
|
# Distribute reverseChargeSentence to all line items
|
|
202
176
|
line_item["reverseChargeSentence"] = reverse_charge
|
|
177
|
+
line_item["reverseChargeSentence"]["page"] = reverse_charge["page"]
|
|
203
178
|
|
|
204
179
|
|
|
205
180
|
def compute_score(args):
|
{data_science_document_ai-1.42.1.dist-info → data_science_document_ai-1.42.2.dist-info}/WHEEL
RENAMED
|
File without changes
|