data-science-document-ai 1.48.0__tar.gz → 1.49.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. {data_science_document_ai-1.48.0 → data_science_document_ai-1.49.0}/PKG-INFO +1 -1
  2. {data_science_document_ai-1.48.0 → data_science_document_ai-1.49.0}/pyproject.toml +1 -1
  3. {data_science_document_ai-1.48.0 → data_science_document_ai-1.49.0}/src/postprocessing/postprocess_partner_invoice.py +12 -3
  4. {data_science_document_ai-1.48.0 → data_science_document_ai-1.49.0}/src/setup.py +9 -16
  5. {data_science_document_ai-1.48.0 → data_science_document_ai-1.49.0}/src/constants.py +0 -0
  6. {data_science_document_ai-1.48.0 → data_science_document_ai-1.49.0}/src/constants_sandbox.py +0 -0
  7. {data_science_document_ai-1.48.0 → data_science_document_ai-1.49.0}/src/docai.py +0 -0
  8. {data_science_document_ai-1.48.0 → data_science_document_ai-1.49.0}/src/docai_processor_config.yaml +0 -0
  9. {data_science_document_ai-1.48.0 → data_science_document_ai-1.49.0}/src/excel_processing.py +0 -0
  10. {data_science_document_ai-1.48.0 → data_science_document_ai-1.49.0}/src/io.py +0 -0
  11. {data_science_document_ai-1.48.0 → data_science_document_ai-1.49.0}/src/llm.py +0 -0
  12. {data_science_document_ai-1.48.0 → data_science_document_ai-1.49.0}/src/log_setup.py +0 -0
  13. {data_science_document_ai-1.48.0 → data_science_document_ai-1.49.0}/src/pdf_processing.py +0 -0
  14. {data_science_document_ai-1.48.0 → data_science_document_ai-1.49.0}/src/postprocessing/common.py +0 -0
  15. {data_science_document_ai-1.48.0 → data_science_document_ai-1.49.0}/src/postprocessing/postprocess_booking_confirmation.py +0 -0
  16. {data_science_document_ai-1.48.0 → data_science_document_ai-1.49.0}/src/postprocessing/postprocess_commercial_invoice.py +0 -0
  17. {data_science_document_ai-1.48.0 → data_science_document_ai-1.49.0}/src/prompts/library/arrivalNotice/other/placeholders.json +0 -0
  18. {data_science_document_ai-1.48.0 → data_science_document_ai-1.49.0}/src/prompts/library/arrivalNotice/other/prompt.txt +0 -0
  19. {data_science_document_ai-1.48.0 → data_science_document_ai-1.49.0}/src/prompts/library/bookingConfirmation/evergreen/placeholders.json +0 -0
  20. {data_science_document_ai-1.48.0 → data_science_document_ai-1.49.0}/src/prompts/library/bookingConfirmation/evergreen/prompt.txt +0 -0
  21. {data_science_document_ai-1.48.0 → data_science_document_ai-1.49.0}/src/prompts/library/bookingConfirmation/hapag-lloyd/placeholders.json +0 -0
  22. {data_science_document_ai-1.48.0 → data_science_document_ai-1.49.0}/src/prompts/library/bookingConfirmation/hapag-lloyd/prompt.txt +0 -0
  23. {data_science_document_ai-1.48.0 → data_science_document_ai-1.49.0}/src/prompts/library/bookingConfirmation/maersk/placeholders.json +0 -0
  24. {data_science_document_ai-1.48.0 → data_science_document_ai-1.49.0}/src/prompts/library/bookingConfirmation/maersk/prompt.txt +0 -0
  25. {data_science_document_ai-1.48.0 → data_science_document_ai-1.49.0}/src/prompts/library/bookingConfirmation/msc/placeholders.json +0 -0
  26. {data_science_document_ai-1.48.0 → data_science_document_ai-1.49.0}/src/prompts/library/bookingConfirmation/msc/prompt.txt +0 -0
  27. {data_science_document_ai-1.48.0 → data_science_document_ai-1.49.0}/src/prompts/library/bookingConfirmation/oocl/placeholders.json +0 -0
  28. {data_science_document_ai-1.48.0 → data_science_document_ai-1.49.0}/src/prompts/library/bookingConfirmation/oocl/prompt.txt +0 -0
  29. {data_science_document_ai-1.48.0 → data_science_document_ai-1.49.0}/src/prompts/library/bookingConfirmation/other/placeholders.json +0 -0
  30. {data_science_document_ai-1.48.0 → data_science_document_ai-1.49.0}/src/prompts/library/bookingConfirmation/other/prompt.txt +0 -0
  31. {data_science_document_ai-1.48.0 → data_science_document_ai-1.49.0}/src/prompts/library/bookingConfirmation/yangming/placeholders.json +0 -0
  32. {data_science_document_ai-1.48.0 → data_science_document_ai-1.49.0}/src/prompts/library/bookingConfirmation/yangming/prompt.txt +0 -0
  33. {data_science_document_ai-1.48.0 → data_science_document_ai-1.49.0}/src/prompts/library/bundeskasse/other/placeholders.json +0 -0
  34. {data_science_document_ai-1.48.0 → data_science_document_ai-1.49.0}/src/prompts/library/bundeskasse/other/prompt.txt +0 -0
  35. {data_science_document_ai-1.48.0 → data_science_document_ai-1.49.0}/src/prompts/library/commercialInvoice/other/placeholders.json +0 -0
  36. {data_science_document_ai-1.48.0 → data_science_document_ai-1.49.0}/src/prompts/library/commercialInvoice/other/prompt.txt +0 -0
  37. {data_science_document_ai-1.48.0 → data_science_document_ai-1.49.0}/src/prompts/library/customsAssessment/other/placeholders.json +0 -0
  38. {data_science_document_ai-1.48.0 → data_science_document_ai-1.49.0}/src/prompts/library/customsAssessment/other/prompt.txt +0 -0
  39. {data_science_document_ai-1.48.0 → data_science_document_ai-1.49.0}/src/prompts/library/customsInvoice/other/placeholders.json +0 -0
  40. {data_science_document_ai-1.48.0 → data_science_document_ai-1.49.0}/src/prompts/library/customsInvoice/other/prompt.txt +0 -0
  41. {data_science_document_ai-1.48.0 → data_science_document_ai-1.49.0}/src/prompts/library/deliveryOrder/other/placeholders.json +0 -0
  42. {data_science_document_ai-1.48.0 → data_science_document_ai-1.49.0}/src/prompts/library/deliveryOrder/other/prompt.txt +0 -0
  43. {data_science_document_ai-1.48.0 → data_science_document_ai-1.49.0}/src/prompts/library/draftMbl/other/placeholders.json +0 -0
  44. {data_science_document_ai-1.48.0 → data_science_document_ai-1.49.0}/src/prompts/library/draftMbl/other/prompt.txt +0 -0
  45. {data_science_document_ai-1.48.0 → data_science_document_ai-1.49.0}/src/prompts/library/finalMbL/other/placeholders.json +0 -0
  46. {data_science_document_ai-1.48.0 → data_science_document_ai-1.49.0}/src/prompts/library/finalMbL/other/prompt.txt +0 -0
  47. {data_science_document_ai-1.48.0 → data_science_document_ai-1.49.0}/src/prompts/library/packingList/other/placeholders.json +0 -0
  48. {data_science_document_ai-1.48.0 → data_science_document_ai-1.49.0}/src/prompts/library/packingList/other/prompt.txt +0 -0
  49. {data_science_document_ai-1.48.0 → data_science_document_ai-1.49.0}/src/prompts/library/partnerInvoice/other/placeholders.json +0 -0
  50. {data_science_document_ai-1.48.0 → data_science_document_ai-1.49.0}/src/prompts/library/partnerInvoice/other/prompt.txt +0 -0
  51. {data_science_document_ai-1.48.0 → data_science_document_ai-1.49.0}/src/prompts/library/postprocessing/port_code/placeholders.json +0 -0
  52. {data_science_document_ai-1.48.0 → data_science_document_ai-1.49.0}/src/prompts/library/postprocessing/port_code/prompt_port_code.txt +0 -0
  53. {data_science_document_ai-1.48.0 → data_science_document_ai-1.49.0}/src/prompts/library/preprocessing/carrier/placeholders.json +0 -0
  54. {data_science_document_ai-1.48.0 → data_science_document_ai-1.49.0}/src/prompts/library/preprocessing/carrier/prompt.txt +0 -0
  55. {data_science_document_ai-1.48.0 → data_science_document_ai-1.49.0}/src/prompts/library/shippingInstruction/other/placeholders.json +0 -0
  56. {data_science_document_ai-1.48.0 → data_science_document_ai-1.49.0}/src/prompts/library/shippingInstruction/other/prompt.txt +0 -0
  57. {data_science_document_ai-1.48.0 → data_science_document_ai-1.49.0}/src/prompts/prompt_library.py +0 -0
  58. {data_science_document_ai-1.48.0 → data_science_document_ai-1.49.0}/src/tms.py +0 -0
  59. {data_science_document_ai-1.48.0 → data_science_document_ai-1.49.0}/src/utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: data-science-document-ai
3
- Version: 1.48.0
3
+ Version: 1.49.0
4
4
  Summary: "Document AI repo for data science"
5
5
  Author: Naomi Nguyen
6
6
  Author-email: naomi.nguyen@forto.com
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "data-science-document-ai"
3
- version = "1.48.0"
3
+ version = "1.49.0"
4
4
  description = "\"Document AI repo for data science\""
5
5
  authors = ["Naomi Nguyen <naomi.nguyen@forto.com>", "Kumar Rajendrababu <kumar.rajendrababu@forto.com>", "Igor Tonko <igor.tonko@forto.com>", "Osman Demirel <osman.demirel@forto.com>"]
6
6
  packages = [
@@ -103,9 +103,18 @@ def post_process_bundeskasse(aggregated_data):
103
103
  )
104
104
 
105
105
  # Check if the deferredDutyPayer is forto
106
- deferredDutyPayer = line_item.get("deferredDutyPayer", {})
107
- lower = deferredDutyPayer.get("documentValue", "").lower()
108
- if any(key in lower for key in ["de789147263644738", "forto"]):
106
+ KEYWORDS = {"de789147263644738", "forto", "009812"}
107
+
108
+ def is_forto_recipient(line_item: dict) -> bool:
109
+ values_to_check = [
110
+ line_item.get("deferredDutyPayer", {}).get("documentValue", ""),
111
+ line_item.get("vatId", {}).get("documentValue", ""),
112
+ ]
113
+
114
+ combined = " ".join(values_to_check).lower()
115
+ return any(keyword in combined for keyword in KEYWORDS)
116
+
117
+ if is_forto_recipient(line_item):
109
118
  is_recipient_forto = True
110
119
 
111
120
  update_recipient_and_vendor(aggregated_data, is_recipient_forto)
@@ -1,11 +1,8 @@
1
1
  """Contains project setup parameters and initialization functions."""
2
2
  import json
3
-
4
- # import streamlit as st
5
3
  import os
6
4
  import random
7
5
  import time
8
- from pathlib import Path
9
6
 
10
7
  import toml
11
8
  import vertexai
@@ -18,7 +15,7 @@ from src.constants import project_parameters
18
15
  from src.constants_sandbox import project_parameters_sandbox
19
16
 
20
17
  # Parent repos are imported without .
21
- from src.io import download_dir_from_bucket, get_bq_client, get_storage_client, logger
18
+ from src.io import get_bq_client, get_storage_client, logger
22
19
  from src.llm import LlmClient
23
20
 
24
21
 
@@ -133,15 +130,7 @@ def setup_params(args=None):
133
130
  assert params.keys() & yaml_content.keys() == set()
134
131
  params.update(yaml_content)
135
132
 
136
- # Get models meta data from cloud
137
- client = get_storage_client(params)
138
- bucket = client.bucket(params["doc_ai_bucket_name"])
139
- downloaded_meta = download_dir_from_bucket(
140
- bucket, params["g_model_data_folder"], Path(params["local_model_data_folder"])
141
- )
142
- if not downloaded_meta:
143
- logger.info(f"Could not load models metadata from cloud.")
144
-
133
+ # Set up LLM clients
145
134
  params["LlmClient"] = LlmClient(
146
135
  openai_key=os.getenv("OPENAI_KEY"), parameters=params["gemini_params"]
147
136
  )
@@ -149,7 +138,8 @@ def setup_params(args=None):
149
138
  openai_key=os.getenv("OPENAI_KEY"), parameters=params["gemini_flash_params"]
150
139
  )
151
140
 
152
- params["lookup_data"] = setup_lookup_data(params, bucket)
141
+ # Load lookup data from GCS bucket
142
+ setup_lookup_data(params)
153
143
 
154
144
  return params
155
145
 
@@ -182,10 +172,13 @@ def setup_vertexai(params):
182
172
  )
183
173
 
184
174
 
185
- def setup_lookup_data(params, bucket):
175
+ def setup_lookup_data(params):
186
176
  """
187
177
  Loads JSON mapping data from given GCP Bucket.
188
178
  """
179
+ client = get_storage_client(params)
180
+ bucket = client.bucket(params["doc_ai_bucket_name"])
181
+
189
182
  data = dict()
190
183
 
191
184
  input_path_item_code = (
@@ -208,4 +201,4 @@ def setup_lookup_data(params, bucket):
208
201
  input_path_reverse_charge
209
202
  )
210
203
 
211
- return data
204
+ params["lookup_data"] = data