data-science-document-ai 1.61.0__tar.gz → 1.61.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. {data_science_document_ai-1.61.0 → data_science_document_ai-1.61.2}/PKG-INFO +3 -5
  2. {data_science_document_ai-1.61.0 → data_science_document_ai-1.61.2}/pyproject.toml +4 -6
  3. {data_science_document_ai-1.61.0 → data_science_document_ai-1.61.2}/src/constants.py +5 -4
  4. {data_science_document_ai-1.61.0 → data_science_document_ai-1.61.2}/src/llm.py +49 -63
  5. {data_science_document_ai-1.61.0 → data_science_document_ai-1.61.2}/src/postprocessing/common.py +17 -10
  6. {data_science_document_ai-1.61.0 → data_science_document_ai-1.61.2}/src/prompts/library/bookingConfirmation/evergreen/placeholders.json +2 -2
  7. {data_science_document_ai-1.61.0 → data_science_document_ai-1.61.2}/src/prompts/library/bookingConfirmation/hapag-lloyd/placeholders.json +2 -2
  8. {data_science_document_ai-1.61.0 → data_science_document_ai-1.61.2}/src/prompts/library/bookingConfirmation/maersk/placeholders.json +2 -2
  9. {data_science_document_ai-1.61.0 → data_science_document_ai-1.61.2}/src/prompts/library/bookingConfirmation/maersk/prompt.txt +1 -1
  10. {data_science_document_ai-1.61.0 → data_science_document_ai-1.61.2}/src/prompts/library/bookingConfirmation/msc/placeholders.json +2 -2
  11. {data_science_document_ai-1.61.0 → data_science_document_ai-1.61.2}/src/prompts/library/bookingConfirmation/msc/prompt.txt +1 -1
  12. {data_science_document_ai-1.61.0 → data_science_document_ai-1.61.2}/src/prompts/library/bookingConfirmation/oocl/placeholders.json +2 -2
  13. {data_science_document_ai-1.61.0 → data_science_document_ai-1.61.2}/src/prompts/library/bookingConfirmation/oocl/prompt.txt +1 -1
  14. {data_science_document_ai-1.61.0 → data_science_document_ai-1.61.2}/src/prompts/library/bookingConfirmation/other/placeholders.json +2 -2
  15. {data_science_document_ai-1.61.0 → data_science_document_ai-1.61.2}/src/prompts/library/bookingConfirmation/yangming/placeholders.json +2 -2
  16. {data_science_document_ai-1.61.0 → data_science_document_ai-1.61.2}/src/prompts/library/bookingConfirmation/yangming/prompt.txt +1 -1
  17. {data_science_document_ai-1.61.0 → data_science_document_ai-1.61.2}/src/setup.py +10 -5
  18. {data_science_document_ai-1.61.0 → data_science_document_ai-1.61.2}/src/constants_sandbox.py +0 -0
  19. {data_science_document_ai-1.61.0 → data_science_document_ai-1.61.2}/src/docai.py +0 -0
  20. {data_science_document_ai-1.61.0 → data_science_document_ai-1.61.2}/src/docai_processor_config.yaml +0 -0
  21. {data_science_document_ai-1.61.0 → data_science_document_ai-1.61.2}/src/excel_processing.py +0 -0
  22. {data_science_document_ai-1.61.0 → data_science_document_ai-1.61.2}/src/io.py +0 -0
  23. {data_science_document_ai-1.61.0 → data_science_document_ai-1.61.2}/src/log_setup.py +0 -0
  24. {data_science_document_ai-1.61.0 → data_science_document_ai-1.61.2}/src/pdf_processing.py +0 -0
  25. {data_science_document_ai-1.61.0 → data_science_document_ai-1.61.2}/src/postprocessing/postprocess_booking_confirmation.py +0 -0
  26. {data_science_document_ai-1.61.0 → data_science_document_ai-1.61.2}/src/postprocessing/postprocess_commercial_invoice.py +0 -0
  27. {data_science_document_ai-1.61.0 → data_science_document_ai-1.61.2}/src/postprocessing/postprocess_partner_invoice.py +0 -0
  28. {data_science_document_ai-1.61.0 → data_science_document_ai-1.61.2}/src/prompts/library/arrivalNotice/other/placeholders.json +0 -0
  29. {data_science_document_ai-1.61.0 → data_science_document_ai-1.61.2}/src/prompts/library/arrivalNotice/other/prompt.txt +0 -0
  30. {data_science_document_ai-1.61.0 → data_science_document_ai-1.61.2}/src/prompts/library/bookingConfirmation/evergreen/prompt.txt +0 -0
  31. {data_science_document_ai-1.61.0 → data_science_document_ai-1.61.2}/src/prompts/library/bookingConfirmation/hapag-lloyd/prompt.txt +0 -0
  32. {data_science_document_ai-1.61.0 → data_science_document_ai-1.61.2}/src/prompts/library/bookingConfirmation/other/prompt.txt +0 -0
  33. {data_science_document_ai-1.61.0 → data_science_document_ai-1.61.2}/src/prompts/library/bundeskasse/other/placeholders.json +0 -0
  34. {data_science_document_ai-1.61.0 → data_science_document_ai-1.61.2}/src/prompts/library/bundeskasse/other/prompt.txt +0 -0
  35. {data_science_document_ai-1.61.0 → data_science_document_ai-1.61.2}/src/prompts/library/commercialInvoice/other/placeholders.json +0 -0
  36. {data_science_document_ai-1.61.0 → data_science_document_ai-1.61.2}/src/prompts/library/commercialInvoice/other/prompt.txt +0 -0
  37. {data_science_document_ai-1.61.0 → data_science_document_ai-1.61.2}/src/prompts/library/customsAssessment/other/placeholders.json +0 -0
  38. {data_science_document_ai-1.61.0 → data_science_document_ai-1.61.2}/src/prompts/library/customsAssessment/other/prompt.txt +0 -0
  39. {data_science_document_ai-1.61.0 → data_science_document_ai-1.61.2}/src/prompts/library/customsInvoice/other/placeholders.json +0 -0
  40. {data_science_document_ai-1.61.0 → data_science_document_ai-1.61.2}/src/prompts/library/customsInvoice/other/prompt.txt +0 -0
  41. {data_science_document_ai-1.61.0 → data_science_document_ai-1.61.2}/src/prompts/library/deliveryOrder/other/placeholders.json +0 -0
  42. {data_science_document_ai-1.61.0 → data_science_document_ai-1.61.2}/src/prompts/library/deliveryOrder/other/prompt.txt +0 -0
  43. {data_science_document_ai-1.61.0 → data_science_document_ai-1.61.2}/src/prompts/library/draftMbl/other/placeholders.json +0 -0
  44. {data_science_document_ai-1.61.0 → data_science_document_ai-1.61.2}/src/prompts/library/draftMbl/other/prompt.txt +0 -0
  45. {data_science_document_ai-1.61.0 → data_science_document_ai-1.61.2}/src/prompts/library/finalMbL/other/placeholders.json +0 -0
  46. {data_science_document_ai-1.61.0 → data_science_document_ai-1.61.2}/src/prompts/library/finalMbL/other/prompt.txt +0 -0
  47. {data_science_document_ai-1.61.0 → data_science_document_ai-1.61.2}/src/prompts/library/packingList/other/placeholders.json +0 -0
  48. {data_science_document_ai-1.61.0 → data_science_document_ai-1.61.2}/src/prompts/library/packingList/other/prompt.txt +0 -0
  49. {data_science_document_ai-1.61.0 → data_science_document_ai-1.61.2}/src/prompts/library/partnerInvoice/other/placeholders.json +0 -0
  50. {data_science_document_ai-1.61.0 → data_science_document_ai-1.61.2}/src/prompts/library/partnerInvoice/other/prompt.txt +0 -0
  51. {data_science_document_ai-1.61.0 → data_science_document_ai-1.61.2}/src/prompts/library/postprocessing/port_code/placeholders.json +0 -0
  52. {data_science_document_ai-1.61.0 → data_science_document_ai-1.61.2}/src/prompts/library/postprocessing/port_code/prompt_port_code.txt +0 -0
  53. {data_science_document_ai-1.61.0 → data_science_document_ai-1.61.2}/src/prompts/library/preprocessing/carrier/placeholders.json +0 -0
  54. {data_science_document_ai-1.61.0 → data_science_document_ai-1.61.2}/src/prompts/library/preprocessing/carrier/prompt.txt +0 -0
  55. {data_science_document_ai-1.61.0 → data_science_document_ai-1.61.2}/src/prompts/library/shippingInstruction/other/placeholders.json +0 -0
  56. {data_science_document_ai-1.61.0 → data_science_document_ai-1.61.2}/src/prompts/library/shippingInstruction/other/prompt.txt +0 -0
  57. {data_science_document_ai-1.61.0 → data_science_document_ai-1.61.2}/src/prompts/prompt_library.py +0 -0
  58. {data_science_document_ai-1.61.0 → data_science_document_ai-1.61.2}/src/tms.py +0 -0
  59. {data_science_document_ai-1.61.0 → data_science_document_ai-1.61.2}/src/utils.py +0 -0
@@ -1,13 +1,11 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: data-science-document-ai
3
- Version: 1.61.0
3
+ Version: 1.61.2
4
4
  Summary: "Document AI repo for data science"
5
5
  Author: Naomi Nguyen
6
6
  Author-email: naomi.nguyen@forto.com
7
- Requires-Python: >=3.9,<3.12
7
+ Requires-Python: >=3.11,<3.12
8
8
  Classifier: Programming Language :: Python :: 3
9
- Classifier: Programming Language :: Python :: 3.9
10
- Classifier: Programming Language :: Python :: 3.10
11
9
  Classifier: Programming Language :: Python :: 3.11
12
10
  Requires-Dist: apscheduler (>=3.10.4,<4.0.0)
13
11
  Requires-Dist: db-dtypes (>=1.2.0,<2.0.0)
@@ -24,8 +22,8 @@ Requires-Dist: google-cloud-bigquery-storage (>=2.20.0,<3.0.0)
24
22
  Requires-Dist: google-cloud-documentai (>=2.23.0,<3.0.0)
25
23
  Requires-Dist: google-cloud-storage (>=2.9.0,<3.0.0)
26
24
  Requires-Dist: google-cloud-vision (>=3.7.1,<4.0.0)
25
+ Requires-Dist: google-genai (>=1.61.0,<2.0.0)
27
26
  Requires-Dist: gspread (>=6.1.0,<7.0.0)
28
- Requires-Dist: httpx (>=0.26.0,<0.27.0)
29
27
  Requires-Dist: jupyter (>=1.0.0,<2.0.0)
30
28
  Requires-Dist: kubernetes (>=30.1.0,<31.0.0)
31
29
  Requires-Dist: nltk (>=3.9.1,<4.0.0)
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "data-science-document-ai"
3
- version = "1.61.0"
3
+ version = "1.61.2"
4
4
  description = "\"Document AI repo for data science\""
5
5
  authors = ["Naomi Nguyen <naomi.nguyen@forto.com>", "Kumar Rajendrababu <kumar.rajendrababu@forto.com>", "Igor Tonko <igor.tonko@forto.com>", "Osman Demirel <osman.demirel@forto.com>"]
6
6
  packages = [
@@ -9,7 +9,7 @@ packages = [
9
9
 
10
10
 
11
11
  [tool.poetry.dependencies]
12
- python = ">=3.9,<3.12"
12
+ python = ">=3.11,<3.12"
13
13
  pandas = "^2.0.3"
14
14
  numpy = "^1.25.1"
15
15
  google-cloud-bigquery-storage = "^2.20.0"
@@ -26,7 +26,6 @@ uvicorn = {extras = ["standard"], version = "^0.27.0.post1"}
26
26
  requests-toolbelt = "^1.0.0"
27
27
  google = "^3.0.0"
28
28
  toml = "^0.10.2"
29
- httpx = "^0.26.0"
30
29
  python-multipart = "^0.0.7"
31
30
  google-cloud-documentai = "^2.23.0"
32
31
  jupyter = "^1.0.0"
@@ -42,6 +41,7 @@ xlrd = "^2.0.1"
42
41
  openpyxl = "^3.1.5"
43
42
  tabulate = "^0.9.0"
44
43
  openai = "^1.53.0"
44
+ google-genai = "^1.61.0"
45
45
  parameterized = "^0.9.0"
46
46
  ddtrace = "^2.20.0"
47
47
  rapidfuzz = "^3.12.2"
@@ -50,12 +50,10 @@ nltk = "^3.9.1"
50
50
  pgzip = "^0.3.5"
51
51
  pypdf = "^6.1.2"
52
52
 
53
- [tool.poetry.dev-dependencies]
53
+ [tool.poetry.group.dev.dependencies]
54
54
  jupyter = "^1.0.0"
55
55
  ipykernel = "^6.23.1"
56
56
  notebook = "^6.5.5"
57
-
58
- [tool.poetry.group.dev.dependencies]
59
57
  deepdiff = "^8.1.1"
60
58
 
61
59
  [build-system]
@@ -9,7 +9,7 @@ project_parameters = {
9
9
  "g_ai_project_id": "738250249861",
10
10
  "g_api_endpoint": "eu-documentai.googleapis.com",
11
11
  "g_location": "eu",
12
- "g_region": "europe-west1",
12
+ "g_region": "global",
13
13
  # Google Cloud Storage
14
14
  "doc_ai_bucket_project_name": "forto-data-science-production",
15
15
  "doc_ai_bucket_name": "ds-document-capture",
@@ -60,7 +60,8 @@ project_parameters = {
60
60
  "top_p": 0.8,
61
61
  "top_k": 40,
62
62
  "seed": 42,
63
- "model_id": "gemini-2.5-pro",
63
+ "model_id": "gemini-3-flash-preview",
64
+ "thinking_level": "medium",
64
65
  },
65
66
  "gemini_flash_params": {
66
67
  "temperature": 0,
@@ -93,8 +94,8 @@ formatting_rules = {
93
94
  "bookingConfirmation": {
94
95
  "pickUpDepotCode": "depot",
95
96
  "dropOffDepotCode": "depot",
96
- "gateInTerminalCode": "terminal",
97
- "pickUpTerminalCode": "terminal",
97
+ "gateInTerminal": "terminal",
98
+ "pickUpTerminal": "terminal",
98
99
  },
99
100
  "deliveryOrder": {"pickUpTerminal": "terminal", "EmptyContainerDepot": "depot"},
100
101
  }
@@ -3,17 +3,11 @@ import logging
3
3
 
4
4
  logger = logging.getLogger(__name__)
5
5
 
6
- import base64
7
6
  import json
8
7
 
8
+ from google import genai
9
+ from google.genai import types
9
10
  from openai import AsyncOpenAI as OpenAI
10
- from vertexai.generative_models import (
11
- GenerationConfig,
12
- GenerativeModel,
13
- HarmBlockThreshold,
14
- HarmCategory,
15
- Part,
16
- )
17
11
 
18
12
  from src.io import get_gcp_labels
19
13
  from src.utils import cache_on_disk
@@ -24,8 +18,10 @@ from src.utils import cache_on_disk
24
18
  class LlmClient:
25
19
  """A client for interacting with large language models (LLMs)."""
26
20
 
27
- def __init__(self, openai_key=None, parameters=None):
21
+ def __init__(self, openai_key=None, parameters=None, genai_client=None):
28
22
  """Initialize the LLM client."""
23
+ self.genai_client = genai_client
24
+
29
25
  # Initialize the model parameters
30
26
  self.model_params = {
31
27
  "temperature": parameters.get("temperature", 0),
@@ -35,31 +31,24 @@ class LlmClient:
35
31
  "seed": parameters.get("seed", 42),
36
32
  }
37
33
  self.model_id = parameters.get("model_id", "gemini-2.5-flash")
38
- # Initialize the safety configuration
39
- self.safety_config = {
40
- HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_NONE,
41
- HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_NONE,
42
- HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_NONE,
43
- HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: HarmBlockThreshold.BLOCK_NONE,
44
- }
45
- # Initialize the Gemini client
46
- self.geminy_client = self._initialize_gemini()
34
+ self.thinking_level = parameters.get("thinking_level")
35
+
36
+ # Initialize the safety configuration (new format: list of SafetySetting objects)
37
+ self.safety_settings = [
38
+ types.SafetySetting(
39
+ category="HARM_CATEGORY_DANGEROUS_CONTENT", threshold="OFF"
40
+ ),
41
+ types.SafetySetting(category="HARM_CATEGORY_HARASSMENT", threshold="OFF"),
42
+ types.SafetySetting(category="HARM_CATEGORY_HATE_SPEECH", threshold="OFF"),
43
+ types.SafetySetting(
44
+ category="HARM_CATEGORY_SEXUALLY_EXPLICIT", threshold="OFF"
45
+ ),
46
+ ]
47
+
47
48
  if openai_key is not None:
48
49
  # Initialize the ChatGPT client
49
50
  self.chatgpt_client = self._create_client_chatgpt(openai_key)
50
51
 
51
- def _initialize_gemini(self):
52
- """Ask the Gemini model a question.
53
-
54
- Returns:
55
- str: The response from the model.
56
- """
57
- # Initialize the model if it is not already initialized
58
- model_gen = GenerativeModel(model_name=self.model_id)
59
- self.model_config = GenerationConfig(**self.model_params)
60
-
61
- return model_gen
62
-
63
52
  def _create_client_chatgpt(self, openai_key):
64
53
  client = OpenAI(api_key=openai_key)
65
54
  return client
@@ -67,7 +56,7 @@ class LlmClient:
67
56
  async def ask_gemini(
68
57
  self,
69
58
  prompt: str,
70
- document: str = None,
59
+ document: types.Part = None,
71
60
  response_schema: dict = None,
72
61
  response_mime_type: str = "application/json",
73
62
  doc_type: str = None,
@@ -76,7 +65,7 @@ class LlmClient:
76
65
 
77
66
  Args:
78
67
  prompt (str): The prompt to send to the model.
79
- document (str, optional): An optional document to provide context.
68
+ document (types.Part, optional): An optional document to provide context.
80
69
  response_schema (dict, optional): Defines a specific response schema for the model.
81
70
  doc_type (str, optional): Document type for cost tracking labels.
82
71
 
@@ -84,33 +73,35 @@ class LlmClient:
84
73
  str: The response from the model.
85
74
  """
86
75
  try:
76
+ # Build config with all parameters
77
+ config_params = {
78
+ **self.model_params,
79
+ "safety_settings": self.safety_settings,
80
+ "labels": get_gcp_labels(doc_type=doc_type),
81
+ }
82
+
83
+ # Add thinking config for Gemini 3 models
84
+ if self.thinking_level:
85
+ config_params["thinking_config"] = types.ThinkingConfig(
86
+ thinking_level=self.thinking_level
87
+ )
87
88
 
88
- # Start with the default model configuration
89
- config = self.model_config
90
-
91
- # Add response_schema if provided. This is only supported for Gemini 1.5 Flash & Pro models
92
89
  if response_schema is not None:
93
- config = GenerationConfig(
94
- response_schema=response_schema,
95
- response_mime_type=response_mime_type,
96
- **self.model_params,
97
- )
90
+ config_params["response_schema"] = response_schema
91
+ config_params["response_mime_type"] = response_mime_type
98
92
 
99
- # Prepare inputs for the model
100
- inputs = [document, prompt] if document else prompt
93
+ config = types.GenerateContentConfig(**config_params)
94
+ contents = [document, prompt] if document else prompt
101
95
 
102
- # Generate the response with labels for cost tracking
96
+ # Use async client
103
97
  model_response = await cache_on_disk(
104
- self.geminy_client.generate_content_async,
105
- contents=inputs,
106
- generation_config=config,
107
- safety_settings=self.safety_config,
108
- labels=get_gcp_labels(doc_type=doc_type),
98
+ self.genai_client.aio.models.generate_content,
99
+ model=self.model_id,
100
+ contents=contents,
101
+ config=config,
109
102
  )
110
103
 
111
- response_text = model_response.text
112
-
113
- return response_text
104
+ return model_response.text
114
105
 
115
106
  except Exception as e:
116
107
  logger.error(f"Failed to generate response: {str(e)}")
@@ -146,25 +137,20 @@ class LlmClient:
146
137
  logger.error(e)
147
138
  return {}
148
139
 
149
- def prepare_document_for_gemini(self, file_content):
150
- """Prepare a document from file content by encoding it to base64.
140
+ def prepare_document_for_gemini(self, file_content: bytes) -> types.Part:
141
+ """Prepare a document from file content for the Gemini model.
151
142
 
152
143
  Args:
153
144
  file_content (bytes): The binary content of the file to be processed.
154
145
 
155
146
  Returns:
156
- Part: A document object ready for processing by the language model.
147
+ types.Part: A document object ready for processing by the language model.
157
148
  """
158
- # Convert binary file to base64
159
- pdf_base64 = base64.b64encode(file_content).decode("utf-8")
160
-
161
- # Create the document for the model
162
- document = Part.from_data(
163
- mime_type="application/pdf", data=base64.b64decode(pdf_base64)
149
+ return types.Part.from_bytes(
150
+ data=file_content,
151
+ mime_type="application/pdf",
164
152
  )
165
153
 
166
- return document
167
-
168
154
  async def ask_chatgpt(self, prompt: str, document=None, response_schema=None):
169
155
  """Ask the chatgpt model a question.
170
156
 
@@ -723,16 +723,23 @@ async def format_all_entities(result, document_type_code, params, mime_type):
723
723
  if document_type_code in ["partnerInvoice", "bundeskasse"]:
724
724
  await process_partner_invoice(params, aggregated_data, document_type_code)
725
725
 
726
- # TODO: This is a temporary change until the terminal codes are updated
727
- if document_type_code == "bookingConfirmation":
728
- if "gateInTerminalCode" in aggregated_data:
729
- aggregated_data["gateInTerminal"] = aggregated_data.pop(
730
- "gateInTerminalCode"
731
- )
732
- if "pickUpTerminalCode" in aggregated_data:
733
- aggregated_data["pickUpTerminal"] = aggregated_data.pop(
734
- "pickUpTerminalCode"
735
- )
726
+ # Post process terminals with code and name
727
+ for key in list(aggregated_data):
728
+ if "terminal" not in key.lower():
729
+ continue
730
+
731
+ entity = aggregated_data.pop(key)
732
+ formatted = entity.get("formattedValue", {})
733
+
734
+ if formatted is None:
735
+ aggregated_data[key] = {**entity}
736
+ continue
737
+
738
+ aggregated_data[key] = {**entity, "formattedValue": formatted.get("id")}
739
+ aggregated_data[f"{key}Code"] = {
740
+ **entity,
741
+ "formattedValue": formatted.get("code"),
742
+ }
736
743
 
737
744
  logger.info("Data Extraction completed successfully")
738
745
  return aggregated_data
@@ -11,12 +11,12 @@
11
11
  "nullable": true,
12
12
  "description": "It's a contract number between the carrier and Forto Logistics SE & Co KG. Shipment Id 'S' followed by 6, 7, or 8 digits e.g. S9486358 is not a contract number."
13
13
  },
14
- "pickUpTerminalCode": {
14
+ "pickUpTerminal": {
15
15
  "type": "STRING",
16
16
  "nullable": true,
17
17
  "description": "The specific terminal for cargo pickup during the import shipment."
18
18
  },
19
- "gateInTerminalCode": {
19
+ "gateInTerminal": {
20
20
  "type": "STRING",
21
21
  "nullable": true,
22
22
  "description": "The specific terminal where cargo is gated in especially Export terminal delivery address. E.g., FULL RETURN TO or Export terminal name."
@@ -11,12 +11,12 @@
11
11
  "nullable": true,
12
12
  "description": "It's a contract number between the carrier and Forto Logistics SE & Co KG. It can be mentioned as Contract No., Contract Reference, or Quotation No. Shipment Id 'S' followed by 6, 7, or 8 digits e.g. S9486358 is not a contract number."
13
13
  },
14
- "pickUpTerminalCode": {
14
+ "pickUpTerminal": {
15
15
  "type": "STRING",
16
16
  "nullable": true,
17
17
  "description": "The specific terminal for cargo pickup during the import shipment."
18
18
  },
19
- "gateInTerminalCode": {
19
+ "gateInTerminal": {
20
20
  "type": "STRING",
21
21
  "nullable": true,
22
22
  "description": "The specific terminal where cargo is gated in especially Export terminal delivery address. E.g., Export terminal delivery address, Export terminal location, or Export terminal name."
@@ -11,12 +11,12 @@
11
11
  "nullable": true,
12
12
  "description": "It's a contract number between the carrier and Forto Logistics SE & Co KG. Shipment Id 'S' followed by 6, 7, or 8 digits e.g. S9486358 is not a contract number."
13
13
  },
14
- "pickUpTerminalCode": {
14
+ "pickUpTerminal": {
15
15
  "type": "STRING",
16
16
  "nullable": true,
17
17
  "description": "The specific terminal for cargo pickup during the import shipment."
18
18
  },
19
- "gateInTerminalCode": {
19
+ "gateInTerminal": {
20
20
  "type": "STRING",
21
21
  "nullable": true,
22
22
  "description": "The specific terminal where cargo is gated in especially Export terminal delivery address. E.g., Export terminal delivery address, Export terminal location, or Return Equip Delivery Terminal."
@@ -16,7 +16,7 @@ For Import Shipment: The loaded container / cargo arrives at a port of discharge
16
16
  <INSTRUCTIONS>
17
17
  - bookingNumber: A unique identifier for the booking.
18
18
 
19
- - gateInTerminalCode: The specific terminal where cargo is gated in. It can be called Return Equip Delivery Terminal and Location interception. This sometimes can be the same as portOfLoading of the First transportLeg.
19
+ - gateInTerminal: The specific terminal where cargo is gated in. It can be called Return Equip Delivery Terminal and Location interception. This sometimes can be the same as portOfLoading of the First transportLeg.
20
20
  - gateInReference: A reference code for cargo entering the terminal. If not mentioned explicitly and gateInTerminal is extracted, then use bookingNumber as gateInReference.
21
21
  - pickUpTerminal: The specific terminal for cargo pickup. It can be found as Import pick up address(es), PORT OF DISCHARGE (after the slash '/').
22
22
  - pickUpReference: A reference code for cargo pickup. If not mentioned explicitly and pickUpTerminal is extracted, then use bookingNumber as pickUpReference.
@@ -11,12 +11,12 @@
11
11
  "nullable": true,
12
12
  "description": "It's a contract number between the carrier and Forto Logistics SE & Co KG. It can be found as Service Contract/Rate Reff. No. Shipment Id 'S' followed by 6, 7, or 8 digits e.g. S9486358 is not a contract number."
13
13
  },
14
- "pickUpTerminalCode": {
14
+ "pickUpTerminal": {
15
15
  "type": "STRING",
16
16
  "nullable": true,
17
17
  "description": "The specific terminal for cargo pickup during the import shipment."
18
18
  },
19
- "gateInTerminalCode": {
19
+ "gateInTerminal": {
20
20
  "type": "STRING",
21
21
  "nullable": true,
22
22
  "description": "The specific terminal where cargo is gated in especially Export terminal delivery address. It can be found as GATE IN AT TERMINAL/DEPOT"
@@ -14,7 +14,7 @@ For Import Shipment: The loaded container / cargo arrives at a port of discharge
14
14
  <CONTEXT>
15
15
 
16
16
  <INSTRUCTIONS>
17
- - gateInTerminalCode: The specific terminal where cargo is gated in. It can be called Return Equip Delivery Terminal and Location interception. This sometimes can be the same as portOfLoading of the First transportLeg.
17
+ - gateInTerminal: The specific terminal where cargo is gated in. It can be called Return Equip Delivery Terminal and Location interception. This sometimes can be the same as portOfLoading of the First transportLeg.
18
18
  - gateInReference: A reference code for cargo entering the terminal. If not mentioned explicitly and gateInTerminal is extracted, then use bookingNumber as gateInReference.
19
19
  - pickUpTerminal: The specific terminal for cargo pickup. It can be found as Import pick up address(es), PORT OF DISCHARGE (after the slash '/').
20
20
  - pickUpReference: A reference code for cargo pickup. If not mentioned explicitly and pickUpTerminal is extracted, then use bookingNumber as pickUpReference.
@@ -11,12 +11,12 @@
11
11
  "nullable": true,
12
12
  "description": "It's a contract number between the carrier and Forto Logistics SE & Co KG. It can be referred as Rate Agreement Number or Contract No.. Shipment Id 'S' followed by 6, 7, or 8 digits e.g. S9486358 is not a contract number."
13
13
  },
14
- "pickUpTerminalCode": {
14
+ "pickUpTerminal": {
15
15
  "type": "STRING",
16
16
  "nullable": true,
17
17
  "description": "The specific terminal for cargo pickup during the import shipment."
18
18
  },
19
- "gateInTerminalCode": {
19
+ "gateInTerminal": {
20
20
  "type": "STRING",
21
21
  "nullable": true,
22
22
  "description": "The specific terminal where cargo is gated in especially Export terminal delivery address. E.g., FULL RETURN LOCATION"
@@ -19,7 +19,7 @@ For Import Shipment: The loaded container / cargo arrives at a port of discharge
19
19
 
20
20
  - bookingNumber: Extract the booking number. This information can be found at the top section of the document".
21
21
 
22
- - gateInTerminalCode: The specific terminal where cargo is gated in. It can be called as FULL RETURN LOCATION. This sometimes can be the same as portOfLoading of the First transportLeg.
22
+ - gateInTerminal: The specific terminal where cargo is gated in. It can be called as FULL RETURN LOCATION. This sometimes can be the same as portOfLoading of the First transportLeg.
23
23
  - gateInReference: A reference code for cargo entering the terminal. If not mentioned explicitly and gateInTerminal is extracted, then use bookingNumber as gateInReference.
24
24
  - pickUpTerminal: The specific terminal for cargo pickup. It can be found as Import pick up address(es), PORT OF DISCHARGE (after the slash '/').
25
25
  - pickUpReference: A reference code for cargo pickup. If not mentioned explicitly and pickUpTerminal is extracted, then use bookingNumber as pickUpReference.
@@ -11,12 +11,12 @@
11
11
  "nullable": true,
12
12
  "description": "It's a contract number between the carrier and Forto Logistics SE & Co KG. Shipment Id 'S' followed by 6, 7, or 8 digits e.g. S9486358 is not a contract number."
13
13
  },
14
- "pickUpTerminalCode": {
14
+ "pickUpTerminal": {
15
15
  "type": "STRING",
16
16
  "nullable": true,
17
17
  "description": "The specific terminal for cargo pickup during the import shipment."
18
18
  },
19
- "gateInTerminalCode": {
19
+ "gateInTerminal": {
20
20
  "type": "STRING",
21
21
  "nullable": true,
22
22
  "description": "The specific terminal where cargo is gated in especially Export terminal delivery address. It can be found as Gate In At Terminal, Gate in Terminal, Full Return To Terminal, Full Return Location, Cargo Delivery At, Delivery Terminal, Full Return CY, eyc.."
@@ -11,12 +11,12 @@
11
11
  "nullable": true,
12
12
  "description": "It's a contract number between the carrier and Forto Logistics SE & Co KG. Shipment Id 'S' followed by 6, 7, or 8 digits e.g. S9486358 is not a contract number."
13
13
  },
14
- "pickUpTerminalCode": {
14
+ "pickUpTerminal": {
15
15
  "type": "STRING",
16
16
  "nullable": true,
17
17
  "description": "The specific terminal for cargo pickup during the import shipment."
18
18
  },
19
- "gateInTerminalCode": {
19
+ "gateInTerminal": {
20
20
  "type": "STRING",
21
21
  "nullable": true,
22
22
  "description": "The specific terminal where cargo is gated in especially Export terminal delivery address. It is mentioned as Delivery Terminal."
@@ -17,7 +17,7 @@ For Import Shipment: The loaded container / cargo arrives at a port of discharge
17
17
  - Populate fields as defined in the response schema.
18
18
  - Use the data field description to understand the context of the data.
19
19
 
20
- - gateInTerminalCode: The specific terminal where cargo is gated in. It is mentioned as Delivery Terminal. This sometimes can be the same as portOfLoading of the First transportLeg.
20
+ - gateInTerminal: The specific terminal where cargo is gated in. It is mentioned as Delivery Terminal. This sometimes can be the same as portOfLoading of the First transportLeg.
21
21
  - cyCutOff: The deadline for cargo to be delivered to the Container Yard. It can be found at Cargo Cut Off or FCL delivery cut-off.
22
22
 
23
23
  - transportLegs: Multiple Transport Legs entries may exist, capture all instances under "transportLegs". Make sure the order of the legs are important.
@@ -5,8 +5,8 @@ import random
5
5
  import time
6
6
 
7
7
  import toml
8
- import vertexai
9
8
  import yaml
9
+ from google import genai
10
10
  from google.api_core.client_options import ClientOptions
11
11
  from google.cloud import documentai
12
12
  from google.cloud import documentai_v1beta3 as docai_beta
@@ -134,10 +134,14 @@ def setup_params(args=None):
134
134
 
135
135
  # Set up LLM clients
136
136
  params["LlmClient"] = LlmClient(
137
- openai_key=os.getenv("OPENAI_KEY"), parameters=params["gemini_params"]
137
+ openai_key=os.getenv("OPENAI_KEY"),
138
+ parameters=params["gemini_params"],
139
+ genai_client=params["genai_client"],
138
140
  )
139
141
  params["LlmClient_Flash"] = LlmClient(
140
- openai_key=os.getenv("OPENAI_KEY"), parameters=params["gemini_flash_params"]
142
+ openai_key=os.getenv("OPENAI_KEY"),
143
+ parameters=params["gemini_flash_params"],
144
+ genai_client=params["genai_client"],
141
145
  )
142
146
 
143
147
  # Load lookup data from GCS bucket
@@ -167,8 +171,9 @@ def setup_docai_client_and_path(params):
167
171
 
168
172
 
169
173
  def setup_vertexai(params):
170
- """Initialize the Vertex AI with the specified project and location."""
171
- vertexai.init(
174
+ """Initialize the Google GenAI client with Vertex AI configuration."""
175
+ params["genai_client"] = genai.Client(
176
+ vertexai=True,
172
177
  project=params["g_ai_project_name"],
173
178
  location=params["g_region"],
174
179
  )