PyPI - data-science-document-ai - Versions diffs - 1.61.0__tar.gz → 1.61.2__tar.gz - Mend

data-science-document-ai 1.61.0tar.gz → 1.61.2tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (59) hide show

{data_science_document_ai-1.61.0 → data_science_document_ai-1.61.2}/PKG-INFO RENAMED Viewed

@@ -1,13 +1,11 @@
 Metadata-Version: 2.4
 Name: data-science-document-ai
-Version: 1.61.0
+Version: 1.61.2
 Summary: "Document AI repo for data science"
 Author: Naomi Nguyen
 Author-email: naomi.nguyen@forto.com
-Requires-Python: >=3.9,<3.12
+Requires-Python: >=3.11,<3.12
 Classifier: Programming Language :: Python :: 3
-Classifier: Programming Language :: Python :: 3.9
-Classifier: Programming Language :: Python :: 3.10
 Classifier: Programming Language :: Python :: 3.11
 Requires-Dist: apscheduler (>=3.10.4,<4.0.0)
 Requires-Dist: db-dtypes (>=1.2.0,<2.0.0)
@@ -24,8 +22,8 @@ Requires-Dist: google-cloud-bigquery-storage (>=2.20.0,<3.0.0)
 Requires-Dist: google-cloud-documentai (>=2.23.0,<3.0.0)
 Requires-Dist: google-cloud-storage (>=2.9.0,<3.0.0)
 Requires-Dist: google-cloud-vision (>=3.7.1,<4.0.0)
+Requires-Dist: google-genai (>=1.61.0,<2.0.0)
 Requires-Dist: gspread (>=6.1.0,<7.0.0)
-Requires-Dist: httpx (>=0.26.0,<0.27.0)
 Requires-Dist: jupyter (>=1.0.0,<2.0.0)
 Requires-Dist: kubernetes (>=30.1.0,<31.0.0)
 Requires-Dist: nltk (>=3.9.1,<4.0.0)

{data_science_document_ai-1.61.0 → data_science_document_ai-1.61.2}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "data-science-document-ai"
-version = "1.61.0"
+version = "1.61.2"
 description = "\"Document AI repo for data science\""
 authors = ["Naomi Nguyen <naomi.nguyen@forto.com>", "Kumar Rajendrababu <kumar.rajendrababu@forto.com>", "Igor Tonko <igor.tonko@forto.com>", "Osman Demirel <osman.demirel@forto.com>"]
 packages = [
@@ -9,7 +9,7 @@ packages = [
 [tool.poetry.dependencies]
-python = ">=3.9,<3.12"
+python = ">=3.11,<3.12"
 pandas = "^2.0.3"
 numpy = "^1.25.1"
 google-cloud-bigquery-storage = "^2.20.0"
@@ -26,7 +26,6 @@ uvicorn = {extras = ["standard"], version = "^0.27.0.post1"}
 requests-toolbelt = "^1.0.0"
 google = "^3.0.0"
 toml = "^0.10.2"
-httpx = "^0.26.0"
 python-multipart = "^0.0.7"
 google-cloud-documentai = "^2.23.0"
 jupyter = "^1.0.0"
@@ -42,6 +41,7 @@ xlrd = "^2.0.1"
 openpyxl = "^3.1.5"
 tabulate = "^0.9.0"
 openai = "^1.53.0"
+google-genai = "^1.61.0"
 parameterized = "^0.9.0"
 ddtrace = "^2.20.0"
 rapidfuzz = "^3.12.2"
@@ -50,12 +50,10 @@ nltk = "^3.9.1"
 pgzip = "^0.3.5"
 pypdf = "^6.1.2"
-[tool.poetry.dev-dependencies]
+[tool.poetry.group.dev.dependencies]
 jupyter = "^1.0.0"
 ipykernel = "^6.23.1"
 notebook = "^6.5.5"
-[tool.poetry.group.dev.dependencies]
 deepdiff = "^8.1.1"
 [build-system]

{data_science_document_ai-1.61.0 → data_science_document_ai-1.61.2}/src/constants.py RENAMED Viewed

@@ -9,7 +9,7 @@ project_parameters = {
     "g_ai_project_id": "738250249861",
     "g_api_endpoint": "eu-documentai.googleapis.com",
     "g_location": "eu",
-    "g_region": "europe-west1",
+    "g_region": "global",
     # Google Cloud Storage
     "doc_ai_bucket_project_name": "forto-data-science-production",
     "doc_ai_bucket_name": "ds-document-capture",
@@ -60,7 +60,8 @@ project_parameters = {
         "top_p": 0.8,
         "top_k": 40,
         "seed": 42,
-        "model_id": "gemini-2.5-pro",
+        "model_id": "gemini-3-flash-preview",
+        "thinking_level": "medium",
     },
     "gemini_flash_params": {
         "temperature": 0,
@@ -93,8 +94,8 @@ formatting_rules = {
     "bookingConfirmation": {
         "pickUpDepotCode": "depot",
         "dropOffDepotCode": "depot",
-        "gateInTerminalCode": "terminal",
-        "pickUpTerminalCode": "terminal",
+        "gateInTerminal": "terminal",
+        "pickUpTerminal": "terminal",
     },
     "deliveryOrder": {"pickUpTerminal": "terminal", "EmptyContainerDepot": "depot"},
 }

{data_science_document_ai-1.61.0 → data_science_document_ai-1.61.2}/src/llm.py RENAMED Viewed

@@ -3,17 +3,11 @@ import logging
 logger = logging.getLogger(__name__)
-import base64
 import json
+from google import genai
+from google.genai import types
 from openai import AsyncOpenAI as OpenAI
-from vertexai.generative_models import (
-    GenerationConfig,
-    GenerativeModel,
-    HarmBlockThreshold,
-    HarmCategory,
-    Part,
-)
 from src.io import get_gcp_labels
 from src.utils import cache_on_disk
@@ -24,8 +18,10 @@ from src.utils import cache_on_disk
 class LlmClient:
     """A client for interacting with large language models (LLMs)."""
-    def __init__(self, openai_key=None, parameters=None):
+    def __init__(self, openai_key=None, parameters=None, genai_client=None):
         """Initialize the LLM client."""
+        self.genai_client = genai_client
         # Initialize the model parameters
         self.model_params = {
             "temperature": parameters.get("temperature", 0),
@@ -35,31 +31,24 @@ class LlmClient:
             "seed": parameters.get("seed", 42),
         }
         self.model_id = parameters.get("model_id", "gemini-2.5-flash")
-        # Initialize the safety configuration
-        self.safety_config = {
-            HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_NONE,
-            HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_NONE,
-            HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_NONE,
-            HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: HarmBlockThreshold.BLOCK_NONE,
-        }
-        # Initialize the Gemini client
-        self.geminy_client = self._initialize_gemini()
+        self.thinking_level = parameters.get("thinking_level")
+        # Initialize the safety configuration (new format: list of SafetySetting objects)
+        self.safety_settings = [
+            types.SafetySetting(
+                category="HARM_CATEGORY_DANGEROUS_CONTENT", threshold="OFF"
+            ),
+            types.SafetySetting(category="HARM_CATEGORY_HARASSMENT", threshold="OFF"),
+            types.SafetySetting(category="HARM_CATEGORY_HATE_SPEECH", threshold="OFF"),
+            types.SafetySetting(
+                category="HARM_CATEGORY_SEXUALLY_EXPLICIT", threshold="OFF"
+            ),
+        ]
         if openai_key is not None:
             # Initialize the ChatGPT client
             self.chatgpt_client = self._create_client_chatgpt(openai_key)
-    def _initialize_gemini(self):
-        """Ask the Gemini model a question.
-        Returns:
-            str: The response from the model.
-        """
-        # Initialize the model if it is not already initialized
-        model_gen = GenerativeModel(model_name=self.model_id)
-        self.model_config = GenerationConfig(**self.model_params)
-        return model_gen
     def _create_client_chatgpt(self, openai_key):
         client = OpenAI(api_key=openai_key)
         return client
@@ -67,7 +56,7 @@ class LlmClient:
     async def ask_gemini(
         self,
         prompt: str,
-        document: str = None,
+        document: types.Part = None,
         response_schema: dict = None,
         response_mime_type: str = "application/json",
         doc_type: str = None,
@@ -76,7 +65,7 @@ class LlmClient:
         Args:
             prompt (str): The prompt to send to the model.
-            document (str, optional): An optional document to provide context.
+            document (types.Part, optional): An optional document to provide context.
             response_schema (dict, optional): Defines a specific response schema for the model.
             doc_type (str, optional): Document type for cost tracking labels.
@@ -84,33 +73,35 @@ class LlmClient:
             str: The response from the model.
         """
         try:
+            # Build config with all parameters
+            config_params = {
+                **self.model_params,
+                "safety_settings": self.safety_settings,
+                "labels": get_gcp_labels(doc_type=doc_type),
+            }
+            # Add thinking config for Gemini 3 models
+            if self.thinking_level:
+                config_params["thinking_config"] = types.ThinkingConfig(
+                    thinking_level=self.thinking_level
+                )
-            # Start with the default model configuration
-            config = self.model_config
-            # Add response_schema if provided. This is only supported for Gemini 1.5 Flash & Pro models
             if response_schema is not None:
-                config = GenerationConfig(
-                    response_schema=response_schema,
-                    response_mime_type=response_mime_type,
-                    **self.model_params,
-                )
+                config_params["response_schema"] = response_schema
+                config_params["response_mime_type"] = response_mime_type
-            # Prepare inputs for the model
-            inputs = [document, prompt] if document else prompt
+            config = types.GenerateContentConfig(**config_params)
+            contents = [document, prompt] if document else prompt
-            # Generate the response with labels for cost tracking
+            # Use async client
             model_response = await cache_on_disk(
-                self.geminy_client.generate_content_async,
-                contents=inputs,
-                generation_config=config,
-                safety_settings=self.safety_config,
-                labels=get_gcp_labels(doc_type=doc_type),
+                self.genai_client.aio.models.generate_content,
+                model=self.model_id,
+                contents=contents,
+                config=config,
             )
-            response_text = model_response.text
-            return response_text
+            return model_response.text
         except Exception as e:
             logger.error(f"Failed to generate response: {str(e)}")
@@ -146,25 +137,20 @@ class LlmClient:
             logger.error(e)
             return {}
-    def prepare_document_for_gemini(self, file_content):
-        """Prepare a document from file content by encoding it to base64.
+    def prepare_document_for_gemini(self, file_content: bytes) -> types.Part:
+        """Prepare a document from file content for the Gemini model.
         Args:
             file_content (bytes): The binary content of the file to be processed.
         Returns:
-            Part: A document object ready for processing by the language model.
+            types.Part: A document object ready for processing by the language model.
         """
-        # Convert binary file to base64
-        pdf_base64 = base64.b64encode(file_content).decode("utf-8")
-        # Create the document for the model
-        document = Part.from_data(
-            mime_type="application/pdf", data=base64.b64decode(pdf_base64)
+        return types.Part.from_bytes(
+            data=file_content,
+            mime_type="application/pdf",
         )
-        return document
     async def ask_chatgpt(self, prompt: str, document=None, response_schema=None):
         """Ask the chatgpt model a question.

{data_science_document_ai-1.61.0 → data_science_document_ai-1.61.2}/src/postprocessing/common.py RENAMED Viewed

@@ -723,16 +723,23 @@ async def format_all_entities(result, document_type_code, params, mime_type):
     if document_type_code in ["partnerInvoice", "bundeskasse"]:
         await process_partner_invoice(params, aggregated_data, document_type_code)
-    # TODO: This is a temporary change until the terminal codes are updated
-    if document_type_code == "bookingConfirmation":
-        if "gateInTerminalCode" in aggregated_data:
-            aggregated_data["gateInTerminal"] = aggregated_data.pop(
-                "gateInTerminalCode"
-            )
-        if "pickUpTerminalCode" in aggregated_data:
-            aggregated_data["pickUpTerminal"] = aggregated_data.pop(
-                "pickUpTerminalCode"
-            )
+    # Post process terminals with code and name
+    for key in list(aggregated_data):
+        if "terminal" not in key.lower():
+            continue
+        entity = aggregated_data.pop(key)
+        formatted = entity.get("formattedValue", {})
+        if formatted is None:
+            aggregated_data[key] = {**entity}
+            continue
+        aggregated_data[key] = {**entity, "formattedValue": formatted.get("id")}
+        aggregated_data[f"{key}Code"] = {
+            **entity,
+            "formattedValue": formatted.get("code"),
+        }
     logger.info("Data Extraction completed successfully")
     return aggregated_data

{data_science_document_ai-1.61.0 → data_science_document_ai-1.61.2}/src/prompts/library/bookingConfirmation/evergreen/placeholders.json RENAMED Viewed

@@ -11,12 +11,12 @@
       "nullable": true,
       "description": "It's a contract number between the carrier and Forto Logistics SE & Co KG. Shipment Id 'S' followed by 6, 7, or 8 digits e.g. S9486358 is not a contract number."
     },
-    "pickUpTerminalCode": {
+    "pickUpTerminal": {
       "type": "STRING",
       "nullable": true,
       "description": "The specific terminal for cargo pickup during the import shipment."
     },
-    "gateInTerminalCode": {
+    "gateInTerminal": {
       "type": "STRING",
       "nullable": true,
       "description": "The specific terminal where cargo is gated in especially Export terminal delivery address. E.g., FULL RETURN TO or Export terminal name."

{data_science_document_ai-1.61.0 → data_science_document_ai-1.61.2}/src/prompts/library/bookingConfirmation/hapag-lloyd/placeholders.json RENAMED Viewed

@@ -11,12 +11,12 @@
       "nullable": true,
       "description": "It's a contract number between the carrier and Forto Logistics SE & Co KG. It can be mentioned as Contract No., Contract Reference, or Quotation No. Shipment Id 'S' followed by 6, 7, or 8 digits e.g. S9486358 is not a contract number."
     },
-    "pickUpTerminalCode": {
+    "pickUpTerminal": {
       "type": "STRING",
       "nullable": true,
       "description": "The specific terminal for cargo pickup during the import shipment."
     },
-    "gateInTerminalCode": {
+    "gateInTerminal": {
       "type": "STRING",
       "nullable": true,
       "description": "The specific terminal where cargo is gated in especially Export terminal delivery address. E.g., Export terminal delivery address, Export terminal location, or Export terminal name."

{data_science_document_ai-1.61.0 → data_science_document_ai-1.61.2}/src/prompts/library/bookingConfirmation/maersk/placeholders.json RENAMED Viewed

@@ -11,12 +11,12 @@
       "nullable": true,
       "description": "It's a contract number between the carrier and Forto Logistics SE & Co KG. Shipment Id 'S' followed by 6, 7, or 8 digits e.g. S9486358 is not a contract number."
     },
-    "pickUpTerminalCode": {
+    "pickUpTerminal": {
       "type": "STRING",
       "nullable": true,
       "description": "The specific terminal for cargo pickup during the import shipment."
     },
-    "gateInTerminalCode": {
+    "gateInTerminal": {
       "type": "STRING",
       "nullable": true,
       "description": "The specific terminal where cargo is gated in especially Export terminal delivery address. E.g., Export terminal delivery address, Export terminal location, or Return Equip Delivery Terminal."

{data_science_document_ai-1.61.0 → data_science_document_ai-1.61.2}/src/prompts/library/bookingConfirmation/maersk/prompt.txt RENAMED Viewed

@@ -16,7 +16,7 @@ For Import Shipment: The loaded container / cargo arrives at a port of discharge
 <INSTRUCTIONS>
 - bookingNumber: A unique identifier for the booking.
-- gateInTerminalCode: The specific terminal where cargo is gated in. It can be called Return Equip Delivery Terminal and Location interception. This sometimes can be the same as portOfLoading of the First transportLeg.
+- gateInTerminal: The specific terminal where cargo is gated in. It can be called Return Equip Delivery Terminal and Location interception. This sometimes can be the same as portOfLoading of the First transportLeg.
 - gateInReference: A reference code for cargo entering the terminal. If not mentioned explicitly and gateInTerminal is extracted, then use bookingNumber as gateInReference.
 - pickUpTerminal: The specific terminal for cargo pickup. It can be found as Import pick up address(es),  PORT OF DISCHARGE (after the slash '/').
 - pickUpReference: A reference code for cargo pickup. If not mentioned explicitly and pickUpTerminal is extracted, then use bookingNumber as pickUpReference.

{data_science_document_ai-1.61.0 → data_science_document_ai-1.61.2}/src/prompts/library/bookingConfirmation/msc/placeholders.json RENAMED Viewed

@@ -11,12 +11,12 @@
       "nullable": true,
       "description": "It's a contract number between the carrier and Forto Logistics SE & Co KG. It can be found as Service Contract/Rate Reff. No. Shipment Id 'S' followed by 6, 7, or 8 digits e.g. S9486358 is not a contract number."
     },
-    "pickUpTerminalCode": {
+    "pickUpTerminal": {
       "type": "STRING",
       "nullable": true,
       "description": "The specific terminal for cargo pickup during the import shipment."
     },
-    "gateInTerminalCode": {
+    "gateInTerminal": {
       "type": "STRING",
       "nullable": true,
       "description": "The specific terminal where cargo is gated in especially Export terminal delivery address. It can be found as GATE IN AT TERMINAL/DEPOT"

{data_science_document_ai-1.61.0 → data_science_document_ai-1.61.2}/src/prompts/library/bookingConfirmation/msc/prompt.txt RENAMED Viewed

@@ -14,7 +14,7 @@ For Import Shipment: The loaded container / cargo arrives at a port of discharge
 <CONTEXT>
 <INSTRUCTIONS>
-- gateInTerminalCode: The specific terminal where cargo is gated in. It can be called Return Equip Delivery Terminal and Location interception. This sometimes can be the same as portOfLoading of the First transportLeg.
+- gateInTerminal: The specific terminal where cargo is gated in. It can be called Return Equip Delivery Terminal and Location interception. This sometimes can be the same as portOfLoading of the First transportLeg.
 - gateInReference: A reference code for cargo entering the terminal. If not mentioned explicitly and gateInTerminal is extracted, then use bookingNumber as gateInReference.
 - pickUpTerminal: The specific terminal for cargo pickup. It can be found as Import pick up address(es),  PORT OF DISCHARGE (after the slash '/').
 - pickUpReference: A reference code for cargo pickup. If not mentioned explicitly and pickUpTerminal is extracted, then use bookingNumber as pickUpReference.

{data_science_document_ai-1.61.0 → data_science_document_ai-1.61.2}/src/prompts/library/bookingConfirmation/oocl/placeholders.json RENAMED Viewed

@@ -11,12 +11,12 @@
       "nullable": true,
       "description": "It's a contract number between the carrier and Forto Logistics SE & Co KG. It can be referred as Rate Agreement Number or Contract No.. Shipment Id 'S' followed by 6, 7, or 8 digits e.g. S9486358 is not a contract number."
     },
-    "pickUpTerminalCode": {
+    "pickUpTerminal": {
       "type": "STRING",
       "nullable": true,
       "description": "The specific terminal for cargo pickup during the import shipment."
     },
-    "gateInTerminalCode": {
+    "gateInTerminal": {
       "type": "STRING",
       "nullable": true,
       "description": "The specific terminal where cargo is gated in especially Export terminal delivery address. E.g., FULL RETURN LOCATION"

{data_science_document_ai-1.61.0 → data_science_document_ai-1.61.2}/src/prompts/library/bookingConfirmation/oocl/prompt.txt RENAMED Viewed

@@ -19,7 +19,7 @@ For Import Shipment: The loaded container / cargo arrives at a port of discharge
 - bookingNumber: Extract the booking number. This information can be found at the top section of the document".
-- gateInTerminalCode: The specific terminal where cargo is gated in. It can be called as FULL RETURN LOCATION. This sometimes can be the same as portOfLoading of the First transportLeg.
+- gateInTerminal: The specific terminal where cargo is gated in. It can be called as FULL RETURN LOCATION. This sometimes can be the same as portOfLoading of the First transportLeg.
 - gateInReference: A reference code for cargo entering the terminal. If not mentioned explicitly and gateInTerminal is extracted, then use bookingNumber as gateInReference.
 - pickUpTerminal: The specific terminal for cargo pickup. It can be found as Import pick up address(es),  PORT OF DISCHARGE (after the slash '/').
 - pickUpReference: A reference code for cargo pickup. If not mentioned explicitly and pickUpTerminal is extracted, then use bookingNumber as pickUpReference.

{data_science_document_ai-1.61.0 → data_science_document_ai-1.61.2}/src/prompts/library/bookingConfirmation/other/placeholders.json RENAMED Viewed

@@ -11,12 +11,12 @@
       "nullable": true,
       "description": "It's a contract number between the carrier and Forto Logistics SE & Co KG. Shipment Id 'S' followed by 6, 7, or 8 digits e.g. S9486358 is not a contract number."
     },
-    "pickUpTerminalCode": {
+    "pickUpTerminal": {
       "type": "STRING",
       "nullable": true,
       "description": "The specific terminal for cargo pickup during the import shipment."
     },
-    "gateInTerminalCode": {
+    "gateInTerminal": {
       "type": "STRING",
       "nullable": true,
       "description": "The specific terminal where cargo is gated in especially Export terminal delivery address. It can be found as Gate In At Terminal, Gate in Terminal, Full Return To Terminal, Full Return Location, Cargo Delivery At, Delivery Terminal, Full Return CY, eyc.."

{data_science_document_ai-1.61.0 → data_science_document_ai-1.61.2}/src/prompts/library/bookingConfirmation/yangming/placeholders.json RENAMED Viewed

@@ -11,12 +11,12 @@
       "nullable": true,
       "description": "It's a contract number between the carrier and Forto Logistics SE & Co KG. Shipment Id 'S' followed by 6, 7, or 8 digits e.g. S9486358 is not a contract number."
     },
-    "pickUpTerminalCode": {
+    "pickUpTerminal": {
       "type": "STRING",
       "nullable": true,
       "description": "The specific terminal for cargo pickup during the import shipment."
     },
-    "gateInTerminalCode": {
+    "gateInTerminal": {
       "type": "STRING",
       "nullable": true,
       "description": "The specific terminal where cargo is gated in especially Export terminal delivery address. It is mentioned as Delivery Terminal."

{data_science_document_ai-1.61.0 → data_science_document_ai-1.61.2}/src/prompts/library/bookingConfirmation/yangming/prompt.txt RENAMED Viewed

@@ -17,7 +17,7 @@ For Import Shipment: The loaded container / cargo arrives at a port of discharge
 - Populate fields as defined in the response schema.
 - Use the data field description to understand the context of the data.
-- gateInTerminalCode: The specific terminal where cargo is gated in. It is mentioned as Delivery Terminal. This sometimes can be the same as portOfLoading of the First transportLeg.
+- gateInTerminal: The specific terminal where cargo is gated in. It is mentioned as Delivery Terminal. This sometimes can be the same as portOfLoading of the First transportLeg.
 - cyCutOff: The deadline for cargo to be delivered to the Container Yard. It can be found at Cargo Cut Off or FCL delivery cut-off.
 - transportLegs: Multiple Transport Legs entries may exist, capture all instances under "transportLegs". Make sure the order of the legs are important.

{data_science_document_ai-1.61.0 → data_science_document_ai-1.61.2}/src/setup.py RENAMED Viewed

@@ -5,8 +5,8 @@ import random
 import time
 import toml
-import vertexai
 import yaml
+from google import genai
 from google.api_core.client_options import ClientOptions
 from google.cloud import documentai
 from google.cloud import documentai_v1beta3 as docai_beta
@@ -134,10 +134,14 @@ def setup_params(args=None):
     # Set up LLM clients
     params["LlmClient"] = LlmClient(
-        openai_key=os.getenv("OPENAI_KEY"), parameters=params["gemini_params"]
+        openai_key=os.getenv("OPENAI_KEY"),
+        parameters=params["gemini_params"],
+        genai_client=params["genai_client"],
     )
     params["LlmClient_Flash"] = LlmClient(
-        openai_key=os.getenv("OPENAI_KEY"), parameters=params["gemini_flash_params"]
+        openai_key=os.getenv("OPENAI_KEY"),
+        parameters=params["gemini_flash_params"],
+        genai_client=params["genai_client"],
     )
     # Load lookup data from GCS bucket
@@ -167,8 +171,9 @@ def setup_docai_client_and_path(params):
 def setup_vertexai(params):
-    """Initialize the Vertex AI with the specified project and location."""
-    vertexai.init(
+    """Initialize the Google GenAI client with Vertex AI configuration."""
+    params["genai_client"] = genai.Client(
+        vertexai=True,
         project=params["g_ai_project_name"],
         location=params["g_region"],
     )