PyPI - agent-starter-pack - Versions diffs - 0.2.2__py3-none-any.whl → 0.3.0__py3-none-any.whl - Mend

agent-starter-pack 0.2.2py3-none-any.whl → 0.3.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of agent-starter-pack might be problematic. Click here for more details.

Files changed (72) hide show

{agent_starter_pack-0.2.2.dist-info → agent_starter_pack-0.3.0.dist-info}/METADATA +14 -16
{agent_starter_pack-0.2.2.dist-info → agent_starter_pack-0.3.0.dist-info}/RECORD +69 -54
agents/adk_base/README.md +14 -0
agents/adk_base/app/agent.py +66 -0
agents/adk_base/notebooks/adk_app_testing.ipynb +305 -0
agents/adk_base/template/.templateconfig.yaml +21 -0
agents/adk_base/tests/integration/test_agent.py +58 -0
agents/agentic_rag/README.md +1 -0
agents/agentic_rag/app/agent.py +44 -89
agents/agentic_rag/app/templates.py +0 -25
agents/agentic_rag/notebooks/adk_app_testing.ipynb +305 -0
agents/agentic_rag/template/.templateconfig.yaml +3 -1
agents/agentic_rag/tests/integration/test_agent.py +34 -27
agents/langgraph_base_react/README.md +1 -1
agents/langgraph_base_react/template/.templateconfig.yaml +1 -1
src/base_template/Makefile +15 -4
src/base_template/README.md +8 -2
src/base_template/app/__init__.py +3 -0
src/base_template/app/utils/tracing.py +11 -1
src/base_template/app/utils/typing.py +54 -4
src/base_template/deployment/README.md +4 -1
src/base_template/deployment/cd/deploy-to-prod.yaml +3 -3
src/base_template/deployment/cd/staging.yaml +4 -4
src/base_template/deployment/ci/pr_checks.yaml +1 -1
src/base_template/deployment/terraform/build_triggers.tf +3 -0
src/base_template/deployment/terraform/dev/variables.tf +4 -0
src/base_template/deployment/terraform/dev/vars/env.tfvars +0 -3
src/base_template/deployment/terraform/variables.tf +4 -0
src/base_template/deployment/terraform/vars/env.tfvars +0 -4
src/base_template/pyproject.toml +5 -3
src/{deployment_targets/agent_engine → base_template}/tests/unit/test_dummy.py +2 -1
src/cli/commands/create.py +45 -11
src/cli/commands/setup_cicd.py +25 -6
src/cli/utils/gcp.py +1 -1
src/cli/utils/template.py +27 -25
src/data_ingestion/README.md +37 -50
src/data_ingestion/data_ingestion_pipeline/components/ingest_data.py +2 -1
src/deployment_targets/agent_engine/app/agent_engine_app.py +68 -22
src/deployment_targets/agent_engine/app/utils/gcs.py +1 -1
src/deployment_targets/agent_engine/tests/integration/test_agent_engine_app.py +63 -0
src/deployment_targets/agent_engine/tests/load_test/load_test.py +9 -2
src/deployment_targets/cloud_run/Dockerfile +1 -1
src/deployment_targets/cloud_run/app/server.py +41 -15
src/deployment_targets/cloud_run/tests/integration/test_server_e2e.py +60 -3
src/deployment_targets/cloud_run/tests/load_test/README.md +1 -1
src/deployment_targets/cloud_run/tests/load_test/load_test.py +57 -24
src/frontends/live_api_react/frontend/package-lock.json +3 -3
src/frontends/streamlit/frontend/utils/stream_handler.py +3 -3
src/frontends/streamlit_adk/frontend/side_bar.py +214 -0
src/frontends/streamlit_adk/frontend/streamlit_app.py +314 -0
src/frontends/streamlit_adk/frontend/style/app_markdown.py +37 -0
src/frontends/streamlit_adk/frontend/utils/chat_utils.py +84 -0
src/frontends/streamlit_adk/frontend/utils/local_chat_history.py +110 -0
src/frontends/streamlit_adk/frontend/utils/message_editing.py +61 -0
src/frontends/streamlit_adk/frontend/utils/multimodal_utils.py +223 -0
src/frontends/streamlit_adk/frontend/utils/stream_handler.py +311 -0
src/frontends/streamlit_adk/frontend/utils/title_summary.py +129 -0
src/resources/locks/uv-adk_base-agent_engine.lock +5335 -0
src/resources/locks/uv-adk_base-cloud_run.lock +5927 -0
src/resources/locks/uv-agentic_rag-agent_engine.lock +939 -732
src/resources/locks/uv-agentic_rag-cloud_run.lock +1087 -907
src/resources/locks/uv-crewai_coding_crew-agent_engine.lock +778 -671
src/resources/locks/uv-crewai_coding_crew-cloud_run.lock +852 -753
src/resources/locks/uv-langgraph_base_react-agent_engine.lock +665 -591
src/resources/locks/uv-langgraph_base_react-cloud_run.lock +842 -743
src/resources/locks/uv-live_api-cloud_run.lock +830 -731
agents/agentic_rag/notebooks/evaluating_langgraph_agent.ipynb +0 -1561
src/base_template/tests/unit/test_utils/test_tracing_exporter.py +0 -140
src/deployment_targets/cloud_run/tests/unit/test_server.py +0 -124
{agent_starter_pack-0.2.2.dist-info → agent_starter_pack-0.3.0.dist-info}/WHEEL +0 -0
{agent_starter_pack-0.2.2.dist-info → agent_starter_pack-0.3.0.dist-info}/entry_points.txt +0 -0
{agent_starter_pack-0.2.2.dist-info → agent_starter_pack-0.3.0.dist-info}/licenses/LICENSE +0 -0

src/base_template/deployment/cd/deploy-to-prod.yaml CHANGED Viewed

@@ -20,7 +20,7 @@ steps:
     args:
       - -c
       - |
-        cd data_ingestion && pip install uv --user && uv sync --frozen && \
+        cd data_ingestion && pip install uv==0.6.12 --user && uv sync --frozen && \
         uv run python data_ingestion_pipeline/submit_pipeline.py
     env:
       - "PIPELINE_ROOT=${_PIPELINE_GCS_ROOT}"
@@ -75,7 +75,7 @@ steps:
     args:
       - "-c"
       - |
-        pip install uv --user && uv sync --frozen
+        pip install uv==0.6.12 --user && uv sync --frozen
     env:
       - 'PATH=/usr/local/bin:/usr/bin:~/.local/bin'
@@ -85,7 +85,7 @@ steps:
     args:
       - "-c"
       - |
-        uv export --no-hashes --no-sources --no-header --no-emit-project --frozen > .requirements.txt
+        uv export --no-hashes --no-sources --no-header --no-dev --no-emit-project --no-annotate --frozen > .requirements.txt
         uv run app/agent_engine_app.py \
           --project ${_PROD_PROJECT_ID} \
           --location ${_REGION} \

src/base_template/deployment/cd/staging.yaml CHANGED Viewed

@@ -20,7 +20,7 @@ steps:
     args:
       - -c
       - |
-        cd data_ingestion && pip install uv --user && uv sync --frozen && \
+        cd data_ingestion && pip install uv==0.6.12 --user && uv sync --frozen && \
         uv run python data_ingestion_pipeline/submit_pipeline.py
     env:
       - "PIPELINE_ROOT=${_PIPELINE_GCS_ROOT}"
@@ -108,7 +108,7 @@ steps:
     args:
       - "-c"
       - |
-        pip install uv --user && uv sync --frozen
+        pip install uv==0.6.12 --user && uv sync --frozen
     env:
       - 'PATH=/usr/local/bin:/usr/bin:~/.local/bin'
@@ -118,7 +118,7 @@ steps:
     args:
       - "-c"
       - |
-        uv export --no-hashes --no-sources --no-header --no-emit-project --frozen > .requirements.txt
+        uv export --no-hashes --no-sources --no-header --no-dev --no-emit-project --no-annotate --frozen > .requirements.txt
         uv run app/agent_engine_app.py \
           --project ${_STAGING_PROJECT_ID} \
           --location ${_REGION} \
@@ -146,7 +146,7 @@ steps:
 {%- if cookiecutter.deployment_target == 'cloud_run' %}
         export _ID_TOKEN=$(cat id_token.txt)
         export _STAGING_URL=$(cat staging_url.txt)
-        pip install uv --user && uv sync --frozen
+        pip install uv==0.6.12 --user && uv sync --frozen
 {%- elif cookiecutter.deployment_target == 'agent_engine' %}
         export _AUTH_TOKEN=$(cat auth_token.txt)
 {%- endif %}

src/base_template/deployment/ci/pr_checks.yaml CHANGED Viewed

@@ -20,7 +20,7 @@ steps:
     args:
       - "-c"
       - |
-        pip install uv --user && uv sync --frozen
+        pip install uv==0.6.12 --user && uv sync --frozen
     env:
       - 'PATH=/usr/local/bin:/usr/bin:~/.local/bin'

src/base_template/deployment/terraform/build_triggers.tf CHANGED Viewed

@@ -38,6 +38,7 @@ resource "google_cloudbuild_trigger" "pr_checks" {
     "data_ingestion/**",
   {% endif %}
   ]
+  include_build_logs = "INCLUDE_BUILD_LOGS_WITH_STATUS"
   depends_on = [resource.google_project_service.cicd_services, resource.google_project_service.shared_services]
 }
@@ -64,6 +65,7 @@ resource "google_cloudbuild_trigger" "cd_pipeline" {
     "deployment/**",
     "uv.lock"
   ]
+  include_build_logs = "INCLUDE_BUILD_LOGS_WITH_STATUS"
   substitutions = {
     _STAGING_PROJECT_ID            = var.staging_project_id
     _BUCKET_NAME_LOAD_TEST_RESULTS = resource.google_storage_bucket.bucket_load_test_results.name
@@ -104,6 +106,7 @@ resource "google_cloudbuild_trigger" "deploy_to_prod_pipeline" {
     repository = "projects/${var.cicd_runner_project_id}/locations/${var.region}/connections/${var.host_connection_name}/repositories/${var.repository_name}"
   }
   filename = "deployment/cd/deploy-to-prod.yaml"
+  include_build_logs = "INCLUDE_BUILD_LOGS_WITH_STATUS"
   approval_config {
     approval_required = true
   }

src/base_template/deployment/terraform/dev/variables.tf CHANGED Viewed

@@ -32,7 +32,11 @@ variable "region" {
 variable "telemetry_logs_filter" {
   type        = string
   description = "Log Sink filter for capturing telemetry data. Captures logs with the `traceloop.association.properties.log_type` attribute set to `tracing`."
+{%- if "adk" in cookiecutter.tags %}
+  default     = "labels.service_name=\"{{cookiecutter.project_name}}\" labels.type=\"agent_telemetry\""
+{%- else %}
   default     = "jsonPayload.attributes.\"traceloop.association.properties.log_type\"=\"tracing\" jsonPayload.resource.attributes.\"service.name\"=\"{{cookiecutter.project_name}}\""
+{%- endif %}
 }
 variable "feedback_logs_filter" {

src/base_template/deployment/terraform/dev/vars/env.tfvars CHANGED Viewed

@@ -7,9 +7,6 @@ dev_project_id = "your-dev-project-id"
 # The Google Cloud region you will use to deploy the infrastructure
 region = "us-central1"
-telemetry_logs_filter = "jsonPayload.attributes.\"traceloop.association.properties.log_type\"=\"tracing\" jsonPayload.resource.attributes.\"service.name\"=\"{{cookiecutter.project_name}}\""
-feedback_logs_filter = "jsonPayload.log_type=\"feedback\""
 {%- if cookiecutter.data_ingestion %}
 {%- if cookiecutter.datastore_type == "vertex_ai_search" %}
 # The value can only be one of "global", "us" and "eu".

src/base_template/deployment/terraform/variables.tf CHANGED Viewed

@@ -52,7 +52,11 @@ variable "repository_name" {
 variable "telemetry_logs_filter" {
   type        = string
   description = "Log Sink filter for capturing telemetry data. Captures logs with the `traceloop.association.properties.log_type` attribute set to `tracing`."
+{%- if "adk" in cookiecutter.tags %}
+  default     = "labels.service_name=\"{{cookiecutter.project_name}}\" labels.type=\"agent_telemetry\""
+{%- else %}
   default     = "jsonPayload.attributes.\"traceloop.association.properties.log_type\"=\"tracing\" jsonPayload.resource.attributes.\"service.name\"=\"{{cookiecutter.project_name}}\""
+{%- endif %}
 }
 variable "feedback_logs_filter" {

src/base_template/deployment/terraform/vars/env.tfvars CHANGED Viewed

@@ -19,10 +19,6 @@ repository_name = "repo-{{cookiecutter.project_name}}"
 # The Google Cloud region you will use to deploy the infrastructure
 region = "us-central1"
-telemetry_logs_filter = "jsonPayload.attributes.\"traceloop.association.properties.log_type\"=\"tracing\" jsonPayload.resource.attributes.\"service.name\"=\"{{cookiecutter.project_name}}\""
-feedback_logs_filter = "jsonPayload.log_type=\"feedback\""
 {%- if cookiecutter.data_ingestion %}
 pipeline_cron_schedule = "0 0 * * 0"

src/base_template/pyproject.toml CHANGED Viewed

@@ -9,16 +9,18 @@ dependencies = [
 {%- for dep in cookiecutter.extra_dependencies %}
     "{{ dep }}",
 {%- endfor %}
-    "langchain-core~=0.3.9",
     "opentelemetry-exporter-gcp-trace~=1.9.0",
+{%- if "adk" not in cookiecutter.tags %}
+    "langchain-core~=0.3.9",
     "traceloop-sdk~=0.38.7",
+{%- endif %}
     "google-cloud-logging~=3.11.4",
 {%- if cookiecutter.deployment_target == 'cloud_run' %}
-    "google-cloud-aiplatform[evaluation]~=1.81.0",
+    "google-cloud-aiplatform[evaluation]~=1.88.0",
     "fastapi~=0.115.8",
     "uvicorn~=0.34.0"
 {%- elif cookiecutter.deployment_target == 'agent_engine' %}
-    "google-cloud-aiplatform[evaluation,reasoningengine]~=1.81.0"
+    "google-cloud-aiplatform[evaluation,agent-engines]~=1.88.0"
 {%- endif %}
 ]
 {% if cookiecutter.deployment_target == 'cloud_run' %}

src/{deployment_targets/agent_engine → base_template}/tests/unit/test_dummy.py RENAMED Viewed

@@ -11,9 +11,10 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """
 You can add your unit tests here.
+This is where you test your business logic, including agent functionality,
+data processing, and other core components of your application.
 """

src/cli/commands/create.py CHANGED Viewed

@@ -37,6 +37,40 @@ from ..utils.template import (
 console = Console()
+def normalize_project_name(project_name: str) -> str:
+    """Normalize project name for better compatibility with cloud resources and tools."""
+    needs_normalization = (
+        any(char.isupper() for char in project_name) or "_" in project_name
+    )
+    if needs_normalization:
+        normalized_name = project_name
+        console.print(
+            "Note: Project names are normalized (lowercase, hyphens only) for better compatibility with cloud resources and tools.",
+            style="dim",
+        )
+        if any(char.isupper() for char in normalized_name):
+            normalized_name = normalized_name.lower()
+            console.print(
+                f"Info: Converting to lowercase for compatibility: '{project_name}' -> '{normalized_name}'",
+                style="bold yellow",
+            )
+        if "_" in normalized_name:
+            # Capture the name state before this specific change
+            name_before_hyphenation = normalized_name
+            normalized_name = normalized_name.replace("_", "-")
+            console.print(
+                f"Info: Replacing underscores with hyphens for compatibility: '{name_before_hyphenation}' -> '{normalized_name}'",
+                style="yellow",
+            )
+        return normalized_name
+    return project_name
 @click.command()
 @click.pass_context
 @click.argument("project_name")
@@ -110,15 +144,7 @@ def create(
             )
             return
-        # Convert project name to lowercase
-        if any(char.isupper() for char in project_name):
-            original_name = project_name
-            project_name = project_name.lower()
-            console.print(
-                f"Warning: Project name '{original_name}' contains uppercase characters. "
-                f"Converting to lowercase: '{project_name}'",
-                style="bold yellow",
-            )
+        project_name = normalize_project_name(project_name)
         # Setup debug logging if enabled
         if debug:
@@ -280,10 +306,10 @@ def create(
                 f"This agent uses a datastore for grounded responses.\n"
                 f"The agent will work without data, but for optimal results:\n"
                 f"1. Set up dev environment:\n"
-                f"   [white italic]`export PROJECT_ID={project_id} && cd {cd_path} && make setup-dev-env`[/white italic]\n\n"
+                f"   [white italic]export PROJECT_ID={project_id} && cd {cd_path} && make setup-dev-env[/white italic]\n\n"
                 f"   See deployment/README.md for more info\n"
                 f"2. Run the data ingestion pipeline:\n"
-                f"   [white italic]`export PROJECT_ID={project_id} && cd {cd_path} && make data-ingestion`[/white italic]\n\n"
+                f"   [white italic]export PROJECT_ID={project_id} && cd {cd_path} && make data-ingestion[/white italic]\n\n"
                 f"   See data_ingestion/README.md for more info\n"
                 f"[bold white]=================================[/bold white]\n"
             )
@@ -621,6 +647,14 @@ def replace_region_in_files(
                     "_DATA_STORE_REGION: us", f"_DATA_STORE_REGION: {data_store_region}"
                 )
                 modified = True
+            elif '"DATA_STORE_REGION", "us"' in content:
+                if debug:
+                    logging.debug(f"Replacing DATA_STORE_REGION in {file_path}")
+                content = content.replace(
+                    '"DATA_STORE_REGION", "us"',
+                    f'"DATA_STORE_REGION", "{data_store_region}"',
+                )
+                modified = True
             if modified:
                 file_path.write_text(content)

src/cli/commands/setup_cicd.py CHANGED Viewed

@@ -356,9 +356,11 @@ console = Console()
 @click.command()
 @click.option("--dev-project", help="Development project ID")
-@click.option("--staging-project", required=True, help="Staging project ID")
-@click.option("--prod-project", required=True, help="Production project ID")
-@click.option("--cicd-project", required=True, help="CICD project ID")
+@click.option("--staging-project", help="Staging project ID")
+@click.option("--prod-project", help="Production project ID")
+@click.option(
+    "--cicd-project", help="CICD project ID (defaults to prod project if not specified)"
+)
 @click.option("--region", default="us-central1", help="GCP region")
 @click.option("--repository-name", help="Repository name (optional)")
 @click.option(
@@ -402,9 +404,9 @@ console = Console()
 )
 def setup_cicd(
     dev_project: str | None,
-    staging_project: str,
-    prod_project: str,
-    cicd_project: str,
+    staging_project: str | None,
+    prod_project: str | None,
+    cicd_project: str | None,
     region: str,
     repository_name: str | None,
     repository_owner: str | None,
@@ -426,6 +428,20 @@ def setup_cicd(
             "Make sure you are in the folder created by agent-starter-pack."
         )
+    # Prompt for staging and prod projects if not provided
+    if staging_project is None:
+        staging_project = click.prompt(
+            "Enter your staging project ID (where tests will be run)", type=str
+        )
+    if prod_project is None:
+        prod_project = click.prompt("Enter your production project ID", type=str)
+    # If cicd_project is not provided, default to prod_project
+    if cicd_project is None:
+        cicd_project = prod_project
+        console.print(f"Using production project '{prod_project}' for CI/CD resources")
     console.print(
         "\n⚠️  WARNING: The setup-cicd command is experimental and may have unexpected behavior.",
         style="bold yellow",
@@ -799,6 +815,9 @@ def setup_cicd(
         repo_url = f"https://github.com/{github_username}/{config.repository_name}"
         cloud_build_url = f"https://console.cloud.google.com/cloud-build/builds?project={config.cicd_project_id}"
+        # Sleep to allow resources to propagate
+        console.print("\n⏳ Waiting for resources to propagate...")
+        time.sleep(10)
         # Print final summary
         print_cicd_summary(config, github_username, repo_url, cloud_build_url)

src/cli/utils/gcp.py CHANGED Viewed

@@ -45,7 +45,7 @@ def get_dummy_request(project_id: str, location: str) -> CountTokensRequest:
     """Creates a simple test request for Gemini."""
     return CountTokensRequest(
         contents=[{"role": "user", "parts": [{"text": "Hi"}]}],
-        endpoint=f"projects/{project_id}/locations/{location}/publishers/google/models/gemini-1.5-flash-002",
+        endpoint=f"projects/{project_id}/locations/{location}/publishers/google/models/gemini-2.0-flash",
     )

src/cli/utils/template.py CHANGED Viewed

@@ -29,6 +29,9 @@ from src.cli.utils.version import get_current_version
 from .datastores import DATASTORES
+ADK_FILES = ["app/__init__.py"]
+NON_ADK_FILES: list[str] = []
 @dataclass
 class TemplateConfig:
@@ -77,12 +80,10 @@ def get_available_agents(deployment_target: str | None = None) -> dict:
         deployment_target: Optional deployment target to filter agents
     """
     # Define priority agents that should appear first
-    PRIORITY_AGENTS = [
-        "langgraph_base_react"  # Add other priority agents here as needed
-    ]
+    PRIORITY_AGENTS = ["adk_base", "agentic_rag", "langgraph_base_react"]
     agents_list = []
-    priority_agents = []
+    priority_agents_dict = dict.fromkeys(PRIORITY_AGENTS)  # Track priority agents
     agents_dir = pathlib.Path(__file__).parent.parent.parent.parent / "agents"
     for agent_dir in agents_dir.iterdir():
@@ -109,16 +110,21 @@ def get_available_agents(deployment_target: str | None = None) -> dict:
                     # Add to priority list or regular list based on agent name
                     if agent_name in PRIORITY_AGENTS:
-                        priority_agents.append(agent_info)
+                        priority_agents_dict[agent_name] = agent_info
                     else:
                         agents_list.append(agent_info)
                 except Exception as e:
                     logging.warning(f"Could not load agent from {agent_dir}: {e}")
-    # Only sort the non-priority agents
+    # Sort the non-priority agents
     agents_list.sort(key=lambda x: x["name"])
-    # Combine priority agents with regular agents (no sorting of priority_agents)
+    # Create priority agents list in the exact order specified
+    priority_agents = [
+        info for name, info in priority_agents_dict.items() if info is not None
+    ]
+    # Combine priority agents with regular agents
     combined_agents = priority_agents + agents_list
     # Convert to numbered dictionary starting from 1
@@ -497,29 +503,27 @@ def process_template(
             extra_deps = template_config.get("settings", {}).get(
                 "extra_dependencies", []
             )
-            otel_instrumentations = get_otel_instrumentations(dependencies=extra_deps)
             # Get frontend type from template config
             frontend_type = template_config.get("settings", {}).get(
                 "frontend_type", DEFAULT_FRONTEND
             )
+            tags = template_config.get("settings", {}).get("tags", ["None"])
             cookiecutter_config = {
                 "project_name": "my-project",
                 "agent_name": agent_name,
                 "package_version": get_current_version(),
                 "agent_description": template_config.get("description", ""),
+                "tags": tags,
                 "deployment_target": deployment_target or "",
                 "frontend_type": frontend_type,
                 "extra_dependencies": [extra_deps],
-                "otel_instrumentations": otel_instrumentations,
                 "data_ingestion": include_data_ingestion,  # Use explicit flag for cookiecutter
                 "datastore_type": datastore if datastore else "",
                 "_copy_without_render": [
                     "*.ipynb",  # Don't render notebooks
                     "*.json",  # Don't render JSON files
                     "frontend/*",  # Don't render frontend directory
-                    "tests/*",  # Don't render tests directory
+                    # "tests/*",  # Don't render tests directory
                     "notebooks/*",  # Don't render notebooks directory
                     ".git/*",  # Don't render git directory
                     "__pycache__/*",  # Don't render cache
@@ -566,6 +570,17 @@ def process_template(
                 shutil.copytree(output_dir, final_destination, dirs_exist_ok=True)
                 logging.debug(f"Project successfully created at {final_destination}")
+                # Delete appropriate files based on ADK tag
+                if "adk" in tags:
+                    files_to_delete = [final_destination / f for f in NON_ADK_FILES]
+                else:
+                    files_to_delete = [final_destination / f for f in ADK_FILES]
+                for file_path in files_to_delete:
+                    if file_path.exists():
+                        file_path.unlink()
+                        logging.debug(f"Deleted {file_path}")
                 # After copying template files, handle the lock file
                 if deployment_target:
                     # Get the source lock file path
@@ -715,16 +730,3 @@ def copy_deployment_files(
         )
     else:
         logging.warning(f"Deployment target directory not found: {deployment_path}")
-def get_otel_instrumentations(dependencies: list) -> list[list[str]]:
-    """Returns OpenTelemetry instrumentation statements for enabled dependencies."""
-    otel_deps = {
-        "langgraph": "Instruments.LANGCHAIN",
-        "crewai": "Instruments.CREW",
-    }
-    imports = []
-    for dep in dependencies:
-        if any(otel_dep in dep for otel_dep in otel_deps):
-            imports.append(otel_deps[next(key for key in otel_deps if key in dep)])
-    return [imports]

src/data_ingestion/README.md CHANGED Viewed

@@ -1,84 +1,71 @@
+{%- if cookiecutter.datastore_type == "vertex_ai_search" -%}
+{%- set datastore_service_name = "Vertex AI Search" -%}
+{%- elif cookiecutter.datastore_type == "vertex_ai_vector_search" -%}
+{%- set datastore_service_name = "Vertex AI Vector Search" -%}
+{%- else -%}
+{%- set datastore_service_name = "Your Configured Datastore" -%}
+{%- endif -%}
 # Data Ingestion Pipeline
-This pipeline automates the ingestion of data into Vertex AI{%- if cookiecutter.datastore_type == "vertex_ai_vector_search" %} Vector{%- endif %} Search, streamlining the process of building Retrieval Augmented Generation (RAG) applications.
+This pipeline automates the ingestion of data into {{ datastore_service_name }}, streamlining the process of building Retrieval Augmented Generation (RAG) applications.
-It orchestrates the complete workflow: loading data, chunking it into manageable segments, generating embeddings using Vertex AI Embeddings, and importing the processed data into your Vertex AI{%- if cookiecutter.datastore_type == "vertex_ai_vector_search" %} Vector{%- endif %} Search datastore.
+It orchestrates the complete workflow: loading data, chunking it into manageable segments, generating embeddings using Vertex AI Embeddings, and importing the processed data into your {{ datastore_service_name }} datastore.
 You can trigger the pipeline for an initial data load or schedule it to run periodically, ensuring your search index remains current. Vertex AI Pipelines provides the orchestration and monitoring capabilities for this process.
 ## Prerequisites
-Before running the data ingestion pipeline, ensure you have completed the following:
+Before running any commands, ensure you have set your Google Cloud Project ID as an environment variable. This variable will be used by the subsequent `make` commands.
-1. **Set up Dev Terraform:** Follow the instructions in the parent [deployment/README.md - Dev Deployment section](../deployment/README.md#dev-deployment) to provision the necessary resources in your development environment using Terraform. This includes deploying a datastore and configuring the required permissions.
+```bash
+export PROJECT_ID="YOUR_PROJECT_ID"
+```
+Replace `"YOUR_PROJECT_ID"` with your actual Google Cloud Project ID.
-## Running the Data Ingestion Pipeline
+Now, you can set up the development environment:
-After setting up the Terraform infrastructure, you can test the data ingestion pipeline.
+1.  **Set up Dev Environment:** Use the following command from the root of the repository to provision the necessary resources in your development environment using Terraform. This includes deploying a datastore and configuring the required permissions.
-> **Note:** The initial pipeline execution might take longer as your project is configured for Vertex AI Pipelines.
+    ```bash
+    make setup-dev-env
+    ```
+    This command requires `terraform` to be installed and configured.
-**Steps:**
+## Running the Data Ingestion Pipeline
-**a. Navigate to the `data_ingestion` directory:**
+After setting up the infrastructure using `make setup-dev-env`, you can run the data ingestion pipeline.
-```bash
-cd data_ingestion
-```
+> **Note:** The initial pipeline execution might take longer as your project is configured for Vertex AI Pipelines.
-**b. Install Dependencies:**
+**Steps:**
-Install the required Python dependencies using uv:
+**a. Execute the Pipeline:**
+Run the following command from the root of the repository. Ensure the `PROJECT_ID` environment variable is still set in your current shell session (as configured in Prerequisites).
 ```bash
-uv sync --frozen
+make data-ingestion
 ```
-**c. Execute the Pipeline:**
-Run the following command to execute the data ingestion pipeline. Replace the placeholder values with your actual project details.
+This command handles installing dependencies (if needed via `make install`) and submits the pipeline job using the configuration derived from your project setup. The specific parameters passed to the underlying script depend on the `datastore_type` selected during project generation:
 {%- if cookiecutter.datastore_type == "vertex_ai_search" %}
-```bash
-PROJECT_ID="YOUR_PROJECT_ID"
-REGION="us-central1"
-DATA_STORE_REGION="us"
-uv run data_ingestion_pipeline/submit_pipeline.py \
-    --project-id=$PROJECT_ID \
-    --region=$REGION \
-    --data-store-region=$DATA_STORE_REGION \
-    --data-store-id="sample-datastore" \
-    --service-account="{{cookiecutter.project_name}}-rag@$PROJECT_ID.iam.gserviceaccount.com" \
-    --pipeline-root="gs://$PROJECT_ID-{{cookiecutter.project_name}}-rag" \
-    --pipeline-name="data-ingestion-pipeline"
-```
+*   It will use parameters like `--data-store-id`, `--data-store-region`.
 {%- elif cookiecutter.datastore_type == "vertex_ai_vector_search" %}
-```bash
-PROJECT_ID="YOUR_PROJECT_ID"
-REGION="us-central1"
-VECTOR_SEARCH_INDEX="YOUR_VECTOR_SEARCH_INDEX"
-VECTOR_SEARCH_INDEX_ENDPOINT="YOUR_VECTOR_SEARCH_INDEX_ENDPOINT"
-uv run data_ingestion_pipeline/submit_pipeline.py \
-    --project-id=$PROJECT_ID \
-    --region=$REGION \
-    --vector-search-index=$VECTOR_SEARCH_INDEX \
-    --vector-search-index-endpoint=$VECTOR_SEARCH_INDEX_ENDPOINT \
-    --service-account="{{cookiecutter.project_name}}-rag@$PROJECT_ID.iam.gserviceaccount.com" \
-    --pipeline-root="gs://$PROJECT_ID-{{cookiecutter.project_name}}-rag" \
-    --pipeline-name="data-ingestion-pipeline"
-```
+*   It will use parameters like `--vector-search-index`, `--vector-search-index-endpoint`, `--vector-search-data-bucket-name`.
 {%- endif %}
+*   Common parameters include `--project-id`, `--region`, `--service-account`, `--pipeline-root`, and `--pipeline-name`.
-**d. Pipeline Scheduling and Execution:**
+**b. Pipeline Scheduling:**
-The pipeline, by default, executes immediately. To schedule the pipeline for periodic execution without immediate initiation, use the `--schedule-only` flag in conjunction with `--cron-schedule`. If a schedule doesn't exist, it will be created. If a schedule already exists, its cron expression will be updated to the provided value.
+The `make data-ingestion` command triggers an immediate pipeline run. For production environments, the underlying `submit_pipeline.py` script also supports scheduling options with flags like `--schedule-only` and `--cron-schedule` for periodic execution.
-**e. Monitoring Pipeline Progress:**
+**c. Monitoring Pipeline Progress:**
-The pipeline's configuration and execution status will be printed to the console. For detailed monitoring, use the Vertex AI Pipelines dashboard in the Google Cloud Console. This dashboard provides real-time insights into the pipeline's progress, logs, and any potential issues.
+The pipeline's configuration and execution status link will be printed to the console upon submission. For detailed monitoring, use the Vertex AI Pipelines dashboard in the Google Cloud Console.
 ## Testing Your RAG Application
-Once the data ingestion pipeline completes successfully, you can test your RAG application with Vertex AI{%- if cookiecutter.datastore_type == "vertex_ai_vector_search" %} Vector{%- endif %} Search.
+Once the data ingestion pipeline completes successfully, you can test your RAG application with {{ datastore_service_name }}.
 {%- if cookiecutter.datastore_type == "vertex_ai_search" %}
 > **Troubleshooting:** If you encounter the error `"google.api_core.exceptions.InvalidArgument: 400 The embedding field path: embedding not found in schema"` after the initial data ingestion, wait a few minutes and try again. This delay allows Vertex AI Search to fully index the ingested data.
 {%- endif %}

src/data_ingestion/data_ingestion_pipeline/components/ingest_data.py CHANGED Viewed

@@ -109,7 +109,8 @@ def ingest_data(
         operation = schema_client.update_schema(
             request=discoveryengine.UpdateSchemaRequest(
                 schema=new_schema, allow_missing=True
-            )
+            ),
+            timeout=1800,
         )
         logging.info(f"Waiting for schema update operation: {operation.operation.name}")
         operation.result()

agent-starter-pack 0.2.2__py3-none-any.whl → 0.3.0__py3-none-any.whl

Potentially problematic release.

agent-starter-pack 0.2.2py3-none-any.whl → 0.3.0py3-none-any.whl