agent-starter-pack 0.2.2__py3-none-any.whl → 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of agent-starter-pack might be problematic. Click here for more details.

Files changed (72) hide show
  1. {agent_starter_pack-0.2.2.dist-info → agent_starter_pack-0.3.0.dist-info}/METADATA +14 -16
  2. {agent_starter_pack-0.2.2.dist-info → agent_starter_pack-0.3.0.dist-info}/RECORD +69 -54
  3. agents/adk_base/README.md +14 -0
  4. agents/adk_base/app/agent.py +66 -0
  5. agents/adk_base/notebooks/adk_app_testing.ipynb +305 -0
  6. agents/adk_base/template/.templateconfig.yaml +21 -0
  7. agents/adk_base/tests/integration/test_agent.py +58 -0
  8. agents/agentic_rag/README.md +1 -0
  9. agents/agentic_rag/app/agent.py +44 -89
  10. agents/agentic_rag/app/templates.py +0 -25
  11. agents/agentic_rag/notebooks/adk_app_testing.ipynb +305 -0
  12. agents/agentic_rag/template/.templateconfig.yaml +3 -1
  13. agents/agentic_rag/tests/integration/test_agent.py +34 -27
  14. agents/langgraph_base_react/README.md +1 -1
  15. agents/langgraph_base_react/template/.templateconfig.yaml +1 -1
  16. src/base_template/Makefile +15 -4
  17. src/base_template/README.md +8 -2
  18. src/base_template/app/__init__.py +3 -0
  19. src/base_template/app/utils/tracing.py +11 -1
  20. src/base_template/app/utils/typing.py +54 -4
  21. src/base_template/deployment/README.md +4 -1
  22. src/base_template/deployment/cd/deploy-to-prod.yaml +3 -3
  23. src/base_template/deployment/cd/staging.yaml +4 -4
  24. src/base_template/deployment/ci/pr_checks.yaml +1 -1
  25. src/base_template/deployment/terraform/build_triggers.tf +3 -0
  26. src/base_template/deployment/terraform/dev/variables.tf +4 -0
  27. src/base_template/deployment/terraform/dev/vars/env.tfvars +0 -3
  28. src/base_template/deployment/terraform/variables.tf +4 -0
  29. src/base_template/deployment/terraform/vars/env.tfvars +0 -4
  30. src/base_template/pyproject.toml +5 -3
  31. src/{deployment_targets/agent_engine → base_template}/tests/unit/test_dummy.py +2 -1
  32. src/cli/commands/create.py +45 -11
  33. src/cli/commands/setup_cicd.py +25 -6
  34. src/cli/utils/gcp.py +1 -1
  35. src/cli/utils/template.py +27 -25
  36. src/data_ingestion/README.md +37 -50
  37. src/data_ingestion/data_ingestion_pipeline/components/ingest_data.py +2 -1
  38. src/deployment_targets/agent_engine/app/agent_engine_app.py +68 -22
  39. src/deployment_targets/agent_engine/app/utils/gcs.py +1 -1
  40. src/deployment_targets/agent_engine/tests/integration/test_agent_engine_app.py +63 -0
  41. src/deployment_targets/agent_engine/tests/load_test/load_test.py +9 -2
  42. src/deployment_targets/cloud_run/Dockerfile +1 -1
  43. src/deployment_targets/cloud_run/app/server.py +41 -15
  44. src/deployment_targets/cloud_run/tests/integration/test_server_e2e.py +60 -3
  45. src/deployment_targets/cloud_run/tests/load_test/README.md +1 -1
  46. src/deployment_targets/cloud_run/tests/load_test/load_test.py +57 -24
  47. src/frontends/live_api_react/frontend/package-lock.json +3 -3
  48. src/frontends/streamlit/frontend/utils/stream_handler.py +3 -3
  49. src/frontends/streamlit_adk/frontend/side_bar.py +214 -0
  50. src/frontends/streamlit_adk/frontend/streamlit_app.py +314 -0
  51. src/frontends/streamlit_adk/frontend/style/app_markdown.py +37 -0
  52. src/frontends/streamlit_adk/frontend/utils/chat_utils.py +84 -0
  53. src/frontends/streamlit_adk/frontend/utils/local_chat_history.py +110 -0
  54. src/frontends/streamlit_adk/frontend/utils/message_editing.py +61 -0
  55. src/frontends/streamlit_adk/frontend/utils/multimodal_utils.py +223 -0
  56. src/frontends/streamlit_adk/frontend/utils/stream_handler.py +311 -0
  57. src/frontends/streamlit_adk/frontend/utils/title_summary.py +129 -0
  58. src/resources/locks/uv-adk_base-agent_engine.lock +5335 -0
  59. src/resources/locks/uv-adk_base-cloud_run.lock +5927 -0
  60. src/resources/locks/uv-agentic_rag-agent_engine.lock +939 -732
  61. src/resources/locks/uv-agentic_rag-cloud_run.lock +1087 -907
  62. src/resources/locks/uv-crewai_coding_crew-agent_engine.lock +778 -671
  63. src/resources/locks/uv-crewai_coding_crew-cloud_run.lock +852 -753
  64. src/resources/locks/uv-langgraph_base_react-agent_engine.lock +665 -591
  65. src/resources/locks/uv-langgraph_base_react-cloud_run.lock +842 -743
  66. src/resources/locks/uv-live_api-cloud_run.lock +830 -731
  67. agents/agentic_rag/notebooks/evaluating_langgraph_agent.ipynb +0 -1561
  68. src/base_template/tests/unit/test_utils/test_tracing_exporter.py +0 -140
  69. src/deployment_targets/cloud_run/tests/unit/test_server.py +0 -124
  70. {agent_starter_pack-0.2.2.dist-info → agent_starter_pack-0.3.0.dist-info}/WHEEL +0 -0
  71. {agent_starter_pack-0.2.2.dist-info → agent_starter_pack-0.3.0.dist-info}/entry_points.txt +0 -0
  72. {agent_starter_pack-0.2.2.dist-info → agent_starter_pack-0.3.0.dist-info}/licenses/LICENSE +0 -0
@@ -20,7 +20,7 @@ steps:
20
20
  args:
21
21
  - -c
22
22
  - |
23
- cd data_ingestion && pip install uv --user && uv sync --frozen && \
23
+ cd data_ingestion && pip install uv==0.6.12 --user && uv sync --frozen && \
24
24
  uv run python data_ingestion_pipeline/submit_pipeline.py
25
25
  env:
26
26
  - "PIPELINE_ROOT=${_PIPELINE_GCS_ROOT}"
@@ -75,7 +75,7 @@ steps:
75
75
  args:
76
76
  - "-c"
77
77
  - |
78
- pip install uv --user && uv sync --frozen
78
+ pip install uv==0.6.12 --user && uv sync --frozen
79
79
  env:
80
80
  - 'PATH=/usr/local/bin:/usr/bin:~/.local/bin'
81
81
 
@@ -85,7 +85,7 @@ steps:
85
85
  args:
86
86
  - "-c"
87
87
  - |
88
- uv export --no-hashes --no-sources --no-header --no-emit-project --frozen > .requirements.txt
88
+ uv export --no-hashes --no-sources --no-header --no-dev --no-emit-project --no-annotate --frozen > .requirements.txt
89
89
  uv run app/agent_engine_app.py \
90
90
  --project ${_PROD_PROJECT_ID} \
91
91
  --location ${_REGION} \
@@ -20,7 +20,7 @@ steps:
20
20
  args:
21
21
  - -c
22
22
  - |
23
- cd data_ingestion && pip install uv --user && uv sync --frozen && \
23
+ cd data_ingestion && pip install uv==0.6.12 --user && uv sync --frozen && \
24
24
  uv run python data_ingestion_pipeline/submit_pipeline.py
25
25
  env:
26
26
  - "PIPELINE_ROOT=${_PIPELINE_GCS_ROOT}"
@@ -108,7 +108,7 @@ steps:
108
108
  args:
109
109
  - "-c"
110
110
  - |
111
- pip install uv --user && uv sync --frozen
111
+ pip install uv==0.6.12 --user && uv sync --frozen
112
112
  env:
113
113
  - 'PATH=/usr/local/bin:/usr/bin:~/.local/bin'
114
114
 
@@ -118,7 +118,7 @@ steps:
118
118
  args:
119
119
  - "-c"
120
120
  - |
121
- uv export --no-hashes --no-sources --no-header --no-emit-project --frozen > .requirements.txt
121
+ uv export --no-hashes --no-sources --no-header --no-dev --no-emit-project --no-annotate --frozen > .requirements.txt
122
122
  uv run app/agent_engine_app.py \
123
123
  --project ${_STAGING_PROJECT_ID} \
124
124
  --location ${_REGION} \
@@ -146,7 +146,7 @@ steps:
146
146
  {%- if cookiecutter.deployment_target == 'cloud_run' %}
147
147
  export _ID_TOKEN=$(cat id_token.txt)
148
148
  export _STAGING_URL=$(cat staging_url.txt)
149
- pip install uv --user && uv sync --frozen
149
+ pip install uv==0.6.12 --user && uv sync --frozen
150
150
  {%- elif cookiecutter.deployment_target == 'agent_engine' %}
151
151
  export _AUTH_TOKEN=$(cat auth_token.txt)
152
152
  {%- endif %}
@@ -20,7 +20,7 @@ steps:
20
20
  args:
21
21
  - "-c"
22
22
  - |
23
- pip install uv --user && uv sync --frozen
23
+ pip install uv==0.6.12 --user && uv sync --frozen
24
24
  env:
25
25
  - 'PATH=/usr/local/bin:/usr/bin:~/.local/bin'
26
26
 
@@ -38,6 +38,7 @@ resource "google_cloudbuild_trigger" "pr_checks" {
38
38
  "data_ingestion/**",
39
39
  {% endif %}
40
40
  ]
41
+ include_build_logs = "INCLUDE_BUILD_LOGS_WITH_STATUS"
41
42
  depends_on = [resource.google_project_service.cicd_services, resource.google_project_service.shared_services]
42
43
  }
43
44
 
@@ -64,6 +65,7 @@ resource "google_cloudbuild_trigger" "cd_pipeline" {
64
65
  "deployment/**",
65
66
  "uv.lock"
66
67
  ]
68
+ include_build_logs = "INCLUDE_BUILD_LOGS_WITH_STATUS"
67
69
  substitutions = {
68
70
  _STAGING_PROJECT_ID = var.staging_project_id
69
71
  _BUCKET_NAME_LOAD_TEST_RESULTS = resource.google_storage_bucket.bucket_load_test_results.name
@@ -104,6 +106,7 @@ resource "google_cloudbuild_trigger" "deploy_to_prod_pipeline" {
104
106
  repository = "projects/${var.cicd_runner_project_id}/locations/${var.region}/connections/${var.host_connection_name}/repositories/${var.repository_name}"
105
107
  }
106
108
  filename = "deployment/cd/deploy-to-prod.yaml"
109
+ include_build_logs = "INCLUDE_BUILD_LOGS_WITH_STATUS"
107
110
  approval_config {
108
111
  approval_required = true
109
112
  }
@@ -32,7 +32,11 @@ variable "region" {
32
32
  variable "telemetry_logs_filter" {
33
33
  type = string
34
34
  description = "Log Sink filter for capturing telemetry data. Captures logs with the `traceloop.association.properties.log_type` attribute set to `tracing`."
35
+ {%- if "adk" in cookiecutter.tags %}
36
+ default = "labels.service_name=\"{{cookiecutter.project_name}}\" labels.type=\"agent_telemetry\""
37
+ {%- else %}
35
38
  default = "jsonPayload.attributes.\"traceloop.association.properties.log_type\"=\"tracing\" jsonPayload.resource.attributes.\"service.name\"=\"{{cookiecutter.project_name}}\""
39
+ {%- endif %}
36
40
  }
37
41
 
38
42
  variable "feedback_logs_filter" {
@@ -7,9 +7,6 @@ dev_project_id = "your-dev-project-id"
7
7
  # The Google Cloud region you will use to deploy the infrastructure
8
8
  region = "us-central1"
9
9
 
10
- telemetry_logs_filter = "jsonPayload.attributes.\"traceloop.association.properties.log_type\"=\"tracing\" jsonPayload.resource.attributes.\"service.name\"=\"{{cookiecutter.project_name}}\""
11
- feedback_logs_filter = "jsonPayload.log_type=\"feedback\""
12
-
13
10
  {%- if cookiecutter.data_ingestion %}
14
11
  {%- if cookiecutter.datastore_type == "vertex_ai_search" %}
15
12
  # The value can only be one of "global", "us" and "eu".
@@ -52,7 +52,11 @@ variable "repository_name" {
52
52
  variable "telemetry_logs_filter" {
53
53
  type = string
54
54
  description = "Log Sink filter for capturing telemetry data. Captures logs with the `traceloop.association.properties.log_type` attribute set to `tracing`."
55
+ {%- if "adk" in cookiecutter.tags %}
56
+ default = "labels.service_name=\"{{cookiecutter.project_name}}\" labels.type=\"agent_telemetry\""
57
+ {%- else %}
55
58
  default = "jsonPayload.attributes.\"traceloop.association.properties.log_type\"=\"tracing\" jsonPayload.resource.attributes.\"service.name\"=\"{{cookiecutter.project_name}}\""
59
+ {%- endif %}
56
60
  }
57
61
 
58
62
  variable "feedback_logs_filter" {
@@ -19,10 +19,6 @@ repository_name = "repo-{{cookiecutter.project_name}}"
19
19
  # The Google Cloud region you will use to deploy the infrastructure
20
20
  region = "us-central1"
21
21
 
22
- telemetry_logs_filter = "jsonPayload.attributes.\"traceloop.association.properties.log_type\"=\"tracing\" jsonPayload.resource.attributes.\"service.name\"=\"{{cookiecutter.project_name}}\""
23
-
24
- feedback_logs_filter = "jsonPayload.log_type=\"feedback\""
25
-
26
22
  {%- if cookiecutter.data_ingestion %}
27
23
  pipeline_cron_schedule = "0 0 * * 0"
28
24
 
@@ -9,16 +9,18 @@ dependencies = [
9
9
  {%- for dep in cookiecutter.extra_dependencies %}
10
10
  "{{ dep }}",
11
11
  {%- endfor %}
12
- "langchain-core~=0.3.9",
13
12
  "opentelemetry-exporter-gcp-trace~=1.9.0",
13
+ {%- if "adk" not in cookiecutter.tags %}
14
+ "langchain-core~=0.3.9",
14
15
  "traceloop-sdk~=0.38.7",
16
+ {%- endif %}
15
17
  "google-cloud-logging~=3.11.4",
16
18
  {%- if cookiecutter.deployment_target == 'cloud_run' %}
17
- "google-cloud-aiplatform[evaluation]~=1.81.0",
19
+ "google-cloud-aiplatform[evaluation]~=1.88.0",
18
20
  "fastapi~=0.115.8",
19
21
  "uvicorn~=0.34.0"
20
22
  {%- elif cookiecutter.deployment_target == 'agent_engine' %}
21
- "google-cloud-aiplatform[evaluation,reasoningengine]~=1.81.0"
23
+ "google-cloud-aiplatform[evaluation,agent-engines]~=1.88.0"
22
24
  {%- endif %}
23
25
  ]
24
26
  {% if cookiecutter.deployment_target == 'cloud_run' %}
@@ -11,9 +11,10 @@
11
11
  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
12
  # See the License for the specific language governing permissions and
13
13
  # limitations under the License.
14
-
15
14
  """
16
15
  You can add your unit tests here.
16
+ This is where you test your business logic, including agent functionality,
17
+ data processing, and other core components of your application.
17
18
  """
18
19
 
19
20
 
@@ -37,6 +37,40 @@ from ..utils.template import (
37
37
  console = Console()
38
38
 
39
39
 
40
+ def normalize_project_name(project_name: str) -> str:
41
+ """Normalize project name for better compatibility with cloud resources and tools."""
42
+
43
+ needs_normalization = (
44
+ any(char.isupper() for char in project_name) or "_" in project_name
45
+ )
46
+
47
+ if needs_normalization:
48
+ normalized_name = project_name
49
+ console.print(
50
+ "Note: Project names are normalized (lowercase, hyphens only) for better compatibility with cloud resources and tools.",
51
+ style="dim",
52
+ )
53
+ if any(char.isupper() for char in normalized_name):
54
+ normalized_name = normalized_name.lower()
55
+ console.print(
56
+ f"Info: Converting to lowercase for compatibility: '{project_name}' -> '{normalized_name}'",
57
+ style="bold yellow",
58
+ )
59
+
60
+ if "_" in normalized_name:
61
+ # Capture the name state before this specific change
62
+ name_before_hyphenation = normalized_name
63
+ normalized_name = normalized_name.replace("_", "-")
64
+ console.print(
65
+ f"Info: Replacing underscores with hyphens for compatibility: '{name_before_hyphenation}' -> '{normalized_name}'",
66
+ style="yellow",
67
+ )
68
+
69
+ return normalized_name
70
+
71
+ return project_name
72
+
73
+
40
74
  @click.command()
41
75
  @click.pass_context
42
76
  @click.argument("project_name")
@@ -110,15 +144,7 @@ def create(
110
144
  )
111
145
  return
112
146
 
113
- # Convert project name to lowercase
114
- if any(char.isupper() for char in project_name):
115
- original_name = project_name
116
- project_name = project_name.lower()
117
- console.print(
118
- f"Warning: Project name '{original_name}' contains uppercase characters. "
119
- f"Converting to lowercase: '{project_name}'",
120
- style="bold yellow",
121
- )
147
+ project_name = normalize_project_name(project_name)
122
148
 
123
149
  # Setup debug logging if enabled
124
150
  if debug:
@@ -280,10 +306,10 @@ def create(
280
306
  f"This agent uses a datastore for grounded responses.\n"
281
307
  f"The agent will work without data, but for optimal results:\n"
282
308
  f"1. Set up dev environment:\n"
283
- f" [white italic]`export PROJECT_ID={project_id} && cd {cd_path} && make setup-dev-env`[/white italic]\n\n"
309
+ f" [white italic]export PROJECT_ID={project_id} && cd {cd_path} && make setup-dev-env[/white italic]\n\n"
284
310
  f" See deployment/README.md for more info\n"
285
311
  f"2. Run the data ingestion pipeline:\n"
286
- f" [white italic]`export PROJECT_ID={project_id} && cd {cd_path} && make data-ingestion`[/white italic]\n\n"
312
+ f" [white italic]export PROJECT_ID={project_id} && cd {cd_path} && make data-ingestion[/white italic]\n\n"
287
313
  f" See data_ingestion/README.md for more info\n"
288
314
  f"[bold white]=================================[/bold white]\n"
289
315
  )
@@ -621,6 +647,14 @@ def replace_region_in_files(
621
647
  "_DATA_STORE_REGION: us", f"_DATA_STORE_REGION: {data_store_region}"
622
648
  )
623
649
  modified = True
650
+ elif '"DATA_STORE_REGION", "us"' in content:
651
+ if debug:
652
+ logging.debug(f"Replacing DATA_STORE_REGION in {file_path}")
653
+ content = content.replace(
654
+ '"DATA_STORE_REGION", "us"',
655
+ f'"DATA_STORE_REGION", "{data_store_region}"',
656
+ )
657
+ modified = True
624
658
 
625
659
  if modified:
626
660
  file_path.write_text(content)
@@ -356,9 +356,11 @@ console = Console()
356
356
 
357
357
  @click.command()
358
358
  @click.option("--dev-project", help="Development project ID")
359
- @click.option("--staging-project", required=True, help="Staging project ID")
360
- @click.option("--prod-project", required=True, help="Production project ID")
361
- @click.option("--cicd-project", required=True, help="CICD project ID")
359
+ @click.option("--staging-project", help="Staging project ID")
360
+ @click.option("--prod-project", help="Production project ID")
361
+ @click.option(
362
+ "--cicd-project", help="CICD project ID (defaults to prod project if not specified)"
363
+ )
362
364
  @click.option("--region", default="us-central1", help="GCP region")
363
365
  @click.option("--repository-name", help="Repository name (optional)")
364
366
  @click.option(
@@ -402,9 +404,9 @@ console = Console()
402
404
  )
403
405
  def setup_cicd(
404
406
  dev_project: str | None,
405
- staging_project: str,
406
- prod_project: str,
407
- cicd_project: str,
407
+ staging_project: str | None,
408
+ prod_project: str | None,
409
+ cicd_project: str | None,
408
410
  region: str,
409
411
  repository_name: str | None,
410
412
  repository_owner: str | None,
@@ -426,6 +428,20 @@ def setup_cicd(
426
428
  "Make sure you are in the folder created by agent-starter-pack."
427
429
  )
428
430
 
431
+ # Prompt for staging and prod projects if not provided
432
+ if staging_project is None:
433
+ staging_project = click.prompt(
434
+ "Enter your staging project ID (where tests will be run)", type=str
435
+ )
436
+
437
+ if prod_project is None:
438
+ prod_project = click.prompt("Enter your production project ID", type=str)
439
+
440
+ # If cicd_project is not provided, default to prod_project
441
+ if cicd_project is None:
442
+ cicd_project = prod_project
443
+ console.print(f"Using production project '{prod_project}' for CI/CD resources")
444
+
429
445
  console.print(
430
446
  "\n⚠️ WARNING: The setup-cicd command is experimental and may have unexpected behavior.",
431
447
  style="bold yellow",
@@ -799,6 +815,9 @@ def setup_cicd(
799
815
 
800
816
  repo_url = f"https://github.com/{github_username}/{config.repository_name}"
801
817
  cloud_build_url = f"https://console.cloud.google.com/cloud-build/builds?project={config.cicd_project_id}"
818
+ # Sleep to allow resources to propagate
819
+ console.print("\n⏳ Waiting for resources to propagate...")
820
+ time.sleep(10)
802
821
 
803
822
  # Print final summary
804
823
  print_cicd_summary(config, github_username, repo_url, cloud_build_url)
src/cli/utils/gcp.py CHANGED
@@ -45,7 +45,7 @@ def get_dummy_request(project_id: str, location: str) -> CountTokensRequest:
45
45
  """Creates a simple test request for Gemini."""
46
46
  return CountTokensRequest(
47
47
  contents=[{"role": "user", "parts": [{"text": "Hi"}]}],
48
- endpoint=f"projects/{project_id}/locations/{location}/publishers/google/models/gemini-1.5-flash-002",
48
+ endpoint=f"projects/{project_id}/locations/{location}/publishers/google/models/gemini-2.0-flash",
49
49
  )
50
50
 
51
51
 
src/cli/utils/template.py CHANGED
@@ -29,6 +29,9 @@ from src.cli.utils.version import get_current_version
29
29
 
30
30
  from .datastores import DATASTORES
31
31
 
32
+ ADK_FILES = ["app/__init__.py"]
33
+ NON_ADK_FILES: list[str] = []
34
+
32
35
 
33
36
  @dataclass
34
37
  class TemplateConfig:
@@ -77,12 +80,10 @@ def get_available_agents(deployment_target: str | None = None) -> dict:
77
80
  deployment_target: Optional deployment target to filter agents
78
81
  """
79
82
  # Define priority agents that should appear first
80
- PRIORITY_AGENTS = [
81
- "langgraph_base_react" # Add other priority agents here as needed
82
- ]
83
+ PRIORITY_AGENTS = ["adk_base", "agentic_rag", "langgraph_base_react"]
83
84
 
84
85
  agents_list = []
85
- priority_agents = []
86
+ priority_agents_dict = dict.fromkeys(PRIORITY_AGENTS) # Track priority agents
86
87
  agents_dir = pathlib.Path(__file__).parent.parent.parent.parent / "agents"
87
88
 
88
89
  for agent_dir in agents_dir.iterdir():
@@ -109,16 +110,21 @@ def get_available_agents(deployment_target: str | None = None) -> dict:
109
110
 
110
111
  # Add to priority list or regular list based on agent name
111
112
  if agent_name in PRIORITY_AGENTS:
112
- priority_agents.append(agent_info)
113
+ priority_agents_dict[agent_name] = agent_info
113
114
  else:
114
115
  agents_list.append(agent_info)
115
116
  except Exception as e:
116
117
  logging.warning(f"Could not load agent from {agent_dir}: {e}")
117
118
 
118
- # Only sort the non-priority agents
119
+ # Sort the non-priority agents
119
120
  agents_list.sort(key=lambda x: x["name"])
120
121
 
121
- # Combine priority agents with regular agents (no sorting of priority_agents)
122
+ # Create priority agents list in the exact order specified
123
+ priority_agents = [
124
+ info for name, info in priority_agents_dict.items() if info is not None
125
+ ]
126
+
127
+ # Combine priority agents with regular agents
122
128
  combined_agents = priority_agents + agents_list
123
129
 
124
130
  # Convert to numbered dictionary starting from 1
@@ -497,29 +503,27 @@ def process_template(
497
503
  extra_deps = template_config.get("settings", {}).get(
498
504
  "extra_dependencies", []
499
505
  )
500
- otel_instrumentations = get_otel_instrumentations(dependencies=extra_deps)
501
-
502
506
  # Get frontend type from template config
503
507
  frontend_type = template_config.get("settings", {}).get(
504
508
  "frontend_type", DEFAULT_FRONTEND
505
509
  )
506
-
510
+ tags = template_config.get("settings", {}).get("tags", ["None"])
507
511
  cookiecutter_config = {
508
512
  "project_name": "my-project",
509
513
  "agent_name": agent_name,
510
514
  "package_version": get_current_version(),
511
515
  "agent_description": template_config.get("description", ""),
516
+ "tags": tags,
512
517
  "deployment_target": deployment_target or "",
513
518
  "frontend_type": frontend_type,
514
519
  "extra_dependencies": [extra_deps],
515
- "otel_instrumentations": otel_instrumentations,
516
520
  "data_ingestion": include_data_ingestion, # Use explicit flag for cookiecutter
517
521
  "datastore_type": datastore if datastore else "",
518
522
  "_copy_without_render": [
519
523
  "*.ipynb", # Don't render notebooks
520
524
  "*.json", # Don't render JSON files
521
525
  "frontend/*", # Don't render frontend directory
522
- "tests/*", # Don't render tests directory
526
+ # "tests/*", # Don't render tests directory
523
527
  "notebooks/*", # Don't render notebooks directory
524
528
  ".git/*", # Don't render git directory
525
529
  "__pycache__/*", # Don't render cache
@@ -566,6 +570,17 @@ def process_template(
566
570
  shutil.copytree(output_dir, final_destination, dirs_exist_ok=True)
567
571
  logging.debug(f"Project successfully created at {final_destination}")
568
572
 
573
+ # Delete appropriate files based on ADK tag
574
+ if "adk" in tags:
575
+ files_to_delete = [final_destination / f for f in NON_ADK_FILES]
576
+ else:
577
+ files_to_delete = [final_destination / f for f in ADK_FILES]
578
+
579
+ for file_path in files_to_delete:
580
+ if file_path.exists():
581
+ file_path.unlink()
582
+ logging.debug(f"Deleted {file_path}")
583
+
569
584
  # After copying template files, handle the lock file
570
585
  if deployment_target:
571
586
  # Get the source lock file path
@@ -715,16 +730,3 @@ def copy_deployment_files(
715
730
  )
716
731
  else:
717
732
  logging.warning(f"Deployment target directory not found: {deployment_path}")
718
-
719
-
720
- def get_otel_instrumentations(dependencies: list) -> list[list[str]]:
721
- """Returns OpenTelemetry instrumentation statements for enabled dependencies."""
722
- otel_deps = {
723
- "langgraph": "Instruments.LANGCHAIN",
724
- "crewai": "Instruments.CREW",
725
- }
726
- imports = []
727
- for dep in dependencies:
728
- if any(otel_dep in dep for otel_dep in otel_deps):
729
- imports.append(otel_deps[next(key for key in otel_deps if key in dep)])
730
- return [imports]
@@ -1,84 +1,71 @@
1
+ {%- if cookiecutter.datastore_type == "vertex_ai_search" -%}
2
+ {%- set datastore_service_name = "Vertex AI Search" -%}
3
+ {%- elif cookiecutter.datastore_type == "vertex_ai_vector_search" -%}
4
+ {%- set datastore_service_name = "Vertex AI Vector Search" -%}
5
+ {%- else -%}
6
+ {%- set datastore_service_name = "Your Configured Datastore" -%}
7
+ {%- endif -%}
8
+
1
9
  # Data Ingestion Pipeline
2
10
 
3
- This pipeline automates the ingestion of data into Vertex AI{%- if cookiecutter.datastore_type == "vertex_ai_vector_search" %} Vector{%- endif %} Search, streamlining the process of building Retrieval Augmented Generation (RAG) applications.
11
+ This pipeline automates the ingestion of data into {{ datastore_service_name }}, streamlining the process of building Retrieval Augmented Generation (RAG) applications.
4
12
 
5
- It orchestrates the complete workflow: loading data, chunking it into manageable segments, generating embeddings using Vertex AI Embeddings, and importing the processed data into your Vertex AI{%- if cookiecutter.datastore_type == "vertex_ai_vector_search" %} Vector{%- endif %} Search datastore.
13
+ It orchestrates the complete workflow: loading data, chunking it into manageable segments, generating embeddings using Vertex AI Embeddings, and importing the processed data into your {{ datastore_service_name }} datastore.
6
14
 
7
15
  You can trigger the pipeline for an initial data load or schedule it to run periodically, ensuring your search index remains current. Vertex AI Pipelines provides the orchestration and monitoring capabilities for this process.
8
16
 
9
17
  ## Prerequisites
10
18
 
11
- Before running the data ingestion pipeline, ensure you have completed the following:
19
+ Before running any commands, ensure you have set your Google Cloud Project ID as an environment variable. This variable will be used by the subsequent `make` commands.
12
20
 
13
- 1. **Set up Dev Terraform:** Follow the instructions in the parent [deployment/README.md - Dev Deployment section](../deployment/README.md#dev-deployment) to provision the necessary resources in your development environment using Terraform. This includes deploying a datastore and configuring the required permissions.
21
+ ```bash
22
+ export PROJECT_ID="YOUR_PROJECT_ID"
23
+ ```
24
+ Replace `"YOUR_PROJECT_ID"` with your actual Google Cloud Project ID.
14
25
 
15
- ## Running the Data Ingestion Pipeline
26
+ Now, you can set up the development environment:
16
27
 
17
- After setting up the Terraform infrastructure, you can test the data ingestion pipeline.
28
+ 1. **Set up Dev Environment:** Use the following command from the root of the repository to provision the necessary resources in your development environment using Terraform. This includes deploying a datastore and configuring the required permissions.
18
29
 
19
- > **Note:** The initial pipeline execution might take longer as your project is configured for Vertex AI Pipelines.
30
+ ```bash
31
+ make setup-dev-env
32
+ ```
33
+ This command requires `terraform` to be installed and configured.
20
34
 
21
- **Steps:**
35
+ ## Running the Data Ingestion Pipeline
22
36
 
23
- **a. Navigate to the `data_ingestion` directory:**
37
+ After setting up the infrastructure using `make setup-dev-env`, you can run the data ingestion pipeline.
24
38
 
25
- ```bash
26
- cd data_ingestion
27
- ```
39
+ > **Note:** The initial pipeline execution might take longer as your project is configured for Vertex AI Pipelines.
28
40
 
29
- **b. Install Dependencies:**
41
+ **Steps:**
30
42
 
31
- Install the required Python dependencies using uv:
43
+ **a. Execute the Pipeline:**
44
+ Run the following command from the root of the repository. Ensure the `PROJECT_ID` environment variable is still set in your current shell session (as configured in Prerequisites).
32
45
 
33
46
  ```bash
34
- uv sync --frozen
47
+ make data-ingestion
35
48
  ```
36
49
 
37
- **c. Execute the Pipeline:**
38
-
39
- Run the following command to execute the data ingestion pipeline. Replace the placeholder values with your actual project details.
50
+ This command handles installing dependencies (if needed via `make install`) and submits the pipeline job using the configuration derived from your project setup. The specific parameters passed to the underlying script depend on the `datastore_type` selected during project generation:
40
51
  {%- if cookiecutter.datastore_type == "vertex_ai_search" %}
41
- ```bash
42
- PROJECT_ID="YOUR_PROJECT_ID"
43
- REGION="us-central1"
44
- DATA_STORE_REGION="us"
45
- uv run data_ingestion_pipeline/submit_pipeline.py \
46
- --project-id=$PROJECT_ID \
47
- --region=$REGION \
48
- --data-store-region=$DATA_STORE_REGION \
49
- --data-store-id="sample-datastore" \
50
- --service-account="{{cookiecutter.project_name}}-rag@$PROJECT_ID.iam.gserviceaccount.com" \
51
- --pipeline-root="gs://$PROJECT_ID-{{cookiecutter.project_name}}-rag" \
52
- --pipeline-name="data-ingestion-pipeline"
53
- ```
52
+ * It will use parameters like `--data-store-id`, `--data-store-region`.
54
53
  {%- elif cookiecutter.datastore_type == "vertex_ai_vector_search" %}
55
- ```bash
56
- PROJECT_ID="YOUR_PROJECT_ID"
57
- REGION="us-central1"
58
- VECTOR_SEARCH_INDEX="YOUR_VECTOR_SEARCH_INDEX"
59
- VECTOR_SEARCH_INDEX_ENDPOINT="YOUR_VECTOR_SEARCH_INDEX_ENDPOINT"
60
- uv run data_ingestion_pipeline/submit_pipeline.py \
61
- --project-id=$PROJECT_ID \
62
- --region=$REGION \
63
- --vector-search-index=$VECTOR_SEARCH_INDEX \
64
- --vector-search-index-endpoint=$VECTOR_SEARCH_INDEX_ENDPOINT \
65
- --service-account="{{cookiecutter.project_name}}-rag@$PROJECT_ID.iam.gserviceaccount.com" \
66
- --pipeline-root="gs://$PROJECT_ID-{{cookiecutter.project_name}}-rag" \
67
- --pipeline-name="data-ingestion-pipeline"
68
- ```
54
+ * It will use parameters like `--vector-search-index`, `--vector-search-index-endpoint`, `--vector-search-data-bucket-name`.
69
55
  {%- endif %}
56
+ * Common parameters include `--project-id`, `--region`, `--service-account`, `--pipeline-root`, and `--pipeline-name`.
70
57
 
71
- **d. Pipeline Scheduling and Execution:**
58
+ **b. Pipeline Scheduling:**
72
59
 
73
- The pipeline, by default, executes immediately. To schedule the pipeline for periodic execution without immediate initiation, use the `--schedule-only` flag in conjunction with `--cron-schedule`. If a schedule doesn't exist, it will be created. If a schedule already exists, its cron expression will be updated to the provided value.
60
+ The `make data-ingestion` command triggers an immediate pipeline run. For production environments, the underlying `submit_pipeline.py` script also supports scheduling options with flags like `--schedule-only` and `--cron-schedule` for periodic execution.
74
61
 
75
- **e. Monitoring Pipeline Progress:**
62
+ **c. Monitoring Pipeline Progress:**
76
63
 
77
- The pipeline's configuration and execution status will be printed to the console. For detailed monitoring, use the Vertex AI Pipelines dashboard in the Google Cloud Console. This dashboard provides real-time insights into the pipeline's progress, logs, and any potential issues.
64
+ The pipeline's configuration and execution status link will be printed to the console upon submission. For detailed monitoring, use the Vertex AI Pipelines dashboard in the Google Cloud Console.
78
65
 
79
66
  ## Testing Your RAG Application
80
67
 
81
- Once the data ingestion pipeline completes successfully, you can test your RAG application with Vertex AI{%- if cookiecutter.datastore_type == "vertex_ai_vector_search" %} Vector{%- endif %} Search.
68
+ Once the data ingestion pipeline completes successfully, you can test your RAG application with {{ datastore_service_name }}.
82
69
  {%- if cookiecutter.datastore_type == "vertex_ai_search" %}
83
70
  > **Troubleshooting:** If you encounter the error `"google.api_core.exceptions.InvalidArgument: 400 The embedding field path: embedding not found in schema"` after the initial data ingestion, wait a few minutes and try again. This delay allows Vertex AI Search to fully index the ingested data.
84
71
  {%- endif %}
@@ -109,7 +109,8 @@ def ingest_data(
109
109
  operation = schema_client.update_schema(
110
110
  request=discoveryengine.UpdateSchemaRequest(
111
111
  schema=new_schema, allow_missing=True
112
- )
112
+ ),
113
+ timeout=1800,
113
114
  )
114
115
  logging.info(f"Waiting for schema update operation: {operation.operation.name}")
115
116
  operation.result()