agent-starter-pack 0.2.1__py3-none-any.whl → 0.2.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of agent-starter-pack might be problematic. Click here for more details.
- {agent_starter_pack-0.2.1.dist-info → agent_starter_pack-0.2.3.dist-info}/METADATA +7 -13
- {agent_starter_pack-0.2.1.dist-info → agent_starter_pack-0.2.3.dist-info}/RECORD +27 -27
- src/base_template/Makefile +6 -4
- src/base_template/README.md +7 -1
- src/base_template/deployment/README.md +4 -1
- src/base_template/deployment/cd/deploy-to-prod.yaml +4 -4
- src/base_template/deployment/cd/staging.yaml +5 -5
- src/base_template/deployment/ci/pr_checks.yaml +2 -2
- src/base_template/deployment/terraform/build_triggers.tf +3 -0
- src/base_template/pyproject.toml +2 -2
- src/cli/commands/create.py +35 -9
- src/cli/commands/setup_cicd.py +23 -6
- src/cli/utils/cicd.py +1 -1
- src/data_ingestion/README.md +37 -50
- src/deployment_targets/agent_engine/app/agent_engine_app.py +7 -12
- src/deployment_targets/cloud_run/Dockerfile +1 -1
- src/frontends/streamlit/frontend/utils/stream_handler.py +3 -3
- src/resources/locks/uv-agentic_rag-agent_engine.lock +128 -127
- src/resources/locks/uv-agentic_rag-cloud_run.lock +174 -173
- src/resources/locks/uv-crewai_coding_crew-agent_engine.lock +149 -148
- src/resources/locks/uv-crewai_coding_crew-cloud_run.lock +195 -194
- src/resources/locks/uv-langgraph_base_react-agent_engine.lock +125 -124
- src/resources/locks/uv-langgraph_base_react-cloud_run.lock +171 -170
- src/resources/locks/uv-live_api-cloud_run.lock +162 -161
- {agent_starter_pack-0.2.1.dist-info → agent_starter_pack-0.2.3.dist-info}/WHEEL +0 -0
- {agent_starter_pack-0.2.1.dist-info → agent_starter_pack-0.2.3.dist-info}/entry_points.txt +0 -0
- {agent_starter_pack-0.2.1.dist-info → agent_starter_pack-0.2.3.dist-info}/licenses/LICENSE +0 -0
src/data_ingestion/README.md
CHANGED
|
@@ -1,84 +1,71 @@
|
|
|
1
|
+
{%- if cookiecutter.datastore_type == "vertex_ai_search" -%}
|
|
2
|
+
{%- set datastore_service_name = "Vertex AI Search" -%}
|
|
3
|
+
{%- elif cookiecutter.datastore_type == "vertex_ai_vector_search" -%}
|
|
4
|
+
{%- set datastore_service_name = "Vertex AI Vector Search" -%}
|
|
5
|
+
{%- else -%}
|
|
6
|
+
{%- set datastore_service_name = "Your Configured Datastore" -%}
|
|
7
|
+
{%- endif -%}
|
|
8
|
+
|
|
1
9
|
# Data Ingestion Pipeline
|
|
2
10
|
|
|
3
|
-
This pipeline automates the ingestion of data into
|
|
11
|
+
This pipeline automates the ingestion of data into {{ datastore_service_name }}, streamlining the process of building Retrieval Augmented Generation (RAG) applications.
|
|
4
12
|
|
|
5
|
-
It orchestrates the complete workflow: loading data, chunking it into manageable segments, generating embeddings using Vertex AI Embeddings, and importing the processed data into your
|
|
13
|
+
It orchestrates the complete workflow: loading data, chunking it into manageable segments, generating embeddings using Vertex AI Embeddings, and importing the processed data into your {{ datastore_service_name }} datastore.
|
|
6
14
|
|
|
7
15
|
You can trigger the pipeline for an initial data load or schedule it to run periodically, ensuring your search index remains current. Vertex AI Pipelines provides the orchestration and monitoring capabilities for this process.
|
|
8
16
|
|
|
9
17
|
## Prerequisites
|
|
10
18
|
|
|
11
|
-
Before running
|
|
19
|
+
Before running any commands, ensure you have set your Google Cloud Project ID as an environment variable. This variable will be used by the subsequent `make` commands.
|
|
12
20
|
|
|
13
|
-
|
|
21
|
+
```bash
|
|
22
|
+
export PROJECT_ID="YOUR_PROJECT_ID"
|
|
23
|
+
```
|
|
24
|
+
Replace `"YOUR_PROJECT_ID"` with your actual Google Cloud Project ID.
|
|
14
25
|
|
|
15
|
-
|
|
26
|
+
Now, you can set up the development environment:
|
|
16
27
|
|
|
17
|
-
|
|
28
|
+
1. **Set up Dev Environment:** Use the following command from the root of the repository to provision the necessary resources in your development environment using Terraform. This includes deploying a datastore and configuring the required permissions.
|
|
18
29
|
|
|
19
|
-
|
|
30
|
+
```bash
|
|
31
|
+
make setup-dev-env
|
|
32
|
+
```
|
|
33
|
+
This command requires `terraform` to be installed and configured.
|
|
20
34
|
|
|
21
|
-
|
|
35
|
+
## Running the Data Ingestion Pipeline
|
|
22
36
|
|
|
23
|
-
|
|
37
|
+
After setting up the infrastructure using `make setup-dev-env`, you can run the data ingestion pipeline.
|
|
24
38
|
|
|
25
|
-
|
|
26
|
-
cd data_ingestion
|
|
27
|
-
```
|
|
39
|
+
> **Note:** The initial pipeline execution might take longer as your project is configured for Vertex AI Pipelines.
|
|
28
40
|
|
|
29
|
-
**
|
|
41
|
+
**Steps:**
|
|
30
42
|
|
|
31
|
-
|
|
43
|
+
**a. Execute the Pipeline:**
|
|
44
|
+
Run the following command from the root of the repository. Ensure the `PROJECT_ID` environment variable is still set in your current shell session (as configured in Prerequisites).
|
|
32
45
|
|
|
33
46
|
```bash
|
|
34
|
-
|
|
47
|
+
make data-ingestion
|
|
35
48
|
```
|
|
36
49
|
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
Run the following command to execute the data ingestion pipeline. Replace the placeholder values with your actual project details.
|
|
50
|
+
This command handles installing dependencies (if needed via `make install`) and submits the pipeline job using the configuration derived from your project setup. The specific parameters passed to the underlying script depend on the `datastore_type` selected during project generation:
|
|
40
51
|
{%- if cookiecutter.datastore_type == "vertex_ai_search" %}
|
|
41
|
-
|
|
42
|
-
PROJECT_ID="YOUR_PROJECT_ID"
|
|
43
|
-
REGION="us-central1"
|
|
44
|
-
DATA_STORE_REGION="us"
|
|
45
|
-
uv run data_ingestion_pipeline/submit_pipeline.py \
|
|
46
|
-
--project-id=$PROJECT_ID \
|
|
47
|
-
--region=$REGION \
|
|
48
|
-
--data-store-region=$DATA_STORE_REGION \
|
|
49
|
-
--data-store-id="sample-datastore" \
|
|
50
|
-
--service-account="{{cookiecutter.project_name}}-rag@$PROJECT_ID.iam.gserviceaccount.com" \
|
|
51
|
-
--pipeline-root="gs://$PROJECT_ID-{{cookiecutter.project_name}}-rag" \
|
|
52
|
-
--pipeline-name="data-ingestion-pipeline"
|
|
53
|
-
```
|
|
52
|
+
* It will use parameters like `--data-store-id`, `--data-store-region`.
|
|
54
53
|
{%- elif cookiecutter.datastore_type == "vertex_ai_vector_search" %}
|
|
55
|
-
|
|
56
|
-
PROJECT_ID="YOUR_PROJECT_ID"
|
|
57
|
-
REGION="us-central1"
|
|
58
|
-
VECTOR_SEARCH_INDEX="YOUR_VECTOR_SEARCH_INDEX"
|
|
59
|
-
VECTOR_SEARCH_INDEX_ENDPOINT="YOUR_VECTOR_SEARCH_INDEX_ENDPOINT"
|
|
60
|
-
uv run data_ingestion_pipeline/submit_pipeline.py \
|
|
61
|
-
--project-id=$PROJECT_ID \
|
|
62
|
-
--region=$REGION \
|
|
63
|
-
--vector-search-index=$VECTOR_SEARCH_INDEX \
|
|
64
|
-
--vector-search-index-endpoint=$VECTOR_SEARCH_INDEX_ENDPOINT \
|
|
65
|
-
--service-account="{{cookiecutter.project_name}}-rag@$PROJECT_ID.iam.gserviceaccount.com" \
|
|
66
|
-
--pipeline-root="gs://$PROJECT_ID-{{cookiecutter.project_name}}-rag" \
|
|
67
|
-
--pipeline-name="data-ingestion-pipeline"
|
|
68
|
-
```
|
|
54
|
+
* It will use parameters like `--vector-search-index`, `--vector-search-index-endpoint`, `--vector-search-data-bucket-name`.
|
|
69
55
|
{%- endif %}
|
|
56
|
+
* Common parameters include `--project-id`, `--region`, `--service-account`, `--pipeline-root`, and `--pipeline-name`.
|
|
70
57
|
|
|
71
|
-
**
|
|
58
|
+
**b. Pipeline Scheduling:**
|
|
72
59
|
|
|
73
|
-
The
|
|
60
|
+
The `make data-ingestion` command triggers an immediate pipeline run. For production environments, the underlying `submit_pipeline.py` script also supports scheduling options with flags like `--schedule-only` and `--cron-schedule` for periodic execution.
|
|
74
61
|
|
|
75
|
-
**
|
|
62
|
+
**c. Monitoring Pipeline Progress:**
|
|
76
63
|
|
|
77
|
-
The pipeline's configuration and execution status will be printed to the console. For detailed monitoring, use the Vertex AI Pipelines dashboard in the Google Cloud Console.
|
|
64
|
+
The pipeline's configuration and execution status link will be printed to the console upon submission. For detailed monitoring, use the Vertex AI Pipelines dashboard in the Google Cloud Console.
|
|
78
65
|
|
|
79
66
|
## Testing Your RAG Application
|
|
80
67
|
|
|
81
|
-
Once the data ingestion pipeline completes successfully, you can test your RAG application with
|
|
68
|
+
Once the data ingestion pipeline completes successfully, you can test your RAG application with {{ datastore_service_name }}.
|
|
82
69
|
{%- if cookiecutter.datastore_type == "vertex_ai_search" %}
|
|
83
70
|
> **Troubleshooting:** If you encounter the error `"google.api_core.exceptions.InvalidArgument: 400 The embedding field path: embedding not found in schema"` after the initial data ingestion, wait a few minutes and try again. This delay allows Vertex AI Search to fully index the ingested data.
|
|
84
71
|
{%- endif %}
|
|
@@ -26,16 +26,12 @@ import vertexai
|
|
|
26
26
|
from google.cloud import logging as google_cloud_logging
|
|
27
27
|
from langchain_core.runnables import RunnableConfig
|
|
28
28
|
from traceloop.sdk import Instruments, Traceloop
|
|
29
|
-
from vertexai
|
|
29
|
+
from vertexai import agent_engines
|
|
30
30
|
|
|
31
31
|
from app.utils.gcs import create_bucket_if_not_exists
|
|
32
32
|
from app.utils.tracing import CloudTraceLoggingSpanExporter
|
|
33
33
|
from app.utils.typing import Feedback, InputChat, dumpd, ensure_valid_config
|
|
34
34
|
|
|
35
|
-
logging.basicConfig(
|
|
36
|
-
level=logging.INFO,
|
|
37
|
-
)
|
|
38
|
-
|
|
39
35
|
|
|
40
36
|
class AgentEngineApp:
|
|
41
37
|
"""Class for managing agent engine functionality."""
|
|
@@ -124,6 +120,8 @@ class AgentEngineApp:
|
|
|
124
120
|
**kwargs: Any,
|
|
125
121
|
) -> Any:
|
|
126
122
|
"""Process a single input and return the agent's response."""
|
|
123
|
+
config = ensure_valid_config(config)
|
|
124
|
+
self.set_tracing_properties(config=config)
|
|
127
125
|
return dumpd(self.runnable.invoke(input=input, config=config, **kwargs))
|
|
128
126
|
|
|
129
127
|
def register_operations(self) -> Mapping[str, Sequence]:
|
|
@@ -151,7 +149,7 @@ def deploy_agent_engine_app(
|
|
|
151
149
|
requirements_file: str = ".requirements.txt",
|
|
152
150
|
extra_packages: list[str] = ["./app"],
|
|
153
151
|
env_vars: dict[str, str] | None = None,
|
|
154
|
-
) ->
|
|
152
|
+
) -> agent_engines.AgentEngine:
|
|
155
153
|
"""Deploy the agent engine app to Vertex AI."""
|
|
156
154
|
|
|
157
155
|
staging_bucket = f"gs://{project}-agent-engine"
|
|
@@ -169,7 +167,7 @@ def deploy_agent_engine_app(
|
|
|
169
167
|
|
|
170
168
|
# Common configuration for both create and update operations
|
|
171
169
|
agent_config = {
|
|
172
|
-
"
|
|
170
|
+
"agent_engine": agent,
|
|
173
171
|
"display_name": agent_name,
|
|
174
172
|
"description": "This is a sample custom application in Agent Engine that uses LangGraph",
|
|
175
173
|
"extra_packages": extra_packages,
|
|
@@ -178,10 +176,7 @@ def deploy_agent_engine_app(
|
|
|
178
176
|
agent_config["requirements"] = requirements
|
|
179
177
|
|
|
180
178
|
# Check if an agent with this name already exists
|
|
181
|
-
existing_agents =
|
|
182
|
-
filter=f"display_name={agent_name}"
|
|
183
|
-
)
|
|
184
|
-
|
|
179
|
+
existing_agents = list(agent_engines.list(filter=f"display_name={agent_name}"))
|
|
185
180
|
if existing_agents:
|
|
186
181
|
# Update the existing agent with new configuration
|
|
187
182
|
logging.info(f"Updating existing agent: {agent_name}")
|
|
@@ -189,7 +184,7 @@ def deploy_agent_engine_app(
|
|
|
189
184
|
else:
|
|
190
185
|
# Create a new agent if none exists
|
|
191
186
|
logging.info(f"Creating new agent: {agent_name}")
|
|
192
|
-
remote_agent =
|
|
187
|
+
remote_agent = agent_engines.create(**agent_config)
|
|
193
188
|
|
|
194
189
|
config = {
|
|
195
190
|
"remote_agent_engine_id": remote_agent.resource_name,
|
|
@@ -28,7 +28,7 @@ import streamlit as st
|
|
|
28
28
|
import vertexai
|
|
29
29
|
from google.auth.exceptions import DefaultCredentialsError
|
|
30
30
|
from langchain_core.messages import AIMessage, ToolMessage
|
|
31
|
-
from vertexai
|
|
31
|
+
from vertexai import agent_engines
|
|
32
32
|
|
|
33
33
|
from frontend.utils.multimodal_utils import format_content
|
|
34
34
|
|
|
@@ -43,7 +43,7 @@ def get_remote_agent(remote_agent_engine_id: str) -> Any:
|
|
|
43
43
|
project_id = parts[1]
|
|
44
44
|
location = parts[3]
|
|
45
45
|
vertexai.init(project=project_id, location=location)
|
|
46
|
-
return
|
|
46
|
+
return agent_engines.AgentEngine(remote_agent_engine_id)
|
|
47
47
|
|
|
48
48
|
|
|
49
49
|
@st.cache_resource
|
|
@@ -159,7 +159,7 @@ class Client:
|
|
|
159
159
|
if self.authenticate_request:
|
|
160
160
|
headers["Authorization"] = f"Bearer {self.id_token}"
|
|
161
161
|
with requests.post(
|
|
162
|
-
self.url, json=data, headers=headers, stream=True, timeout=
|
|
162
|
+
self.url, json=data, headers=headers, stream=True, timeout=60
|
|
163
163
|
) as response:
|
|
164
164
|
for line in response.iter_lines():
|
|
165
165
|
if line:
|