awslabs.cdk-mcp-server 0.1.1__py3-none-any.whl → 0.1.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- awslabs/cdk_mcp_server/core/resources.py +104 -15
- awslabs/cdk_mcp_server/core/server.py +4 -3
- awslabs/cdk_mcp_server/core/tools.py +6 -1
- awslabs/cdk_mcp_server/data/genai_cdk_loader.py +508 -349
- {awslabs_cdk_mcp_server-0.1.1.dist-info → awslabs_cdk_mcp_server-0.1.3.dist-info}/METADATA +24 -1
- awslabs_cdk_mcp_server-0.1.3.dist-info/RECORD +33 -0
- awslabs/cdk_mcp_server/static/genai_cdk/bedrock/agent/actiongroups.md +0 -137
- awslabs/cdk_mcp_server/static/genai_cdk/bedrock/agent/alias.md +0 -39
- awslabs/cdk_mcp_server/static/genai_cdk/bedrock/agent/collaboration.md +0 -91
- awslabs/cdk_mcp_server/static/genai_cdk/bedrock/agent/creation.md +0 -149
- awslabs/cdk_mcp_server/static/genai_cdk/bedrock/agent/custom_orchestration.md +0 -74
- awslabs/cdk_mcp_server/static/genai_cdk/bedrock/agent/overview.md +0 -78
- awslabs/cdk_mcp_server/static/genai_cdk/bedrock/agent/prompt_override.md +0 -70
- awslabs/cdk_mcp_server/static/genai_cdk/bedrock/bedrockguardrails.md +0 -188
- awslabs/cdk_mcp_server/static/genai_cdk/bedrock/knowledgebases/chunking.md +0 -137
- awslabs/cdk_mcp_server/static/genai_cdk/bedrock/knowledgebases/datasources.md +0 -225
- awslabs/cdk_mcp_server/static/genai_cdk/bedrock/knowledgebases/kendra.md +0 -81
- awslabs/cdk_mcp_server/static/genai_cdk/bedrock/knowledgebases/overview.md +0 -116
- awslabs/cdk_mcp_server/static/genai_cdk/bedrock/knowledgebases/parsing.md +0 -36
- awslabs/cdk_mcp_server/static/genai_cdk/bedrock/knowledgebases/transformation.md +0 -30
- awslabs/cdk_mcp_server/static/genai_cdk/bedrock/knowledgebases/vector/aurora.md +0 -185
- awslabs/cdk_mcp_server/static/genai_cdk/bedrock/knowledgebases/vector/creation.md +0 -80
- awslabs/cdk_mcp_server/static/genai_cdk/bedrock/knowledgebases/vector/opensearch.md +0 -56
- awslabs/cdk_mcp_server/static/genai_cdk/bedrock/knowledgebases/vector/pinecone.md +0 -66
- awslabs/cdk_mcp_server/static/genai_cdk/bedrock/profiles.md +0 -153
- awslabs/cdk_mcp_server/static/genai_cdk/opensearch-vectorindex/overview.md +0 -135
- awslabs/cdk_mcp_server/static/genai_cdk/opensearchserverless/overview.md +0 -17
- awslabs_cdk_mcp_server-0.1.1.dist-info/RECORD +0 -54
- {awslabs_cdk_mcp_server-0.1.1.dist-info → awslabs_cdk_mcp_server-0.1.3.dist-info}/WHEEL +0 -0
- {awslabs_cdk_mcp_server-0.1.1.dist-info → awslabs_cdk_mcp_server-0.1.3.dist-info}/entry_points.txt +0 -0
- {awslabs_cdk_mcp_server-0.1.1.dist-info → awslabs_cdk_mcp_server-0.1.3.dist-info}/licenses/LICENSE +0 -0
- {awslabs_cdk_mcp_server-0.1.1.dist-info → awslabs_cdk_mcp_server-0.1.3.dist-info}/licenses/NOTICE +0 -0
|
@@ -1,116 +0,0 @@
|
|
|
1
|
-
# Amazon Bedrock Knowledge Bases
|
|
2
|
-
|
|
3
|
-
Amazon Bedrock Knowledge Bases enable you to provide foundation models and agents with contextual information from your company's private data sources. This enhances the relevance, accuracy, and customization of their responses.
|
|
4
|
-
|
|
5
|
-
## Table of Contents
|
|
6
|
-
|
|
7
|
-
- [Amazon Bedrock Knowledge Bases](#amazon-bedrock-knowledge-bases)
|
|
8
|
-
- [Table of Contents](#table-of-contents)
|
|
9
|
-
- [Key Concepts](#key-concepts)
|
|
10
|
-
- [Knowledge Base Types](#knowledge-base-types)
|
|
11
|
-
- [Knowledge Base Components](#knowledge-base-components)
|
|
12
|
-
- [When to Use Knowledge Bases](#when-to-use-knowledge-bases)
|
|
13
|
-
- [Related Resources](#related-resources)
|
|
14
|
-
- [Quick Start Example](#quick-start-example)
|
|
15
|
-
- [Next Steps](#next-steps)
|
|
16
|
-
|
|
17
|
-
## Key Concepts
|
|
18
|
-
|
|
19
|
-
- **Knowledge Base**: A repository of information that foundation models can access to provide context-aware responses
|
|
20
|
-
- **Vector Store**: A specialized database that stores and retrieves text as vector embeddings
|
|
21
|
-
- **Chunking**: The process of breaking documents into smaller pieces for efficient storage and retrieval
|
|
22
|
-
- **Data Source**: The origin of information ingested into a knowledge base (S3, web crawler, etc.)
|
|
23
|
-
- **Embeddings Model**: A model that converts text into vector representations
|
|
24
|
-
|
|
25
|
-
## Knowledge Base Types
|
|
26
|
-
|
|
27
|
-
- **Vector Knowledge Base**: Uses vector embeddings to find semantically similar content (most common)
|
|
28
|
-
- **Kendra Knowledge Base**: Leverages Amazon Kendra's semantic search capabilities
|
|
29
|
-
- **Structured Data Retrieval**: Enables querying structured data sources (coming soon)
|
|
30
|
-
|
|
31
|
-
> **Note**: Vector Knowledge Base is the most common type and currently has the most comprehensive support in the GenAI CDK. See [Vector Knowledge Base Creation](vector/creation.md) to get started.
|
|
32
|
-
|
|
33
|
-
## Knowledge Base Components
|
|
34
|
-
|
|
35
|
-
```mermaid
|
|
36
|
-
graph TD
|
|
37
|
-
A[Knowledge Base] --> B[Vector Store]
|
|
38
|
-
A --> C[Data Sources]
|
|
39
|
-
A --> D[Embeddings Model]
|
|
40
|
-
B --> E[OpenSearch Serverless]
|
|
41
|
-
B --> F[Aurora PostgreSQL]
|
|
42
|
-
B --> G[Pinecone]
|
|
43
|
-
C --> H[S3]
|
|
44
|
-
C --> I[Web Crawler]
|
|
45
|
-
C --> J[Confluence]
|
|
46
|
-
C --> K[SharePoint]
|
|
47
|
-
C --> L[Salesforce]
|
|
48
|
-
A --> M[Chunking Strategy]
|
|
49
|
-
M --> N[Fixed Size]
|
|
50
|
-
M --> O[Hierarchical]
|
|
51
|
-
M --> P[Semantic]
|
|
52
|
-
```
|
|
53
|
-
|
|
54
|
-
## When to Use Knowledge Bases
|
|
55
|
-
|
|
56
|
-
- **Domain-Specific Knowledge**: Provide specialized information not in the model's training data
|
|
57
|
-
- **Private Information**: Allow models to access your organization's proprietary information
|
|
58
|
-
- **Up-to-Date Information**: Supply models with the latest information beyond their training cutoff
|
|
59
|
-
- **RAG Applications**: Implement Retrieval Augmented Generation for more accurate responses
|
|
60
|
-
|
|
61
|
-
## Related Resources
|
|
62
|
-
|
|
63
|
-
- **Chunking Strategies**: `genai-cdk-constructs://bedrock/knowledgebases/chunking`
|
|
64
|
-
- **Data Sources**: `genai-cdk-constructs://bedrock/knowledgebases/datasources`
|
|
65
|
-
- **Kendra Integration**: `genai-cdk-constructs://bedrock/knowledgebases/kendra`
|
|
66
|
-
- **Parsing Strategies**: `genai-cdk-constructs://bedrock/knowledgebases/parsing`
|
|
67
|
-
- **Custom Transformations**: `genai-cdk-constructs://bedrock/knowledgebases/transformation`
|
|
68
|
-
- **Vector Stores**:
|
|
69
|
-
- `genai-cdk-constructs://bedrock/knowledgebases/vector/creation`
|
|
70
|
-
- `genai-cdk-constructs://bedrock/knowledgebases/vector/opensearch`
|
|
71
|
-
- `genai-cdk-constructs://bedrock/knowledgebases/vector/aurora`
|
|
72
|
-
- `genai-cdk-constructs://bedrock/knowledgebases/vector/pinecone`
|
|
73
|
-
|
|
74
|
-
## Quick Start Example
|
|
75
|
-
|
|
76
|
-
```typescript
|
|
77
|
-
import { bedrock } from '@cdklabs/generative-ai-cdk-constructs';
|
|
78
|
-
import * as s3 from 'aws-cdk-lib/aws-s3';
|
|
79
|
-
|
|
80
|
-
// Create a vector knowledge base
|
|
81
|
-
const kb = new bedrock.VectorKnowledgeBase(this, 'KnowledgeBase', {
|
|
82
|
-
// Use TITAN_EMBED_TEXT_V2 with appropriate dimension size based on your needs:
|
|
83
|
-
// - 256: Fastest, lowest storage requirements, good for simple use cases
|
|
84
|
-
// - 512: Balanced performance and accuracy, recommended for most use cases
|
|
85
|
-
// - 1024: Highest accuracy, best for complex semantic relationships
|
|
86
|
-
embeddingsModel: bedrock.BedrockFoundationModel.TITAN_EMBED_TEXT_V2_512,
|
|
87
|
-
instruction: 'Use this knowledge base to answer questions about our company policies.',
|
|
88
|
-
});
|
|
89
|
-
|
|
90
|
-
// Create a secure S3 bucket for data
|
|
91
|
-
const dataBucket = new s3.Bucket(this, 'DataBucket');
|
|
92
|
-
|
|
93
|
-
// Add an S3 data source with chunking strategy
|
|
94
|
-
new bedrock.S3DataSource(this, 'DataSource', {
|
|
95
|
-
bucket: dataBucket,
|
|
96
|
-
knowledgeBase: kb,
|
|
97
|
-
dataSourceName: 'CompanyPolicies',
|
|
98
|
-
chunkingStrategy: bedrock.ChunkingStrategy.fixedSize({
|
|
99
|
-
maxTokens: 300,
|
|
100
|
-
overlapPercentage: 20,
|
|
101
|
-
}),
|
|
102
|
-
});
|
|
103
|
-
|
|
104
|
-
// Note: When choosing an embedding model version:
|
|
105
|
-
// - TITAN_EMBED_TEXT_V2_256: Lower dimension, faster, smaller storage footprint
|
|
106
|
-
// - TITAN_EMBED_TEXT_V2_512: Balanced performance and accuracy (recommended)
|
|
107
|
-
// - TITAN_EMBED_TEXT_V2_1024: Higher dimension, more accurate but more expensive
|
|
108
|
-
|
|
109
|
-
### Next Steps
|
|
110
|
-
|
|
111
|
-
For more detailed information:
|
|
112
|
-
|
|
113
|
-
- [Vector Knowledge Base Creation](vector/creation.md) - Detailed properties and configuration options
|
|
114
|
-
- [Vector Stores](vector/opensearch.md) - Different vector store options (OpenSearch, Aurora, Pinecone)
|
|
115
|
-
- [Chunking Strategies](chunking.md) - Options for chunking your data
|
|
116
|
-
- [Data Sources](datasources.md) - Different data source types and configuration
|
|
@@ -1,36 +0,0 @@
|
|
|
1
|
-
# Vector Knowledge Base - Parsing Strategy
|
|
2
|
-
|
|
3
|
-
## Overview
|
|
4
|
-
|
|
5
|
-
A parsing strategy in Amazon Bedrock is a configuration that determines how the service
|
|
6
|
-
processes and interprets the contents of a document. It involves converting the document's
|
|
7
|
-
contents into text and splitting it into smaller chunks for analysis. Amazon Bedrock offers
|
|
8
|
-
two parsing strategies:
|
|
9
|
-
|
|
10
|
-
### Default Parsing Strategy
|
|
11
|
-
|
|
12
|
-
This strategy converts the document's contents into text
|
|
13
|
-
and splits it into chunks using a predefined approach. It is suitable for most use cases
|
|
14
|
-
but may not be optimal for specific document types or requirements.
|
|
15
|
-
|
|
16
|
-
### Foundation Model Parsing Strategy
|
|
17
|
-
|
|
18
|
-
This strategy uses a foundation model to describe
|
|
19
|
-
the contents of the document. It is particularly useful for improved processing of PDF files
|
|
20
|
-
with tables and images. To use this strategy, set the `parsingStrategy` in a data source as below.
|
|
21
|
-
|
|
22
|
-
#### TypeScript
|
|
23
|
-
|
|
24
|
-
```ts
|
|
25
|
-
bedrock.ParsingStategy.foundationModel({
|
|
26
|
-
model: BedrockFoundationModel.ANTHROPIC_CLAUDE_SONNET_V1_0,
|
|
27
|
-
});
|
|
28
|
-
```
|
|
29
|
-
|
|
30
|
-
#### Python
|
|
31
|
-
|
|
32
|
-
```python
|
|
33
|
-
bedrock.ParsingStategy.foundation_model(
|
|
34
|
-
parsing_model=BedrockFoundationModel.ANTHROPIC_CLAUDE_SONNET_V1_0
|
|
35
|
-
)
|
|
36
|
-
```
|
|
@@ -1,30 +0,0 @@
|
|
|
1
|
-
# Knowledge Base - Custom Transformation
|
|
2
|
-
|
|
3
|
-
## Overview
|
|
4
|
-
|
|
5
|
-
Custom Transformation in Amazon Bedrock is a feature that allows you to create and apply
|
|
6
|
-
custom processing steps to documents moving through a data source ingestion pipeline.
|
|
7
|
-
|
|
8
|
-
Custom Transformation uses AWS Lambda functions to process documents, enabling you to
|
|
9
|
-
perform custom operations such as data extraction, normalization, or enrichment. To
|
|
10
|
-
create a custom transformation, set the `customTransformation` in a data source as below.
|
|
11
|
-
|
|
12
|
-
## Example
|
|
13
|
-
|
|
14
|
-
### TypeScript
|
|
15
|
-
|
|
16
|
-
```ts
|
|
17
|
-
CustomTransformation.lambda({
|
|
18
|
-
lambdaFunction: lambdaFunction,
|
|
19
|
-
s3BucketUri: `s3://${bucket.bucketName}/chunk-processor/`,
|
|
20
|
-
}),
|
|
21
|
-
```
|
|
22
|
-
|
|
23
|
-
### Python
|
|
24
|
-
|
|
25
|
-
```python
|
|
26
|
-
CustomTransformation.lambda_(
|
|
27
|
-
lambda_function= function,
|
|
28
|
-
s3_bucket_uri= f's3://{docBucket.bucket_name}/chunk-processor/'
|
|
29
|
-
)
|
|
30
|
-
```
|
|
@@ -1,185 +0,0 @@
|
|
|
1
|
-
#### Example of `Amazon RDS Aurora PostgreSQL`:
|
|
2
|
-
|
|
3
|
-
##### TypeScript
|
|
4
|
-
|
|
5
|
-
```ts
|
|
6
|
-
import * as s3 from 'aws-cdk-lib/aws-s3';
|
|
7
|
-
import { amazonaurora, bedrock } from '@cdklabs/generative-ai-cdk-constructs';
|
|
8
|
-
|
|
9
|
-
// Dimension of your vector embedding
|
|
10
|
-
embeddingsModelVectorDimension = 1024;
|
|
11
|
-
const auroraDb = new amazonaurora.AmazonAuroraVectorStore(stack, 'AuroraDefaultVectorStore', {
|
|
12
|
-
embeddingsModelVectorDimension: embeddingsModelVectorDimension,
|
|
13
|
-
});
|
|
14
|
-
|
|
15
|
-
const kb = new bedrock.VectorKnowledgeBase(this, 'KnowledgeBase', {
|
|
16
|
-
vectorStore: auroraDb,
|
|
17
|
-
embeddingsModel: foundation_models.BedrockFoundationModel.TITAN_EMBED_TEXT_V1,
|
|
18
|
-
instruction: 'Use this knowledge base to answer questions about books. ' + 'It contains the full text of novels.',
|
|
19
|
-
});
|
|
20
|
-
|
|
21
|
-
const docBucket = new s3.Bucket(this, 'DocBucket');
|
|
22
|
-
|
|
23
|
-
new bedrock.S3DataSource(this, 'DataSource', {
|
|
24
|
-
bucket: docBucket,
|
|
25
|
-
knowledgeBase: kb,
|
|
26
|
-
dataSourceName: 'books',
|
|
27
|
-
chunkingStrategy: bedrock.ChunkingStrategy.FIXED_SIZE,
|
|
28
|
-
});
|
|
29
|
-
```
|
|
30
|
-
|
|
31
|
-
##### Python
|
|
32
|
-
|
|
33
|
-
```python
|
|
34
|
-
|
|
35
|
-
from aws_cdk import (
|
|
36
|
-
aws_s3 as s3,
|
|
37
|
-
aws_rds as rds,
|
|
38
|
-
aws_ec2 as ec2,
|
|
39
|
-
Stack,
|
|
40
|
-
ArnFormat
|
|
41
|
-
)
|
|
42
|
-
from cdklabs.generative_ai_cdk_constructs import (
|
|
43
|
-
bedrock,
|
|
44
|
-
amazonaurora,
|
|
45
|
-
)
|
|
46
|
-
|
|
47
|
-
# Dimension of your vector embedding
|
|
48
|
-
embeddings_model_vector_dimension = 1024
|
|
49
|
-
aurora_db = amazonaurora.AmazonAuroraVectorStore(self, 'AuroraDefaultVectorStore',
|
|
50
|
-
embeddings_model_vector_dimension=embeddings_model_vector_dimension
|
|
51
|
-
)
|
|
52
|
-
|
|
53
|
-
kb = bedrock.VectorKnowledgeBase(self, 'KnowledgeBase',
|
|
54
|
-
vector_store= aurora_db,
|
|
55
|
-
embeddings_model= foundation_models.BedrockFoundationModel.TITAN_EMBED_TEXT_V1,
|
|
56
|
-
instruction= 'Use this knowledge base to answer questions about books. ' +
|
|
57
|
-
'It contains the full text of novels.'
|
|
58
|
-
)
|
|
59
|
-
|
|
60
|
-
docBucket = s3.Bucket(self, 'DockBucket')
|
|
61
|
-
|
|
62
|
-
bedrock.S3DataSource(self, 'DataSource',
|
|
63
|
-
bucket= docBucket,
|
|
64
|
-
knowledge_base=kb,
|
|
65
|
-
data_source_name='books',
|
|
66
|
-
chunking_strategy= bedrock.ChunkingStrategy.FIXED_SIZE,
|
|
67
|
-
)
|
|
68
|
-
|
|
69
|
-
```
|
|
70
|
-
|
|
71
|
-
#### Example of importing existing `Amazon RDS Aurora PostgreSQL` using `fromExistingAuroraVectorStore()` method.
|
|
72
|
-
|
|
73
|
-
**Note** - you need to provide `clusterIdentifier`, `databaseName`, `vpc`, `secret` and `auroraSecurityGroupId` used in deployment of your existing RDS Amazon Aurora DB, as well as `embeddingsModel` that you want to be used by a Knowledge Base for chunking:
|
|
74
|
-
|
|
75
|
-
##### TypeScript
|
|
76
|
-
|
|
77
|
-
```ts
|
|
78
|
-
import * as s3 from "aws-cdk-lib/aws-s3";
|
|
79
|
-
import { amazonaurora, bedrock } from '@cdklabs/generative-ai-cdk-constructs';
|
|
80
|
-
|
|
81
|
-
const auroraDb = aurora.AmazonAuroraVectorStore.fromExistingAuroraVectorStore(stack, 'ExistingAuroraVectorStore', {
|
|
82
|
-
clusterIdentifier: 'aurora-serverless-vector-cluster',
|
|
83
|
-
databaseName: 'bedrock_vector_db',
|
|
84
|
-
schemaName: 'bedrock_integration',
|
|
85
|
-
tableName: 'bedrock_kb',
|
|
86
|
-
vectorField: 'embedding',
|
|
87
|
-
textField: 'chunks',
|
|
88
|
-
metadataField: 'metadata',
|
|
89
|
-
primaryKeyField: 'id',
|
|
90
|
-
embeddingsModel: bedrock.BedrockFoundationModel.COHERE_EMBED_ENGLISH_V3,
|
|
91
|
-
vpc: cdk.aws_ec2.Vpc.fromLookup(stack, 'VPC', {
|
|
92
|
-
vpcId: 'vpc-0c1a234567ee8bc90',
|
|
93
|
-
}),
|
|
94
|
-
auroraSecurityGroupId: 'sg-012ef345678c98a76',,
|
|
95
|
-
secret: cdk.aws_rds.DatabaseSecret.fromSecretCompleteArn(
|
|
96
|
-
stack,
|
|
97
|
-
'Secret',
|
|
98
|
-
cdk.Stack.of(stack).formatArn({
|
|
99
|
-
service: 'secretsmanager',
|
|
100
|
-
resource: 'secret',
|
|
101
|
-
resourceName: 'rds-db-credentials/cluster-1234567890',
|
|
102
|
-
region: cdk.Stack.of(stack).region,
|
|
103
|
-
account: cdk.Stack.of(stack).account,
|
|
104
|
-
arnFormat: cdk.ArnFormat.COLON_RESOURCE_NAME,
|
|
105
|
-
}),
|
|
106
|
-
),
|
|
107
|
-
});
|
|
108
|
-
|
|
109
|
-
const kb = new bedrock.VectorKnowledgeBase(this, "KnowledgeBase", {
|
|
110
|
-
vectorStore: auroraDb,
|
|
111
|
-
embeddingsModel: bedrock.BedrockFoundationModel.COHERE_EMBED_ENGLISH_V3,
|
|
112
|
-
instruction:
|
|
113
|
-
"Use this knowledge base to answer questions about books. " +
|
|
114
|
-
"It contains the full text of novels.",
|
|
115
|
-
});
|
|
116
|
-
|
|
117
|
-
const docBucket = new s3.Bucket(this, "DocBucket");
|
|
118
|
-
|
|
119
|
-
new bedrock.S3DataSource(this, "DataSource", {
|
|
120
|
-
bucket: docBucket,
|
|
121
|
-
knowledgeBase: kb,
|
|
122
|
-
dataSourceName: "books",
|
|
123
|
-
chunkingStrategy: bedrock.ChunkingStrategy.FIXED_SIZE,
|
|
124
|
-
});
|
|
125
|
-
```
|
|
126
|
-
|
|
127
|
-
##### Python
|
|
128
|
-
|
|
129
|
-
```python
|
|
130
|
-
|
|
131
|
-
from aws_cdk import (
|
|
132
|
-
aws_s3 as s3,
|
|
133
|
-
aws_rds as rds,
|
|
134
|
-
aws_ec2 as ec2,
|
|
135
|
-
Stack,
|
|
136
|
-
ArnFormat
|
|
137
|
-
)
|
|
138
|
-
from cdklabs.generative_ai_cdk_constructs import (
|
|
139
|
-
bedrock,
|
|
140
|
-
amazonaurora,
|
|
141
|
-
)
|
|
142
|
-
|
|
143
|
-
aurora_db = amazonaurora.AmazonAuroraVectorStore.from_existing_aurora_vector_store(
|
|
144
|
-
self, 'ExistingAuroraVectorStore',
|
|
145
|
-
cluster_identifier='aurora-serverless-vector-cluster',
|
|
146
|
-
database_name='bedrock_vector_db',
|
|
147
|
-
schema_name='bedrock_integration',
|
|
148
|
-
table_name='bedrock_kb',
|
|
149
|
-
vector_field='embedding',
|
|
150
|
-
text_field='chunks',
|
|
151
|
-
metadata_field='metadata',
|
|
152
|
-
primary_key_field='id',
|
|
153
|
-
embeddings_model=bedrock.BedrockFoundationModel.COHERE_EMBED_ENGLISH_V3,
|
|
154
|
-
vpc=ec2.Vpc.from_lookup(self, 'VPC', vpc_id='vpc-0c1a234567ee8bc90'),
|
|
155
|
-
aurora_security_group_id='sg-012ef345678c98a76',,
|
|
156
|
-
secret=rds.DatabaseSecret.from_secret_complete_arn(
|
|
157
|
-
self,
|
|
158
|
-
'Secret',
|
|
159
|
-
Stack.of(self).format_arn(
|
|
160
|
-
service= 'secretsmanager',
|
|
161
|
-
resource= 'secret',
|
|
162
|
-
resource_name= 'rds-db-credentials/cluster-1234567890',
|
|
163
|
-
region= Stack.of(self).region,
|
|
164
|
-
account= Stack.of(self).account,
|
|
165
|
-
arn_format= ArnFormat.COLON_RESOURCE_NAME
|
|
166
|
-
)
|
|
167
|
-
)
|
|
168
|
-
)
|
|
169
|
-
|
|
170
|
-
kb = bedrock.VectorKnowledgeBase(self, 'KnowledgeBase',
|
|
171
|
-
vector_store= aurora_db,
|
|
172
|
-
embeddings_model= bedrock.BedrockFoundationModel.COHERE_EMBED_ENGLISH_V3,
|
|
173
|
-
instruction= 'Use this knowledge base to answer questions about books. ' +
|
|
174
|
-
'It contains the full text of novels.'
|
|
175
|
-
)
|
|
176
|
-
|
|
177
|
-
docBucket = s3.Bucket(self, 'DockBucket')
|
|
178
|
-
|
|
179
|
-
bedrock.S3DataSource(self, 'DataSource',
|
|
180
|
-
bucket= docBucket,
|
|
181
|
-
knowledge_base=kb,
|
|
182
|
-
data_source_name='books',
|
|
183
|
-
chunking_strategy= bedrock.ChunkingStrategy.FIXED_SIZE,
|
|
184
|
-
)
|
|
185
|
-
```
|
|
@@ -1,80 +0,0 @@
|
|
|
1
|
-
# Vector Knowledge Base Properties
|
|
2
|
-
|
|
3
|
-
| Name | Type | Required | Description |
|
|
4
|
-
|---|---|---|---|
|
|
5
|
-
| embeddingsModel | BedrockFoundationModel | Yes | The embeddings model for the knowledge base |
|
|
6
|
-
| name | string | No | The name of the knowledge base |
|
|
7
|
-
| vectorType | VectorType | No | The vector type to store vector embeddings |
|
|
8
|
-
| description | string | No | The description of the knowledge base |
|
|
9
|
-
| instruction | string | No | Instructions for agents based on the design and type of information of the Knowledge Base that will impact how Agents interact with the Knowledge Base |
|
|
10
|
-
| existingRole | iam.IRole | No | Existing IAM role with a policy statement granting permission to invoke the specific embeddings model |
|
|
11
|
-
| indexName | string | No | The name of the vector index (only applicable if vectorStore is of type VectorCollection) |
|
|
12
|
-
| vectorField | string | No | The name of the field in the vector index (only applicable if vectorStore is of type VectorCollection) |
|
|
13
|
-
| vectorStore | VectorCollection \| PineconeVectorStore \| AmazonAuroraVectorStore \| ExistingAmazonAuroraVectorStore | No | The vector store for the knowledge base |
|
|
14
|
-
| vectorIndex | VectorIndex | No | The vector index for the OpenSearch Serverless backed knowledge base |
|
|
15
|
-
| knowledgeBaseState | string | No | Specifies whether to use the knowledge base or not when sending an InvokeAgent request |
|
|
16
|
-
| tags | Record<string, string> | No | Tag (KEY-VALUE) bedrock agent resource |
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
### Vector Knowledge Base - Vector Type
|
|
20
|
-
|
|
21
|
-
The data type for the vectors when using a model to convert text into vector embeddings. Embeddings type may impact the availability of some embeddings models and vector stores. The following vector types are available:
|
|
22
|
-
|
|
23
|
-
- Floating point: More precise vector representation of the text, but more costly in storage.
|
|
24
|
-
- Binary: Not as precise vector representation of the text, but not as costly in storage as a standard floating-point (float32). Not all embedding models and vector stores support binary embeddings
|
|
25
|
-
|
|
26
|
-
See [Supported embeddings models](https://docs.aws.amazon.com/bedrock/latest/userguide/knowledge-base-supported.html) for information on the available models and their vector data types.
|
|
27
|
-
|
|
28
|
-
#### Example
|
|
29
|
-
|
|
30
|
-
##### Typescript
|
|
31
|
-
|
|
32
|
-
```ts
|
|
33
|
-
const app = new cdk.App();
|
|
34
|
-
const stack = new cdk.Stack(app, 'aws-cdk-bedrock-data-sources-integ-test');
|
|
35
|
-
|
|
36
|
-
const kb = new VectorKnowledgeBase(stack, 'MyKnowledgeBase', {
|
|
37
|
-
name: 'MyKnowledgeBase',
|
|
38
|
-
vectorType: bedrock.VectorType.BINARY,
|
|
39
|
-
embeddingsModel: BedrockFoundationModel.COHERE_EMBED_MULTILINGUAL_V3,
|
|
40
|
-
});
|
|
41
|
-
```
|
|
42
|
-
|
|
43
|
-
##### Python
|
|
44
|
-
|
|
45
|
-
```python
|
|
46
|
-
|
|
47
|
-
from aws_cdk import (
|
|
48
|
-
aws_s3 as s3,
|
|
49
|
-
)
|
|
50
|
-
from cdklabs.generative_ai_cdk_constructs import (
|
|
51
|
-
bedrock
|
|
52
|
-
)
|
|
53
|
-
|
|
54
|
-
kb = bedrock.VectorKnowledgeBase(self, 'KnowledgeBase',
|
|
55
|
-
name= 'MyKnowledgeBase',
|
|
56
|
-
vector_type= bedrock.VectorType.BINARY,
|
|
57
|
-
embeddings_model= bedrock.BedrockFoundationModel.COHERE_EMBED_MULTILINGUAL_V3,
|
|
58
|
-
)
|
|
59
|
-
```
|
|
60
|
-
|
|
61
|
-
### Vector Knowledge Base - Data Sources
|
|
62
|
-
|
|
63
|
-
Data sources are the various repositories or systems from which information is extracted and ingested into the
|
|
64
|
-
knowledge base. These sources provide the raw content that will be processed, indexed, and made available for
|
|
65
|
-
querying within the knowledge base system. Data sources can include various types of systems such as document
|
|
66
|
-
management systems, databases, file storage systems, and content management platforms. Suuported Data Sources
|
|
67
|
-
include Amazon S3 buckets, Web Crawlers, SharePoint sites, Salesforce instances, and Confluence spaces.
|
|
68
|
-
|
|
69
|
-
- **Amazon S3**. You can either create a new data source using the `bedrock.S3DataSource(..)` class, or using the
|
|
70
|
-
`kb.addS3DataSource(..)`.
|
|
71
|
-
- **Web Crawler**. You can either create a new data source using the `bedrock.WebCrawlerDataSource(..)` class, or using the
|
|
72
|
-
`kb.addWebCrawlerDataSource(..)`.
|
|
73
|
-
- **Confluence**. You can either create a new data source using the `bedrock.ConfluenceDataSource(..)` class, or using the
|
|
74
|
-
`kb.addConfluenceDataSource(..)`.
|
|
75
|
-
- **SharePoint**. You can either create a new data source using the `bedrock.SharePointDataSource(..)` class, or using the
|
|
76
|
-
`kb.addSharePointDataSource(..)`.
|
|
77
|
-
- **Salesforce**. You can either create a new data source using the `bedrock.SalesforceDataSource(..)` class, or using the
|
|
78
|
-
`kb.addSalesforceDataSource(..)`.
|
|
79
|
-
- **Custom**. You can either create a new data source using the `bedrock.CustomDataSource(..)` class, or using the
|
|
80
|
-
`kb.addCustomDataSource(..)`. This allows you to add your own custom data source to the knowledge base.
|
|
@@ -1,56 +0,0 @@
|
|
|
1
|
-
|
|
2
|
-
# OpenSearch Serverless Vector Store
|
|
3
|
-
|
|
4
|
-
## Example
|
|
5
|
-
|
|
6
|
-
### TypeScript
|
|
7
|
-
|
|
8
|
-
```ts
|
|
9
|
-
import * as s3 from 'aws-cdk-lib/aws-s3';
|
|
10
|
-
import { bedrock } from '@cdklabs/generative-ai-cdk-constructs';
|
|
11
|
-
|
|
12
|
-
const kb = new bedrock.VectorKnowledgeBase(this, 'KnowledgeBase', {
|
|
13
|
-
embeddingsModel: bedrock.BedrockFoundationModel.TITAN_EMBED_TEXT_V1,
|
|
14
|
-
instruction: 'Use this knowledge base to answer questions about books. ' + 'It contains the full text of novels.',
|
|
15
|
-
});
|
|
16
|
-
|
|
17
|
-
const docBucket = new s3.Bucket(this, 'DocBucket');
|
|
18
|
-
|
|
19
|
-
new bedrock.S3DataSource(this, 'DataSource', {
|
|
20
|
-
bucket: docBucket,
|
|
21
|
-
knowledgeBase: kb,
|
|
22
|
-
dataSourceName: 'books',
|
|
23
|
-
chunkingStrategy: bedrock.ChunkingStrategy.fixedSize({
|
|
24
|
-
maxTokens: 500,
|
|
25
|
-
overlapPercentage: 20,
|
|
26
|
-
}),
|
|
27
|
-
});
|
|
28
|
-
```
|
|
29
|
-
|
|
30
|
-
### Python
|
|
31
|
-
|
|
32
|
-
```python
|
|
33
|
-
|
|
34
|
-
from aws_cdk import (
|
|
35
|
-
aws_s3 as s3,
|
|
36
|
-
)
|
|
37
|
-
from cdklabs.generative_ai_cdk_constructs import (
|
|
38
|
-
bedrock
|
|
39
|
-
)
|
|
40
|
-
|
|
41
|
-
kb = bedrock.VectorKnowledgeBase(self, 'KnowledgeBase',
|
|
42
|
-
embeddings_model= bedrock.BedrockFoundationModel.TITAN_EMBED_TEXT_V1,
|
|
43
|
-
instruction= 'Use this knowledge base to answer questions about books. ' +
|
|
44
|
-
'It contains the full text of novels.'
|
|
45
|
-
)
|
|
46
|
-
|
|
47
|
-
docBucket = s3.Bucket(self, 'DockBucket')
|
|
48
|
-
|
|
49
|
-
bedrock.S3DataSource(self, 'DataSource',
|
|
50
|
-
bucket= docBucket,
|
|
51
|
-
knowledge_base=kb,
|
|
52
|
-
data_source_name='books',
|
|
53
|
-
chunking_strategy= bedrock.ChunkingStrategy.FIXED_SIZE,
|
|
54
|
-
)
|
|
55
|
-
|
|
56
|
-
```
|
|
@@ -1,66 +0,0 @@
|
|
|
1
|
-
#### Example of `Pinecone` (manual, you must have Pinecone vector store created):
|
|
2
|
-
|
|
3
|
-
##### TypeScript
|
|
4
|
-
|
|
5
|
-
```ts
|
|
6
|
-
import * as s3 from 'aws-cdk-lib/aws-s3';
|
|
7
|
-
import { pinecone, bedrock } from '@cdklabs/generative-ai-cdk-constructs';
|
|
8
|
-
|
|
9
|
-
const pineconeds = new pinecone.PineconeVectorStore({
|
|
10
|
-
connectionString: 'https://your-index-1234567.svc.gcp-starter.pinecone.io',
|
|
11
|
-
credentialsSecretArn: 'arn:aws:secretsmanager:your-region:123456789876:secret:your-key-name', # pragma: allowlist secret
|
|
12
|
-
textField: 'question',
|
|
13
|
-
metadataField: 'metadata',
|
|
14
|
-
});
|
|
15
|
-
|
|
16
|
-
const kb = new bedrock.VectorKnowledgeBase(this, 'KnowledgeBase', {
|
|
17
|
-
vectorStore: pineconeds,
|
|
18
|
-
embeddingsModel: bedrock.BedrockFoundationModel.TITAN_EMBED_TEXT_V1,
|
|
19
|
-
instruction: 'Use this knowledge base to answer questions about books. ' + 'It contains the full text of novels.',
|
|
20
|
-
});
|
|
21
|
-
|
|
22
|
-
const docBucket = new s3.Bucket(this, 'DocBucket');
|
|
23
|
-
|
|
24
|
-
new bedrock.S3DataSource(this, 'DataSource', {
|
|
25
|
-
bucket: docBucket,
|
|
26
|
-
knowledgeBase: kb,
|
|
27
|
-
dataSourceName: 'books',
|
|
28
|
-
chunkingStrategy: bedrock.ChunkingStrategy.FIXED_SIZE,
|
|
29
|
-
});
|
|
30
|
-
```
|
|
31
|
-
|
|
32
|
-
##### Python
|
|
33
|
-
|
|
34
|
-
```python
|
|
35
|
-
|
|
36
|
-
from aws_cdk import (
|
|
37
|
-
aws_s3 as s3,
|
|
38
|
-
)
|
|
39
|
-
from cdklabs.generative_ai_cdk_constructs import (
|
|
40
|
-
bedrock,
|
|
41
|
-
pinecone,
|
|
42
|
-
)
|
|
43
|
-
|
|
44
|
-
pineconevs = pinecone.PineconeVectorStore(
|
|
45
|
-
connection_string='https://your-index-1234567.svc.gcp-starter.pinecone.io',
|
|
46
|
-
credentials_secret_arn='arn:aws:secretsmanager:your-region:123456789876:secret:your-key-name', # pragma: allowlist secret
|
|
47
|
-
text_field='question',
|
|
48
|
-
metadata_field='metadata'
|
|
49
|
-
)
|
|
50
|
-
|
|
51
|
-
kb = bedrock.VectorKnowledgeBase(self, 'KnowledgeBase',
|
|
52
|
-
vector_store= pineconevs,
|
|
53
|
-
embeddings_model= bedrock.BedrockFoundationModel.COHERE_EMBED_ENGLISH_V3,
|
|
54
|
-
instruction= 'Use this knowledge base to answer questions about books. ' +
|
|
55
|
-
'It contains the full text of novels.'
|
|
56
|
-
)
|
|
57
|
-
|
|
58
|
-
docBucket = s3.Bucket(self, 'DockBucket')
|
|
59
|
-
|
|
60
|
-
bedrock.S3DataSource(self, 'DataSource',
|
|
61
|
-
bucket= docBucket,
|
|
62
|
-
knowledge_base=kb,
|
|
63
|
-
data_source_name='books',
|
|
64
|
-
chunking_strategy= bedrock.ChunkingStrategy.FIXED_SIZE,
|
|
65
|
-
)
|
|
66
|
-
```
|