awslabs.cdk-mcp-server 0.0.10417__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (72) hide show
  1. awslabs/__init__.py +2 -0
  2. awslabs/cdk_mcp_server/__init__.py +8 -0
  3. awslabs/cdk_mcp_server/core/__init__.py +1 -0
  4. awslabs/cdk_mcp_server/core/resources.py +271 -0
  5. awslabs/cdk_mcp_server/core/search_utils.py +182 -0
  6. awslabs/cdk_mcp_server/core/server.py +74 -0
  7. awslabs/cdk_mcp_server/core/tools.py +324 -0
  8. awslabs/cdk_mcp_server/data/__init__.py +1 -0
  9. awslabs/cdk_mcp_server/data/cdk_nag_parser.py +331 -0
  10. awslabs/cdk_mcp_server/data/construct_descriptions.py +32 -0
  11. awslabs/cdk_mcp_server/data/genai_cdk_loader.py +423 -0
  12. awslabs/cdk_mcp_server/data/lambda_powertools_loader.py +48 -0
  13. awslabs/cdk_mcp_server/data/schema_generator.py +666 -0
  14. awslabs/cdk_mcp_server/data/solutions_constructs_parser.py +782 -0
  15. awslabs/cdk_mcp_server/server.py +7 -0
  16. awslabs/cdk_mcp_server/static/CDK_GENERAL_GUIDANCE.md +232 -0
  17. awslabs/cdk_mcp_server/static/CDK_NAG_GUIDANCE.md +192 -0
  18. awslabs/cdk_mcp_server/static/__init__.py +5 -0
  19. awslabs/cdk_mcp_server/static/bedrock/agent/actiongroups.md +137 -0
  20. awslabs/cdk_mcp_server/static/bedrock/agent/alias.md +39 -0
  21. awslabs/cdk_mcp_server/static/bedrock/agent/collaboration.md +91 -0
  22. awslabs/cdk_mcp_server/static/bedrock/agent/creation.md +149 -0
  23. awslabs/cdk_mcp_server/static/bedrock/agent/custom_orchestration.md +74 -0
  24. awslabs/cdk_mcp_server/static/bedrock/agent/overview.md +78 -0
  25. awslabs/cdk_mcp_server/static/bedrock/agent/prompt_override.md +70 -0
  26. awslabs/cdk_mcp_server/static/bedrock/bedrockguardrails.md +188 -0
  27. awslabs/cdk_mcp_server/static/bedrock/knowledgebases/chunking.md +137 -0
  28. awslabs/cdk_mcp_server/static/bedrock/knowledgebases/datasources.md +225 -0
  29. awslabs/cdk_mcp_server/static/bedrock/knowledgebases/kendra.md +81 -0
  30. awslabs/cdk_mcp_server/static/bedrock/knowledgebases/overview.md +116 -0
  31. awslabs/cdk_mcp_server/static/bedrock/knowledgebases/parsing.md +36 -0
  32. awslabs/cdk_mcp_server/static/bedrock/knowledgebases/transformation.md +30 -0
  33. awslabs/cdk_mcp_server/static/bedrock/knowledgebases/vector/aurora.md +185 -0
  34. awslabs/cdk_mcp_server/static/bedrock/knowledgebases/vector/creation.md +80 -0
  35. awslabs/cdk_mcp_server/static/bedrock/knowledgebases/vector/opensearch.md +56 -0
  36. awslabs/cdk_mcp_server/static/bedrock/knowledgebases/vector/pinecone.md +66 -0
  37. awslabs/cdk_mcp_server/static/bedrock/profiles.md +153 -0
  38. awslabs/cdk_mcp_server/static/genai_cdk/bedrock/agent/actiongroups.md +137 -0
  39. awslabs/cdk_mcp_server/static/genai_cdk/bedrock/agent/alias.md +39 -0
  40. awslabs/cdk_mcp_server/static/genai_cdk/bedrock/agent/collaboration.md +91 -0
  41. awslabs/cdk_mcp_server/static/genai_cdk/bedrock/agent/creation.md +149 -0
  42. awslabs/cdk_mcp_server/static/genai_cdk/bedrock/agent/custom_orchestration.md +74 -0
  43. awslabs/cdk_mcp_server/static/genai_cdk/bedrock/agent/overview.md +78 -0
  44. awslabs/cdk_mcp_server/static/genai_cdk/bedrock/agent/prompt_override.md +70 -0
  45. awslabs/cdk_mcp_server/static/genai_cdk/bedrock/bedrockguardrails.md +188 -0
  46. awslabs/cdk_mcp_server/static/genai_cdk/bedrock/knowledgebases/chunking.md +137 -0
  47. awslabs/cdk_mcp_server/static/genai_cdk/bedrock/knowledgebases/datasources.md +225 -0
  48. awslabs/cdk_mcp_server/static/genai_cdk/bedrock/knowledgebases/kendra.md +81 -0
  49. awslabs/cdk_mcp_server/static/genai_cdk/bedrock/knowledgebases/overview.md +116 -0
  50. awslabs/cdk_mcp_server/static/genai_cdk/bedrock/knowledgebases/parsing.md +36 -0
  51. awslabs/cdk_mcp_server/static/genai_cdk/bedrock/knowledgebases/transformation.md +30 -0
  52. awslabs/cdk_mcp_server/static/genai_cdk/bedrock/knowledgebases/vector/aurora.md +185 -0
  53. awslabs/cdk_mcp_server/static/genai_cdk/bedrock/knowledgebases/vector/creation.md +80 -0
  54. awslabs/cdk_mcp_server/static/genai_cdk/bedrock/knowledgebases/vector/opensearch.md +56 -0
  55. awslabs/cdk_mcp_server/static/genai_cdk/bedrock/knowledgebases/vector/pinecone.md +66 -0
  56. awslabs/cdk_mcp_server/static/genai_cdk/bedrock/profiles.md +153 -0
  57. awslabs/cdk_mcp_server/static/genai_cdk/opensearch-vectorindex/overview.md +135 -0
  58. awslabs/cdk_mcp_server/static/genai_cdk/opensearchserverless/overview.md +17 -0
  59. awslabs/cdk_mcp_server/static/lambda_powertools/bedrock.md +127 -0
  60. awslabs/cdk_mcp_server/static/lambda_powertools/cdk.md +99 -0
  61. awslabs/cdk_mcp_server/static/lambda_powertools/dependencies.md +45 -0
  62. awslabs/cdk_mcp_server/static/lambda_powertools/index.md +36 -0
  63. awslabs/cdk_mcp_server/static/lambda_powertools/insights.md +95 -0
  64. awslabs/cdk_mcp_server/static/lambda_powertools/logging.md +43 -0
  65. awslabs/cdk_mcp_server/static/lambda_powertools/metrics.md +93 -0
  66. awslabs/cdk_mcp_server/static/lambda_powertools/tracing.md +63 -0
  67. awslabs/cdk_mcp_server/static/opensearch-vectorindex/overview.md +135 -0
  68. awslabs/cdk_mcp_server/static/opensearchserverless/overview.md +17 -0
  69. awslabs_cdk_mcp_server-0.0.10417.dist-info/METADATA +14 -0
  70. awslabs_cdk_mcp_server-0.0.10417.dist-info/RECORD +72 -0
  71. awslabs_cdk_mcp_server-0.0.10417.dist-info/WHEEL +4 -0
  72. awslabs_cdk_mcp_server-0.0.10417.dist-info/entry_points.txt +2 -0
@@ -0,0 +1,137 @@
1
+ # Vector Knowledge Base - Chunking Strategies
2
+
3
+ ## Available Strategies
4
+
5
+ ### Default Chunking
6
+
7
+ Applies Fixed Chunking with the default chunk size of 300 tokens and 20% overlap.
8
+
9
+ #### TypeScript
10
+
11
+ ```ts
12
+ ChunkingStrategy.DEFAULT;
13
+ ```
14
+
15
+ #### Python
16
+
17
+ ```python
18
+ ChunkingStrategy.DEFAULT
19
+ ```
20
+
21
+ ### Fixed Size Chunking
22
+
23
+ This method divides the data into fixed-size chunks, with each chunk
24
+ containing a predetermined number of tokens. This strategy is useful when the data is uniform
25
+ in size and structure.
26
+
27
+ #### TypeScript
28
+
29
+ ```ts
30
+ // Fixed Size Chunking with sane defaults.
31
+ ChunkingStrategy.FIXED_SIZE;
32
+
33
+ // Fixed Size Chunking with custom values.
34
+ ChunkingStrategy.fixedSize({ maxTokens: 200, overlapPercentage: 25 });
35
+ ```
36
+
37
+ #### Python
38
+
39
+ ```python
40
+ # Fixed Size Chunking with sane defaults.
41
+ ChunkingStrategy.FIXED_SIZE
42
+
43
+ # Fixed Size Chunking with custom values.
44
+ ChunkingStrategy.fixed_size(
45
+ max_tokens= 200,
46
+ overlap_percentage= 25
47
+ )
48
+ ```
49
+
50
+ ### Hierarchical Chunking
51
+
52
+ This strategy organizes data into layers of chunks, with the first
53
+ layer containing large chunks and the second layer containing smaller chunks derived from the first.
54
+ It is ideal for data with inherent hierarchies or nested structures.
55
+
56
+ #### TypeScript
57
+
58
+ ```ts
59
+ // Hierarchical Chunking with the default for Cohere Models.
60
+ ChunkingStrategy.HIERARCHICAL_COHERE;
61
+
62
+ // Hierarchical Chunking with the default for Titan Models.
63
+ ChunkingStrategy.HIERARCHICAL_TITAN;
64
+
65
+ // Hierarchical Chunking with custom values. Tthe maximum chunk size depends on the model.
66
+ // Amazon Titan Text Embeddings: 8192. Cohere Embed models: 512
67
+ ChunkingStrategy.hierarchical({
68
+ overlapTokens: 60,
69
+ maxParentTokenSize: 1500,
70
+ maxChildTokenSize: 300,
71
+ });
72
+ ```
73
+
74
+ #### Python
75
+
76
+ ```python
77
+ # Hierarchical Chunking with the default for Cohere Models.
78
+ ChunkingStrategy.HIERARCHICAL_COHERE
79
+
80
+ # Hierarchical Chunking with the default for Titan Models.
81
+ ChunkingStrategy.HIERARCHICAL_TITAN
82
+
83
+ # Hierarchical Chunking with custom values. Tthe maximum chunk size depends on the model.
84
+ # Amazon Titan Text Embeddings: 8192. Cohere Embed models: 512
85
+ chunking_strategy= ChunkingStrategy.hierarchical(
86
+ overlap_tokens=60,
87
+ max_parent_token_size=1500,
88
+ max_child_token_size=300
89
+ )
90
+ ```
91
+
92
+ ### Semantic Chunking
93
+
94
+ This method splits data into smaller documents based on groups of similar
95
+ content derived from the text using natural language processing. It helps preserve contextual
96
+ relationships and ensures accurate and contextually appropriate results.
97
+
98
+ #### TypeScript
99
+
100
+ ```ts
101
+ // Semantic Chunking with sane defaults.
102
+ ChunkingStrategy.SEMANTIC;
103
+
104
+ // Semantic Chunking with custom values.
105
+ ChunkingStrategy.semantic({ bufferSize: 0, breakpointPercentileThreshold: 95, maxTokens: 300 });
106
+ ```
107
+
108
+ #### Python
109
+
110
+ ```python
111
+ # Semantic Chunking with sane defaults.
112
+ ChunkingStrategy.SEMANTIC
113
+
114
+ # Semantic Chunking with custom values.
115
+ ChunkingStrategy.semantic(
116
+ buffer_size=0,
117
+ breakpoint_percentile_threshold=95,
118
+ max_tokens=300
119
+ )
120
+ ```
121
+
122
+ ### No Chunking
123
+
124
+ This strategy treats each file as one chunk. If you choose this option,
125
+ you may want to pre-process your documents by splitting them into separate files.
126
+
127
+ #### TypeScript
128
+
129
+ ```ts
130
+ ChunkingStrategy.NONE;
131
+ ```
132
+
133
+ #### Python
134
+
135
+ ```python
136
+ ChunkingStrategy.NONE
137
+ ```
@@ -0,0 +1,225 @@
1
+ # Knowledge Base Data Sources
2
+
3
+ ## Overview
4
+
5
+ This document provides examples of adding various data sources to a Knowledge Base in Amazon Bedrock.
6
+
7
+ ## Example
8
+
9
+ ### TypeScript
10
+
11
+ ```ts
12
+ const app = new cdk.App();
13
+ const stack = new cdk.Stack(app, 'aws-cdk-bedrock-data-sources-integ-test');
14
+
15
+ const kb = new VectorKnowledgeBase(stack, 'MyKnowledgeBase', {
16
+ name: 'MyKnowledgeBase',
17
+ embeddingsModel: BedrockFoundationModel.COHERE_EMBED_MULTILINGUAL_V3,
18
+ });
19
+
20
+ const bucket = new Bucket(stack, 'Bucket', {});
21
+ const lambdaFunction = new Function(stack, 'MyFunction', {
22
+ runtime: cdk.aws_lambda.Runtime.PYTHON_3_9,
23
+ handler: 'index.handler',
24
+ code: cdk.aws_lambda.Code.fromInline('print("Hello, World!")'),
25
+ });
26
+
27
+ const secret = new Secret(stack, 'Secret');
28
+ const key = new Key(stack, 'Key');
29
+
30
+ kb.addWebCrawlerDataSource({
31
+ sourceUrls: ['https://docs.aws.amazon.com/'],
32
+ chunkingStrategy: ChunkingStrategy.HIERARCHICAL_COHERE,
33
+ customTransformation: CustomTransformation.lambda({
34
+ lambdaFunction: lambdaFunction,
35
+ s3BucketUri: `s3://${bucket.bucketName}/chunk-processor/`,
36
+ }),
37
+ });
38
+
39
+ kb.addS3DataSource({
40
+ bucket,
41
+ chunkingStrategy: ChunkingStrategy.SEMANTIC,
42
+ parsingStrategy: ParsingStategy.foundationModel({
43
+ model: BedrockFoundationModel.ANTHROPIC_CLAUDE_SONNET_V1_0,
44
+ }),
45
+ });
46
+
47
+ kb.addConfluenceDataSource({
48
+ dataSourceName: 'TestDataSource',
49
+ authSecret: secret,
50
+ kmsKey: key,
51
+ confluenceUrl: 'https://example.atlassian.net',
52
+ filters: [
53
+ {
54
+ objectType: ConfluenceObjectType.ATTACHMENT,
55
+ includePatterns: ['.*\\.pdf'],
56
+ excludePatterns: ['.*private.*\\.pdf'],
57
+ },
58
+ {
59
+ objectType: ConfluenceObjectType.PAGE,
60
+ includePatterns: ['.*public.*\\.pdf'],
61
+ excludePatterns: ['.*confidential.*\\.pdf'],
62
+ },
63
+ ],
64
+ });
65
+
66
+ kb.addSalesforceDataSource({
67
+ authSecret: secret,
68
+ endpoint: 'https://your-instance.my.salesforce.com',
69
+ kmsKey: key,
70
+ filters: [
71
+ {
72
+ objectType: SalesforceObjectType.ATTACHMENT,
73
+ includePatterns: ['.*\\.pdf'],
74
+ excludePatterns: ['.*private.*\\.pdf'],
75
+ },
76
+ {
77
+ objectType: SalesforceObjectType.CONTRACT,
78
+ includePatterns: ['.*public.*\\.pdf'],
79
+ excludePatterns: ['.*confidential.*\\.pdf'],
80
+ },
81
+ ],
82
+ });
83
+
84
+ kb.addSharePointDataSource({
85
+ dataSourceName: 'SharepointDataSource',
86
+ authSecret: secret,
87
+ kmsKey: key,
88
+ domain: 'yourdomain',
89
+ siteUrls: ['https://yourdomain.sharepoint.com/sites/mysite'],
90
+ tenantId: '888d0b57-69f1-4fb8-957f-e1f0bedf64de',
91
+ filters: [
92
+ {
93
+ objectType: SharePointObjectType.PAGE,
94
+ includePatterns: ['.*\\.pdf'],
95
+ excludePatterns: ['.*private.*\\.pdf'],
96
+ },
97
+ {
98
+ objectType: SharePointObjectType.FILE,
99
+ includePatterns: ['.*public.*\\.pdf'],
100
+ excludePatterns: ['.*confidential.*\\.pdf'],
101
+ },
102
+ ],
103
+ });
104
+
105
+ kb.addCustomDataSource({
106
+ dataSourceName: 'CustomDataSource',
107
+ chunkingStrategy: ChunkingStrategy.FIXED_SIZE,
108
+ });
109
+ ```
110
+
111
+ ### Python
112
+
113
+ ```python
114
+ from aws_cdk import (
115
+ Stack,
116
+ aws_s3 as s3,
117
+ aws_lambda as _lambda,
118
+ aws_secretsmanager as secretsmanager,
119
+ aws_kms as kms
120
+ )
121
+ from constructs import Construct
122
+ from cdklabs.generative_ai_cdk_constructs import (
123
+ bedrock
124
+ )
125
+
126
+ class PythonTestStack(Stack):
127
+
128
+ def __init__(self, scope: Construct, construct_id: str, **kwargs) -> None:
129
+ super().__init__(scope, construct_id, **kwargs)
130
+
131
+ kb = bedrock.VectorKnowledgeBase(self, 'MyKnowledgeBase',
132
+ embeddings_model= bedrock.BedrockFoundationModel.COHERE_EMBED_MULTILINGUAL_V3,
133
+ )
134
+
135
+ docBucket = s3.Bucket(self, 'Bucket')
136
+
137
+ function = _lambda.Function(self, 'MyFunction',
138
+ runtime=_lambda.Runtime.PYTHON_3_12,
139
+ handler='index.handler',
140
+ code=_lambda.Code.from_inline('print("Hello, World!")'),
141
+ )
142
+
143
+ kb.add_web_crawler_data_source(
144
+ source_urls= ['https://docs.aws.amazon.com/'],
145
+ chunking_strategy= bedrock.ChunkingStrategy.HIERARCHICAL_COHERE,
146
+ custom_transformation= bedrock.CustomTransformation.lambda_(
147
+ lambda_function= function,
148
+ s3_bucket_uri= f's3://{docBucket.bucket_name}/chunk-processor/'
149
+ )
150
+ )
151
+
152
+ kb.add_s3_data_source(
153
+ bucket= docBucket,
154
+ chunking_strategy= bedrock.ChunkingStrategy.SEMANTIC,
155
+ parsing_strategy= bedrock.ParsingStategy.foundation_model(
156
+ parsing_model= bedrock.BedrockFoundationModel.ANTHROPIC_CLAUDE_SONNET_V1_0.as_i_model(self)
157
+ )
158
+ )
159
+
160
+ secret = secretsmanager.Secret(self, 'Secret')
161
+ key = kms.Key(self, 'Key')
162
+
163
+ kb.add_confluence_data_source(
164
+ data_source_name='TestDataSource',
165
+ auth_secret=secret,
166
+ kms_key=key,
167
+ confluence_url='https://example.atlassian.net',
168
+ filters=[
169
+ bedrock.ConfluenceCrawlingFilters(
170
+ object_type=bedrock.ConfluenceObjectType.ATTACHMENT,
171
+ include_patterns= [".*\\.pdf"],
172
+ exclude_patterns= [".*private.*\\.pdf"],
173
+ ),
174
+ bedrock.ConfluenceCrawlingFilters(
175
+ object_type=bedrock.ConfluenceObjectType.PAGE,
176
+ include_patterns= [".*public.*\\.pdf"],
177
+ exclude_patterns= [".*confidential.*\\.pdf"],
178
+ ),
179
+ ]
180
+ )
181
+
182
+ kb.add_salesforce_data_source(
183
+ auth_secret=secret,
184
+ endpoint='https://your-instance.my.salesforce.com',
185
+ kms_key=key,
186
+ filters=[
187
+ bedrock.SalesforceCrawlingFilters(
188
+ object_type=bedrock.SalesforceObjectType.ATTACHMENT,
189
+ include_patterns= [".*\\.pdf"],
190
+ exclude_patterns= [".*private.*\\.pdf"],
191
+ ),
192
+ bedrock.SalesforceCrawlingFilters(
193
+ object_type=bedrock.SalesforceObjectType.CONTRACT,
194
+ include_patterns= [".*public.*\\.pdf"],
195
+ exclude_patterns= [".*confidential.*\\.pdf"],
196
+ ),
197
+ ]
198
+ )
199
+
200
+ kb.add_share_point_data_source(
201
+ data_source_name='SharepointDataSource',
202
+ auth_secret=secret,
203
+ kms_key=key,
204
+ domain='yourDomain',
205
+ site_urls= ['https://yourdomain.sharepoint.com/sites/mysite'],
206
+ tenant_id='888d0b57-69f1-4fb8-957f-e1f0bedf64de',
207
+ filters=[
208
+ bedrock.SharePointCrawlingFilters(
209
+ object_type=bedrock.SharePointObjectType.PAGE,
210
+ include_patterns= [".*\\.pdf"],
211
+ exclude_patterns= [".*private.*\\.pdf"],
212
+ ),
213
+ bedrock.SharePointCrawlingFilters(
214
+ object_type=bedrock.SharePointObjectType.FILE,
215
+ include_patterns= [".*public.*\\.pdf"],
216
+ exclude_patterns= [".*confidential.*\\.pdf"],
217
+ ),
218
+ ]
219
+ )
220
+
221
+ kb.add_custom_data_source(
222
+ data_source_name='CustomDataSource',
223
+ chunking_strategy=bedrock.ChunkingStrategy.FIXED_SIZE,
224
+ )
225
+ ```
@@ -0,0 +1,81 @@
1
+ # Kendra Knowledge Base
2
+
3
+ ## Overview
4
+
5
+ With Amazon Bedrock Knowledge Bases, you can build a knowledge base from an Amazon Kendra GenAI index to create more sophisticated and accurate Retrieval Augmented Generation (RAG)-powered digital assistants. By combining an Amazon Kendra GenAI index with Amazon Bedrock Knowledge Bases, you can:
6
+
7
+ - Reuse your indexed content across multiple Amazon Bedrock applications without rebuilding indexes or re-ingesting data.
8
+ - Leverage the advanced GenAI capabilities of Amazon Bedrock while benefiting from the high-accuracy information retrieval of Amazon Kendra.
9
+ - Customize your digital assistant's behavior using the tools of Amazon Bedrock while maintaining the semantic accuracy of an Amazon Kendra GenAI index.
10
+
11
+ ## Kendra Knowledge Base Properties
12
+
13
+ | Name | Type | Required | Description |
14
+ |------|------|----------|-------------|
15
+ | kendraIndex | IKendraGenAiIndex | Yes | The Kendra Index to use for the knowledge base. |
16
+ | name | string | No | The name of the knowledge base. If not provided, a name will be auto-generated. |
17
+ | description | string | No | Description of the knowledge base. |
18
+ | instruction | string | No | Instructions for the knowledge base. |
19
+ | existingRole | iam.IRole | No | An existing IAM role to use for the knowledge base. If not provided, a new role will be created. |
20
+
21
+ ## Example
22
+
23
+ ### TypeScript
24
+
25
+ ```ts
26
+ import * as s3 from 'aws-cdk-lib/aws-s3';
27
+ import { bedrock, kendra } from '@cdklabs/generative-ai-cdk-constructs';
28
+
29
+ const cmk = new kms.Key(stack, 'cmk', {});
30
+
31
+ // you can create a new index using the api below
32
+ const index = new kendra.KendraGenAiIndex(this, 'index', {
33
+ name: 'kendra-index-cdk',
34
+ kmsKey: cmk,
35
+ documentCapacityUnits: 1, // 40K documents
36
+ queryCapacityUnits: 1, // 0.2 QPS
37
+ });
38
+
39
+ // or import an existing one
40
+ const index = kendra.KendraGenAiIndex.fromAttrs(this, 'myindex', {
41
+ indexId: 'myindex',
42
+ role: myRole
43
+ });
44
+
45
+ new bedrock.KendraKnowledgeBase(this, 'kb', {
46
+ name: 'kendra-kb-cdk',
47
+ kendraIndex: index,
48
+ });
49
+ ```
50
+
51
+ ### Python
52
+
53
+ ```py
54
+ from aws_cdk import aws_kms as kms
55
+ from cdklabs.generative_ai_cdk_constructs import bedrock, kendra
56
+
57
+ # Create a KMS key
58
+ cmk = kms.Key(stack, 'cmk')
59
+
60
+ # Create a new Kendra index
61
+ index = kendra.KendraGenAiIndex(self, 'index',
62
+ name='kendra-index-cdk',
63
+ kms_key=cmk,
64
+ document_capacity_units=1, # 40K documents
65
+ query_capacity_units=1 # 0.2 QPS
66
+ )
67
+
68
+ # Or import an existing index
69
+ index = kendra.KendraGenAiIndex.from_attrs(self, 'myindex',
70
+ index_id='myindex',
71
+ role=my_role
72
+ )
73
+
74
+ # Create a Kendra Knowledge Base
75
+ kb = bedrock.KendraKnowledgeBase(self, 'kb',
76
+ name='kendra-kb-cdk',
77
+ kendra_index=index
78
+ )
79
+ ```
80
+
81
+ [View full documentation](https://github.com/awslabs/generative-ai-cdk-constructs/blob/main/src/cdk-lib/bedrock/README.md)
@@ -0,0 +1,116 @@
1
+ # Amazon Bedrock Knowledge Bases
2
+
3
+ Amazon Bedrock Knowledge Bases enable you to provide foundation models and agents with contextual information from your company's private data sources. This enhances the relevance, accuracy, and customization of their responses.
4
+
5
+ ## Table of Contents
6
+
7
+ - [Amazon Bedrock Knowledge Bases](#amazon-bedrock-knowledge-bases)
8
+ - [Table of Contents](#table-of-contents)
9
+ - [Key Concepts](#key-concepts)
10
+ - [Knowledge Base Types](#knowledge-base-types)
11
+ - [Knowledge Base Components](#knowledge-base-components)
12
+ - [When to Use Knowledge Bases](#when-to-use-knowledge-bases)
13
+ - [Related Resources](#related-resources)
14
+ - [Quick Start Example](#quick-start-example)
15
+ - [Next Steps](#next-steps)
16
+
17
+ ## Key Concepts
18
+
19
+ - **Knowledge Base**: A repository of information that foundation models can access to provide context-aware responses
20
+ - **Vector Store**: A specialized database that stores and retrieves text as vector embeddings
21
+ - **Chunking**: The process of breaking documents into smaller pieces for efficient storage and retrieval
22
+ - **Data Source**: The origin of information ingested into a knowledge base (S3, web crawler, etc.)
23
+ - **Embeddings Model**: A model that converts text into vector representations
24
+
25
+ ## Knowledge Base Types
26
+
27
+ - **Vector Knowledge Base**: Uses vector embeddings to find semantically similar content (most common)
28
+ - **Kendra Knowledge Base**: Leverages Amazon Kendra's semantic search capabilities
29
+ - **Structured Data Retrieval**: Enables querying structured data sources (coming soon)
30
+
31
+ > **Note**: Vector Knowledge Base is the most common type and currently has the most comprehensive support in the GenAI CDK. See [Vector Knowledge Base Creation](vector/creation.md) to get started.
32
+
33
+ ## Knowledge Base Components
34
+
35
+ ```mermaid
36
+ graph TD
37
+ A[Knowledge Base] --> B[Vector Store]
38
+ A --> C[Data Sources]
39
+ A --> D[Embeddings Model]
40
+ B --> E[OpenSearch Serverless]
41
+ B --> F[Aurora PostgreSQL]
42
+ B --> G[Pinecone]
43
+ C --> H[S3]
44
+ C --> I[Web Crawler]
45
+ C --> J[Confluence]
46
+ C --> K[SharePoint]
47
+ C --> L[Salesforce]
48
+ A --> M[Chunking Strategy]
49
+ M --> N[Fixed Size]
50
+ M --> O[Hierarchical]
51
+ M --> P[Semantic]
52
+ ```
53
+
54
+ ## When to Use Knowledge Bases
55
+
56
+ - **Domain-Specific Knowledge**: Provide specialized information not in the model's training data
57
+ - **Private Information**: Allow models to access your organization's proprietary information
58
+ - **Up-to-Date Information**: Supply models with the latest information beyond their training cutoff
59
+ - **RAG Applications**: Implement Retrieval Augmented Generation for more accurate responses
60
+
61
+ ## Related Resources
62
+
63
+ - **Chunking Strategies**: `genai-cdk-constructs://bedrock/knowledgebases/chunking`
64
+ - **Data Sources**: `genai-cdk-constructs://bedrock/knowledgebases/datasources`
65
+ - **Kendra Integration**: `genai-cdk-constructs://bedrock/knowledgebases/kendra`
66
+ - **Parsing Strategies**: `genai-cdk-constructs://bedrock/knowledgebases/parsing`
67
+ - **Custom Transformations**: `genai-cdk-constructs://bedrock/knowledgebases/transformation`
68
+ - **Vector Stores**:
69
+ - `genai-cdk-constructs://bedrock/knowledgebases/vector/creation`
70
+ - `genai-cdk-constructs://bedrock/knowledgebases/vector/opensearch`
71
+ - `genai-cdk-constructs://bedrock/knowledgebases/vector/aurora`
72
+ - `genai-cdk-constructs://bedrock/knowledgebases/vector/pinecone`
73
+
74
+ ## Quick Start Example
75
+
76
+ ```typescript
77
+ import { bedrock } from '@cdklabs/generative-ai-cdk-constructs';
78
+ import * as s3 from 'aws-cdk-lib/aws-s3';
79
+
80
+ // Create a vector knowledge base
81
+ const kb = new bedrock.VectorKnowledgeBase(this, 'KnowledgeBase', {
82
+ // Use TITAN_EMBED_TEXT_V2 with appropriate dimension size based on your needs:
83
+ // - 256: Fastest, lowest storage requirements, good for simple use cases
84
+ // - 512: Balanced performance and accuracy, recommended for most use cases
85
+ // - 1024: Highest accuracy, best for complex semantic relationships
86
+ embeddingsModel: bedrock.BedrockFoundationModel.TITAN_EMBED_TEXT_V2_512,
87
+ instruction: 'Use this knowledge base to answer questions about our company policies.',
88
+ });
89
+
90
+ // Create a secure S3 bucket for data
91
+ const dataBucket = new s3.Bucket(this, 'DataBucket');
92
+
93
+ // Add an S3 data source with chunking strategy
94
+ new bedrock.S3DataSource(this, 'DataSource', {
95
+ bucket: dataBucket,
96
+ knowledgeBase: kb,
97
+ dataSourceName: 'CompanyPolicies',
98
+ chunkingStrategy: bedrock.ChunkingStrategy.fixedSize({
99
+ maxTokens: 300,
100
+ overlapPercentage: 20,
101
+ }),
102
+ });
103
+
104
+ // Note: When choosing an embedding model version:
105
+ // - TITAN_EMBED_TEXT_V2_256: Lower dimension, faster, smaller storage footprint
106
+ // - TITAN_EMBED_TEXT_V2_512: Balanced performance and accuracy (recommended)
107
+ // - TITAN_EMBED_TEXT_V2_1024: Higher dimension, more accurate but more expensive
108
+
109
+ ### Next Steps
110
+
111
+ For more detailed information:
112
+
113
+ - [Vector Knowledge Base Creation](vector/creation.md) - Detailed properties and configuration options
114
+ - [Vector Stores](vector/opensearch.md) - Different vector store options (OpenSearch, Aurora, Pinecone)
115
+ - [Chunking Strategies](chunking.md) - Options for chunking your data
116
+ - [Data Sources](datasources.md) - Different data source types and configuration
@@ -0,0 +1,36 @@
1
+ # Vector Knowledge Base - Parsing Strategy
2
+
3
+ ## Overview
4
+
5
+ A parsing strategy in Amazon Bedrock is a configuration that determines how the service
6
+ processes and interprets the contents of a document. It involves converting the document's
7
+ contents into text and splitting it into smaller chunks for analysis. Amazon Bedrock offers
8
+ two parsing strategies:
9
+
10
+ ### Default Parsing Strategy
11
+
12
+ This strategy converts the document's contents into text
13
+ and splits it into chunks using a predefined approach. It is suitable for most use cases
14
+ but may not be optimal for specific document types or requirements.
15
+
16
+ ### Foundation Model Parsing Strategy
17
+
18
+ This strategy uses a foundation model to describe
19
+ the contents of the document. It is particularly useful for improved processing of PDF files
20
+ with tables and images. To use this strategy, set the `parsingStrategy` in a data source as below.
21
+
22
+ #### TypeScript
23
+
24
+ ```ts
25
+ bedrock.ParsingStategy.foundationModel({
26
+ model: BedrockFoundationModel.ANTHROPIC_CLAUDE_SONNET_V1_0,
27
+ });
28
+ ```
29
+
30
+ #### Python
31
+
32
+ ```python
33
+ bedrock.ParsingStategy.foundation_model(
34
+ parsing_model=BedrockFoundationModel.ANTHROPIC_CLAUDE_SONNET_V1_0
35
+ )
36
+ ```
@@ -0,0 +1,30 @@
1
+ # Knowledge Base - Custom Transformation
2
+
3
+ ## Overview
4
+
5
+ Custom Transformation in Amazon Bedrock is a feature that allows you to create and apply
6
+ custom processing steps to documents moving through a data source ingestion pipeline.
7
+
8
+ Custom Transformation uses AWS Lambda functions to process documents, enabling you to
9
+ perform custom operations such as data extraction, normalization, or enrichment. To
10
+ create a custom transformation, set the `customTransformation` in a data source as below.
11
+
12
+ ## Example
13
+
14
+ ### TypeScript
15
+
16
+ ```ts
17
+ CustomTransformation.lambda({
18
+ lambdaFunction: lambdaFunction,
19
+ s3BucketUri: `s3://${bucket.bucketName}/chunk-processor/`,
20
+ }),
21
+ ```
22
+
23
+ ### Python
24
+
25
+ ```python
26
+ CustomTransformation.lambda_(
27
+ lambda_function= function,
28
+ s3_bucket_uri= f's3://{docBucket.bucket_name}/chunk-processor/'
29
+ )
30
+ ```