awslabs.cdk-mcp-server 0.1.1__py3-none-any.whl → 0.1.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- awslabs/cdk_mcp_server/core/resources.py +104 -15
- awslabs/cdk_mcp_server/core/server.py +4 -3
- awslabs/cdk_mcp_server/core/tools.py +6 -1
- awslabs/cdk_mcp_server/data/genai_cdk_loader.py +508 -349
- {awslabs_cdk_mcp_server-0.1.1.dist-info → awslabs_cdk_mcp_server-0.1.3.dist-info}/METADATA +24 -1
- awslabs_cdk_mcp_server-0.1.3.dist-info/RECORD +33 -0
- awslabs/cdk_mcp_server/static/genai_cdk/bedrock/agent/actiongroups.md +0 -137
- awslabs/cdk_mcp_server/static/genai_cdk/bedrock/agent/alias.md +0 -39
- awslabs/cdk_mcp_server/static/genai_cdk/bedrock/agent/collaboration.md +0 -91
- awslabs/cdk_mcp_server/static/genai_cdk/bedrock/agent/creation.md +0 -149
- awslabs/cdk_mcp_server/static/genai_cdk/bedrock/agent/custom_orchestration.md +0 -74
- awslabs/cdk_mcp_server/static/genai_cdk/bedrock/agent/overview.md +0 -78
- awslabs/cdk_mcp_server/static/genai_cdk/bedrock/agent/prompt_override.md +0 -70
- awslabs/cdk_mcp_server/static/genai_cdk/bedrock/bedrockguardrails.md +0 -188
- awslabs/cdk_mcp_server/static/genai_cdk/bedrock/knowledgebases/chunking.md +0 -137
- awslabs/cdk_mcp_server/static/genai_cdk/bedrock/knowledgebases/datasources.md +0 -225
- awslabs/cdk_mcp_server/static/genai_cdk/bedrock/knowledgebases/kendra.md +0 -81
- awslabs/cdk_mcp_server/static/genai_cdk/bedrock/knowledgebases/overview.md +0 -116
- awslabs/cdk_mcp_server/static/genai_cdk/bedrock/knowledgebases/parsing.md +0 -36
- awslabs/cdk_mcp_server/static/genai_cdk/bedrock/knowledgebases/transformation.md +0 -30
- awslabs/cdk_mcp_server/static/genai_cdk/bedrock/knowledgebases/vector/aurora.md +0 -185
- awslabs/cdk_mcp_server/static/genai_cdk/bedrock/knowledgebases/vector/creation.md +0 -80
- awslabs/cdk_mcp_server/static/genai_cdk/bedrock/knowledgebases/vector/opensearch.md +0 -56
- awslabs/cdk_mcp_server/static/genai_cdk/bedrock/knowledgebases/vector/pinecone.md +0 -66
- awslabs/cdk_mcp_server/static/genai_cdk/bedrock/profiles.md +0 -153
- awslabs/cdk_mcp_server/static/genai_cdk/opensearch-vectorindex/overview.md +0 -135
- awslabs/cdk_mcp_server/static/genai_cdk/opensearchserverless/overview.md +0 -17
- awslabs_cdk_mcp_server-0.1.1.dist-info/RECORD +0 -54
- {awslabs_cdk_mcp_server-0.1.1.dist-info → awslabs_cdk_mcp_server-0.1.3.dist-info}/WHEEL +0 -0
- {awslabs_cdk_mcp_server-0.1.1.dist-info → awslabs_cdk_mcp_server-0.1.3.dist-info}/entry_points.txt +0 -0
- {awslabs_cdk_mcp_server-0.1.1.dist-info → awslabs_cdk_mcp_server-0.1.3.dist-info}/licenses/LICENSE +0 -0
- {awslabs_cdk_mcp_server-0.1.1.dist-info → awslabs_cdk_mcp_server-0.1.3.dist-info}/licenses/NOTICE +0 -0
|
@@ -1,70 +0,0 @@
|
|
|
1
|
-
# Prompt Overrides
|
|
2
|
-
|
|
3
|
-
## Overview
|
|
4
|
-
|
|
5
|
-
Bedrock Agents allows you to customize the prompts and LLM configuration for its different steps. You can disable steps or create a new prompt template. Prompt templates can be inserted from plain text files.
|
|
6
|
-
|
|
7
|
-
## Example
|
|
8
|
-
|
|
9
|
-
### TypeScript
|
|
10
|
-
|
|
11
|
-
```ts
|
|
12
|
-
import { readFileSync } from 'fs';
|
|
13
|
-
|
|
14
|
-
const file = readFileSync(prompt_path, 'utf-8');
|
|
15
|
-
|
|
16
|
-
const agent = new bedrock.Agent(this, 'Agent', {
|
|
17
|
-
foundationModel: bedrock.BedrockFoundationModel.AMAZON_NOVA_LITE_V1,
|
|
18
|
-
instruction: 'You are a helpful and friendly agent that answers questions about literature.',
|
|
19
|
-
userInputEnabled: true,
|
|
20
|
-
codeInterpreterEnabled: false,
|
|
21
|
-
shouldPrepareAgent:true,
|
|
22
|
-
promptOverrideConfiguration: bedrock.PromptOverrideConfiguration.fromSteps(
|
|
23
|
-
[
|
|
24
|
-
{
|
|
25
|
-
stepType: bedrock.AgentStepType.PRE_PROCESSING,
|
|
26
|
-
stepEnabled: true,
|
|
27
|
-
customPromptTemplate: file,
|
|
28
|
-
inferenceConfig: {
|
|
29
|
-
temperature: 0.0,
|
|
30
|
-
topP: 1,
|
|
31
|
-
topK: 250,
|
|
32
|
-
maximumLength: 1,
|
|
33
|
-
stopSequences: ["\n\nHuman:"],
|
|
34
|
-
},
|
|
35
|
-
foundationModel: bedrock.BedrockFoundationModel.AMAZON_NOVA_LITE_V1
|
|
36
|
-
}
|
|
37
|
-
]
|
|
38
|
-
)
|
|
39
|
-
});
|
|
40
|
-
```
|
|
41
|
-
|
|
42
|
-
### Python
|
|
43
|
-
|
|
44
|
-
```python
|
|
45
|
-
orchestration = open('prompts/orchestration.txt', encoding="utf-8").read()
|
|
46
|
-
agent = bedrock.Agent(self, "Agent",
|
|
47
|
-
foundation_model=bedrock.BedrockFoundationModel.ANTHROPIC_CLAUDE_V2_1,
|
|
48
|
-
instruction="You are a helpful and friendly agent that answers questions about insurance claims.",
|
|
49
|
-
user_input_enabled=True,
|
|
50
|
-
code_interpreter_enabled=False,
|
|
51
|
-
should_prepare_agent=True,
|
|
52
|
-
prompt_override_configuration= bedrock.PromptOverrideConfiguration.from_steps(
|
|
53
|
-
steps=[
|
|
54
|
-
bedrock.PromptStepConfiguration(
|
|
55
|
-
step_type=bedrock.AgentStepType.PRE_PROCESSING,
|
|
56
|
-
step_enabled= True,
|
|
57
|
-
custom_prompt_template= file,
|
|
58
|
-
inference_config=bedrock.InferenceConfiguration(
|
|
59
|
-
temperature=0.0,
|
|
60
|
-
top_k=250,
|
|
61
|
-
top_p=1,
|
|
62
|
-
maximum_length=1,
|
|
63
|
-
stop_sequences=['\n\nHuman:'],
|
|
64
|
-
),
|
|
65
|
-
foundationModel: bedrock.BedrockFoundationModel.AMAZON_NOVA_LITE_V1
|
|
66
|
-
),
|
|
67
|
-
]
|
|
68
|
-
),
|
|
69
|
-
)
|
|
70
|
-
```
|
|
@@ -1,188 +0,0 @@
|
|
|
1
|
-
# Bedrock Guardrails
|
|
2
|
-
|
|
3
|
-
Amazon Bedrock's Guardrails feature enables you to implement robust governance and control mechanisms for your generative AI applications, ensuring alignment with your specific use cases and responsible AI policies. Guardrails empowers you to create multiple tailored policy configurations, each designed to address the unique requirements and constraints of different use cases. These policy configurations can then be seamlessly applied across multiple foundation models (FMs) and Agents, ensuring a consistent user experience and standardizing safety, security, and privacy controls throughout your generative AI ecosystem.
|
|
4
|
-
|
|
5
|
-
With Guardrails, you can define and enforce granular, customizable policies to precisely govern the behavior of your generative AI applications. You can configure the following policies in a guardrail to avoid undesirable and harmful content and remove sensitive information for privacy protection.
|
|
6
|
-
|
|
7
|
-
- Content filters – Adjust filter strengths to block input prompts or model responses containing harmful content.
|
|
8
|
-
|
|
9
|
-
- Denied topics – Define a set of topics that are undesirable in the context of your application. These topics will be blocked if detected in user queries or model responses.
|
|
10
|
-
|
|
11
|
-
- Word filters – Configure filters to block undesirable words, phrases, and profanity. Such words can include offensive terms, competitor names etc.
|
|
12
|
-
|
|
13
|
-
- Sensitive information filters – Block or mask sensitive information such as personally identifiable information (PII) or custom regex in user inputs and model responses.
|
|
14
|
-
|
|
15
|
-
You can create a Guardrail with a minimum blockedInputMessaging ,blockedOutputsMessaging and default content filter policy.
|
|
16
|
-
|
|
17
|
-
## Examples
|
|
18
|
-
|
|
19
|
-
### TypeScript
|
|
20
|
-
|
|
21
|
-
```ts
|
|
22
|
-
const guardrails = new bedrock.Guardrail(this, 'bedrockGuardrails', {
|
|
23
|
-
name: 'my-BedrockGuardrails',
|
|
24
|
-
description: 'Legal ethical guardrails.',
|
|
25
|
-
});
|
|
26
|
-
|
|
27
|
-
// Optional - Add Sensitive information filters
|
|
28
|
-
|
|
29
|
-
guardrails.addPIIFilter({
|
|
30
|
-
type: PIIType.General.ADDRESS,
|
|
31
|
-
action: GuardrailAction.ANONYMIZE,
|
|
32
|
-
});
|
|
33
|
-
|
|
34
|
-
guardrails.addRegexFilter({
|
|
35
|
-
name: 'TestRegexFilter',
|
|
36
|
-
description: 'This is a test regex filter',
|
|
37
|
-
pattern: '/^[A-Z]{2}d{6}$/',
|
|
38
|
-
action: bedrock.GuardrailAction.ANONYMIZE,
|
|
39
|
-
});
|
|
40
|
-
|
|
41
|
-
// Optional - Add contextual grounding
|
|
42
|
-
|
|
43
|
-
guardrails.addContextualGroundingFilter({
|
|
44
|
-
type: ContextualGroundingFilterType.GROUNDING,
|
|
45
|
-
threshold: 0.95,
|
|
46
|
-
});
|
|
47
|
-
|
|
48
|
-
guardrails.addContextualGroundingFilter({
|
|
49
|
-
type: ContextualGroundingFilterType.RELEVANCE,
|
|
50
|
-
threshold: 0.95,
|
|
51
|
-
});
|
|
52
|
-
|
|
53
|
-
// Optional - Add Denied topics . You can use a Topic or create your custom Topic
|
|
54
|
-
|
|
55
|
-
guardrails.addDeniedTopicFilter(Topic.FINANCIAL_ADVICE);
|
|
56
|
-
guardrails.addDeniedTopicFilter(
|
|
57
|
-
Topic.custom({
|
|
58
|
-
name: 'Legal_Advice',
|
|
59
|
-
definition:
|
|
60
|
-
'Offering guidance or suggestions on legal matters, legal actions, interpretation of laws, or legal rights and responsibilities.',
|
|
61
|
-
examples: [
|
|
62
|
-
'Can I sue someone for this?',
|
|
63
|
-
'What are my legal rights in this situation?',
|
|
64
|
-
'Is this action against the law?',
|
|
65
|
-
'What should I do to file a legal complaint?',
|
|
66
|
-
'Can you explain this law to me?',
|
|
67
|
-
],
|
|
68
|
-
})
|
|
69
|
-
);
|
|
70
|
-
|
|
71
|
-
// Optional - Add Word filters. You can upload words from a file with addWordFilterFromFile function.
|
|
72
|
-
guardrails.addWordFilter('drugs');
|
|
73
|
-
guardrails.addManagedWordListFilter(ManagedWordFilterType.PROFANITY);
|
|
74
|
-
guardrails.addWordFilterFromFile('./scripts/wordsPolicy.csv');
|
|
75
|
-
|
|
76
|
-
// versioning - if you change any guardrail configuration, a new version will be created
|
|
77
|
-
guardrails.createVersion('testversion');
|
|
78
|
-
|
|
79
|
-
// Importing existing guardrail
|
|
80
|
-
const importedGuardrail = bedrock.Guardrail.fromGuardrailAttributes(stack, 'TestGuardrail', {
|
|
81
|
-
guardrailArn: 'arn:aws:bedrock:us-east-1:123456789012:guardrail/oygh3o8g7rtl',
|
|
82
|
-
guardrailVersion: '1', //optional
|
|
83
|
-
kmsKey: kmsKey, //optional
|
|
84
|
-
});
|
|
85
|
-
|
|
86
|
-
// Importing Guardrails created through the L1 CDK CfnGuardrail construct
|
|
87
|
-
const cfnGuardrail = new CfnGuardrail(this, 'MyCfnGuardrail', {
|
|
88
|
-
blockedInputMessaging: 'blockedInputMessaging',
|
|
89
|
-
blockedOutputsMessaging: 'blockedOutputsMessaging',
|
|
90
|
-
name: 'namemycfnguardrails',
|
|
91
|
-
wordPolicyConfig: {
|
|
92
|
-
wordsConfig: [
|
|
93
|
-
{
|
|
94
|
-
text: 'drugs',
|
|
95
|
-
},
|
|
96
|
-
],
|
|
97
|
-
},
|
|
98
|
-
});
|
|
99
|
-
|
|
100
|
-
const importedGuardrail = bedrock.Guardrail.fromCfnGuardrail(cfnGuardrail);
|
|
101
|
-
```
|
|
102
|
-
|
|
103
|
-
### Python
|
|
104
|
-
|
|
105
|
-
```python
|
|
106
|
-
guardrail = bedrock.Guardrail(self, 'myGuardrails',
|
|
107
|
-
name='my-BedrockGuardrails',
|
|
108
|
-
description= "Legal ethical guardrails.")
|
|
109
|
-
|
|
110
|
-
# Optional - Add Sensitive information filters
|
|
111
|
-
|
|
112
|
-
guardrail.add_pii_filter(
|
|
113
|
-
type= bedrock.pii_type.General.ADDRESS,
|
|
114
|
-
action= bedrock.GuardrailAction.ANONYMIZE,
|
|
115
|
-
)
|
|
116
|
-
|
|
117
|
-
guardrail.add_regex_filter(
|
|
118
|
-
name= "TestRegexFilter",
|
|
119
|
-
description= "This is a test regex filter",
|
|
120
|
-
pattern= "/^[A-Z]{2}d{6}$/",
|
|
121
|
-
action= bedrock.GuardrailAction.ANONYMIZE,
|
|
122
|
-
)
|
|
123
|
-
|
|
124
|
-
# Optional - Add contextual grounding
|
|
125
|
-
|
|
126
|
-
guardrail.add_contextual_grounding_filter(
|
|
127
|
-
type= bedrock.ContextualGroundingFilterType.GROUNDING,
|
|
128
|
-
threshold= 0.95,
|
|
129
|
-
)
|
|
130
|
-
|
|
131
|
-
# Optional - Add Denied topics . You can use default Topic or create your custom Topic with createTopic function. The default Topics can also be overwritten.
|
|
132
|
-
|
|
133
|
-
guardrail.add_contextual_grounding_filter(
|
|
134
|
-
type= bedrock.ContextualGroundingFilterType.RELEVANCE,
|
|
135
|
-
threshold= 0.95,
|
|
136
|
-
)
|
|
137
|
-
|
|
138
|
-
guardrail.add_denied_topic_filter(bedrock.Topic.FINANCIAL_ADVICE)
|
|
139
|
-
|
|
140
|
-
guardrail.add_denied_topic_filter(
|
|
141
|
-
bedrock.Topic.custom(
|
|
142
|
-
name= "Legal_Advice",
|
|
143
|
-
definition=
|
|
144
|
-
"Offering guidance or suggestions on legal matters, legal actions, interpretation of laws, or legal rights and responsibilities.",
|
|
145
|
-
examples= [
|
|
146
|
-
"Can I sue someone for this?",
|
|
147
|
-
"What are my legal rights in this situation?",
|
|
148
|
-
"Is this action against the law?",
|
|
149
|
-
"What should I do to file a legal complaint?",
|
|
150
|
-
"Can you explain this law to me?",
|
|
151
|
-
]
|
|
152
|
-
)
|
|
153
|
-
)
|
|
154
|
-
|
|
155
|
-
# Optional - Add Word filters. You can upload words from a file with addWordFilterFromFile function.
|
|
156
|
-
guardrail.add_word_filter("drugs")
|
|
157
|
-
guardrail.add_managed_word_list_filter(bedrock.ManagedWordFilterType.PROFANITY)
|
|
158
|
-
guardrail.add_word_filter_from_file("./scripts/wordsPolicy.csv")
|
|
159
|
-
|
|
160
|
-
# versioning - if you change any guardrail configuration, a new version will be created
|
|
161
|
-
guardrail.create_version("testversion")
|
|
162
|
-
|
|
163
|
-
# Importing existing guardrail
|
|
164
|
-
imported_guardrail = bedrock.Guardrail.from_guardrail_attributes(self, "TestGuardrail",
|
|
165
|
-
guardrail_arn="arn:aws:bedrock:us-east-1:123456789012:guardrail/oygh3o8g7rtl",
|
|
166
|
-
guardrail_version="1",
|
|
167
|
-
kms_key=kms_key
|
|
168
|
-
)
|
|
169
|
-
|
|
170
|
-
# Importing Guardrails created through the L1 CDK CfnGuardrail construct
|
|
171
|
-
cfn_guardrail = cfnbedrock.CfnGuardrail(self, "MyCfnGuardrail",
|
|
172
|
-
blocked_input_messaging="blockedInputMessaging",
|
|
173
|
-
blocked_outputs_messaging="blockedOutputsMessaging",
|
|
174
|
-
name="name",
|
|
175
|
-
|
|
176
|
-
# the properties below are optional
|
|
177
|
-
word_policy_config=cfnbedrock.CfnGuardrail.WordPolicyConfigProperty(
|
|
178
|
-
words_config=[cfnbedrock.CfnGuardrail.WordConfigProperty(
|
|
179
|
-
text="drugs"
|
|
180
|
-
)]
|
|
181
|
-
)
|
|
182
|
-
)
|
|
183
|
-
|
|
184
|
-
imported_guardrail = bedrock.Guardrail.from_cfn_guardrail(cfn_guardrail)
|
|
185
|
-
```
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
[View full documentation](https://github.com/awslabs/generative-ai-cdk-constructs/tree/main/src/cdk-lib/bedrock#bedrock-guardrails)
|
|
@@ -1,137 +0,0 @@
|
|
|
1
|
-
# Vector Knowledge Base - Chunking Strategies
|
|
2
|
-
|
|
3
|
-
## Available Strategies
|
|
4
|
-
|
|
5
|
-
### Default Chunking
|
|
6
|
-
|
|
7
|
-
Applies Fixed Chunking with the default chunk size of 300 tokens and 20% overlap.
|
|
8
|
-
|
|
9
|
-
#### TypeScript
|
|
10
|
-
|
|
11
|
-
```ts
|
|
12
|
-
ChunkingStrategy.DEFAULT;
|
|
13
|
-
```
|
|
14
|
-
|
|
15
|
-
#### Python
|
|
16
|
-
|
|
17
|
-
```python
|
|
18
|
-
ChunkingStrategy.DEFAULT
|
|
19
|
-
```
|
|
20
|
-
|
|
21
|
-
### Fixed Size Chunking
|
|
22
|
-
|
|
23
|
-
This method divides the data into fixed-size chunks, with each chunk
|
|
24
|
-
containing a predetermined number of tokens. This strategy is useful when the data is uniform
|
|
25
|
-
in size and structure.
|
|
26
|
-
|
|
27
|
-
#### TypeScript
|
|
28
|
-
|
|
29
|
-
```ts
|
|
30
|
-
// Fixed Size Chunking with sane defaults.
|
|
31
|
-
ChunkingStrategy.FIXED_SIZE;
|
|
32
|
-
|
|
33
|
-
// Fixed Size Chunking with custom values.
|
|
34
|
-
ChunkingStrategy.fixedSize({ maxTokens: 200, overlapPercentage: 25 });
|
|
35
|
-
```
|
|
36
|
-
|
|
37
|
-
#### Python
|
|
38
|
-
|
|
39
|
-
```python
|
|
40
|
-
# Fixed Size Chunking with sane defaults.
|
|
41
|
-
ChunkingStrategy.FIXED_SIZE
|
|
42
|
-
|
|
43
|
-
# Fixed Size Chunking with custom values.
|
|
44
|
-
ChunkingStrategy.fixed_size(
|
|
45
|
-
max_tokens= 200,
|
|
46
|
-
overlap_percentage= 25
|
|
47
|
-
)
|
|
48
|
-
```
|
|
49
|
-
|
|
50
|
-
### Hierarchical Chunking
|
|
51
|
-
|
|
52
|
-
This strategy organizes data into layers of chunks, with the first
|
|
53
|
-
layer containing large chunks and the second layer containing smaller chunks derived from the first.
|
|
54
|
-
It is ideal for data with inherent hierarchies or nested structures.
|
|
55
|
-
|
|
56
|
-
#### TypeScript
|
|
57
|
-
|
|
58
|
-
```ts
|
|
59
|
-
// Hierarchical Chunking with the default for Cohere Models.
|
|
60
|
-
ChunkingStrategy.HIERARCHICAL_COHERE;
|
|
61
|
-
|
|
62
|
-
// Hierarchical Chunking with the default for Titan Models.
|
|
63
|
-
ChunkingStrategy.HIERARCHICAL_TITAN;
|
|
64
|
-
|
|
65
|
-
// Hierarchical Chunking with custom values. Tthe maximum chunk size depends on the model.
|
|
66
|
-
// Amazon Titan Text Embeddings: 8192. Cohere Embed models: 512
|
|
67
|
-
ChunkingStrategy.hierarchical({
|
|
68
|
-
overlapTokens: 60,
|
|
69
|
-
maxParentTokenSize: 1500,
|
|
70
|
-
maxChildTokenSize: 300,
|
|
71
|
-
});
|
|
72
|
-
```
|
|
73
|
-
|
|
74
|
-
#### Python
|
|
75
|
-
|
|
76
|
-
```python
|
|
77
|
-
# Hierarchical Chunking with the default for Cohere Models.
|
|
78
|
-
ChunkingStrategy.HIERARCHICAL_COHERE
|
|
79
|
-
|
|
80
|
-
# Hierarchical Chunking with the default for Titan Models.
|
|
81
|
-
ChunkingStrategy.HIERARCHICAL_TITAN
|
|
82
|
-
|
|
83
|
-
# Hierarchical Chunking with custom values. Tthe maximum chunk size depends on the model.
|
|
84
|
-
# Amazon Titan Text Embeddings: 8192. Cohere Embed models: 512
|
|
85
|
-
chunking_strategy= ChunkingStrategy.hierarchical(
|
|
86
|
-
overlap_tokens=60,
|
|
87
|
-
max_parent_token_size=1500,
|
|
88
|
-
max_child_token_size=300
|
|
89
|
-
)
|
|
90
|
-
```
|
|
91
|
-
|
|
92
|
-
### Semantic Chunking
|
|
93
|
-
|
|
94
|
-
This method splits data into smaller documents based on groups of similar
|
|
95
|
-
content derived from the text using natural language processing. It helps preserve contextual
|
|
96
|
-
relationships and ensures accurate and contextually appropriate results.
|
|
97
|
-
|
|
98
|
-
#### TypeScript
|
|
99
|
-
|
|
100
|
-
```ts
|
|
101
|
-
// Semantic Chunking with sane defaults.
|
|
102
|
-
ChunkingStrategy.SEMANTIC;
|
|
103
|
-
|
|
104
|
-
// Semantic Chunking with custom values.
|
|
105
|
-
ChunkingStrategy.semantic({ bufferSize: 0, breakpointPercentileThreshold: 95, maxTokens: 300 });
|
|
106
|
-
```
|
|
107
|
-
|
|
108
|
-
#### Python
|
|
109
|
-
|
|
110
|
-
```python
|
|
111
|
-
# Semantic Chunking with sane defaults.
|
|
112
|
-
ChunkingStrategy.SEMANTIC
|
|
113
|
-
|
|
114
|
-
# Semantic Chunking with custom values.
|
|
115
|
-
ChunkingStrategy.semantic(
|
|
116
|
-
buffer_size=0,
|
|
117
|
-
breakpoint_percentile_threshold=95,
|
|
118
|
-
max_tokens=300
|
|
119
|
-
)
|
|
120
|
-
```
|
|
121
|
-
|
|
122
|
-
### No Chunking
|
|
123
|
-
|
|
124
|
-
This strategy treats each file as one chunk. If you choose this option,
|
|
125
|
-
you may want to pre-process your documents by splitting them into separate files.
|
|
126
|
-
|
|
127
|
-
#### TypeScript
|
|
128
|
-
|
|
129
|
-
```ts
|
|
130
|
-
ChunkingStrategy.NONE;
|
|
131
|
-
```
|
|
132
|
-
|
|
133
|
-
#### Python
|
|
134
|
-
|
|
135
|
-
```python
|
|
136
|
-
ChunkingStrategy.NONE
|
|
137
|
-
```
|
|
@@ -1,225 +0,0 @@
|
|
|
1
|
-
# Knowledge Base Data Sources
|
|
2
|
-
|
|
3
|
-
## Overview
|
|
4
|
-
|
|
5
|
-
This document provides examples of adding various data sources to a Knowledge Base in Amazon Bedrock.
|
|
6
|
-
|
|
7
|
-
## Example
|
|
8
|
-
|
|
9
|
-
### TypeScript
|
|
10
|
-
|
|
11
|
-
```ts
|
|
12
|
-
const app = new cdk.App();
|
|
13
|
-
const stack = new cdk.Stack(app, 'aws-cdk-bedrock-data-sources-integ-test');
|
|
14
|
-
|
|
15
|
-
const kb = new VectorKnowledgeBase(stack, 'MyKnowledgeBase', {
|
|
16
|
-
name: 'MyKnowledgeBase',
|
|
17
|
-
embeddingsModel: BedrockFoundationModel.COHERE_EMBED_MULTILINGUAL_V3,
|
|
18
|
-
});
|
|
19
|
-
|
|
20
|
-
const bucket = new Bucket(stack, 'Bucket', {});
|
|
21
|
-
const lambdaFunction = new Function(stack, 'MyFunction', {
|
|
22
|
-
runtime: cdk.aws_lambda.Runtime.PYTHON_3_9,
|
|
23
|
-
handler: 'index.handler',
|
|
24
|
-
code: cdk.aws_lambda.Code.fromInline('print("Hello, World!")'),
|
|
25
|
-
});
|
|
26
|
-
|
|
27
|
-
const secret = new Secret(stack, 'Secret');
|
|
28
|
-
const key = new Key(stack, 'Key');
|
|
29
|
-
|
|
30
|
-
kb.addWebCrawlerDataSource({
|
|
31
|
-
sourceUrls: ['https://docs.aws.amazon.com/'],
|
|
32
|
-
chunkingStrategy: ChunkingStrategy.HIERARCHICAL_COHERE,
|
|
33
|
-
customTransformation: CustomTransformation.lambda({
|
|
34
|
-
lambdaFunction: lambdaFunction,
|
|
35
|
-
s3BucketUri: `s3://${bucket.bucketName}/chunk-processor/`,
|
|
36
|
-
}),
|
|
37
|
-
});
|
|
38
|
-
|
|
39
|
-
kb.addS3DataSource({
|
|
40
|
-
bucket,
|
|
41
|
-
chunkingStrategy: ChunkingStrategy.SEMANTIC,
|
|
42
|
-
parsingStrategy: ParsingStategy.foundationModel({
|
|
43
|
-
model: BedrockFoundationModel.ANTHROPIC_CLAUDE_SONNET_V1_0,
|
|
44
|
-
}),
|
|
45
|
-
});
|
|
46
|
-
|
|
47
|
-
kb.addConfluenceDataSource({
|
|
48
|
-
dataSourceName: 'TestDataSource',
|
|
49
|
-
authSecret: secret,
|
|
50
|
-
kmsKey: key,
|
|
51
|
-
confluenceUrl: 'https://example.atlassian.net',
|
|
52
|
-
filters: [
|
|
53
|
-
{
|
|
54
|
-
objectType: ConfluenceObjectType.ATTACHMENT,
|
|
55
|
-
includePatterns: ['.*\\.pdf'],
|
|
56
|
-
excludePatterns: ['.*private.*\\.pdf'],
|
|
57
|
-
},
|
|
58
|
-
{
|
|
59
|
-
objectType: ConfluenceObjectType.PAGE,
|
|
60
|
-
includePatterns: ['.*public.*\\.pdf'],
|
|
61
|
-
excludePatterns: ['.*confidential.*\\.pdf'],
|
|
62
|
-
},
|
|
63
|
-
],
|
|
64
|
-
});
|
|
65
|
-
|
|
66
|
-
kb.addSalesforceDataSource({
|
|
67
|
-
authSecret: secret,
|
|
68
|
-
endpoint: 'https://your-instance.my.salesforce.com',
|
|
69
|
-
kmsKey: key,
|
|
70
|
-
filters: [
|
|
71
|
-
{
|
|
72
|
-
objectType: SalesforceObjectType.ATTACHMENT,
|
|
73
|
-
includePatterns: ['.*\\.pdf'],
|
|
74
|
-
excludePatterns: ['.*private.*\\.pdf'],
|
|
75
|
-
},
|
|
76
|
-
{
|
|
77
|
-
objectType: SalesforceObjectType.CONTRACT,
|
|
78
|
-
includePatterns: ['.*public.*\\.pdf'],
|
|
79
|
-
excludePatterns: ['.*confidential.*\\.pdf'],
|
|
80
|
-
},
|
|
81
|
-
],
|
|
82
|
-
});
|
|
83
|
-
|
|
84
|
-
kb.addSharePointDataSource({
|
|
85
|
-
dataSourceName: 'SharepointDataSource',
|
|
86
|
-
authSecret: secret,
|
|
87
|
-
kmsKey: key,
|
|
88
|
-
domain: 'yourdomain',
|
|
89
|
-
siteUrls: ['https://yourdomain.sharepoint.com/sites/mysite'],
|
|
90
|
-
tenantId: '888d0b57-69f1-4fb8-957f-e1f0bedf64de',
|
|
91
|
-
filters: [
|
|
92
|
-
{
|
|
93
|
-
objectType: SharePointObjectType.PAGE,
|
|
94
|
-
includePatterns: ['.*\\.pdf'],
|
|
95
|
-
excludePatterns: ['.*private.*\\.pdf'],
|
|
96
|
-
},
|
|
97
|
-
{
|
|
98
|
-
objectType: SharePointObjectType.FILE,
|
|
99
|
-
includePatterns: ['.*public.*\\.pdf'],
|
|
100
|
-
excludePatterns: ['.*confidential.*\\.pdf'],
|
|
101
|
-
},
|
|
102
|
-
],
|
|
103
|
-
});
|
|
104
|
-
|
|
105
|
-
kb.addCustomDataSource({
|
|
106
|
-
dataSourceName: 'CustomDataSource',
|
|
107
|
-
chunkingStrategy: ChunkingStrategy.FIXED_SIZE,
|
|
108
|
-
});
|
|
109
|
-
```
|
|
110
|
-
|
|
111
|
-
### Python
|
|
112
|
-
|
|
113
|
-
```python
|
|
114
|
-
from aws_cdk import (
|
|
115
|
-
Stack,
|
|
116
|
-
aws_s3 as s3,
|
|
117
|
-
aws_lambda as _lambda,
|
|
118
|
-
aws_secretsmanager as secretsmanager,
|
|
119
|
-
aws_kms as kms
|
|
120
|
-
)
|
|
121
|
-
from constructs import Construct
|
|
122
|
-
from cdklabs.generative_ai_cdk_constructs import (
|
|
123
|
-
bedrock
|
|
124
|
-
)
|
|
125
|
-
|
|
126
|
-
class PythonTestStack(Stack):
|
|
127
|
-
|
|
128
|
-
def __init__(self, scope: Construct, construct_id: str, **kwargs) -> None:
|
|
129
|
-
super().__init__(scope, construct_id, **kwargs)
|
|
130
|
-
|
|
131
|
-
kb = bedrock.VectorKnowledgeBase(self, 'MyKnowledgeBase',
|
|
132
|
-
embeddings_model= bedrock.BedrockFoundationModel.COHERE_EMBED_MULTILINGUAL_V3,
|
|
133
|
-
)
|
|
134
|
-
|
|
135
|
-
docBucket = s3.Bucket(self, 'Bucket')
|
|
136
|
-
|
|
137
|
-
function = _lambda.Function(self, 'MyFunction',
|
|
138
|
-
runtime=_lambda.Runtime.PYTHON_3_12,
|
|
139
|
-
handler='index.handler',
|
|
140
|
-
code=_lambda.Code.from_inline('print("Hello, World!")'),
|
|
141
|
-
)
|
|
142
|
-
|
|
143
|
-
kb.add_web_crawler_data_source(
|
|
144
|
-
source_urls= ['https://docs.aws.amazon.com/'],
|
|
145
|
-
chunking_strategy= bedrock.ChunkingStrategy.HIERARCHICAL_COHERE,
|
|
146
|
-
custom_transformation= bedrock.CustomTransformation.lambda_(
|
|
147
|
-
lambda_function= function,
|
|
148
|
-
s3_bucket_uri= f's3://{docBucket.bucket_name}/chunk-processor/'
|
|
149
|
-
)
|
|
150
|
-
)
|
|
151
|
-
|
|
152
|
-
kb.add_s3_data_source(
|
|
153
|
-
bucket= docBucket,
|
|
154
|
-
chunking_strategy= bedrock.ChunkingStrategy.SEMANTIC,
|
|
155
|
-
parsing_strategy= bedrock.ParsingStategy.foundation_model(
|
|
156
|
-
parsing_model= bedrock.BedrockFoundationModel.ANTHROPIC_CLAUDE_SONNET_V1_0.as_i_model(self)
|
|
157
|
-
)
|
|
158
|
-
)
|
|
159
|
-
|
|
160
|
-
secret = secretsmanager.Secret(self, 'Secret')
|
|
161
|
-
key = kms.Key(self, 'Key')
|
|
162
|
-
|
|
163
|
-
kb.add_confluence_data_source(
|
|
164
|
-
data_source_name='TestDataSource',
|
|
165
|
-
auth_secret=secret,
|
|
166
|
-
kms_key=key,
|
|
167
|
-
confluence_url='https://example.atlassian.net',
|
|
168
|
-
filters=[
|
|
169
|
-
bedrock.ConfluenceCrawlingFilters(
|
|
170
|
-
object_type=bedrock.ConfluenceObjectType.ATTACHMENT,
|
|
171
|
-
include_patterns= [".*\\.pdf"],
|
|
172
|
-
exclude_patterns= [".*private.*\\.pdf"],
|
|
173
|
-
),
|
|
174
|
-
bedrock.ConfluenceCrawlingFilters(
|
|
175
|
-
object_type=bedrock.ConfluenceObjectType.PAGE,
|
|
176
|
-
include_patterns= [".*public.*\\.pdf"],
|
|
177
|
-
exclude_patterns= [".*confidential.*\\.pdf"],
|
|
178
|
-
),
|
|
179
|
-
]
|
|
180
|
-
)
|
|
181
|
-
|
|
182
|
-
kb.add_salesforce_data_source(
|
|
183
|
-
auth_secret=secret,
|
|
184
|
-
endpoint='https://your-instance.my.salesforce.com',
|
|
185
|
-
kms_key=key,
|
|
186
|
-
filters=[
|
|
187
|
-
bedrock.SalesforceCrawlingFilters(
|
|
188
|
-
object_type=bedrock.SalesforceObjectType.ATTACHMENT,
|
|
189
|
-
include_patterns= [".*\\.pdf"],
|
|
190
|
-
exclude_patterns= [".*private.*\\.pdf"],
|
|
191
|
-
),
|
|
192
|
-
bedrock.SalesforceCrawlingFilters(
|
|
193
|
-
object_type=bedrock.SalesforceObjectType.CONTRACT,
|
|
194
|
-
include_patterns= [".*public.*\\.pdf"],
|
|
195
|
-
exclude_patterns= [".*confidential.*\\.pdf"],
|
|
196
|
-
),
|
|
197
|
-
]
|
|
198
|
-
)
|
|
199
|
-
|
|
200
|
-
kb.add_share_point_data_source(
|
|
201
|
-
data_source_name='SharepointDataSource',
|
|
202
|
-
auth_secret=secret,
|
|
203
|
-
kms_key=key,
|
|
204
|
-
domain='yourDomain',
|
|
205
|
-
site_urls= ['https://yourdomain.sharepoint.com/sites/mysite'],
|
|
206
|
-
tenant_id='888d0b57-69f1-4fb8-957f-e1f0bedf64de',
|
|
207
|
-
filters=[
|
|
208
|
-
bedrock.SharePointCrawlingFilters(
|
|
209
|
-
object_type=bedrock.SharePointObjectType.PAGE,
|
|
210
|
-
include_patterns= [".*\\.pdf"],
|
|
211
|
-
exclude_patterns= [".*private.*\\.pdf"],
|
|
212
|
-
),
|
|
213
|
-
bedrock.SharePointCrawlingFilters(
|
|
214
|
-
object_type=bedrock.SharePointObjectType.FILE,
|
|
215
|
-
include_patterns= [".*public.*\\.pdf"],
|
|
216
|
-
exclude_patterns= [".*confidential.*\\.pdf"],
|
|
217
|
-
),
|
|
218
|
-
]
|
|
219
|
-
)
|
|
220
|
-
|
|
221
|
-
kb.add_custom_data_source(
|
|
222
|
-
data_source_name='CustomDataSource',
|
|
223
|
-
chunking_strategy=bedrock.ChunkingStrategy.FIXED_SIZE,
|
|
224
|
-
)
|
|
225
|
-
```
|
|
@@ -1,81 +0,0 @@
|
|
|
1
|
-
# Kendra Knowledge Base
|
|
2
|
-
|
|
3
|
-
## Overview
|
|
4
|
-
|
|
5
|
-
With Amazon Bedrock Knowledge Bases, you can build a knowledge base from an Amazon Kendra GenAI index to create more sophisticated and accurate Retrieval Augmented Generation (RAG)-powered digital assistants. By combining an Amazon Kendra GenAI index with Amazon Bedrock Knowledge Bases, you can:
|
|
6
|
-
|
|
7
|
-
- Reuse your indexed content across multiple Amazon Bedrock applications without rebuilding indexes or re-ingesting data.
|
|
8
|
-
- Leverage the advanced GenAI capabilities of Amazon Bedrock while benefiting from the high-accuracy information retrieval of Amazon Kendra.
|
|
9
|
-
- Customize your digital assistant's behavior using the tools of Amazon Bedrock while maintaining the semantic accuracy of an Amazon Kendra GenAI index.
|
|
10
|
-
|
|
11
|
-
## Kendra Knowledge Base Properties
|
|
12
|
-
|
|
13
|
-
| Name | Type | Required | Description |
|
|
14
|
-
|------|------|----------|-------------|
|
|
15
|
-
| kendraIndex | IKendraGenAiIndex | Yes | The Kendra Index to use for the knowledge base. |
|
|
16
|
-
| name | string | No | The name of the knowledge base. If not provided, a name will be auto-generated. |
|
|
17
|
-
| description | string | No | Description of the knowledge base. |
|
|
18
|
-
| instruction | string | No | Instructions for the knowledge base. |
|
|
19
|
-
| existingRole | iam.IRole | No | An existing IAM role to use for the knowledge base. If not provided, a new role will be created. |
|
|
20
|
-
|
|
21
|
-
## Example
|
|
22
|
-
|
|
23
|
-
### TypeScript
|
|
24
|
-
|
|
25
|
-
```ts
|
|
26
|
-
import * as s3 from 'aws-cdk-lib/aws-s3';
|
|
27
|
-
import { bedrock, kendra } from '@cdklabs/generative-ai-cdk-constructs';
|
|
28
|
-
|
|
29
|
-
const cmk = new kms.Key(stack, 'cmk', {});
|
|
30
|
-
|
|
31
|
-
// you can create a new index using the api below
|
|
32
|
-
const index = new kendra.KendraGenAiIndex(this, 'index', {
|
|
33
|
-
name: 'kendra-index-cdk',
|
|
34
|
-
kmsKey: cmk,
|
|
35
|
-
documentCapacityUnits: 1, // 40K documents
|
|
36
|
-
queryCapacityUnits: 1, // 0.2 QPS
|
|
37
|
-
});
|
|
38
|
-
|
|
39
|
-
// or import an existing one
|
|
40
|
-
const index = kendra.KendraGenAiIndex.fromAttrs(this, 'myindex', {
|
|
41
|
-
indexId: 'myindex',
|
|
42
|
-
role: myRole
|
|
43
|
-
});
|
|
44
|
-
|
|
45
|
-
new bedrock.KendraKnowledgeBase(this, 'kb', {
|
|
46
|
-
name: 'kendra-kb-cdk',
|
|
47
|
-
kendraIndex: index,
|
|
48
|
-
});
|
|
49
|
-
```
|
|
50
|
-
|
|
51
|
-
### Python
|
|
52
|
-
|
|
53
|
-
```py
|
|
54
|
-
from aws_cdk import aws_kms as kms
|
|
55
|
-
from cdklabs.generative_ai_cdk_constructs import bedrock, kendra
|
|
56
|
-
|
|
57
|
-
# Create a KMS key
|
|
58
|
-
cmk = kms.Key(stack, 'cmk')
|
|
59
|
-
|
|
60
|
-
# Create a new Kendra index
|
|
61
|
-
index = kendra.KendraGenAiIndex(self, 'index',
|
|
62
|
-
name='kendra-index-cdk',
|
|
63
|
-
kms_key=cmk,
|
|
64
|
-
document_capacity_units=1, # 40K documents
|
|
65
|
-
query_capacity_units=1 # 0.2 QPS
|
|
66
|
-
)
|
|
67
|
-
|
|
68
|
-
# Or import an existing index
|
|
69
|
-
index = kendra.KendraGenAiIndex.from_attrs(self, 'myindex',
|
|
70
|
-
index_id='myindex',
|
|
71
|
-
role=my_role
|
|
72
|
-
)
|
|
73
|
-
|
|
74
|
-
# Create a Kendra Knowledge Base
|
|
75
|
-
kb = bedrock.KendraKnowledgeBase(self, 'kb',
|
|
76
|
-
name='kendra-kb-cdk',
|
|
77
|
-
kendra_index=index
|
|
78
|
-
)
|
|
79
|
-
```
|
|
80
|
-
|
|
81
|
-
[View full documentation](https://github.com/awslabs/generative-ai-cdk-constructs/blob/main/src/cdk-lib/bedrock/README.md)
|