@xdev-asia/xdev-knowledge-mcp 1.0.41 → 1.0.43

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (23) hide show
  1. package/content/series/luyen-thi/luyen-thi-aws-ai-practitioner/chapters/01-domain-1-fundamentals-ai-ml/lessons/01-bai-1-ai-ml-deep-learning-concepts.md +287 -0
  2. package/content/series/luyen-thi/luyen-thi-aws-ai-practitioner/chapters/01-domain-1-fundamentals-ai-ml/lessons/02-bai-2-ml-lifecycle-aws-services.md +258 -0
  3. package/content/series/luyen-thi/luyen-thi-aws-ai-practitioner/chapters/02-domain-2-fundamentals-generative-ai/lessons/03-bai-3-generative-ai-foundation-models.md +218 -0
  4. package/content/series/luyen-thi/luyen-thi-aws-ai-practitioner/chapters/02-domain-2-fundamentals-generative-ai/lessons/04-bai-4-llm-transformers-multimodal.md +232 -0
  5. package/content/series/luyen-thi/luyen-thi-aws-ai-practitioner/chapters/03-domain-3-applications-foundation-models/lessons/05-bai-5-prompt-engineering-techniques.md +254 -0
  6. package/content/series/luyen-thi/luyen-thi-aws-ai-practitioner/chapters/03-domain-3-applications-foundation-models/lessons/06-bai-6-rag-vector-databases-knowledge-bases.md +244 -0
  7. package/content/series/luyen-thi/luyen-thi-aws-ai-practitioner/chapters/03-domain-3-applications-foundation-models/lessons/07-bai-7-fine-tuning-model-customization.md +247 -0
  8. package/content/series/luyen-thi/luyen-thi-aws-ai-practitioner/chapters/03-domain-3-applications-foundation-models/lessons/08-bai-8-amazon-bedrock-deep-dive.md +276 -0
  9. package/content/series/luyen-thi/luyen-thi-aws-ai-practitioner/chapters/04-domain-4-responsible-ai/lessons/09-bai-9-responsible-ai-fairness-bias-transparency.md +224 -0
  10. package/content/series/luyen-thi/luyen-thi-aws-ai-practitioner/chapters/04-domain-4-responsible-ai/lessons/10-bai-10-aws-responsible-ai-tools.md +252 -0
  11. package/content/series/luyen-thi/luyen-thi-aws-ai-practitioner/chapters/05-domain-5-security-compliance/lessons/11-bai-11-ai-security-data-privacy-compliance.md +279 -0
  12. package/content/series/luyen-thi/luyen-thi-aws-ai-practitioner/chapters/05-domain-5-security-compliance/lessons/12-bai-12-exam-strategy-cheat-sheet.md +229 -0
  13. package/content/series/luyen-thi/luyen-thi-aws-ai-practitioner/index.md +257 -0
  14. package/content/series/luyen-thi/luyen-thi-aws-ml-specialty/chapters/01-phan-1-data-engineering/lessons/01-bai-1-data-repositories-ingestion.md +193 -0
  15. package/content/series/luyen-thi/luyen-thi-aws-ml-specialty/chapters/01-phan-1-data-engineering/lessons/02-bai-2-data-transformation.md +178 -0
  16. package/content/series/luyen-thi/luyen-thi-aws-ml-specialty/index.md +240 -0
  17. package/content/series/luyen-thi/luyen-thi-gcp-ml-engineer/index.md +225 -0
  18. package/data/categories.json +16 -4
  19. package/data/quizzes/aws-ai-practitioner.json +362 -0
  20. package/data/quizzes/aws-ml-specialty.json +200 -0
  21. package/data/quizzes/gcp-ml-engineer.json +200 -0
  22. package/data/quizzes.json +764 -0
  23. package/package.json +1 -1
@@ -0,0 +1,362 @@
1
+ {
2
+ "id": "aws-ai-practitioner",
3
+ "title": "AWS Certified AI Practitioner (AIF-C01)",
4
+ "slug": "aws-ai-practitioner",
5
+ "description": "Practice exam for AWS Certified AI Practitioner — 20 questions covering all 5 domains",
6
+ "icon": "award",
7
+ "provider": "AWS",
8
+ "level": "Foundational",
9
+ "duration_minutes": 30,
10
+ "passing_score": 70,
11
+ "questions_count": 20,
12
+ "tags": [
13
+ "AWS",
14
+ "AI",
15
+ "Cloud",
16
+ "Bedrock",
17
+ "GenAI"
18
+ ],
19
+ "series_slug": "luyen-thi-aws-ai-practitioner",
20
+ "domains": [
21
+ {
22
+ "name": "Domain 1: Fundamentals of AI and ML",
23
+ "weight": 20,
24
+ "lessons": [
25
+ {
26
+ "title": "Bài 1: AI, ML & Deep Learning Concepts",
27
+ "slug": "01-bai-1-ai-ml-deep-learning-concepts"
28
+ },
29
+ {
30
+ "title": "Bài 2: ML Lifecycle & AWS AI Services",
31
+ "slug": "02-bai-2-ml-lifecycle-aws-services"
32
+ }
33
+ ]
34
+ },
35
+ {
36
+ "name": "Domain 2: Fundamentals of Generative AI",
37
+ "weight": 24,
38
+ "lessons": [
39
+ {
40
+ "title": "Bài 3: Generative AI & Foundation Models",
41
+ "slug": "03-bai-3-generative-ai-foundation-models"
42
+ },
43
+ {
44
+ "title": "Bài 4: LLMs, Transformers & Multi-modal",
45
+ "slug": "04-bai-4-llm-transformers-multimodal"
46
+ }
47
+ ]
48
+ },
49
+ {
50
+ "name": "Domain 3: Applications of Foundation Models",
51
+ "weight": 28,
52
+ "lessons": [
53
+ {
54
+ "title": "Bài 5: Prompt Engineering",
55
+ "slug": "05-bai-5-prompt-engineering-techniques"
56
+ },
57
+ {
58
+ "title": "Bài 6: RAG & Knowledge Bases",
59
+ "slug": "06-bai-6-rag-vector-databases-knowledge-bases"
60
+ },
61
+ {
62
+ "title": "Bài 7: Fine-tuning & Model Customization",
63
+ "slug": "07-bai-7-fine-tuning-model-customization"
64
+ },
65
+ {
66
+ "title": "Bài 8: Amazon Bedrock Deep Dive",
67
+ "slug": "08-bai-8-amazon-bedrock-deep-dive"
68
+ }
69
+ ]
70
+ },
71
+ {
72
+ "name": "Domain 4: Guidelines for Responsible AI",
73
+ "weight": 14,
74
+ "lessons": [
75
+ {
76
+ "title": "Bài 9: Responsible AI — Fairness & Bias",
77
+ "slug": "09-bai-9-responsible-ai-fairness-bias-transparency"
78
+ },
79
+ {
80
+ "title": "Bài 10: AWS Responsible AI Tools",
81
+ "slug": "10-bai-10-aws-responsible-ai-tools"
82
+ }
83
+ ]
84
+ },
85
+ {
86
+ "name": "Domain 5: Security, Compliance & Governance",
87
+ "weight": 14,
88
+ "lessons": [
89
+ {
90
+ "title": "Bài 11: AI Security & Data Privacy",
91
+ "slug": "11-bai-11-ai-security-data-privacy-compliance"
92
+ },
93
+ {
94
+ "title": "Bài 12: Exam Strategy & Cheat Sheet",
95
+ "slug": "12-bai-12-exam-strategy-cheat-sheet"
96
+ }
97
+ ]
98
+ }
99
+ ],
100
+ "questions": [
101
+ {
102
+ "id": 1,
103
+ "domain": "Domain 2: Fundamentals of Generative AI",
104
+ "question": "What is a Foundation Model?",
105
+ "options": [
106
+ "A model designed for only one specific task",
107
+ "A large AI model pre-trained on broad data that can be adapted to many downstream tasks",
108
+ "A model that only processes structured tabular data",
109
+ "A model trained entirely using Reinforcement Learning"
110
+ ],
111
+ "correct": 1,
112
+ "explanation": "A Foundation Model is a large AI model pre-trained on vast, diverse datasets. It can be adapted to many downstream tasks through fine-tuning, RAG, or prompt engineering."
113
+ },
114
+ {
115
+ "id": 2,
116
+ "domain": "Domain 3: Applications of Foundation Models",
117
+ "question": "What is the PRIMARY purpose of Amazon Bedrock?",
118
+ "options": [
119
+ "Managing relational databases",
120
+ "Deploying containers on the cloud",
121
+ "Accessing and using Foundation Models from multiple providers through a single API",
122
+ "Monitoring cloud costs"
123
+ ],
124
+ "correct": 2,
125
+ "explanation": "Amazon Bedrock is a fully managed service that provides access to Foundation Models from multiple providers (Anthropic, Meta, Amazon, Mistral, etc.) through a single API for building generative AI applications."
126
+ },
127
+ {
128
+ "id": 3,
129
+ "domain": "Domain 1: Fundamentals of AI and ML",
130
+ "question": "How does Supervised Learning differ from Unsupervised Learning?",
131
+ "options": [
132
+ "Supervised Learning does not require any data",
133
+ "Supervised Learning uses labeled data to train the model",
134
+ "Unsupervised Learning always produces more accurate results",
135
+ "Supervised Learning can only be used for classification tasks"
136
+ ],
137
+ "correct": 1,
138
+ "explanation": "Supervised Learning uses labeled data (input-output pairs) to train models for classification or regression, while Unsupervised Learning discovers hidden patterns in unlabeled data (e.g., clustering)."
139
+ },
140
+ {
141
+ "id": 4,
142
+ "domain": "Domain 3: Applications of Foundation Models",
143
+ "question": "What problem does RAG (Retrieval-Augmented Generation) solve for LLMs?",
144
+ "options": [
145
+ "It increases inference speed",
146
+ "It reduces training costs",
147
+ "It reduces hallucination by grounding responses in external knowledge sources",
148
+ "It increases the context window size"
149
+ ],
150
+ "correct": 2,
151
+ "explanation": "RAG combines retrieval of relevant external data with generation, helping LLMs produce more accurate, fact-based answers by grounding responses in retrieved documents rather than relying solely on training knowledge."
152
+ },
153
+ {
154
+ "id": 5,
155
+ "domain": "Domain 2: Fundamentals of Generative AI",
156
+ "question": "A customer support chatbot gives inconsistent and overly creative answers to factual questions. Which inference parameter should be adjusted?",
157
+ "options": [
158
+ "Increase temperature to 1.0",
159
+ "Decrease temperature closer to 0",
160
+ "Increase max tokens",
161
+ "Increase top-k to 500"
162
+ ],
163
+ "correct": 1,
164
+ "explanation": "Lower temperature values (closer to 0) make the model more deterministic and focused, producing consistent and factual responses. Higher temperature values increase randomness and creativity."
165
+ },
166
+ {
167
+ "id": 6,
168
+ "domain": "Domain 3: Applications of Foundation Models",
169
+ "question": "Which prompting technique is MOST effective for improving a model's accuracy on complex mathematical reasoning tasks?",
170
+ "options": [
171
+ "Zero-shot prompting",
172
+ "Negative prompting",
173
+ "Chain-of-Thought (CoT) prompting",
174
+ "System prompting"
175
+ ],
176
+ "correct": 2,
177
+ "explanation": "Chain-of-Thought prompting instructs the model to reason step by step before giving a final answer. This significantly improves accuracy on math, logic, and multi-step reasoning tasks."
178
+ },
179
+ {
180
+ "id": 7,
181
+ "domain": "Domain 3: Applications of Foundation Models",
182
+ "question": "A company wants to build a Q&A assistant that answers questions from internal documents stored in Amazon S3. The documents are updated weekly. Which approach is MOST suitable?",
183
+ "options": [
184
+ "Fine-tune a foundation model on the documents",
185
+ "Use RAG with Amazon Bedrock Knowledge Bases",
186
+ "Pre-train a custom model from scratch",
187
+ "Use zero-shot prompting with a large context window"
188
+ ],
189
+ "correct": 1,
190
+ "explanation": "Amazon Bedrock Knowledge Bases provides managed RAG — it automatically chunks, embeds, and indexes S3 documents, retrieves relevant information per query, and stays current via auto-sync without model retraining."
191
+ },
192
+ {
193
+ "id": 8,
194
+ "domain": "Domain 2: Fundamentals of Generative AI",
195
+ "question": "Which Transformer architecture type is BEST suited for text generation tasks such as chatbots and content creation?",
196
+ "options": [
197
+ "Encoder-only (e.g., BERT)",
198
+ "Decoder-only (e.g., GPT, Claude)",
199
+ "Encoder-Decoder (e.g., T5)",
200
+ "Convolutional Neural Network (CNN)"
201
+ ],
202
+ "correct": 1,
203
+ "explanation": "Decoder-only architectures (like GPT, Claude, Llama) generate text autoregressively one token at a time and are the basis for most modern chatbots and text generators."
204
+ },
205
+ {
206
+ "id": 9,
207
+ "domain": "Domain 3: Applications of Foundation Models",
208
+ "question": "A retail company wants to build an AI assistant that can check inventory, process returns, and answer product questions from their catalog. Which Amazon Bedrock feature should they use?",
209
+ "options": [
210
+ "Bedrock Guardrails",
211
+ "Bedrock Knowledge Bases only",
212
+ "Bedrock Agents with Action Groups and Knowledge Bases",
213
+ "Bedrock Model Evaluation"
214
+ ],
215
+ "correct": 2,
216
+ "explanation": "Bedrock Agents can orchestrate multi-step tasks by calling APIs (action groups for inventory/returns) and retrieving information (knowledge bases for product catalog) — combining reasoning with actions."
217
+ },
218
+ {
219
+ "id": 10,
220
+ "domain": "Domain 3: Applications of Foundation Models",
221
+ "question": "Which technique allows fine-tuning a large language model while updating only a small fraction of the model's parameters?",
222
+ "options": [
223
+ "Full fine-tuning",
224
+ "LoRA (Low-Rank Adaptation)",
225
+ "Continued pre-training",
226
+ "RLHF (Reinforcement Learning from Human Feedback)"
227
+ ],
228
+ "correct": 1,
229
+ "explanation": "LoRA is a Parameter-Efficient Fine-Tuning (PEFT) technique that adds small trainable adapter matrices while freezing the original model weights — typically updating less than 1% of total parameters, reducing cost significantly."
230
+ },
231
+ {
232
+ "id": 11,
233
+ "domain": "Domain 4: Guidelines for Responsible AI",
234
+ "question": "What is the PRIMARY purpose of Amazon Bedrock Guardrails?",
235
+ "options": [
236
+ "Accelerating model inference",
237
+ "Implementing safety controls such as content filtering, denied topics, and PII detection for AI applications",
238
+ "Compressing model size for deployment",
239
+ "Managing billing and costs"
240
+ ],
241
+ "correct": 1,
242
+ "explanation": "Bedrock Guardrails implement safety controls including content filters (hate, violence, sexual), denied topics, word filters, PII detection/redaction, and contextual grounding checks — applied to both model inputs and outputs."
243
+ },
244
+ {
245
+ "id": 12,
246
+ "domain": "Domain 4: Guidelines for Responsible AI",
247
+ "question": "A hiring AI system consistently ranks male candidates higher than equally qualified female candidates. What is the MOST likely cause?",
248
+ "options": [
249
+ "Measurement bias in data collection",
250
+ "Selection bias in the training data reflecting historical hiring patterns",
251
+ "The model's architecture is too complex",
252
+ "The inference temperature is set too high"
253
+ ],
254
+ "correct": 1,
255
+ "explanation": "If training data contained historical hiring decisions that favored male candidates, the model would learn and reproduce that selection bias — the training data didn't represent the qualified population fairly."
256
+ },
257
+ {
258
+ "id": 13,
259
+ "domain": "Domain 4: Guidelines for Responsible AI",
260
+ "question": "Which AWS service can detect bias in ML model predictions and provide per-prediction explainability using SHAP values?",
261
+ "options": [
262
+ "Amazon Rekognition",
263
+ "Amazon SageMaker Clarify",
264
+ "Amazon Bedrock Guardrails",
265
+ "Amazon Comprehend"
266
+ ],
267
+ "correct": 1,
268
+ "explanation": "SageMaker Clarify provides pre-training bias detection (data analysis), post-training bias detection (prediction analysis across demographic groups), and model explainability through SHAP values."
269
+ },
270
+ {
271
+ "id": 14,
272
+ "domain": "Domain 4: Guidelines for Responsible AI",
273
+ "question": "A document processing application needs human review when AI-extracted data has low confidence. Which AWS service provides this human-in-the-loop capability?",
274
+ "options": [
275
+ "Amazon SageMaker Ground Truth",
276
+ "Amazon Augmented AI (A2I)",
277
+ "Amazon Mechanical Turk directly",
278
+ "Amazon Bedrock Agents"
279
+ ],
280
+ "correct": 1,
281
+ "explanation": "Amazon A2I provides human-in-the-loop workflows with built-in integration for Amazon Textract and Rekognition. It automatically triggers human review when AI confidence falls below a defined threshold."
282
+ },
283
+ {
284
+ "id": 15,
285
+ "domain": "Domain 5: Security, Compliance & Governance",
286
+ "question": "A financial services company wants to ensure Amazon Bedrock API calls do NOT traverse the public internet. What should they configure?",
287
+ "options": [
288
+ "AWS Direct Connect only",
289
+ "VPC endpoint (AWS PrivateLink) for Amazon Bedrock",
290
+ "A VPN connection",
291
+ "Amazon CloudFront distribution"
292
+ ],
293
+ "correct": 1,
294
+ "explanation": "A VPC interface endpoint (AWS PrivateLink) for Amazon Bedrock allows private connectivity from within a VPC without any traffic going through the public internet."
295
+ },
296
+ {
297
+ "id": 16,
298
+ "domain": "Domain 5: Security, Compliance & Governance",
299
+ "question": "According to the AWS Shared Responsibility Model, who is responsible for ensuring ML training data does not contain bias?",
300
+ "options": [
301
+ "AWS",
302
+ "The foundation model provider",
303
+ "The customer",
304
+ "Both AWS and the customer equally"
305
+ ],
306
+ "correct": 2,
307
+ "explanation": "Under the Shared Responsibility Model, customers are responsible for 'security IN the cloud' — including training data quality, bias detection, model selection, IAM, and ethical AI practices."
308
+ },
309
+ {
310
+ "id": 17,
311
+ "domain": "Domain 5: Security, Compliance & Governance",
312
+ "question": "A chatbot must NEVER reveal customer credit card numbers in responses. Which approach provides the STRONGEST guarantee?",
313
+ "options": [
314
+ "Add 'never output credit card numbers' to the system prompt",
315
+ "Fine-tune the model to avoid outputting PII",
316
+ "Use Amazon Bedrock Guardrails with PII filters set to BLOCK",
317
+ "Remove credit card numbers from the knowledge base"
318
+ ],
319
+ "correct": 2,
320
+ "explanation": "Bedrock Guardrails with PII filters provide programmatic detection and blocking of credit card numbers — this cannot be bypassed by prompt injection, unlike system prompts which are soft constraints."
321
+ },
322
+ {
323
+ "id": 18,
324
+ "domain": "Domain 5: Security, Compliance & Governance",
325
+ "question": "A company needs to discover which Amazon S3 buckets contain personally identifiable information (PII) before using the data for ML training. Which AWS service should they use?",
326
+ "options": [
327
+ "Amazon Comprehend",
328
+ "Amazon Macie",
329
+ "Amazon Inspector",
330
+ "AWS Config"
331
+ ],
332
+ "correct": 1,
333
+ "explanation": "Amazon Macie uses ML to automatically discover and classify sensitive data (including PII) stored in Amazon S3 buckets. Comprehend detects PII in text at runtime, but Macie is designed for S3-level data discovery."
334
+ },
335
+ {
336
+ "id": 19,
337
+ "domain": "Domain 3: Applications of Foundation Models",
338
+ "question": "A company wants to process 50,000 customer reviews overnight using a foundation model for sentiment analysis. Which Amazon Bedrock pricing model is MOST cost-effective?",
339
+ "options": [
340
+ "On-Demand pricing",
341
+ "Provisioned Throughput",
342
+ "Batch Inference",
343
+ "Free tier"
344
+ ],
345
+ "correct": 2,
346
+ "explanation": "Batch Inference is designed for large-scale, non-real-time workloads and offers up to 50% cost savings compared to on-demand pricing. Ideal for processing large datasets overnight."
347
+ },
348
+ {
349
+ "id": 20,
350
+ "domain": "Domain 3: Applications of Foundation Models",
351
+ "question": "A non-technical marketing team wants to experiment with generative AI applications without an AWS account or coding skills. Which AWS service should they use?",
352
+ "options": [
353
+ "Amazon SageMaker Canvas",
354
+ "Amazon Bedrock Console",
355
+ "Amazon PartyRock",
356
+ "Amazon Q Business"
357
+ ],
358
+ "correct": 2,
359
+ "explanation": "Amazon PartyRock is a free, no-code playground for generative AI that requires no AWS account. Users can build and share GenAI apps with drag-and-drop — ideal for experimentation and learning."
360
+ }
361
+ ]
362
+ }
@@ -0,0 +1,200 @@
1
+ {
2
+ "id": "aws-ml-specialty",
3
+ "title": "AWS Certified Machine Learning - Specialty",
4
+ "slug": "aws-ml-specialty",
5
+ "description": "Luyện thi chứng chỉ AWS ML Specialty — build, train, deploy ML trên AWS",
6
+ "icon": "award",
7
+ "provider": "AWS",
8
+ "level": "Chuyên gia",
9
+ "duration_minutes": 180,
10
+ "passing_score": 75,
11
+ "questions_count": 15,
12
+ "tags": [
13
+ "AWS",
14
+ "ML",
15
+ "SageMaker"
16
+ ],
17
+ "series_slug": "luyen-thi-aws-ml-specialty",
18
+ "questions": [
19
+ {
20
+ "id": 1,
21
+ "question": "SageMaker built-in algorithm nào phù hợp nhất cho bài toán phát hiện bất thường (anomaly detection)?",
22
+ "options": [
23
+ "XGBoost",
24
+ "Random Cut Forest",
25
+ "BlazingText",
26
+ "DeepAR"
27
+ ],
28
+ "correct": 1,
29
+ "explanation": "Random Cut Forest (RCF) là thuật toán unsupervised trong SageMaker, chuyên detect anomaly trong dữ liệu streaming hoặc time series."
30
+ },
31
+ {
32
+ "id": 2,
33
+ "question": "Feature Store trong SageMaker dùng để làm gì?",
34
+ "options": [
35
+ "Lưu trữ mô hình đã train",
36
+ "Quản lý và chia sẻ features giữa các team ML, đảm bảo consistency",
37
+ "Giám sát endpoint inference",
38
+ "Quản lý IAM policies"
39
+ ],
40
+ "correct": 1,
41
+ "explanation": "SageMaker Feature Store là kho lưu trữ features centralized, giúp các team ML chia sẻ features, tránh duplicate work, và đảm bảo tính nhất quán giữa training và inference."
42
+ },
43
+ {
44
+ "id": 3,
45
+ "question": "SageMaker sử dụng mode nào để train trên nhiều instance cùng lúc?",
46
+ "options": [
47
+ "Pipe mode",
48
+ "Distributed training mode",
49
+ "File mode",
50
+ "Batch mode"
51
+ ],
52
+ "correct": 1,
53
+ "explanation": "SageMaker hỗ trợ distributed training cho phép chia workload training ra nhiều instances (data parallelism hoặc model parallelism) để tăng tốc."
54
+ },
55
+ {
56
+ "id": 4,
57
+ "question": "SageMaker Model Monitor phát hiện loại drift nào?",
58
+ "options": [
59
+ "Chỉ concept drift",
60
+ "Data quality, model quality, bias drift, và feature attribution drift",
61
+ "Chỉ data drift",
62
+ "Chỉ bias drift"
63
+ ],
64
+ "correct": 1,
65
+ "explanation": "SageMaker Model Monitor phát hiện 4 loại: Data Quality (thay đổi schema/statistics), Model Quality (độ chính xác giảm), Bias Drift (bias thay đổi), Feature Attribution Drift (feature importance thay đổi)."
66
+ },
67
+ {
68
+ "id": 5,
69
+ "question": "Khi nào nên dùng SageMaker Inference Pipeline?",
70
+ "options": [
71
+ "Khi cần chạy batch transform",
72
+ "Khi cần chain nhiều bước xử lý (preprocessing → model → postprocessing) trong một endpoint",
73
+ "Khi cần train nhiều model",
74
+ "Khi cần A/B testing"
75
+ ],
76
+ "correct": 1,
77
+ "explanation": "Inference Pipeline cho phép chain tối đa 15 containers trong một endpoint — ví dụ: data preprocessing → feature engineering → model prediction → postprocessing."
78
+ },
79
+ {
80
+ "id": 6,
81
+ "question": "BlazingText trong SageMaker được dùng cho tác vụ nào?",
82
+ "options": [
83
+ "Object detection",
84
+ "Word2Vec và text classification",
85
+ "Time series forecasting",
86
+ "Recommender systems"
87
+ ],
88
+ "correct": 1,
89
+ "explanation": "BlazingText là implementation siêu nhanh của Word2Vec và nhận diện text classification. Nó hỗ trợ training trên multi-GPU với tốc độ rất cao."
90
+ },
91
+ {
92
+ "id": 7,
93
+ "question": "SageMaker Ground Truth được sử dụng để?",
94
+ "options": [
95
+ "Deploy model lên production",
96
+ "Tạo labeled datasets với hỗ trợ của human annotators và active learning",
97
+ "Tối ưu hyperparameter",
98
+ "Giám sát chi phí training"
99
+ ],
100
+ "correct": 1,
101
+ "explanation": "Ground Truth là dịch vụ data labeling, kết hợp human annotators (Amazon Mechanical Turk, private team, hoặc vendors) với active learning để giảm chi phí labeling."
102
+ },
103
+ {
104
+ "id": 8,
105
+ "question": "Elastic Inference trong SageMaker dùng để?",
106
+ "options": [
107
+ "Tăng dung lượng storage",
108
+ "Gắn GPU fractional vào instance để giảm chi phí inference",
109
+ "Tự động scale số lượng model",
110
+ "Nén model để deploy nhanh"
111
+ ],
112
+ "correct": 1,
113
+ "explanation": "Elastic Inference cho phép gắn GPU acceleration với chi phí thấp vào SageMaker endpoints hoặc notebook instances — chỉ trả tiền cho GPU resource thực sự dùng."
114
+ },
115
+ {
116
+ "id": 9,
117
+ "question": "Chiến lược nào giúp xử lý dữ liệu mất cân bằng (imbalanced dataset)?",
118
+ "options": [
119
+ "Chỉ dùng accuracy làm metric",
120
+ "SMOTE (oversampling), undersampling, class weights, hoặc ensemble methods",
121
+ "Tăng learning rate",
122
+ "Giảm số epoch training"
123
+ ],
124
+ "correct": 1,
125
+ "explanation": "Imbalanced data cần kỹ thuật đặc biệt: SMOTE tạo thêm sample cho class thiểu số, undersampling giảm class đa số, hoặc điều chỉnh class weights trong loss function."
126
+ },
127
+ {
128
+ "id": 10,
129
+ "question": "SageMaker Clarify dùng để?",
130
+ "options": [
131
+ "Tối ưu hyperparameter",
132
+ "Phát hiện bias trong dữ liệu và mô hình, giải thích dự đoán (explainability)",
133
+ "Quản lý experiment",
134
+ "Xây dựng data pipeline"
135
+ ],
136
+ "correct": 1,
137
+ "explanation": "SageMaker Clarify giúp phát hiện bias trong data và model, cung cấp feature importance (SHAP values), hỗ trợ Responsible AI và regulatory compliance."
138
+ },
139
+ {
140
+ "id": 11,
141
+ "question": "DeepAR trong SageMaker được dùng cho bài toán nào?",
142
+ "options": [
143
+ "Image classification",
144
+ "Dự báo chuỗi thời gian (time series forecasting)",
145
+ "Text summarization",
146
+ "Object detection"
147
+ ],
148
+ "correct": 1,
149
+ "explanation": "DeepAR là thuật toán RNN-based cho time series forecasting, đặc biệt hiệu quả khi có nhiều chuỗi thời gian liên quan (cold-start problem)."
150
+ },
151
+ {
152
+ "id": 12,
153
+ "question": "Multi-Model Endpoint trong SageMaker có ưu điểm gì?",
154
+ "options": [
155
+ "Chỉ hỗ trợ GPU instances",
156
+ "Host nhiều model trên cùng một endpoint, giảm chi phí khi có nhiều model ít traffic",
157
+ "Tăng tốc training",
158
+ "Chỉ support PyTorch"
159
+ ],
160
+ "correct": 1,
161
+ "explanation": "Multi-Model Endpoint cho phép host hàng trăm model trên cùng endpoint, load model on-demand — tiết kiệm chi phí rất lớn so với mỗi model một endpoint riêng."
162
+ },
163
+ {
164
+ "id": 13,
165
+ "question": "Khi nào nên dùng SageMaker Batch Transform thay vì Real-time Endpoint?",
166
+ "options": [
167
+ "Khi cần inference nhanh, real-time",
168
+ "Khi cần xử lý inference cho dataset lớn không cần response ngay lập tức",
169
+ "Khi cần A/B testing",
170
+ "Khi cần model auto-scaling"
171
+ ],
172
+ "correct": 1,
173
+ "explanation": "Batch Transform phù hợp khi cần inference lượng lớn dữ liệu, không cần response real-time — ví dụ: nightly scoring, preprocessing dataset lớn."
174
+ },
175
+ {
176
+ "id": 14,
177
+ "question": "SageMaker Autopilot là gì?",
178
+ "options": [
179
+ "Tool deploy model tự động",
180
+ "AutoML — tự động phân tích data, thử nhiều algorithms, và chọn model tốt nhất",
181
+ "Tool giám sát endpoint",
182
+ "Framework training distributed"
183
+ ],
184
+ "correct": 1,
185
+ "explanation": "SageMaker Autopilot là giải pháp AutoML, tự động phân tích data, feature engineering, thử nhiều algorithms/hyperparameters, và đề xuất model tốt nhất — kèm notebook giải thích."
186
+ },
187
+ {
188
+ "id": 15,
189
+ "question": "Pipe Mode trong SageMaker training có lợi ích gì?",
190
+ "options": [
191
+ "Tăng kích thước model",
192
+ "Stream dữ liệu trực tiếp từ S3 vào training container, không cần download toàn bộ trước",
193
+ "Giảm thời gian deploy",
194
+ "Tự động chọn algorithm"
195
+ ],
196
+ "correct": 1,
197
+ "explanation": "Pipe Mode stream dữ liệu từ S3 vào training container thay vì copy toàn bộ (File Mode) — giảm startup time và disk requirement, đặc biệt hiệu quả với dataset lớn."
198
+ }
199
+ ]
200
+ }