@xdev-asia/xdev-knowledge-mcp 1.0.42 → 1.0.44
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/content/pages/xoa-du-lieu-nguoi-dung.md +68 -0
- package/content/series/luyen-thi/luyen-thi-aws-ml-specialty/chapters/01-phan-1-data-engineering/lessons/01-bai-1-data-repositories-ingestion.md +198 -0
- package/content/series/luyen-thi/luyen-thi-aws-ml-specialty/chapters/01-phan-1-data-engineering/lessons/02-bai-2-data-transformation.md +183 -0
- package/content/series/luyen-thi/luyen-thi-aws-ml-specialty/chapters/01-phan-1-data-engineering/lessons/03-bai-3-data-analysis.md +159 -0
- package/content/series/luyen-thi/luyen-thi-aws-ml-specialty/chapters/02-phan-2-modeling/lessons/04-bai-4-sagemaker-built-in-algorithms.md +186 -0
- package/content/series/luyen-thi/luyen-thi-aws-ml-specialty/chapters/02-phan-2-modeling/lessons/05-bai-5-training-hyperparameter-tuning.md +159 -0
- package/content/series/luyen-thi/luyen-thi-aws-ml-specialty/chapters/02-phan-2-modeling/lessons/06-bai-6-model-evaluation.md +169 -0
- package/content/series/luyen-thi/luyen-thi-aws-ml-specialty/chapters/03-phan-3-implementation-operations/lessons/07-bai-7-model-deployment.md +193 -0
- package/content/series/luyen-thi/luyen-thi-aws-ml-specialty/chapters/03-phan-3-implementation-operations/lessons/08-bai-8-model-monitoring-mlops.md +184 -0
- package/content/series/luyen-thi/luyen-thi-aws-ml-specialty/chapters/03-phan-3-implementation-operations/lessons/09-bai-9-security-cost.md +166 -0
- package/content/series/luyen-thi/luyen-thi-aws-ml-specialty/chapters/04-phan-4-on-tap/lessons/10-bai-10-bai-toan-thuong-gap.md +181 -0
- package/content/series/luyen-thi/luyen-thi-aws-ml-specialty/chapters/04-phan-4-on-tap/lessons/11-bai-11-cheat-sheet.md +110 -0
- package/content/series/luyen-thi/luyen-thi-aws-ml-specialty/chapters/04-phan-4-on-tap/lessons/12-bai-12-chien-luoc-thi.md +113 -0
- package/content/series/luyen-thi/luyen-thi-aws-ml-specialty/index.md +1 -1
- package/content/series/luyen-thi/luyen-thi-cka/index.md +217 -0
- package/content/series/luyen-thi/luyen-thi-ckad/index.md +199 -0
- package/content/series/luyen-thi/luyen-thi-gcp-ml-engineer/chapters/01-phan-1-problem-framing/lessons/01-bai-1-framing-ml-problems.md +136 -0
- package/content/series/luyen-thi/luyen-thi-gcp-ml-engineer/chapters/01-phan-1-problem-framing/lessons/02-bai-2-gcp-ai-ml-ecosystem.md +160 -0
- package/content/series/luyen-thi/luyen-thi-gcp-ml-engineer/chapters/02-phan-2-data-engineering/lessons/03-bai-3-data-pipeline.md +174 -0
- package/content/series/luyen-thi/luyen-thi-gcp-ml-engineer/chapters/02-phan-2-data-engineering/lessons/04-bai-4-feature-engineering.md +156 -0
- package/content/series/luyen-thi/luyen-thi-gcp-ml-engineer/chapters/03-phan-3-model-development/lessons/05-bai-5-vertex-ai-training.md +155 -0
- package/content/series/luyen-thi/luyen-thi-gcp-ml-engineer/chapters/03-phan-3-model-development/lessons/06-bai-6-bigquery-ml-tensorflow.md +141 -0
- package/content/series/luyen-thi/luyen-thi-gcp-ml-engineer/chapters/04-phan-4-deployment-mlops/lessons/07-bai-7-model-deployment.md +134 -0
- package/content/series/luyen-thi/luyen-thi-gcp-ml-engineer/chapters/04-phan-4-deployment-mlops/lessons/08-bai-8-vertex-ai-pipelines-mlops.md +149 -0
- package/content/series/luyen-thi/luyen-thi-gcp-ml-engineer/chapters/05-phan-5-responsible-ai/lessons/09-bai-9-responsible-ai.md +128 -0
- package/content/series/luyen-thi/luyen-thi-gcp-ml-engineer/chapters/05-phan-5-responsible-ai/lessons/10-bai-10-cheat-sheet-chien-luoc-thi.md +108 -0
- package/content/series/luyen-thi/luyen-thi-gcp-ml-engineer/index.md +1 -1
- package/content/series/luyen-thi/luyen-thi-kcna/index.md +168 -0
- package/data/quizzes/aws-ai-practitioner.json +362 -0
- package/data/quizzes/aws-ml-specialty.json +200 -0
- package/data/quizzes/gcp-ml-engineer.json +200 -0
- package/package.json +1 -1
|
@@ -0,0 +1,199 @@
|
|
|
1
|
+
---
|
|
2
|
+
id: lt-ckad-series-001
|
|
3
|
+
title: "Luyện thi CKAD — Certified Kubernetes Application Developer"
|
|
4
|
+
slug: luyen-thi-ckad
|
|
5
|
+
description: >-
|
|
6
|
+
Lộ trình ôn tập toàn diện cho kỳ thi CKAD (Certified Kubernetes Application Developer).
|
|
7
|
+
Bao phủ đầy đủ 5 domain hands-on: App Environment & Security (25%), App Design & Build (20%),
|
|
8
|
+
App Deployment (20%), Services & Networking (20%), App Observability (15%).
|
|
9
|
+
10 bài học kèm bài tập thực hành terminal.
|
|
10
|
+
|
|
11
|
+
featured_image: null
|
|
12
|
+
level: intermediate
|
|
13
|
+
duration_hours: 28
|
|
14
|
+
lesson_count: 10
|
|
15
|
+
price: '0.00'
|
|
16
|
+
is_free: true
|
|
17
|
+
view_count: 0
|
|
18
|
+
average_rating: '0.00'
|
|
19
|
+
review_count: 0
|
|
20
|
+
enrollment_count: 0
|
|
21
|
+
meta: null
|
|
22
|
+
published_at: '2026-04-05T10:00:00.000000Z'
|
|
23
|
+
created_at: '2026-04-05T10:00:00.000000Z'
|
|
24
|
+
|
|
25
|
+
author:
|
|
26
|
+
id: 019c9616-d2b4-713f-9b2c-40e2e92a05cf
|
|
27
|
+
name: Duy Tran
|
|
28
|
+
avatar: avatars/7e8eb5c6-4cac-455b-a701-4060f085d501.jpeg
|
|
29
|
+
|
|
30
|
+
category:
|
|
31
|
+
id: 019c9616-cat9-7009-a009-000000000009
|
|
32
|
+
name: Luyện thi chứng chỉ
|
|
33
|
+
slug: luyen-thi
|
|
34
|
+
|
|
35
|
+
tags:
|
|
36
|
+
|
|
37
|
+
- name: Kubernetes
|
|
38
|
+
slug: kubernetes
|
|
39
|
+
- name: CKAD
|
|
40
|
+
slug: ckad
|
|
41
|
+
- name: CNCF
|
|
42
|
+
slug: cncf
|
|
43
|
+
- name: Chứng chỉ
|
|
44
|
+
slug: chung-chi
|
|
45
|
+
- name: DevOps
|
|
46
|
+
slug: devops
|
|
47
|
+
- name: Linux Foundation
|
|
48
|
+
slug: linux-foundation
|
|
49
|
+
|
|
50
|
+
quiz_slug: ckad
|
|
51
|
+
|
|
52
|
+
sections:
|
|
53
|
+
|
|
54
|
+
- id: ckad-section-01
|
|
55
|
+
title: "Domain 1: Application Design and Build (20%)"
|
|
56
|
+
description: Multi-container pods, init containers, jobs, CronJobs
|
|
57
|
+
sort_order: 1
|
|
58
|
+
lessons:
|
|
59
|
+
- id: ckad-d1-l01
|
|
60
|
+
title: "Bài 1: Multi-container Pods & Init Containers"
|
|
61
|
+
slug: 01-multi-container-pods
|
|
62
|
+
description: >-
|
|
63
|
+
Sidecar pattern, Ambassador, Adapter patterns.
|
|
64
|
+
Init containers: sequencing, use cases.
|
|
65
|
+
Shared volumes giữa containers. Container ports.
|
|
66
|
+
Ephemeral containers cho debugging.
|
|
67
|
+
duration_minutes: 60
|
|
68
|
+
is_free: true
|
|
69
|
+
sort_order: 1
|
|
70
|
+
video_url: null
|
|
71
|
+
- id: ckad-d1-l02
|
|
72
|
+
title: "Bài 2: Jobs, CronJobs & Resource Management"
|
|
73
|
+
slug: 02-jobs-cronjobs-resources
|
|
74
|
+
description: >-
|
|
75
|
+
Job completions, parallelism, backoffLimit.
|
|
76
|
+
CronJob schedule syntax, concurrencyPolicy.
|
|
77
|
+
Resource requests vs limits. QoS classes: Guaranteed, Burstable, BestEffort.
|
|
78
|
+
LimitRange, ResourceQuota.
|
|
79
|
+
duration_minutes: 55
|
|
80
|
+
is_free: true
|
|
81
|
+
sort_order: 2
|
|
82
|
+
video_url: null
|
|
83
|
+
|
|
84
|
+
- id: ckad-section-02
|
|
85
|
+
title: "Domain 2: Application Deployment (20%)"
|
|
86
|
+
description: Rolling updates, rollbacks, Helm, Kustomize, deployment strategies
|
|
87
|
+
sort_order: 2
|
|
88
|
+
lessons:
|
|
89
|
+
- id: ckad-d2-l01
|
|
90
|
+
title: "Bài 3: Rolling Updates, Rollbacks & Deployment Strategies"
|
|
91
|
+
slug: 03-rolling-updates-rollbacks
|
|
92
|
+
description: >-
|
|
93
|
+
RollingUpdate vs Recreate strategy. maxUnavailable, maxSurge.
|
|
94
|
+
kubectl rollout history/undo/status. Blue-Green deployment.
|
|
95
|
+
Canary deployment với labels. Pause/resume rollouts.
|
|
96
|
+
duration_minutes: 60
|
|
97
|
+
is_free: true
|
|
98
|
+
sort_order: 3
|
|
99
|
+
video_url: null
|
|
100
|
+
- id: ckad-d2-l02
|
|
101
|
+
title: "Bài 4: Helm & Kustomize Basics"
|
|
102
|
+
slug: 04-helm-kustomize
|
|
103
|
+
description: >-
|
|
104
|
+
Helm chart structure: Chart.yaml, values.yaml, templates/.
|
|
105
|
+
helm install/upgrade/rollback. Helm hooks.
|
|
106
|
+
Kustomize: base + overlays, patches, namePrefix.
|
|
107
|
+
kubectl apply -k vs helm install.
|
|
108
|
+
duration_minutes: 55
|
|
109
|
+
is_free: true
|
|
110
|
+
sort_order: 4
|
|
111
|
+
video_url: null
|
|
112
|
+
|
|
113
|
+
- id: ckad-section-03
|
|
114
|
+
title: "Domain 3: Application Observability and Maintenance (15%)"
|
|
115
|
+
description: Probes, logging, monitoring, debugging
|
|
116
|
+
sort_order: 3
|
|
117
|
+
lessons:
|
|
118
|
+
- id: ckad-d3-l01
|
|
119
|
+
title: "Bài 5: Probes, Logging & Debugging"
|
|
120
|
+
slug: 05-probes-logging-debugging
|
|
121
|
+
description: >-
|
|
122
|
+
Liveness, Readiness, Startup probes: httpGet, exec, tcpSocket.
|
|
123
|
+
probe timing: initialDelaySeconds, periodSeconds, failureThreshold.
|
|
124
|
+
kubectl logs, stern. kubectl exec. Debugging crashed containers.
|
|
125
|
+
kubectl top (metrics-server). Events và conditions.
|
|
126
|
+
duration_minutes: 60
|
|
127
|
+
is_free: true
|
|
128
|
+
sort_order: 5
|
|
129
|
+
video_url: null
|
|
130
|
+
|
|
131
|
+
- id: ckad-section-04
|
|
132
|
+
title: "Domain 4: Application Environment, Configuration & Security (25%)"
|
|
133
|
+
description: ConfigMaps, Secrets, SecurityContext, ServiceAccounts, RBAC
|
|
134
|
+
sort_order: 4
|
|
135
|
+
lessons:
|
|
136
|
+
- id: ckad-d4-l01
|
|
137
|
+
title: "Bài 6: ConfigMaps & Secrets"
|
|
138
|
+
slug: 06-configmaps-secrets
|
|
139
|
+
description: >-
|
|
140
|
+
ConfigMap: từ literal, file, env. Inject qua env / envFrom / volume.
|
|
141
|
+
Secret types: Opaque, TLS, dockerconfigjson. Base64 encoding.
|
|
142
|
+
Secrets as volumes vs env vars. External Secrets overview.
|
|
143
|
+
duration_minutes: 55
|
|
144
|
+
is_free: true
|
|
145
|
+
sort_order: 6
|
|
146
|
+
video_url: null
|
|
147
|
+
- id: ckad-d4-l02
|
|
148
|
+
title: "Bài 7: SecurityContext & Pod Security"
|
|
149
|
+
slug: 07-securitycontext-pod-security
|
|
150
|
+
description: >-
|
|
151
|
+
runAsUser, runAsGroup, fsGroup. readOnlyRootFilesystem.
|
|
152
|
+
capabilities: add/drop. allowPrivilegeEscalation.
|
|
153
|
+
Pod Security Standards: Privileged, Baseline, Restricted.
|
|
154
|
+
ServiceAccount: automountServiceAccountToken, projected volumes.
|
|
155
|
+
duration_minutes: 60
|
|
156
|
+
is_free: true
|
|
157
|
+
sort_order: 7
|
|
158
|
+
video_url: null
|
|
159
|
+
- id: ckad-d4-l03
|
|
160
|
+
title: "Bài 8: Resource Requests, Limits & QoS"
|
|
161
|
+
slug: 08-resources-qos
|
|
162
|
+
description: >-
|
|
163
|
+
CPU (millicores) vs Memory (MiB/GiB) units. requests vs limits.
|
|
164
|
+
OOMKilled và CPU throttling. QoS classes chi tiết.
|
|
165
|
+
LimitRange per container/pod. ResourceQuota per namespace.
|
|
166
|
+
Horizontal Pod Autoscaler (HPA) basics.
|
|
167
|
+
duration_minutes: 55
|
|
168
|
+
is_free: true
|
|
169
|
+
sort_order: 8
|
|
170
|
+
video_url: null
|
|
171
|
+
|
|
172
|
+
- id: ckad-section-05
|
|
173
|
+
title: "Domain 5: Services & Networking (20%)"
|
|
174
|
+
description: Services, Ingress, Network Policies
|
|
175
|
+
sort_order: 5
|
|
176
|
+
lessons:
|
|
177
|
+
- id: ckad-d5-l01
|
|
178
|
+
title: "Bài 9: Services & Ingress"
|
|
179
|
+
slug: 09-services-ingress
|
|
180
|
+
description: >-
|
|
181
|
+
ClusterIP, NodePort, LoadBalancer, ExternalName. Headless service.
|
|
182
|
+
port vs targetPort vs nodePort. Ingress rules, path types.
|
|
183
|
+
TLS termination. Ingress class. Service vs Ingress use cases.
|
|
184
|
+
duration_minutes: 60
|
|
185
|
+
is_free: true
|
|
186
|
+
sort_order: 9
|
|
187
|
+
video_url: null
|
|
188
|
+
- id: ckad-d5-l02
|
|
189
|
+
title: "Bài 10: Network Policies & CKAD Exam Strategy"
|
|
190
|
+
slug: 10-networkpolicies-exam-strategy
|
|
191
|
+
description: >-
|
|
192
|
+
NetworkPolicy: podSelector, namespaceSelector, ipBlock.
|
|
193
|
+
Ingress vs Egress rules. Default deny patterns.
|
|
194
|
+
CKAD exam tips: imperative kubectl commands, --dry-run=client,
|
|
195
|
+
time management, bookmarking docs, common task templates.
|
|
196
|
+
duration_minutes: 60
|
|
197
|
+
is_free: true
|
|
198
|
+
sort_order: 10
|
|
199
|
+
video_url: null
|
|
@@ -0,0 +1,136 @@
|
|
|
1
|
+
---
|
|
2
|
+
id: 019c9619-lt03-l01
|
|
3
|
+
title: 'Bài 1: Framing ML Problems — Supervised, Unsupervised, RL'
|
|
4
|
+
slug: bai-1-framing-ml-problems
|
|
5
|
+
description: >-
|
|
6
|
+
Cách xác định bài toán có cần ML không. Chọn đúng loại model.
|
|
7
|
+
Business metrics vs ML metrics. Data availability assessment.
|
|
8
|
+
Google's ML best practices.
|
|
9
|
+
duration_minutes: 50
|
|
10
|
+
is_free: true
|
|
11
|
+
video_url: null
|
|
12
|
+
sort_order: 1
|
|
13
|
+
section_title: "Phần 1: ML Problem Framing & Architecture"
|
|
14
|
+
course:
|
|
15
|
+
id: 019c9619-lt03-7003-c003-lt0300000003
|
|
16
|
+
title: 'Luyện thi Google Cloud Professional Machine Learning Engineer'
|
|
17
|
+
slug: luyen-thi-gcp-ml-engineer
|
|
18
|
+
---
|
|
19
|
+
|
|
20
|
+
<div style="text-align: center; margin: 2rem 0;">
|
|
21
|
+
<img src="/storage/uploads/2026/04/gcp-mle-bai1-problem-framing.png" alt="ML Problem Framing Framework" style="max-width: 800px; width: 100%; border-radius: 12px;" />
|
|
22
|
+
<p><em>ML Problem Framing: xác định bài toán, chọn loại model, và định nghĩa metrics theo chuẩn Google</em></p>
|
|
23
|
+
</div>
|
|
24
|
+
|
|
25
|
+
<h2 id="when-to-use-ml"><strong>1. Khi Nào Cần Dùng ML?</strong></h2>
|
|
26
|
+
|
|
27
|
+
<p>Google ML certification thường hỏi về <strong>problem framing</strong> — tức là xác định xem bài toán có phù hợp để áp dụng ML không, và nếu có thì dùng loại ML nào. Đây là skill quan trọng của một professional ML Engineer.</p>
|
|
28
|
+
|
|
29
|
+
<table>
|
|
30
|
+
<thead><tr><th>Câu hỏi cần đặt ra</th><th>Nếu "Có"</th><th>Nếu "Không"</th></tr></thead>
|
|
31
|
+
<tbody>
|
|
32
|
+
<tr><td>Có pattern phức tạp trong data không?</td><td>ML có thể giúp</td><td>Rules-based logic đủ rồi</td></tr>
|
|
33
|
+
<tr><td>Có đủ data (labels) không?</td><td>Supervised Learning</td><td>Unsupervised hoặc thu thập thêm</td></tr>
|
|
34
|
+
<tr><td>Output có thể định nghĩa rõ ràng không?</td><td>Supervised ML</td><td>Cần clarify với stakeholders</td></tr>
|
|
35
|
+
<tr><td>Bài toán có cần agent tương tác với environment không?</td><td>Reinforcement Learning</td><td>Supervised/Unsupervised</td></tr>
|
|
36
|
+
</tbody>
|
|
37
|
+
</table>
|
|
38
|
+
|
|
39
|
+
<h2 id="ml-types"><strong>2. Các Loại ML và Khi Nào Dùng</strong></h2>
|
|
40
|
+
|
|
41
|
+
<pre><code class="language-text">Problem Framing Decision Tree:
|
|
42
|
+
|
|
43
|
+
Has labeled training data?
|
|
44
|
+
YES → Supervised Learning
|
|
45
|
+
├── Output is category? → Classification
|
|
46
|
+
└── Output is number? → Regression
|
|
47
|
+
|
|
48
|
+
NO → Has examples, no labels?
|
|
49
|
+
YES → Unsupervised Learning
|
|
50
|
+
├── Find groups? → Clustering
|
|
51
|
+
└── Find patterns/anomalies? → Density estimation
|
|
52
|
+
NO → Agent in environment?
|
|
53
|
+
YES → Reinforcement Learning
|
|
54
|
+
NO → Reconsider problem definition
|
|
55
|
+
</code></pre>
|
|
56
|
+
|
|
57
|
+
<table>
|
|
58
|
+
<thead><tr><th>ML Type</th><th>When to Use</th><th>GCP Services</th></tr></thead>
|
|
59
|
+
<tbody>
|
|
60
|
+
<tr><td><strong>Supervised Classification</strong></td><td>Email spam, image labels, churn prediction</td><td>Vertex AI AutoML, BigQuery ML</td></tr>
|
|
61
|
+
<tr><td><strong>Supervised Regression</strong></td><td>Price prediction, demand forecast</td><td>Vertex AI, BigQuery ML BQML_REGRESSOR</td></tr>
|
|
62
|
+
<tr><td><strong>Unsupervised Clustering</strong></td><td>Customer segmentation, topic discovery</td><td>Vertex AI Custom Training (k-means)</td></tr>
|
|
63
|
+
<tr><td><strong>Reinforcement Learning</strong></td><td>Game agents, robotics, ad bidding</td><td>Vertex AI + custom environment</td></tr>
|
|
64
|
+
<tr><td><strong>Self-supervised</strong></td><td>LLMs, foundation models</td><td>Vertex AI Model Garden</td></tr>
|
|
65
|
+
</tbody>
|
|
66
|
+
</table>
|
|
67
|
+
|
|
68
|
+
<h2 id="business-vs-ml-metrics"><strong>3. Business Metrics vs. ML Metrics</strong></h2>
|
|
69
|
+
|
|
70
|
+
<p>Một trong những sai lầm phổ biến là <strong>optimize nhầm metric</strong>. Mục tiêu ML phải align với mục tiêu business.</p>
|
|
71
|
+
|
|
72
|
+
<table>
|
|
73
|
+
<thead><tr><th>Business Goal</th><th>Wrong ML Metric</th><th>Correct ML Metric</th></tr></thead>
|
|
74
|
+
<tbody>
|
|
75
|
+
<tr><td>Giảm doanh thu bị gian lận</td><td>Accuracy (99%!)</td><td>Recall (bắt được nhiều fraud)</td></tr>
|
|
76
|
+
<tr><td>Giảm email spam trải nghiệm người dùng</td><td>Recall</td><td>Precision (ít false positive)</td></tr>
|
|
77
|
+
<tr><td>Dự báo nhu cầu tồn kho</td><td>MSE</td><td>MAPE (scale-independent)</td></tr>
|
|
78
|
+
<tr><td>Ranking sản phẩm trong search</td><td>Accuracy</td><td>NDCG, MRR (ranking metrics)</td></tr>
|
|
79
|
+
</tbody>
|
|
80
|
+
</table>
|
|
81
|
+
|
|
82
|
+
<blockquote>
|
|
83
|
+
<p><strong>Exam tip:</strong> Professional ML Engineer exam thường hỏi "which metric BEST aligns with the business objective". Khi thấy fraud/medical diagnosis → Recall. Khi thấy spam/precision-critical → Precision. Khi thấy class imbalance → F1 hoặc AUC-ROC.</p>
|
|
84
|
+
</blockquote>
|
|
85
|
+
|
|
86
|
+
<h2 id="data-assessment"><strong>4. Data Availability Assessment</strong></h2>
|
|
87
|
+
|
|
88
|
+
<table>
|
|
89
|
+
<thead><tr><th>Data Situation</th><th>ML Approach</th></tr></thead>
|
|
90
|
+
<tbody>
|
|
91
|
+
<tr><td>Nhiều labeled data</td><td>Fully supervised, train from scratch</td></tr>
|
|
92
|
+
<tr><td>Ít labeled data (<1000)</td><td><strong>Transfer Learning</strong> (pre-trained + fine-tune)</td></tr>
|
|
93
|
+
<tr><td>Không có labels</td><td>Unsupervised hoặc thu thập labels (Vertex AI Data Labeling)</td></tr>
|
|
94
|
+
<tr><td>Labels tốn kém</td><td><strong>Active Learning</strong> — label uncertain samples trước</td></tr>
|
|
95
|
+
<tr><td>Dữ liệu không cân bằng</td><td>Oversampling, undersampling, class weights</td></tr>
|
|
96
|
+
</tbody>
|
|
97
|
+
</table>
|
|
98
|
+
|
|
99
|
+
<h2 id="google-ml-practices"><strong>5. Google's ML Best Practices</strong></h2>
|
|
100
|
+
|
|
101
|
+
<ul>
|
|
102
|
+
<li><strong>Start simple</strong>: Bắt đầu với model đơn giản nhất, sau đó phức tạp hóa dần</li>
|
|
103
|
+
<li><strong>Establish baseline</strong>: So sánh với heuristic/rules trước khi dùng ML</li>
|
|
104
|
+
<li><strong>Data quality first</strong>: 80% thời gian ML project là data preparation</li>
|
|
105
|
+
<li><strong>Reproducibility</strong>: Pipeline phải reproducible với cùng data</li>
|
|
106
|
+
<li><strong>Monitor in production</strong>: Model decay theo thời gian — cần continuous monitoring</li>
|
|
107
|
+
</ul>
|
|
108
|
+
|
|
109
|
+
<h2 id="practice"><strong>6. Practice Questions</strong></h2>
|
|
110
|
+
|
|
111
|
+
<p><strong>Q1:</strong> A company wants to identify which of its customers are most likely to cancel their subscription in the next 30 days. They have 3 years of historical customer behavior data with known churn events. Which ML approach should they use?</p>
|
|
112
|
+
<ul>
|
|
113
|
+
<li>A) Unsupervised clustering to find customer groups</li>
|
|
114
|
+
<li>B) Reinforcement learning to optimize retention campaigns</li>
|
|
115
|
+
<li>C) Supervised binary classification with historical churn labels ✓</li>
|
|
116
|
+
<li>D) Anomaly detection to find unusual behavior</li>
|
|
117
|
+
</ul>
|
|
118
|
+
<p><em>Explanation: This is a classic supervised classification problem (churn = yes/no). Historical data with known outcomes (churned/not churned) provides the labels needed. Clustering would not predict individual churn probability. RL is for sequential decision making, not prediction.</em></p>
|
|
119
|
+
|
|
120
|
+
<p><strong>Q2:</strong> A medical imaging ML model achieves 98% accuracy on test data but the business team is unsatisfied. The task is detecting rare cancer cells (1% prevalence). What is the most likely issue?</p>
|
|
121
|
+
<ul>
|
|
122
|
+
<li>A) The model is overfitting to training data</li>
|
|
123
|
+
<li>B) Accuracy is the wrong metric — the model may be predicting "no cancer" for everything ✓</li>
|
|
124
|
+
<li>C) The model needs more training iterations</li>
|
|
125
|
+
<li>D) The test dataset is too small</li>
|
|
126
|
+
</ul>
|
|
127
|
+
<p><em>Explanation: With 1% prevalence, a model always predicting "no cancer" achieves 99% accuracy but has 0% recall — it misses every cancer case. For rare class problems, Recall (sensitivity) is the critical metric, not accuracy.</em></p>
|
|
128
|
+
|
|
129
|
+
<p><strong>Q3:</strong> A startup has 500 labeled product images for a new custom classification task. Which training approach is MOST appropriate?</p>
|
|
130
|
+
<ul>
|
|
131
|
+
<li>A) Train a deep learning CNN from scratch on the 500 images</li>
|
|
132
|
+
<li>B) Use AutoML Tabular on the image metadata</li>
|
|
133
|
+
<li>C) Use Transfer Learning from a pre-trained image model ✓</li>
|
|
134
|
+
<li>D) Apply K-Means clustering since the dataset is too small</li>
|
|
135
|
+
</ul>
|
|
136
|
+
<p><em>Explanation: With only 500 labeled examples, training from scratch would overfit severely. Transfer Learning reuses features from a model pre-trained on millions of images (e.g., ImageNet), requiring far less data to achieve good accuracy on the new task.</em></p>
|
|
@@ -0,0 +1,160 @@
|
|
|
1
|
+
---
|
|
2
|
+
id: 019c9619-lt03-l02
|
|
3
|
+
title: 'Bài 2: GCP AI/ML Ecosystem Overview'
|
|
4
|
+
slug: bai-2-gcp-ai-ml-ecosystem
|
|
5
|
+
description: >-
|
|
6
|
+
Vertex AI platform tổng quan. AutoML vs Custom Training.
|
|
7
|
+
BigQuery ML. Pre-trained APIs (Vision, NLP, Translation).
|
|
8
|
+
Khi nào dùng service nào — decision tree.
|
|
9
|
+
duration_minutes: 50
|
|
10
|
+
is_free: true
|
|
11
|
+
video_url: null
|
|
12
|
+
sort_order: 2
|
|
13
|
+
section_title: "Phần 1: ML Problem Framing & Architecture"
|
|
14
|
+
course:
|
|
15
|
+
id: 019c9619-lt03-7003-c003-lt0300000003
|
|
16
|
+
title: 'Luyện thi Google Cloud Professional Machine Learning Engineer'
|
|
17
|
+
slug: luyen-thi-gcp-ml-engineer
|
|
18
|
+
---
|
|
19
|
+
|
|
20
|
+
<div style="text-align: center; margin: 2rem 0;">
|
|
21
|
+
<img src="/storage/uploads/2026/04/gcp-mle-bai2-gcp-ecosystem.png" alt="GCP AI/ML Ecosystem" style="max-width: 800px; width: 100%; border-radius: 12px;" />
|
|
22
|
+
<p><em>GCP AI/ML Ecosystem: Vertex AI, AutoML, BigQuery ML, Pre-trained APIs và khi nào dùng cái nào</em></p>
|
|
23
|
+
</div>
|
|
24
|
+
|
|
25
|
+
<h2 id="gcp-ml-landscape"><strong>1. GCP ML Landscape Overview</strong></h2>
|
|
26
|
+
|
|
27
|
+
<pre><code class="language-text">GCP ML Capability Spectrum:
|
|
28
|
+
|
|
29
|
+
LOW CODE ◄────────────────────────────────────► HIGH CONTROL
|
|
30
|
+
│ │ │
|
|
31
|
+
▼ ▼ ▼
|
|
32
|
+
Pre-trained APIs Vertex AI AutoML Custom Training
|
|
33
|
+
(Vision, NLP, (no code needed, (full control,
|
|
34
|
+
Translation) you bring data) you bring code)
|
|
35
|
+
│ │ │
|
|
36
|
+
No ML expertise Some domain ML expertise
|
|
37
|
+
needed expertise required
|
|
38
|
+
|
|
39
|
+
BigQuery ML ────── SQL interface for ML on warehouse data
|
|
40
|
+
</code></pre>
|
|
41
|
+
|
|
42
|
+
<h2 id="vertex-ai"><strong>2. Vertex AI — Unified ML Platform</strong></h2>
|
|
43
|
+
|
|
44
|
+
<p>Vertex AI là GCP's unified platform cho toàn bộ ML lifecycle. Hiểu rõ các component là bắt buộc cho kỳ thi.</p>
|
|
45
|
+
|
|
46
|
+
<table>
|
|
47
|
+
<thead><tr><th>Component</th><th>Purpose</th></tr></thead>
|
|
48
|
+
<tbody>
|
|
49
|
+
<tr><td><strong>Vertex AI Workbench</strong></td><td>Managed Jupyter notebooks cho data scientists</td></tr>
|
|
50
|
+
<tr><td><strong>Vertex AI Training</strong></td><td>Custom training jobs (CPUs, GPUs, TPUs)</td></tr>
|
|
51
|
+
<tr><td><strong>Vertex AI AutoML</strong></td><td>No-code model training (Tabular, Image, Text, Video)</td></tr>
|
|
52
|
+
<tr><td><strong>Vertex AI Endpoints</strong></td><td>Deploy models cho online prediction</td></tr>
|
|
53
|
+
<tr><td><strong>Vertex AI Batch Prediction</strong></td><td>Asynchronous batch scoring</td></tr>
|
|
54
|
+
<tr><td><strong>Vertex AI Feature Store</strong></td><td>Serve features consistently across training/serving</td></tr>
|
|
55
|
+
<tr><td><strong>Vertex AI Pipelines</strong></td><td>Kubeflow Pipelines-based ML workflow orchestration</td></tr>
|
|
56
|
+
<tr><td><strong>Vertex AI Experiments</strong></td><td>Track runs, compare metrics</td></tr>
|
|
57
|
+
<tr><td><strong>Vertex AI Model Registry</strong></td><td>Version control for models</td></tr>
|
|
58
|
+
<tr><td><strong>Vertex AI Model Monitoring</strong></td><td>Detect feature skew và prediction drift</td></tr>
|
|
59
|
+
</tbody>
|
|
60
|
+
</table>
|
|
61
|
+
|
|
62
|
+
<h2 id="automl-vs-custom"><strong>3. AutoML vs. Custom Training</strong></h2>
|
|
63
|
+
|
|
64
|
+
<table>
|
|
65
|
+
<thead><tr><th>Criteria</th><th>AutoML</th><th>Custom Training</th></tr></thead>
|
|
66
|
+
<tbody>
|
|
67
|
+
<tr><td>ML expertise needed</td><td>Minimal</td><td>Required</td></tr>
|
|
68
|
+
<tr><td>Training time</td><td>Hours (automated)</td><td>Variable (you control)</td></tr>
|
|
69
|
+
<tr><td>Model interpretability</td><td>Limited</td><td>Full control</td></tr>
|
|
70
|
+
<tr><td>Cost</td><td>Higher per model</td><td>Pay per compute used</td></tr>
|
|
71
|
+
<tr><td>Best for</td><td>Quick prototypes, standard tasks</td><td>Custom architectures, research</td></tr>
|
|
72
|
+
<tr><td>Supported data types</td><td>Tabular, Image, Text, Video</td><td>Any (you write the code)</td></tr>
|
|
73
|
+
</tbody>
|
|
74
|
+
</table>
|
|
75
|
+
|
|
76
|
+
<blockquote>
|
|
77
|
+
<p><strong>Exam tip:</strong> Câu hỏi có "team doesn't have ML expertise" hoặc "fastest time to deployment" → AutoML. Câu hỏi có "custom neural architecture" hoặc "full control over training loop" → Custom Training.</p>
|
|
78
|
+
</blockquote>
|
|
79
|
+
|
|
80
|
+
<h2 id="bigquery-ml"><strong>4. BigQuery ML</strong></h2>
|
|
81
|
+
|
|
82
|
+
<p>BigQuery ML cho phép train và serve ML models bằng SQL — không cần export data khỏi BigQuery.</p>
|
|
83
|
+
|
|
84
|
+
<table>
|
|
85
|
+
<thead><tr><th>Model Type</th><th>SQL Keyword</th><th>Use Case</th></tr></thead>
|
|
86
|
+
<tbody>
|
|
87
|
+
<tr><td>Linear Regression</td><td>LINEAR_REG</td><td>Price prediction</td></tr>
|
|
88
|
+
<tr><td>Logistic Regression</td><td>LOGISTIC_REG</td><td>Classification</td></tr>
|
|
89
|
+
<tr><td>K-Means Clustering</td><td>KMEANS</td><td>Customer segmentation</td></tr>
|
|
90
|
+
<tr><td>XGBoost</td><td>BOOSTED_TREE_CLASSIFIER/REGRESSOR</td><td>Tabular classification/regression</td></tr>
|
|
91
|
+
<tr><td>Deep Neural Network</td><td>DNN_CLASSIFIER/DNN_REGRESSOR</td><td>Complex patterns</td></tr>
|
|
92
|
+
<tr><td>Matrix Factorization</td><td>MATRIX_FACTORIZATION</td><td>Recommendations</td></tr>
|
|
93
|
+
<tr><td>Imported TF models</td><td>TENSORFLOW</td><td>Custom TF models</td></tr>
|
|
94
|
+
</tbody>
|
|
95
|
+
</table>
|
|
96
|
+
|
|
97
|
+
<h2 id="pre-trained-apis"><strong>5. Pre-trained AI APIs</strong></h2>
|
|
98
|
+
|
|
99
|
+
<table>
|
|
100
|
+
<thead><tr><th>API</th><th>Capabilities</th><th>Use Case</th></tr></thead>
|
|
101
|
+
<tbody>
|
|
102
|
+
<tr><td><strong>Cloud Vision API</strong></td><td>Labels, OCR, faces, logos, safe search</td><td>Image analysis without training</td></tr>
|
|
103
|
+
<tr><td><strong>Cloud Natural Language API</strong></td><td>Entities, sentiment, syntax, categories</td><td>Text analytics</td></tr>
|
|
104
|
+
<tr><td><strong>Cloud Translation API</strong></td><td>100+ language pairs</td><td>Multi-language content</td></tr>
|
|
105
|
+
<tr><td><strong>Cloud Speech-to-Text</strong></td><td>Transcription, speaker diarization</td><td>Audio processing</td></tr>
|
|
106
|
+
<tr><td><strong>Cloud Text-to-Speech</strong></td><td>WaveNet voices, SSML</td><td>Voice UI, accessibility</td></tr>
|
|
107
|
+
<tr><td><strong>Document AI</strong></td><td>Form parsing, invoice extraction</td><td>Document automation</td></tr>
|
|
108
|
+
<tr><td><strong>Recommendations AI</strong></td><td>Real-time product recommendations</td><td>E-commerce personalization</td></tr>
|
|
109
|
+
</tbody>
|
|
110
|
+
</table>
|
|
111
|
+
|
|
112
|
+
<h2 id="decision-tree"><strong>6. Service Selection Decision Tree</strong></h2>
|
|
113
|
+
|
|
114
|
+
<pre><code class="language-text">WHICH GCP ML SERVICE?
|
|
115
|
+
|
|
116
|
+
Do you have LABELED DATA?
|
|
117
|
+
│
|
|
118
|
+
├── NO → Pre-trained API sufficient for your task (Vision, NLP)?
|
|
119
|
+
│ YES → Use Pre-trained API
|
|
120
|
+
│ NO → Vertex AI Custom Training (unsupervised)
|
|
121
|
+
│
|
|
122
|
+
└── YES → Is your data already IN BigQuery?
|
|
123
|
+
│
|
|
124
|
+
├── YES → BigQuery ML (SQL-based, fast, no export)
|
|
125
|
+
│
|
|
126
|
+
└── NO → Need rapid prototyping, no ML team?
|
|
127
|
+
│
|
|
128
|
+
├── YES → Vertex AI AutoML
|
|
129
|
+
│
|
|
130
|
+
└── NO → Vertex AI Custom Training
|
|
131
|
+
</code></pre>
|
|
132
|
+
|
|
133
|
+
<h2 id="practice"><strong>7. Practice Questions</strong></h2>
|
|
134
|
+
|
|
135
|
+
<p><strong>Q1:</strong> A data analytics team has petabytes of customer transaction data in BigQuery. They want to build a churn prediction model using their existing SQL skills without data exports. Which approach is BEST?</p>
|
|
136
|
+
<ul>
|
|
137
|
+
<li>A) Export to Cloud Storage, then use Vertex AI Custom Training</li>
|
|
138
|
+
<li>B) Use Cloud Natural Language API</li>
|
|
139
|
+
<li>C) Use BigQuery ML with CREATE MODEL LOGISTIC_REGRESSION ✓</li>
|
|
140
|
+
<li>D) Use Vertex AI AutoML Tabular</li>
|
|
141
|
+
</ul>
|
|
142
|
+
<p><em>Explanation: BigQuery ML allows training classification models directly on BigQuery data using SQL, leveraging existing data infrastructure and skills without exporting data. This is the fastest path when data is already in BigQuery.</em></p>
|
|
143
|
+
|
|
144
|
+
<p><strong>Q2:</strong> A small startup needs to add sentiment analysis to customer reviews. They have no ML team and no labeled sentiment data. Which solution requires the LEAST effort?</p>
|
|
145
|
+
<ul>
|
|
146
|
+
<li>A) Vertex AI AutoML Text Sentiment</li>
|
|
147
|
+
<li>B) Train a custom BERT model on Vertex AI</li>
|
|
148
|
+
<li>C) Cloud Natural Language API sentiment analysis ✓</li>
|
|
149
|
+
<li>D) BigQuery ML DNN classifier</li>
|
|
150
|
+
</ul>
|
|
151
|
+
<p><em>Explanation: Cloud Natural Language API is a pre-trained, fully managed service that requires no training data, no ML expertise, and no infrastructure setup. Just call the API. AutoML requires labeled sentiment examples; custom BERT requires significantly more expertise.</em></p>
|
|
152
|
+
|
|
153
|
+
<p><strong>Q3:</strong> Which Vertex AI component should a team use to ensure that feature values used during model training are identical to those served at prediction time?</p>
|
|
154
|
+
<ul>
|
|
155
|
+
<li>A) Vertex AI Experiments</li>
|
|
156
|
+
<li>B) Vertex AI Feature Store ✓</li>
|
|
157
|
+
<li>C) Vertex AI Model Registry</li>
|
|
158
|
+
<li>D) Vertex AI Pipelines</li>
|
|
159
|
+
</ul>
|
|
160
|
+
<p><em>Explanation: Vertex AI Feature Store provides a centralized repository for storing, serving, and sharing ML features. It ensures training-serving consistency by using the same feature definitions and values for both training and online/batch prediction, preventing training-serving skew.</em></p>
|