@xdev-asia/xdev-knowledge-mcp 1.0.52 → 1.0.53
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/data/quizzes/cka.json +319 -0
- package/data/quizzes/ckad.json +318 -0
- package/data/quizzes/gcp-ml-engineer.json +608 -100
- package/data/quizzes/kcna.json +317 -0
- package/data/quizzes.json +49 -4
- package/package.json +1 -1
|
@@ -2,199 +2,707 @@
|
|
|
2
2
|
"id": "gcp-ml-engineer",
|
|
3
3
|
"title": "Google Cloud Professional ML Engineer",
|
|
4
4
|
"slug": "gcp-ml-engineer",
|
|
5
|
-
"description": "
|
|
5
|
+
"description": "Practice exam for Google Cloud Professional Machine Learning Engineer — 50 questions covering all domains",
|
|
6
6
|
"icon": "award",
|
|
7
7
|
"provider": "Google Cloud",
|
|
8
|
-
"level": "
|
|
8
|
+
"level": "Professional",
|
|
9
9
|
"duration_minutes": 120,
|
|
10
10
|
"passing_score": 70,
|
|
11
|
-
"questions_count":
|
|
12
|
-
"tags": [
|
|
13
|
-
"GCP",
|
|
14
|
-
"ML",
|
|
15
|
-
"Vertex AI"
|
|
16
|
-
],
|
|
11
|
+
"questions_count": 50,
|
|
12
|
+
"tags": ["GCP", "ML", "Vertex AI", "BigQuery ML", "MLOps", "TFX"],
|
|
17
13
|
"series_slug": "luyen-thi-gcp-ml-engineer",
|
|
14
|
+
"domains": [
|
|
15
|
+
{
|
|
16
|
+
"name": "Domain 1: ML Problem Framing & Architecture",
|
|
17
|
+
"weight": 20,
|
|
18
|
+
"lessons": [
|
|
19
|
+
{ "title": "Bài 1: Framing ML Problems — Supervised, Unsupervised, RL", "slug": "bai-1-framing-ml-problems" },
|
|
20
|
+
{ "title": "Bài 2: GCP AI/ML Ecosystem Overview", "slug": "bai-2-gcp-ai-ml-ecosystem" }
|
|
21
|
+
]
|
|
22
|
+
},
|
|
23
|
+
{
|
|
24
|
+
"name": "Domain 2: Data Engineering & Feature Engineering",
|
|
25
|
+
"weight": 20,
|
|
26
|
+
"lessons": [
|
|
27
|
+
{ "title": "Bài 3: Data Pipeline — Dataflow, Pub/Sub, Dataproc", "slug": "bai-3-data-pipeline" },
|
|
28
|
+
{ "title": "Bài 4: Feature Engineering & Vertex AI Feature Store", "slug": "bai-4-feature-engineering" }
|
|
29
|
+
]
|
|
30
|
+
},
|
|
31
|
+
{
|
|
32
|
+
"name": "Domain 3: Model Development on Vertex AI",
|
|
33
|
+
"weight": 20,
|
|
34
|
+
"lessons": [
|
|
35
|
+
{ "title": "Bài 5: Vertex AI Training — Custom & AutoML", "slug": "bai-5-vertex-ai-training" },
|
|
36
|
+
{ "title": "Bài 6: BigQuery ML & TensorFlow on GCP", "slug": "bai-6-bigquery-ml-tensorflow" }
|
|
37
|
+
]
|
|
38
|
+
},
|
|
39
|
+
{
|
|
40
|
+
"name": "Domain 4: Model Deployment & MLOps",
|
|
41
|
+
"weight": 20,
|
|
42
|
+
"lessons": [
|
|
43
|
+
{ "title": "Bài 7: Model Deployment & Prediction", "slug": "bai-7-model-deployment" },
|
|
44
|
+
{ "title": "Bài 8: Vertex AI Pipelines & MLOps", "slug": "bai-8-vertex-ai-pipelines-mlops" }
|
|
45
|
+
]
|
|
46
|
+
},
|
|
47
|
+
{
|
|
48
|
+
"name": "Domain 5: Responsible AI, Security & Exam Strategy",
|
|
49
|
+
"weight": 20,
|
|
50
|
+
"lessons": [
|
|
51
|
+
{ "title": "Bài 9: Responsible AI & Security", "slug": "bai-9-responsible-ai" },
|
|
52
|
+
{ "title": "Bài 10: Cheat Sheet & Chiến lược thi", "slug": "bai-10-cheat-sheet-chien-luoc-thi" }
|
|
53
|
+
]
|
|
54
|
+
}
|
|
55
|
+
],
|
|
18
56
|
"questions": [
|
|
19
57
|
{
|
|
20
58
|
"id": 1,
|
|
21
|
-
"
|
|
59
|
+
"domain": "Domain 1: ML Problem Framing & Architecture",
|
|
60
|
+
"question": "A retail company wants to predict which customers will churn in the next 30 days. They have 2 years of historical data with labels. Which ML approach is MOST appropriate?",
|
|
22
61
|
"options": [
|
|
23
|
-
"
|
|
24
|
-
"
|
|
25
|
-
"
|
|
26
|
-
"
|
|
62
|
+
"Unsupervised clustering to group similar customers",
|
|
63
|
+
"Supervised binary classification using historical churn labels",
|
|
64
|
+
"Reinforcement learning to learn optimal retention actions",
|
|
65
|
+
"Anomaly detection on transaction patterns"
|
|
27
66
|
],
|
|
28
67
|
"correct": 1,
|
|
29
|
-
"explanation": "
|
|
68
|
+
"explanation": "This is a classic supervised binary classification problem — predicting a binary outcome (churn/no-churn) using labeled historical data. The target variable is clearly defined and data is available."
|
|
30
69
|
},
|
|
31
70
|
{
|
|
32
71
|
"id": 2,
|
|
33
|
-
"
|
|
72
|
+
"domain": "Domain 1: ML Problem Framing & Architecture",
|
|
73
|
+
"question": "A team wants to build a product recommendation system but has NO historical interaction data. Which approach should they start with?",
|
|
34
74
|
"options": [
|
|
35
|
-
"
|
|
36
|
-
"
|
|
37
|
-
"
|
|
38
|
-
"
|
|
75
|
+
"Collaborative filtering",
|
|
76
|
+
"Content-based filtering using product attributes",
|
|
77
|
+
"Matrix factorization",
|
|
78
|
+
"Deep neural collaborative filtering"
|
|
39
79
|
],
|
|
40
80
|
"correct": 1,
|
|
41
|
-
"explanation": "
|
|
81
|
+
"explanation": "Without user-item interaction data (cold start problem), collaborative filtering won't work. Content-based filtering uses product attributes (category, description, price) to recommend similar items — it doesn't require interaction history."
|
|
42
82
|
},
|
|
43
83
|
{
|
|
44
84
|
"id": 3,
|
|
45
|
-
"
|
|
85
|
+
"domain": "Domain 1: ML Problem Framing & Architecture",
|
|
86
|
+
"question": "Which GCP service should you use for a simple image classification task when you have limited ML expertise and a small labeled dataset?",
|
|
46
87
|
"options": [
|
|
47
|
-
"
|
|
48
|
-
"
|
|
49
|
-
"
|
|
50
|
-
"
|
|
88
|
+
"Vertex AI Custom Training with a custom TensorFlow model",
|
|
89
|
+
"Vertex AI AutoML Vision",
|
|
90
|
+
"Cloud Vision API (pre-trained)",
|
|
91
|
+
"BigQuery ML"
|
|
51
92
|
],
|
|
52
93
|
"correct": 1,
|
|
53
|
-
"explanation": "
|
|
94
|
+
"explanation": "AutoML Vision is ideal for custom image classification when you have labeled data but limited ML expertise. Cloud Vision API only supports pre-defined labels. Custom training requires significant ML knowledge."
|
|
54
95
|
},
|
|
55
96
|
{
|
|
56
97
|
"id": 4,
|
|
57
|
-
"
|
|
98
|
+
"domain": "Domain 1: ML Problem Framing & Architecture",
|
|
99
|
+
"question": "A company needs to extract text from scanned documents and receipts. Which GCP service is MOST appropriate?",
|
|
58
100
|
"options": [
|
|
59
|
-
"
|
|
60
|
-
"
|
|
61
|
-
"
|
|
62
|
-
"
|
|
101
|
+
"Cloud Natural Language API",
|
|
102
|
+
"Document AI",
|
|
103
|
+
"Cloud Vision API OCR only",
|
|
104
|
+
"Vertex AI AutoML Text"
|
|
63
105
|
],
|
|
64
106
|
"correct": 1,
|
|
65
|
-
"explanation": "
|
|
107
|
+
"explanation": "Document AI is purpose-built for extracting structured data from documents (invoices, receipts, forms). It goes beyond simple OCR by understanding document structure, key-value pairs, and tables."
|
|
66
108
|
},
|
|
67
109
|
{
|
|
68
110
|
"id": 5,
|
|
69
|
-
"
|
|
111
|
+
"domain": "Domain 1: ML Problem Framing & Architecture",
|
|
112
|
+
"question": "When framing an ML problem, which metric is MOST important to align with stakeholders FIRST?",
|
|
70
113
|
"options": [
|
|
71
|
-
"
|
|
72
|
-
"
|
|
73
|
-
"
|
|
74
|
-
"
|
|
114
|
+
"Model accuracy on test set",
|
|
115
|
+
"AUC-ROC curve",
|
|
116
|
+
"Business KPI that the model is expected to improve",
|
|
117
|
+
"F1 score"
|
|
75
118
|
],
|
|
76
|
-
"correct":
|
|
77
|
-
"explanation": "
|
|
119
|
+
"correct": 2,
|
|
120
|
+
"explanation": "The first step in framing is defining how ML success maps to business outcomes. ML metrics (accuracy, AUC) are proxies — stakeholders care about business KPIs (revenue, cost reduction, customer satisfaction)."
|
|
78
121
|
},
|
|
79
122
|
{
|
|
80
123
|
"id": 6,
|
|
81
|
-
"
|
|
124
|
+
"domain": "Domain 1: ML Problem Framing & Architecture",
|
|
125
|
+
"question": "A fraud detection system needs to catch 99% of fraud even if some legitimate transactions are flagged. Which metric should be optimized?",
|
|
82
126
|
"options": [
|
|
83
|
-
"
|
|
84
|
-
"
|
|
85
|
-
"
|
|
86
|
-
"
|
|
127
|
+
"Precision",
|
|
128
|
+
"Recall (Sensitivity)",
|
|
129
|
+
"Accuracy",
|
|
130
|
+
"Specificity"
|
|
87
131
|
],
|
|
88
132
|
"correct": 1,
|
|
89
|
-
"explanation": "
|
|
133
|
+
"explanation": "Recall = TP/(TP+FN) measures the fraction of actual positives correctly identified. 99% recall means catching 99% of fraud. Precision would prioritize minimizing false alarms instead."
|
|
90
134
|
},
|
|
91
135
|
{
|
|
92
136
|
"id": 7,
|
|
93
|
-
"
|
|
137
|
+
"domain": "Domain 1: ML Problem Framing & Architecture",
|
|
138
|
+
"question": "Which GCP service decision is correct?",
|
|
94
139
|
"options": [
|
|
95
|
-
"
|
|
96
|
-
"
|
|
97
|
-
"
|
|
98
|
-
"
|
|
140
|
+
"Use BigQuery ML when you need custom PyTorch architectures",
|
|
141
|
+
"Use Vertex AI Custom Training when you need full control over training code, framework, and infrastructure",
|
|
142
|
+
"Use AutoML when you need to implement a custom loss function",
|
|
143
|
+
"Use Cloud Vision API when you need to classify images into custom categories"
|
|
99
144
|
],
|
|
100
145
|
"correct": 1,
|
|
101
|
-
"explanation": "
|
|
146
|
+
"explanation": "Vertex AI Custom Training gives full control: any framework (TF, PyTorch, XGBoost), custom code, custom containers, distributed training, GPU/TPU selection. AutoML and BigQuery ML have constraints on customization."
|
|
102
147
|
},
|
|
103
148
|
{
|
|
104
149
|
"id": 8,
|
|
105
|
-
"
|
|
150
|
+
"domain": "Domain 1: ML Problem Framing & Architecture",
|
|
151
|
+
"question": "A time series forecasting model needs to predict daily sales for 1,000 products. Which GCP approach is MOST scalable?",
|
|
106
152
|
"options": [
|
|
107
|
-
"
|
|
108
|
-
"
|
|
109
|
-
"
|
|
110
|
-
"
|
|
153
|
+
"Train 1,000 individual ARIMA models in Cloud Functions",
|
|
154
|
+
"Use Vertex AI Forecasting (AutoML) which handles multiple time series natively",
|
|
155
|
+
"Use a single linear regression model for all products",
|
|
156
|
+
"Use BigQuery ML's ARIMA_PLUS with a single query"
|
|
111
157
|
],
|
|
112
158
|
"correct": 1,
|
|
113
|
-
"explanation": "Vertex AI
|
|
159
|
+
"explanation": "Vertex AI Forecasting (AutoML) is designed for large-scale time series: it handles thousands of series, automatically selects algorithms, and manages training/serving. BigQuery ML ARIMA_PLUS is also viable but AutoML handles more complexity."
|
|
114
160
|
},
|
|
115
161
|
{
|
|
116
162
|
"id": 9,
|
|
117
|
-
"
|
|
163
|
+
"domain": "Domain 1: ML Problem Framing & Architecture",
|
|
164
|
+
"question": "A model achieves 99.5% accuracy on a fraud dataset where only 0.5% of transactions are fraud. What is the problem?",
|
|
118
165
|
"options": [
|
|
119
|
-
"
|
|
120
|
-
"
|
|
121
|
-
"
|
|
122
|
-
"
|
|
166
|
+
"The model is overfitting",
|
|
167
|
+
"The high accuracy is misleading — the model may just predict 'not fraud' for everything (class imbalance)",
|
|
168
|
+
"The model needs more training data",
|
|
169
|
+
"The learning rate is too high"
|
|
123
170
|
],
|
|
124
171
|
"correct": 1,
|
|
125
|
-
"explanation": "
|
|
172
|
+
"explanation": "With 99.5% negative class, a model that always predicts 'not fraud' gets 99.5% accuracy. This is the class imbalance problem. Use precision, recall, F1, AUC-PR instead of accuracy for imbalanced datasets."
|
|
126
173
|
},
|
|
127
174
|
{
|
|
128
175
|
"id": 10,
|
|
129
|
-
"
|
|
176
|
+
"domain": "Domain 1: ML Problem Framing & Architecture",
|
|
177
|
+
"question": "Which is NOT a valid reason to choose ML over a rule-based system?",
|
|
130
178
|
"options": [
|
|
131
|
-
"
|
|
132
|
-
"
|
|
133
|
-
"
|
|
134
|
-
"
|
|
179
|
+
"The problem involves complex patterns that are hard to specify manually",
|
|
180
|
+
"The relationships in data change over time requiring adaptation",
|
|
181
|
+
"A simple if-else logic with 5 rules can solve the problem with 99% accuracy",
|
|
182
|
+
"The input data is unstructured (images, text, audio)"
|
|
135
183
|
],
|
|
136
|
-
"correct":
|
|
137
|
-
"explanation": "
|
|
184
|
+
"correct": 2,
|
|
185
|
+
"explanation": "If simple rules achieve 99% accuracy, ML adds unnecessary complexity. ML should be chosen when rules are too complex, patterns evolve over time, or data is unstructured — not when simple heuristics already work."
|
|
138
186
|
},
|
|
139
187
|
{
|
|
140
188
|
"id": 11,
|
|
141
|
-
"
|
|
189
|
+
"domain": "Domain 2: Data Engineering & Feature Engineering",
|
|
190
|
+
"question": "You need to build a real-time feature engineering pipeline that processes streaming events and writes to Feature Store. Which GCP architecture is correct?",
|
|
142
191
|
"options": [
|
|
143
|
-
"
|
|
144
|
-
"
|
|
145
|
-
"
|
|
146
|
-
"
|
|
192
|
+
"Cloud Storage → Dataproc Batch → Feature Store",
|
|
193
|
+
"Pub/Sub → Dataflow Streaming → Vertex AI Feature Store",
|
|
194
|
+
"Cloud Functions → BigQuery → Feature Store",
|
|
195
|
+
"Pub/Sub → Cloud Composer → Feature Store"
|
|
147
196
|
],
|
|
148
197
|
"correct": 1,
|
|
149
|
-
"explanation": "
|
|
198
|
+
"explanation": "Pub/Sub ingests streaming events, Dataflow (Apache Beam) processes them in real-time for feature computation, and writes to Vertex AI Feature Store for online serving with low latency."
|
|
150
199
|
},
|
|
151
200
|
{
|
|
152
201
|
"id": 12,
|
|
153
|
-
"
|
|
202
|
+
"domain": "Domain 2: Data Engineering & Feature Engineering",
|
|
203
|
+
"question": "What is the PRIMARY advantage of Vertex AI Feature Store over storing features in a regular database?",
|
|
154
204
|
"options": [
|
|
155
|
-
"
|
|
156
|
-
"
|
|
157
|
-
"
|
|
158
|
-
"
|
|
205
|
+
"Lower storage cost",
|
|
206
|
+
"Training-serving consistency — same features used during training are served in production with low-latency online serving",
|
|
207
|
+
"Better SQL query performance",
|
|
208
|
+
"Automatic model training"
|
|
159
209
|
],
|
|
160
210
|
"correct": 1,
|
|
161
|
-
"explanation": "
|
|
211
|
+
"explanation": "Feature Store ensures training-serving consistency: features computed for training are exactly the same as those served online. It provides both batch (offline) and online serving modes with feature monitoring."
|
|
162
212
|
},
|
|
163
213
|
{
|
|
164
214
|
"id": 13,
|
|
165
|
-
"
|
|
215
|
+
"domain": "Domain 2: Data Engineering & Feature Engineering",
|
|
216
|
+
"question": "When should you use Dataflow over Dataproc for data processing?",
|
|
166
217
|
"options": [
|
|
167
|
-
"
|
|
168
|
-
"
|
|
169
|
-
"
|
|
170
|
-
"
|
|
218
|
+
"When you need to run existing Spark/Hadoop jobs",
|
|
219
|
+
"When you need a serverless, auto-scaling pipeline for both batch and streaming with Apache Beam",
|
|
220
|
+
"When you need to use PySpark",
|
|
221
|
+
"When you already have a Hadoop cluster"
|
|
171
222
|
],
|
|
172
223
|
"correct": 1,
|
|
173
|
-
"explanation": "
|
|
224
|
+
"explanation": "Dataflow is serverless (no cluster management), auto-scales, and uses Apache Beam for unified batch/streaming. Dataproc is for migrating existing Spark/Hadoop workloads that require a managed cluster."
|
|
174
225
|
},
|
|
175
226
|
{
|
|
176
227
|
"id": 14,
|
|
177
|
-
"
|
|
228
|
+
"domain": "Domain 2: Data Engineering & Feature Engineering",
|
|
229
|
+
"question": "A dataset has a categorical feature 'city' with 10,000 unique values. What is the BEST encoding strategy for a deep learning model?",
|
|
178
230
|
"options": [
|
|
179
|
-
"
|
|
180
|
-
"
|
|
181
|
-
"
|
|
182
|
-
"
|
|
231
|
+
"One-hot encoding (creates 10,000 sparse columns)",
|
|
232
|
+
"Embedding layer that learns dense vector representations",
|
|
233
|
+
"Label encoding (assign integer 0-9999)",
|
|
234
|
+
"Remove the feature entirely"
|
|
183
235
|
],
|
|
184
236
|
"correct": 1,
|
|
185
|
-
"explanation": "
|
|
237
|
+
"explanation": "High-cardinality categorical features should use embeddings in deep learning — the model learns a dense, low-dimensional representation. One-hot creates extremely sparse, high-dimensional input. Label encoding implies false ordinal relationships."
|
|
186
238
|
},
|
|
187
239
|
{
|
|
188
240
|
"id": 15,
|
|
189
|
-
"
|
|
241
|
+
"domain": "Domain 2: Data Engineering & Feature Engineering",
|
|
242
|
+
"question": "You need to compute aggregate features (e.g., average order value per customer over 30 days) for training. Which tool is MOST efficient?",
|
|
243
|
+
"options": [
|
|
244
|
+
"Vertex AI Workbench with pandas",
|
|
245
|
+
"BigQuery SQL window functions, then export to Feature Store",
|
|
246
|
+
"Cloud Functions processing individual records",
|
|
247
|
+
"Dataproc with MapReduce"
|
|
248
|
+
],
|
|
249
|
+
"correct": 1,
|
|
250
|
+
"explanation": "BigQuery excels at large-scale aggregate computations using SQL window/analytic functions. Results can be exported to Feature Store for serving. More efficient than processing row-by-row in notebooks."
|
|
251
|
+
},
|
|
252
|
+
{
|
|
253
|
+
"id": 16,
|
|
254
|
+
"domain": "Domain 2: Data Engineering & Feature Engineering",
|
|
255
|
+
"question": "What does the TFX Transform component do?",
|
|
256
|
+
"options": [
|
|
257
|
+
"Trains the model",
|
|
258
|
+
"Applies feature transformations consistently during training AND serving using a saved transform graph",
|
|
259
|
+
"Validates the input data schema",
|
|
260
|
+
"Deploys the model to an endpoint"
|
|
261
|
+
],
|
|
262
|
+
"correct": 1,
|
|
263
|
+
"explanation": "TFX Transform uses tf.Transform to create a transform graph that is applied both during training and serving — preventing training-serving skew in feature engineering (normalization, bucketization, vocabulary mapping, etc.)."
|
|
264
|
+
},
|
|
265
|
+
{
|
|
266
|
+
"id": 17,
|
|
267
|
+
"domain": "Domain 2: Data Engineering & Feature Engineering",
|
|
268
|
+
"question": "How should you handle missing values in a feature used for a gradient boosted tree model (XGBoost)?",
|
|
269
|
+
"options": [
|
|
270
|
+
"Always impute with mean",
|
|
271
|
+
"Always drop rows",
|
|
272
|
+
"XGBoost handles missing values natively — it learns the best direction for missing values at each split",
|
|
273
|
+
"Replace with -999 to signal missingness"
|
|
274
|
+
],
|
|
275
|
+
"correct": 2,
|
|
276
|
+
"explanation": "XGBoost natively handles missing values by learning the optimal split direction for missing entries during training. Forcing imputation may actually reduce performance. This is a key advantage of tree-based models."
|
|
277
|
+
},
|
|
278
|
+
{
|
|
279
|
+
"id": 18,
|
|
280
|
+
"domain": "Domain 2: Data Engineering & Feature Engineering",
|
|
281
|
+
"question": "Pub/Sub guarantees at-least-once delivery. How does this affect an ML data pipeline?",
|
|
282
|
+
"options": [
|
|
283
|
+
"It has no impact on ML pipelines",
|
|
284
|
+
"Dataflow must handle duplicate messages to avoid counting features incorrectly",
|
|
285
|
+
"It guarantees exactly-once processing automatically",
|
|
286
|
+
"It means messages can be lost"
|
|
287
|
+
],
|
|
288
|
+
"correct": 1,
|
|
289
|
+
"explanation": "At-least-once delivery means messages can be delivered multiple times. Without deduplication in Dataflow (using unique message IDs or idempotent operations), features like counts or sums can be inflated by duplicate processing."
|
|
290
|
+
},
|
|
291
|
+
{
|
|
292
|
+
"id": 19,
|
|
293
|
+
"domain": "Domain 2: Data Engineering & Feature Engineering",
|
|
294
|
+
"question": "A feature has a right-skewed distribution (e.g., income). Which transformation is MOST appropriate before using it in a linear model?",
|
|
295
|
+
"options": [
|
|
296
|
+
"Min-max scaling to [0, 1]",
|
|
297
|
+
"Log transformation to reduce skewness, then standardization",
|
|
298
|
+
"One-hot encoding",
|
|
299
|
+
"No transformation needed"
|
|
300
|
+
],
|
|
301
|
+
"correct": 1,
|
|
302
|
+
"explanation": "Right-skewed features benefit from log transformation (or Box-Cox) to make the distribution more normal-like, improving linear model performance. Standardization after log-transform ensures zero mean and unit variance."
|
|
303
|
+
},
|
|
304
|
+
{
|
|
305
|
+
"id": 20,
|
|
306
|
+
"domain": "Domain 2: Data Engineering & Feature Engineering",
|
|
307
|
+
"question": "Which Cloud Composer (Airflow) role is appropriate in an ML pipeline?",
|
|
308
|
+
"options": [
|
|
309
|
+
"Training the model directly",
|
|
310
|
+
"Orchestrating the end-to-end workflow: data ingestion → preprocessing → training → evaluation → deployment",
|
|
311
|
+
"Serving real-time predictions",
|
|
312
|
+
"Storing features"
|
|
313
|
+
],
|
|
314
|
+
"correct": 1,
|
|
315
|
+
"explanation": "Cloud Composer (managed Apache Airflow) orchestrates ML pipeline tasks: scheduling data extraction, triggering Dataflow jobs, launching Vertex AI training, evaluating results, and conditional deployment — but doesn't execute ML compute itself."
|
|
316
|
+
},
|
|
317
|
+
{
|
|
318
|
+
"id": 21,
|
|
319
|
+
"domain": "Domain 3: Model Development on Vertex AI",
|
|
320
|
+
"question": "You need to train a custom PyTorch model on Vertex AI with 4 GPUs. Which approach is correct?",
|
|
321
|
+
"options": [
|
|
322
|
+
"Use AutoML with a custom container",
|
|
323
|
+
"Use Vertex AI Custom Training Job with a pre-built PyTorch container and specify 4 GPUs in machine config",
|
|
324
|
+
"Use BigQuery ML with PyTorch",
|
|
325
|
+
"Use Cloud Functions with GPU support"
|
|
326
|
+
],
|
|
327
|
+
"correct": 1,
|
|
328
|
+
"explanation": "Vertex AI Custom Training supports pre-built containers for PyTorch (and TensorFlow, XGBoost, sklearn). You specify GPU type and count in the worker pool config. Distributed training is also supported with multiple workers."
|
|
329
|
+
},
|
|
330
|
+
{
|
|
331
|
+
"id": 22,
|
|
332
|
+
"domain": "Domain 3: Model Development on Vertex AI",
|
|
333
|
+
"question": "When should you use Vertex AI AutoML vs Custom Training?",
|
|
334
|
+
"options": [
|
|
335
|
+
"AutoML when you need to implement a custom loss function",
|
|
336
|
+
"Custom Training when you just need a quick baseline with minimal ML knowledge",
|
|
337
|
+
"AutoML when you have tabular/image/text data and want a strong model with minimal code",
|
|
338
|
+
"AutoML for any production model"
|
|
339
|
+
],
|
|
340
|
+
"correct": 2,
|
|
341
|
+
"explanation": "AutoML excels when: data fits standard types (tabular, image, text, video), you want a strong baseline fast, or the team has limited ML expertise. Custom Training when: custom architecture, custom loss/metrics, specific frameworks, or research-level requirements."
|
|
342
|
+
},
|
|
343
|
+
{
|
|
344
|
+
"id": 23,
|
|
345
|
+
"domain": "Domain 3: Model Development on Vertex AI",
|
|
346
|
+
"question": "Which BigQuery ML statement creates and trains a logistic regression model?",
|
|
347
|
+
"options": [
|
|
348
|
+
"CREATE MODEL dataset.model OPTIONS(model_type='logistic_reg') AS SELECT ...",
|
|
349
|
+
"TRAIN MODEL dataset.model USING logistic_regression SELECT ...",
|
|
350
|
+
"CREATE ML_MODEL dataset.model AS LOGISTIC_REGRESSION SELECT ...",
|
|
351
|
+
"BUILD MODEL dataset.model OPTIONS(type='classification') AS SELECT ..."
|
|
352
|
+
],
|
|
353
|
+
"correct": 0,
|
|
354
|
+
"explanation": "BigQuery ML uses CREATE MODEL with OPTIONS to specify model type. 'CREATE MODEL my_dataset.my_model OPTIONS(model_type='logistic_reg') AS SELECT features, label FROM ...' trains the model using SQL."
|
|
355
|
+
},
|
|
356
|
+
{
|
|
357
|
+
"id": 24,
|
|
358
|
+
"domain": "Domain 3: Model Development on Vertex AI",
|
|
359
|
+
"question": "What is the purpose of Vertex AI Hyperparameter Tuning?",
|
|
360
|
+
"options": [
|
|
361
|
+
"To automatically select the best features",
|
|
362
|
+
"To systematically search for the best hyperparameters (learning rate, batch size, etc.) using Bayesian optimization or grid search",
|
|
363
|
+
"To clean the training data",
|
|
364
|
+
"To deploy multiple model versions"
|
|
365
|
+
],
|
|
366
|
+
"correct": 1,
|
|
367
|
+
"explanation": "Vertex AI Hyperparameter Tuning uses Vizier (Google's black-box optimization service) to search for optimal hyperparameters. It supports Bayesian optimization, grid search, and random search strategies."
|
|
368
|
+
},
|
|
369
|
+
{
|
|
370
|
+
"id": 25,
|
|
371
|
+
"domain": "Domain 3: Model Development on Vertex AI",
|
|
372
|
+
"question": "A model has high training accuracy but low test accuracy. What does this indicate?",
|
|
373
|
+
"options": [
|
|
374
|
+
"Underfitting",
|
|
375
|
+
"Overfitting — the model memorized training data but doesn't generalize",
|
|
376
|
+
"The model needs more features",
|
|
377
|
+
"The learning rate is too low"
|
|
378
|
+
],
|
|
379
|
+
"correct": 1,
|
|
380
|
+
"explanation": "High train / low test performance = overfitting. Solutions: more training data, regularization (L1/L2, dropout), simpler model, early stopping, cross-validation, or data augmentation."
|
|
381
|
+
},
|
|
382
|
+
{
|
|
383
|
+
"id": 26,
|
|
384
|
+
"domain": "Domain 3: Model Development on Vertex AI",
|
|
385
|
+
"question": "Which technique is MOST effective for improving model performance on a small image dataset (<1,000 images)?",
|
|
386
|
+
"options": [
|
|
387
|
+
"Training a very deep custom CNN from scratch",
|
|
388
|
+
"Transfer learning — fine-tuning a pre-trained model (e.g., ResNet, EfficientNet) on your dataset",
|
|
389
|
+
"Using simpler models like logistic regression on raw pixels",
|
|
390
|
+
"Increasing the learning rate"
|
|
391
|
+
],
|
|
392
|
+
"correct": 1,
|
|
393
|
+
"explanation": "Transfer learning leverages knowledge from models pre-trained on large datasets (ImageNet). Fine-tuning the last few layers on your small dataset typically achieves much better results than training from scratch."
|
|
394
|
+
},
|
|
395
|
+
{
|
|
396
|
+
"id": 27,
|
|
397
|
+
"domain": "Domain 3: Model Development on Vertex AI",
|
|
398
|
+
"question": "When training on Vertex AI, what is the difference between pre-built containers and custom containers?",
|
|
399
|
+
"options": [
|
|
400
|
+
"Pre-built containers are faster to train",
|
|
401
|
+
"Pre-built containers include common frameworks (TF, PyTorch, XGBoost, sklearn); custom containers let you install any dependencies and use any framework",
|
|
402
|
+
"Custom containers can only be used with AutoML",
|
|
403
|
+
"There is no difference"
|
|
404
|
+
],
|
|
405
|
+
"correct": 1,
|
|
406
|
+
"explanation": "Pre-built containers are maintained by Google with popular ML frameworks pre-installed. Custom containers allow you to define your own Docker image with any dependencies, frameworks, or custom code."
|
|
407
|
+
},
|
|
408
|
+
{
|
|
409
|
+
"id": 28,
|
|
410
|
+
"domain": "Domain 3: Model Development on Vertex AI",
|
|
411
|
+
"question": "Which BigQuery ML model type should you use for customer segmentation WITHOUT labels?",
|
|
412
|
+
"options": [
|
|
413
|
+
"logistic_reg",
|
|
414
|
+
"kmeans",
|
|
415
|
+
"linear_reg",
|
|
416
|
+
"boosted_tree_classifier"
|
|
417
|
+
],
|
|
418
|
+
"correct": 1,
|
|
419
|
+
"explanation": "Customer segmentation without labels is an unsupervised learning problem. K-means clustering (model_type='kmeans') groups similar customers into clusters based on feature similarity."
|
|
420
|
+
},
|
|
421
|
+
{
|
|
422
|
+
"id": 29,
|
|
423
|
+
"domain": "Domain 3: Model Development on Vertex AI",
|
|
424
|
+
"question": "Which regularization technique randomly disables neurons during training to prevent co-adaptation?",
|
|
425
|
+
"options": [
|
|
426
|
+
"L1 regularization (Lasso)",
|
|
427
|
+
"L2 regularization (Ridge)",
|
|
428
|
+
"Dropout",
|
|
429
|
+
"Batch normalization"
|
|
430
|
+
],
|
|
431
|
+
"correct": 2,
|
|
432
|
+
"explanation": "Dropout randomly sets a fraction of neurons to zero during each training step, forcing the network to learn redundant representations. This prevents neurons from co-adapting and reduces overfitting."
|
|
433
|
+
},
|
|
434
|
+
{
|
|
435
|
+
"id": 30,
|
|
436
|
+
"domain": "Domain 3: Model Development on Vertex AI",
|
|
437
|
+
"question": "You want to track and compare multiple training experiments on Vertex AI. Which feature should you use?",
|
|
438
|
+
"options": [
|
|
439
|
+
"Vertex AI Endpoints",
|
|
440
|
+
"Vertex AI Experiments with Vertex AI TensorBoard",
|
|
441
|
+
"Vertex AI Feature Store",
|
|
442
|
+
"Vertex AI Model Registry only"
|
|
443
|
+
],
|
|
444
|
+
"correct": 1,
|
|
445
|
+
"explanation": "Vertex AI Experiments logs hyperparameters, metrics, and artifacts for each run. Combined with Vertex AI TensorBoard, you can visualize training curves and compare runs side by side."
|
|
446
|
+
},
|
|
447
|
+
{
|
|
448
|
+
"id": 31,
|
|
449
|
+
"domain": "Domain 4: Model Deployment & MLOps",
|
|
450
|
+
"question": "You deployed a model to a Vertex AI Endpoint. How do you perform an A/B test between model v1 and v2?",
|
|
451
|
+
"options": [
|
|
452
|
+
"Deploy to two separate endpoints and use a load balancer",
|
|
453
|
+
"Use traffic splitting on a single endpoint — route 90% to v1 and 10% to v2",
|
|
454
|
+
"Deploy v2 to a Cloud Function",
|
|
455
|
+
"Use Cloud CDN for routing"
|
|
456
|
+
],
|
|
457
|
+
"correct": 1,
|
|
458
|
+
"explanation": "Vertex AI Endpoints support traffic splitting: deploy multiple model versions to the same endpoint and configure traffic percentages. This enables A/B testing and canary deployments natively."
|
|
459
|
+
},
|
|
460
|
+
{
|
|
461
|
+
"id": 32,
|
|
462
|
+
"domain": "Domain 4: Model Deployment & MLOps",
|
|
463
|
+
"question": "What is the difference between online prediction and batch prediction on Vertex AI?",
|
|
464
|
+
"options": [
|
|
465
|
+
"Online is cheaper than batch",
|
|
466
|
+
"Online returns predictions synchronously with low latency via an endpoint; batch processes large datasets asynchronously and writes results to storage",
|
|
467
|
+
"Batch prediction is more accurate",
|
|
468
|
+
"Online prediction can only handle images"
|
|
469
|
+
],
|
|
470
|
+
"correct": 1,
|
|
471
|
+
"explanation": "Online prediction uses a deployed endpoint for real-time, synchronous, low-latency responses. Batch prediction accepts input files (CSV, JSONL, BigQuery), processes them asynchronously, and writes output to GCS or BigQuery."
|
|
472
|
+
},
|
|
473
|
+
{
|
|
474
|
+
"id": 33,
|
|
475
|
+
"domain": "Domain 4: Model Deployment & MLOps",
|
|
476
|
+
"question": "Vertex AI Model Monitoring detects 'training-serving skew' on a feature. What does this mean?",
|
|
477
|
+
"options": [
|
|
478
|
+
"The model's accuracy has decreased",
|
|
479
|
+
"The feature's distribution in production serving requests differs significantly from training data distribution",
|
|
480
|
+
"The model is serving stale predictions",
|
|
481
|
+
"The endpoint is experiencing high latency"
|
|
482
|
+
],
|
|
483
|
+
"correct": 1,
|
|
484
|
+
"explanation": "Training-serving skew means the statistical distribution of a feature during inference is significantly different from training time. This can cause degraded model performance and may indicate data pipeline issues or real-world distribution shifts."
|
|
485
|
+
},
|
|
486
|
+
{
|
|
487
|
+
"id": 34,
|
|
488
|
+
"domain": "Domain 4: Model Deployment & MLOps",
|
|
489
|
+
"question": "Which TFX component evaluates a trained model against baseline and decides whether to deploy?",
|
|
490
|
+
"options": [
|
|
491
|
+
"Trainer",
|
|
492
|
+
"Evaluator (with TFMA — TensorFlow Model Analysis)",
|
|
493
|
+
"Pusher",
|
|
494
|
+
"ExampleValidator"
|
|
495
|
+
],
|
|
496
|
+
"correct": 1,
|
|
497
|
+
"explanation": "The Evaluator component uses TFMA (TensorFlow Model Analysis) to compute metrics, compare against a baseline model, and produce a 'blessed' or 'not blessed' decision. Only blessed models proceed to Pusher for deployment."
|
|
498
|
+
},
|
|
499
|
+
{
|
|
500
|
+
"id": 35,
|
|
501
|
+
"domain": "Domain 4: Model Deployment & MLOps",
|
|
502
|
+
"question": "Vertex AI Pipelines are built on which open-source framework?",
|
|
503
|
+
"options": [
|
|
504
|
+
"Apache Airflow",
|
|
505
|
+
"Kubeflow Pipelines (KFP) SDK",
|
|
506
|
+
"Apache Spark",
|
|
507
|
+
"Luigi"
|
|
508
|
+
],
|
|
509
|
+
"correct": 1,
|
|
510
|
+
"explanation": "Vertex AI Pipelines uses the Kubeflow Pipelines (KFP) v2 SDK for pipeline definition. It also supports TFX pipelines. The pipeline runs on Google's serverless infrastructure — no Kubernetes cluster management required."
|
|
511
|
+
},
|
|
512
|
+
{
|
|
513
|
+
"id": 36,
|
|
514
|
+
"domain": "Domain 4: Model Deployment & MLOps",
|
|
515
|
+
"question": "A model endpoint needs to handle spiky traffic (many requests during business hours, near zero at night). Which configuration is BEST?",
|
|
516
|
+
"options": [
|
|
517
|
+
"Set a fixed number of replicas to handle peak load",
|
|
518
|
+
"Configure autoscaling with min replicas=1 and scaling based on CPU utilization or request count",
|
|
519
|
+
"Use batch prediction instead",
|
|
520
|
+
"Deploy to Cloud Run instead of Vertex AI"
|
|
521
|
+
],
|
|
522
|
+
"correct": 1,
|
|
523
|
+
"explanation": "Vertex AI Endpoints support autoscaling based on metrics like CPU utilization or traffic. Setting min replicas ensures availability while auto-scaling up for peaks and down during quiet periods optimizes cost."
|
|
524
|
+
},
|
|
525
|
+
{
|
|
526
|
+
"id": 37,
|
|
527
|
+
"domain": "Domain 4: Model Deployment & MLOps",
|
|
528
|
+
"question": "In an MLOps CI/CD pipeline, what should trigger model retraining?",
|
|
529
|
+
"options": [
|
|
530
|
+
"Only manual trigger by data scientists",
|
|
531
|
+
"Data drift detected, scheduled cadence, code changes, or model performance degradation",
|
|
532
|
+
"Only when new features are added",
|
|
533
|
+
"Every time new data arrives regardless of quantity"
|
|
534
|
+
],
|
|
535
|
+
"correct": 1,
|
|
536
|
+
"explanation": "Continuous Training triggers include: data drift or concept drift exceeding thresholds, scheduled retraining (daily/weekly), code changes (new features/model architecture), or performance monitoring alerts showing degradation."
|
|
537
|
+
},
|
|
538
|
+
{
|
|
539
|
+
"id": 38,
|
|
540
|
+
"domain": "Domain 4: Model Deployment & MLOps",
|
|
541
|
+
"question": "What is the purpose of Vertex AI Model Registry?",
|
|
542
|
+
"options": [
|
|
543
|
+
"To store training data",
|
|
544
|
+
"To manage model versions, track lineage, and organize models with metadata for governance",
|
|
545
|
+
"To run inference",
|
|
546
|
+
"To create features"
|
|
547
|
+
],
|
|
548
|
+
"correct": 1,
|
|
549
|
+
"explanation": "Model Registry provides a central repository for model versioning, metadata management, lineage tracking (which data/pipeline produced which model), and governance — essential for production ML."
|
|
550
|
+
},
|
|
551
|
+
{
|
|
552
|
+
"id": 39,
|
|
553
|
+
"domain": "Domain 4: Model Deployment & MLOps",
|
|
554
|
+
"question": "You need to deploy a model for inference at the edge (on-device). Which GCP approach is correct?",
|
|
555
|
+
"options": [
|
|
556
|
+
"Deploy to Vertex AI Endpoint and call from the edge",
|
|
557
|
+
"Export the model, optimize with TFLite or ONNX, deploy via Edge Manager",
|
|
558
|
+
"Use BigQuery ML for edge inference",
|
|
559
|
+
"Use Dataflow for edge processing"
|
|
560
|
+
],
|
|
561
|
+
"correct": 1,
|
|
562
|
+
"explanation": "Edge deployment requires model optimization (TFLite for TensorFlow, ONNX for framework-agnostic) to reduce model size and latency. Vertex AI supports edge deployment with model optimization tools."
|
|
563
|
+
},
|
|
564
|
+
{
|
|
565
|
+
"id": 40,
|
|
566
|
+
"domain": "Domain 4: Model Deployment & MLOps",
|
|
567
|
+
"question": "Which tool in the Vertex AI ecosystem provides experiment tracking with visualization of training curves?",
|
|
568
|
+
"options": [
|
|
569
|
+
"Vertex AI Feature Store",
|
|
570
|
+
"Vertex AI TensorBoard",
|
|
571
|
+
"Vertex AI Matching Engine",
|
|
572
|
+
"Cloud Monitoring"
|
|
573
|
+
],
|
|
574
|
+
"correct": 1,
|
|
575
|
+
"explanation": "Vertex AI TensorBoard is a managed TensorBoard instance that visualizes training metrics (loss curves, accuracy), model graphs, embeddings, and profiling data. It integrates with Vertex AI Experiments."
|
|
576
|
+
},
|
|
577
|
+
{
|
|
578
|
+
"id": 41,
|
|
579
|
+
"domain": "Domain 5: Responsible AI, Security & Exam Strategy",
|
|
580
|
+
"question": "Which Google Cloud service provides model explainability with feature attributions for Vertex AI predictions?",
|
|
581
|
+
"options": [
|
|
582
|
+
"Cloud Audit Logs",
|
|
583
|
+
"Vertex Explainable AI (using Integrated Gradients, SHAP, or sampled Shapley)",
|
|
584
|
+
"Cloud DLP",
|
|
585
|
+
"Security Command Center"
|
|
586
|
+
],
|
|
587
|
+
"correct": 1,
|
|
588
|
+
"explanation": "Vertex Explainable AI provides feature attributions for predictions using methods like Integrated Gradients (for neural networks), sampled Shapley, and XRAI (for image models) — showing which features most influenced each prediction."
|
|
589
|
+
},
|
|
590
|
+
{
|
|
591
|
+
"id": 42,
|
|
592
|
+
"domain": "Domain 5: Responsible AI, Security & Exam Strategy",
|
|
593
|
+
"question": "A loan approval model is observed to have significantly different approval rates across racial groups. Which Responsible AI principle is being violated?",
|
|
594
|
+
"options": [
|
|
595
|
+
"Privacy",
|
|
596
|
+
"Fairness — the model shows demographic bias",
|
|
597
|
+
"Transparency",
|
|
598
|
+
"Accountability"
|
|
599
|
+
],
|
|
600
|
+
"correct": 1,
|
|
601
|
+
"explanation": "Fairness requires that ML systems don't create or reinforce bias against protected groups. Disparate approval rates across racial groups indicate bias that must be investigated using fairness metrics and mitigated."
|
|
602
|
+
},
|
|
603
|
+
{
|
|
604
|
+
"id": 43,
|
|
605
|
+
"domain": "Domain 5: Responsible AI, Security & Exam Strategy",
|
|
606
|
+
"question": "How should you protect Vertex AI training data at rest in Google Cloud Storage?",
|
|
607
|
+
"options": [
|
|
608
|
+
"Rely on default encryption only",
|
|
609
|
+
"Use Customer-Managed Encryption Keys (CMEK) with Cloud KMS for additional control",
|
|
610
|
+
"Encrypt data manually before upload and decrypt in training code",
|
|
611
|
+
"Use a VPN connection"
|
|
612
|
+
],
|
|
613
|
+
"correct": 1,
|
|
614
|
+
"explanation": "GCS encrypts at rest by default (Google-managed keys). CMEK gives you control over encryption keys via Cloud KMS — you can rotate, disable, or revoke keys. This is required for compliance-sensitive ML workloads."
|
|
615
|
+
},
|
|
616
|
+
{
|
|
617
|
+
"id": 44,
|
|
618
|
+
"domain": "Domain 5: Responsible AI, Security & Exam Strategy",
|
|
619
|
+
"question": "How do you restrict which users can deploy models to Vertex AI Endpoints in a production project?",
|
|
620
|
+
"options": [
|
|
621
|
+
"Use service account keys shared across the team",
|
|
622
|
+
"Use IAM roles — grant roles/aiplatform.user for training but only roles/aiplatform.admin or custom roles for deployment",
|
|
623
|
+
"Use IP allowlisting",
|
|
624
|
+
"Restrict by VPC network"
|
|
625
|
+
],
|
|
626
|
+
"correct": 1,
|
|
627
|
+
"explanation": "IAM roles control who can perform what actions. Separation of duties: data scientists get training permissions, while only authorized users/service accounts get deployment permissions. Custom IAM roles allow fine-grained control."
|
|
628
|
+
},
|
|
629
|
+
{
|
|
630
|
+
"id": 45,
|
|
631
|
+
"domain": "Domain 5: Responsible AI, Security & Exam Strategy",
|
|
632
|
+
"question": "What is VPC Service Controls (VPC-SC) used for in ML workloads?",
|
|
633
|
+
"options": [
|
|
634
|
+
"Speeding up network traffic",
|
|
635
|
+
"Creating a security perimeter around GCP services to prevent data exfiltration from Vertex AI, BigQuery, GCS",
|
|
636
|
+
"Managing DNS resolution",
|
|
637
|
+
"Load balancing inference traffic"
|
|
638
|
+
],
|
|
639
|
+
"correct": 1,
|
|
640
|
+
"explanation": "VPC-SC creates a security perimeter (service perimeter) that restricts data movement in/out of specified GCP services. This prevents exfiltration of sensitive ML data from BigQuery, GCS, and Vertex AI."
|
|
641
|
+
},
|
|
642
|
+
{
|
|
643
|
+
"id": 46,
|
|
644
|
+
"domain": "Domain 5: Responsible AI, Security & Exam Strategy",
|
|
645
|
+
"question": "Differential privacy in ML training means:",
|
|
646
|
+
"options": [
|
|
647
|
+
"Encrypting the model weights",
|
|
648
|
+
"Adding calibrated noise during training so the model doesn't memorize or reveal individual training examples",
|
|
649
|
+
"Using private VPCs for training",
|
|
650
|
+
"Restricting access to training logs"
|
|
651
|
+
],
|
|
652
|
+
"correct": 1,
|
|
653
|
+
"explanation": "Differential privacy adds mathematical noise to gradient updates during training, ensuring no individual training record can be reconstructed or inferred from the model — protecting user privacy while maintaining model utility."
|
|
654
|
+
},
|
|
655
|
+
{
|
|
656
|
+
"id": 47,
|
|
657
|
+
"domain": "Domain 5: Responsible AI, Security & Exam Strategy",
|
|
658
|
+
"question": "Federated Learning allows:",
|
|
659
|
+
"options": [
|
|
660
|
+
"Training one large model on a single GPU",
|
|
661
|
+
"Training a shared model across multiple devices/organizations without centralizing raw data",
|
|
662
|
+
"Faster inference on edge devices",
|
|
663
|
+
"Real-time feature serving"
|
|
664
|
+
],
|
|
665
|
+
"correct": 1,
|
|
666
|
+
"explanation": "Federated Learning trains a model across decentralized data sources (devices, hospitals, organizations). Each participant trains locally and shares model updates (not raw data), preserving data privacy."
|
|
667
|
+
},
|
|
668
|
+
{
|
|
669
|
+
"id": 48,
|
|
670
|
+
"domain": "Domain 5: Responsible AI, Security & Exam Strategy",
|
|
671
|
+
"question": "Which tool helps detect PII (Personally Identifiable Information) in text data before using it for ML training?",
|
|
672
|
+
"options": [
|
|
673
|
+
"Vertex AI Feature Store",
|
|
674
|
+
"Cloud Data Loss Prevention (Cloud DLP)",
|
|
675
|
+
"Cloud Armor",
|
|
676
|
+
"Cloud IDS"
|
|
677
|
+
],
|
|
678
|
+
"correct": 1,
|
|
679
|
+
"explanation": "Cloud DLP inspects, classifies, and de-identifies sensitive data (PII, PHI) in text, images, and structured data. Use it to scan training data for PII before feeding it into ML pipelines."
|
|
680
|
+
},
|
|
681
|
+
{
|
|
682
|
+
"id": 49,
|
|
683
|
+
"domain": "Domain 5: Responsible AI, Security & Exam Strategy",
|
|
684
|
+
"question": "During the exam, you see a question about choosing between Dataflow, Dataproc, and Cloud Composer. Which decision framework is correct?",
|
|
685
|
+
"options": [
|
|
686
|
+
"Always choose Dataflow because it's serverless",
|
|
687
|
+
"Dataflow for new batch/streaming pipelines (Apache Beam); Dataproc for existing Spark/Hadoop migrations; Cloud Composer for workflow orchestration (Airflow)",
|
|
688
|
+
"Always choose Dataproc because it's cheaper",
|
|
689
|
+
"Cloud Composer for all data processing"
|
|
690
|
+
],
|
|
691
|
+
"correct": 1,
|
|
692
|
+
"explanation": "Decision framework: Dataflow = serverless Apache Beam for new ETL (batch/streaming). Dataproc = managed Spark/Hadoop for existing workloads. Cloud Composer = managed Airflow for orchestrating multi-step workflows (not data processing itself)."
|
|
693
|
+
},
|
|
694
|
+
{
|
|
695
|
+
"id": 50,
|
|
696
|
+
"domain": "Domain 5: Responsible AI, Security & Exam Strategy",
|
|
697
|
+
"question": "A Vertex AI model is making predictions that seem incorrect but there are no alerts. What should you add to your monitoring setup?",
|
|
190
698
|
"options": [
|
|
191
|
-
"
|
|
192
|
-
"
|
|
193
|
-
"
|
|
194
|
-
"
|
|
699
|
+
"More prediction logs in Cloud Logging only",
|
|
700
|
+
"Vertex AI Model Monitoring with skew/drift detection thresholds and alerting to Cloud Monitoring",
|
|
701
|
+
"Increase the number of endpoint replicas",
|
|
702
|
+
"Add more training data"
|
|
195
703
|
],
|
|
196
704
|
"correct": 1,
|
|
197
|
-
"explanation": "
|
|
705
|
+
"explanation": "Vertex AI Model Monitoring should be configured with: (1) training-serving skew detection, (2) prediction drift detection, (3) threshold configurations per feature, and (4) alerting via Cloud Monitoring for proactive issue detection."
|
|
198
706
|
}
|
|
199
707
|
]
|
|
200
708
|
}
|