npm - @xdev-asia/xdev-knowledge-mcp - Versions diffs - 1.0.58 → 1.0.59 - Mend

@xdev-asia/xdev-knowledge-mcp 1.0.58 → 1.0.59

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

package/content/series/luyen-thi/luyen-thi-nvidia-dli-generative-ai/index.md ADDED Viewed

@@ -0,0 +1,237 @@
+---
+id: 019c9619-nv01-7001-c001-nv0100000001
+title: "Luyện thi NVIDIA DLI — Generative AI with Diffusion Models & LLMs"
+slug: luyen-thi-nvidia-dli-generative-ai
+description: >-
+  Lộ trình ôn tập toàn diện cho các khóa NVIDIA DLI Generative AI — từ Diffusion Models,
+  RAG Agents, Agentic AI đến LLM Evaluation & Fine-tuning. 10 bài học chuyên sâu có hands-on
+  code, bài thi thử dạng coding assessment, và câu hỏi mẫu sát đề thi thật.
+featured_image: images/blog/nvidia-dli-genai-series-banner.png
+level: intermediate
+duration_hours: 40
+lesson_count: 10
+price: '0.00'
+is_free: true
+view_count: 0
+average_rating: '0.00'
+review_count: 0
+enrollment_count: 0
+meta: null
+published_at: '2026-04-13T14:00:00.000000Z'
+created_at: '2026-04-13T14:00:00.000000Z'
+author:
+  id: 019c9616-d2b4-713f-9b2c-40e2e92a05cf
+  name: Duy Tran
+  avatar: avatars/7e8eb5c6-4cac-455b-a701-4060f085d501.jpeg
+category:
+  id: 019c9616-cat9-7009-a009-000000000009
+  name: Luyện thi chứng chỉ
+  slug: luyen-thi
+tags:
+  - name: NVIDIA
+    slug: nvidia
+  - name: AI
+    slug: ai
+  - name: Deep Learning
+    slug: deep-learning
+  - name: LLM
+    slug: llm
+  - name: Diffusion Models
+    slug: diffusion-models
+  - name: RAG
+    slug: rag
+  - name: Chứng chỉ
+    slug: chung-chi
+quiz_slug: nvidia-dli-generative-ai
+sections:
+  - id: section-01
+    title: "Part 1: Deep Learning Foundations"
+    description: PyTorch fundamentals, neural network architectures, Transformer basics
+    sort_order: 1
+    lessons:
+      - id: 019c9619-nv01-p1-l01
+        title: "Bài 1: PyTorch & Neural Network Fundamentals"
+        slug: bai-1-pytorch-neural-network-fundamentals
+        description: >-
+          PyTorch tensors, autograd, nn.Module. Build neural network from scratch.
+          Training loop, loss functions, optimizers. GPU acceleration basics.
+          CNN architecture, pooling, batch normalization.
+        duration_minutes: 90
+        is_free: true
+        sort_order: 0
+        video_url: null
+      - id: 019c9619-nv01-p1-l02
+        title: "Bài 2: Transformer Architecture & Attention Mechanism"
+        slug: bai-2-transformer-architecture-attention
+        description: >-
+          Self-attention, multi-head attention, positional encoding.
+          Encoder-decoder architecture. BERT, GPT, T5 model families.
+          Tokenization: BPE, WordPiece, SentencePiece.
+          NLP tasks: classification, NER, QA, summarization.
+        duration_minutes: 90
+        is_free: true
+        sort_order: 1
+        video_url: null
+  - id: section-02
+    title: "Part 2: Generative AI with Diffusion Models"
+    description: U-Net, DDPM, noise scheduling, CLIP, text-to-image pipeline
+    sort_order: 2
+    lessons:
+      - id: 019c9619-nv01-p2-l03
+        title: "Bài 3: U-Net Architecture & Denoising Basics"
+        slug: bai-3-unet-architecture-denoising
+        description: >-
+          U-Net encoder-decoder with skip connections.
+          Build U-Net from scratch in PyTorch. Train denoiser model.
+          Group Normalization, GELU activation, Rearrange Pooling.
+          Sinusoidal Position Embeddings for timestep encoding.
+        duration_minutes: 90
+        is_free: true
+        sort_order: 0
+        video_url: null
+      - id: 019c9619-nv01-p2-l04
+        title: "Bài 4: DDPM — Forward & Reverse Diffusion"
+        slug: bai-4-ddpm-forward-reverse-diffusion
+        description: >-
+          Forward diffusion: Markov chain, variance schedule, reparameterization.
+          Reverse diffusion: predict noise, denoise step-by-step.
+          Noise scheduling: linear, cosine schedules.
+          Training objective: simplified ELBO loss.
+          Classifier-Free Diffusion Guidance (CFG).
+        duration_minutes: 90
+        is_free: true
+        sort_order: 1
+        video_url: null
+      - id: 019c9619-nv01-p2-l05
+        title: "Bài 5: CLIP & Text-to-Image Pipeline"
+        slug: bai-5-clip-text-to-image-pipeline
+        description: >-
+          CLIP: Contrastive Language-Image Pretraining.
+          Text encoding, image encoding, contrastive loss.
+          Cross-attention: inject text embeddings into U-Net.
+          Full text-to-image pipeline. Latent Diffusion overview.
+          Assessment prep: coding exercises & debug challenges.
+        duration_minutes: 90
+        is_free: true
+        sort_order: 2
+        video_url: null
+  - id: section-03
+    title: "Part 3: LLM Applications & RAG"
+    description: LLM inference, RAG pipeline, embeddings, vector stores, guardrails
+    sort_order: 3
+    lessons:
+      - id: 019c9619-nv01-p3-l06
+        title: "Bài 6: LLM Inference & Pipeline Design"
+        slug: bai-6-llm-inference-pipeline-design
+        description: >-
+          LLM inference parameters: temperature, top_p, top_k, max_tokens.
+          NVIDIA NIM (Inference Microservices).
+          LangChain Expression Language (LCEL), prompt templates.
+          Gradio UI prototyping, LangServe deployment.
+          Dialog management with running states.
+        duration_minutes: 90
+        is_free: true
+        sort_order: 0
+        video_url: null
+      - id: 019c9619-nv01-p3-l07
+        title: "Bài 7: RAG — Retrieval-Augmented Generation"
+        slug: bai-7-rag-retrieval-augmented-generation
+        description: >-
+          Document loading, chunking strategies, metadata extraction.
+          Embedding models: semantic similarity, cosine distance.
+          Vector stores: FAISS, Milvus, pgvector.
+          Full RAG pipeline: query → retrieve → augment → generate.
+          Guardrailing: input/output filters, topic detection.
+        duration_minutes: 90
+        is_free: true
+        sort_order: 1
+        video_url: null
+      - id: 019c9619-nv01-p3-l08
+        title: "Bài 8: RAG Agent — Build & Evaluate"
+        slug: bai-8-rag-agent-build-evaluate
+        description: >-
+          Build RAG agent answers questions about research papers.
+          Multi-turn conversation with state management.
+          RAG evaluation metrics: precision, recall, faithfulness.
+          LLM-as-a-judge evaluation pattern.
+          Assessment prep: end-to-end RAG agent challenge.
+        duration_minutes: 90
+        is_free: true
+        sort_order: 2
+        video_url: null
+  - id: section-04
+    title: "Part 4: Agentic AI & LLM Customization"
+    description: Multi-agent systems, LangGraph, LoRA fine-tuning, NeMo framework
+    sort_order: 4
+    lessons:
+      - id: 019c9619-nv01-p4-l09
+        title: "Bài 9: Agentic AI — Multi-Agent Systems"
+        slug: bai-9-agentic-ai-multi-agent-systems
+        description: >-
+          Agent abstraction: task decomposition, structured output.
+          Cognitive architectures: ReAct, Plan-and-Execute, LATS.
+          LangGraph: state machines, conditional edges, parallel execution.
+          Multi-agent orchestration, tool interfaces, knowledge graphs.
+          Final assessment: deploy multi-agent research system.
+        duration_minutes: 90
+        is_free: true
+        sort_order: 0
+        video_url: null
+      - id: 019c9619-nv01-p4-l10
+        title: "Bài 10: LLM Evaluation & LoRA Fine-tuning"
+        slug: bai-10-llm-evaluation-lora-fine-tuning
+        description: >-
+          Evaluation methods: benchmarks (GSM8K), LLM-as-a-judge, ELO ranking.
+          NeMo Evaluator microservice, MLflow experiment tracking.
+          Metrics: BLEU, F1-score, semantic similarity.
+          LoRA & QLoRA fine-tuning: theory and practice.
+          NeMo Customizer: launch fine-tuning jobs.
+          Exam strategy, cheat sheet & final mock exam.
+        duration_minutes: 90
+        is_free: true
+        sort_order: 1
+        video_url: null
+reviews: []
+quizzes: []
+---
+## Giới thiệu
+Khóa học **Luyện thi NVIDIA DLI Generative AI** giúp bạn ôn tập có hệ thống cho toàn bộ track Generative AI/LLM của NVIDIA Deep Learning Institute — từ **Diffusion Models**, **RAG Agents**, **Agentic AI** đến **LLM Evaluation & Fine-tuning**.
+### Khác biệt so với certification truyền thống
+NVIDIA DLI **không phải MCQ exam** — bạn phải **viết code thực sự** trên Jupyter Notebook với GPU cloud. Đây là lý do series này tập trung vào **hands-on coding** và **debug exercises** thay vì câu hỏi lý thuyết.
+### Ai nên học?
+- ML Engineer muốn **chứng chỉ kỹ thuật sâu** về Generative AI
+- Developer đã biết Python, muốn **master PyTorch + LLM stack**
+- Người chuẩn bị thi các khóa DLI: **S-FX-14** (Diffusion), **S-FX-15** (RAG), **C-FX-25** (Agentic), **S-FX-34** (Eval/Fine-tune)
+- AI Researcher muốn ôn lại fundamentals một cách hệ thống
+### Cấu trúc series
+| Part | Chủ đề | Khóa DLI tương ứng | Số bài |
+|------|--------|---------------------|--------|
+| Part 1 | Deep Learning Foundations | S-FX-08 (Transformer NLP) | Bài 1–2 |
+| Part 2 | Generative AI with Diffusion Models | **S-FX-14** (Diffusion Models) | Bài 3–5 |
+| Part 3 | LLM Applications & RAG | **S-FX-15** (RAG Agents) | Bài 6–8 |
+| Part 4 | Agentic AI & LLM Customization | **C-FX-25** (Agentic) + **S-FX-34** (Eval) | Bài 9–10 |
+### Prerequisites
+- **Python**: OOP, list comprehension, decorators
+- **PyTorch**: tensors cơ bản (sẽ ôn lại trong Bài 1)
+- **Deep Learning**: hiểu neural networks, backpropagation (sẽ ôn lại)
+- **Toán**: linear algebra cơ bản, probability (Gaussian, Markov chain)

package/data/quizzes/nvidia-dli-generative-ai.json ADDED Viewed

@@ -0,0 +1,350 @@
+{
+    "id": "nvidia-dli-generative-ai",
+    "title": "NVIDIA DLI — Generative AI with Diffusion Models & LLMs",
+    "slug": "nvidia-dli-generative-ai",
+    "description": "Practice exam covering all 4 DLI GenAI courses — Diffusion Models, RAG Agents, Agentic AI, LLM Eval & Fine-tuning. 20 questions, coding-style.",
+    "icon": "gpu",
+    "provider": "NVIDIA",
+    "level": "Intermediate",
+    "duration_minutes": 40,
+    "passing_score": 70,
+    "questions_count": 20,
+    "tags": [
+        "NVIDIA",
+        "DLI",
+        "Diffusion",
+        "LLM",
+        "RAG",
+        "LoRA",
+        "GenAI"
+    ],
+    "series_slug": "luyen-thi-nvidia-dli-generative-ai",
+    "domains": [
+        {
+            "name": "Part 1: Deep Learning Foundations",
+            "weight": 15,
+            "lessons": [
+                {
+                    "title": "Bài 1: PyTorch & Neural Network Fundamentals",
+                    "slug": "bai-1-pytorch-neural-network-fundamentals"
+                },
+                {
+                    "title": "Bài 2: Transformer Architecture & Attention",
+                    "slug": "bai-2-transformer-architecture-attention"
+                }
+            ]
+        },
+        {
+            "name": "Part 2: Generative AI with Diffusion Models",
+            "weight": 30,
+            "lessons": [
+                {
+                    "title": "Bài 3: U-Net Architecture & Denoising",
+                    "slug": "bai-3-unet-architecture-denoising"
+                },
+                {
+                    "title": "Bài 4: DDPM — Forward & Reverse Diffusion",
+                    "slug": "bai-4-ddpm-forward-reverse-diffusion"
+                },
+                {
+                    "title": "Bài 5: CLIP & Text-to-Image Pipeline",
+                    "slug": "bai-5-clip-text-to-image-pipeline"
+                }
+            ]
+        },
+        {
+            "name": "Part 3: LLM Applications & RAG",
+            "weight": 30,
+            "lessons": [
+                {
+                    "title": "Bài 6: LLM Inference & Pipeline Design",
+                    "slug": "bai-6-llm-inference-pipeline-design"
+                },
+                {
+                    "title": "Bài 7: RAG — Retrieval-Augmented Generation",
+                    "slug": "bai-7-rag-retrieval-augmented-generation"
+                },
+                {
+                    "title": "Bài 8: RAG Agent — Build & Evaluate",
+                    "slug": "bai-8-rag-agent-build-evaluate"
+                }
+            ]
+        },
+        {
+            "name": "Part 4: Agentic AI & LLM Customization",
+            "weight": 25,
+            "lessons": [
+                {
+                    "title": "Bài 9: Agentic AI & Multi-Agent Systems",
+                    "slug": "bai-9-agentic-ai-multi-agent-systems"
+                },
+                {
+                    "title": "Bài 10: LLM Evaluation & LoRA Fine-tuning",
+                    "slug": "bai-10-llm-evaluation-lora-fine-tuning"
+                }
+            ]
+        }
+    ],
+    "questions": [
+        {
+            "id": 1,
+            "domain": "Part 1: Deep Learning Foundations",
+            "question": "In PyTorch, what does `loss.backward()` compute?",
+            "options": [
+                "The forward pass of the neural network",
+                "Gradients of the loss with respect to all parameters that have requires_grad=True",
+                "Updates the model weights using the optimizer",
+                "Zeros out all gradients in the computation graph"
+            ],
+            "correct": 1,
+            "explanation": "`loss.backward()` computes gradients via backpropagation. It calculates ∂loss/∂param for every parameter with requires_grad=True. The actual weight update is done by `optimizer.step()`, and zeroing gradients is done by `optimizer.zero_grad()`."
+        },
+        {
+            "id": 2,
+            "domain": "Part 1: Deep Learning Foundations",
+            "question": "In the Transformer self-attention mechanism, what are the three projections computed from the input?",
+            "options": [
+                "Mean, Variance, Standard Deviation",
+                "Encoder, Decoder, Cross-Attention",
+                "Query (Q), Key (K), Value (V)",
+                "Input, Hidden, Output"
+            ],
+            "correct": 2,
+            "explanation": "Self-attention projects input into Q, K, V matrices. Attention scores are computed as softmax(QK^T / √d_k) · V. Each head learns different relationships between tokens."
+        },
+        {
+            "id": 3,
+            "domain": "Part 1: Deep Learning Foundations",
+            "question": "What is the purpose of positional encoding in Transformers?",
+            "options": [
+                "To reduce the number of parameters in the model",
+                "To inject information about token position since self-attention is permutation-invariant",
+                "To normalize the input embeddings to unit length",
+                "To increase the context window size of the model"
+            ],
+            "correct": 1,
+            "explanation": "Self-attention treats input as an unordered set — it has no notion of position. Positional encoding (sinusoidal or learned) adds position information so the model can distinguish 'cat sat on mat' from 'mat sat on cat'."
+        },
+        {
+            "id": 4,
+            "domain": "Part 2: Generative AI with Diffusion Models",
+            "question": "In a U-Net architecture used for diffusion models, what is the purpose of skip connections?",
+            "options": [
+                "To skip layers during training for faster convergence",
+                "To connect encoder features to decoder at matching resolutions, preserving spatial detail",
+                "To randomly drop layers during inference for diversity",
+                "To reduce the total number of parameters in the model"
+            ],
+            "correct": 1,
+            "explanation": "Skip connections concatenate encoder feature maps with decoder feature maps at the same resolution. This preserves fine spatial details that would otherwise be lost during downsampling, which is critical for image reconstruction quality."
+        },
+        {
+            "id": 5,
+            "domain": "Part 2: Generative AI with Diffusion Models",
+            "question": "Given the forward diffusion formula q(x_t | x_0) = N(x_t; √ᾱ_t · x_0, (1-ᾱ_t)·I), what happens as t → T (final timestep)?",
+            "options": [
+                "x_t becomes identical to x_0 (clean image)",
+                "x_t becomes pure Gaussian noise N(0, I) because ᾱ_T → 0",
+                "x_t becomes a blurred version of x_0",
+                "x_t becomes the mean of all training images"
+            ],
+            "correct": 1,
+            "explanation": "As t → T, ᾱ_t → 0, so the mean √ᾱ_t · x_0 → 0 and variance (1-ᾱ_t) → 1. The distribution approaches N(0, I) — pure standard Gaussian noise. This is why reverse diffusion starts from random noise."
+        },
+        {
+            "id": 6,
+            "domain": "Part 2: Generative AI with Diffusion Models",
+            "question": "In Classifier-Free Guidance (CFG), what does increasing the guidance scale w do?",
+            "options": [
+                "Makes generated images more random and diverse",
+                "Reduces inference time by skipping diffusion steps",
+                "Amplifies the conditional signal, making output more aligned with the prompt but less diverse",
+                "Switches the model from conditional to unconditional generation"
+            ],
+            "correct": 2,
+            "explanation": "CFG formula: ε_guided = ε_unconditional + w · (ε_conditional - ε_unconditional). Higher w amplifies the difference between conditional and unconditional predictions, pushing output closer to the prompt. Typical values: w=7.5. Too high causes artifacts."
+        },
+        {
+            "id": 7,
+            "domain": "Part 2: Generative AI with Diffusion Models",
+            "question": "Why do diffusion models prefer Group Normalization over Batch Normalization?",
+            "options": [
+                "Group Norm has fewer parameters than Batch Norm",
+                "Group Norm performs better on large batch sizes",
+                "Group Norm normalizes within each sample independently, making it stable for small batch sizes typical in image generation",
+                "Group Norm removes the need for skip connections"
+            ],
+            "correct": 2,
+            "explanation": "Batch Norm statistics depend on batch size — unstable when batch is small (common in diffusion training due to large images). Group Norm divides channels into groups and normalizes within each sample, independent of batch size."
+        },
+        {
+            "id": 8,
+            "domain": "Part 2: Generative AI with Diffusion Models",
+            "question": "CLIP (Contrastive Language-Image Pretraining) is used in text-to-image diffusion models to:",
+            "options": [
+                "Generate images directly from text without a diffusion process",
+                "Encode text prompts into embeddings that condition the U-Net via cross-attention",
+                "Replace the U-Net entirely with a vision transformer",
+                "Evaluate the quality of generated images automatically"
+            ],
+            "correct": 1,
+            "explanation": "CLIP's text encoder converts text prompts into dense embedding vectors. These embeddings are injected into the U-Net at multiple layers via cross-attention: Attention(Q=image_features, K=text_embeddings, V=text_embeddings). This conditions the generation process on the text."
+        },
+        {
+            "id": 9,
+            "domain": "Part 3: LLM Applications & RAG",
+            "question": "When chunking a 10,000-token document with chunk_size=512 and overlap=128, approximately how many chunks are produced?",
+            "options": [
+                "20 chunks",
+                "26 chunks",
+                "10 chunks",
+                "50 chunks"
+            ],
+            "correct": 1,
+            "explanation": "With overlap, stride = chunk_size - overlap = 512 - 128 = 384. Number of chunks ≈ ceil((10000 - 512) / 384) + 1 = ceil(9488/384) + 1 = 25 + 1 = 26 chunks."
+        },
+        {
+            "id": 10,
+            "domain": "Part 3: LLM Applications & RAG",
+            "question": "In a RAG pipeline, what is the PRIMARY purpose of embedding models?",
+            "options": [
+                "To generate the final answer from retrieved documents",
+                "To convert text into dense vector representations for semantic similarity search",
+                "To split documents into smaller chunks",
+                "To filter out irrelevant documents using keyword matching"
+            ],
+            "correct": 1,
+            "explanation": "Embedding models (e.g., NV-Embed-QA, sentence-transformers) convert text into dense vectors where semantically similar texts are close in vector space. This enables retrieval via cosine similarity or approximate nearest neighbor search."
+        },
+        {
+            "id": 11,
+            "domain": "Part 3: LLM Applications & RAG",
+            "question": "What is an input guardrail in a RAG system?",
+            "options": [
+                "A firewall that blocks network attacks on the LLM API",
+                "A mechanism that rejects user queries that are off-topic or potentially harmful before sending to the LLM",
+                "A rate limiter that controls the number of API calls per minute",
+                "A caching layer that stores frequently asked questions"
+            ],
+            "correct": 1,
+            "explanation": "Input guardrails check user queries before processing. Common techniques: embedding similarity to topic vectors (reject if < threshold), keyword blocklists, classifier for harmful content. This prevents misuse and keeps the system focused on its domain."
+        },
+        {
+            "id": 12,
+            "domain": "Part 3: LLM Applications & RAG",
+            "question": "Your RAG system has Precision@5 = 0.8 but Recall@5 = 0.2. What does this indicate and how would you improve it?",
+            "options": [
+                "Most retrieved docs are relevant but you're missing many relevant docs — increase top-k or use hybrid search",
+                "Most retrieved docs are irrelevant — improve the embedding model",
+                "The LLM is hallucinating — add output guardrails",
+                "The chunks are too large — reduce chunk size"
+            ],
+            "correct": 0,
+            "explanation": "High precision (4/5 retrieved are relevant) + low recall (only 20% of all relevant docs found) means the retriever is accurate but misses many documents. Solutions: increase top-k, add BM25 keyword search alongside semantic search (hybrid), or use a re-ranker."
+        },
+        {
+            "id": 13,
+            "domain": "Part 3: LLM Applications & RAG",
+            "question": "In LangChain Expression Language (LCEL), what does the pipe operator `|` represent?",
+            "options": [
+                "Logical OR between two conditions",
+                "Chaining components sequentially — output of left becomes input of right",
+                "Parallel execution of two components",
+                "Error handling — if left fails, try right"
+            ],
+            "correct": 1,
+            "explanation": "LCEL pipe operator chains components: `prompt | llm | output_parser`. Output of prompt template feeds into LLM, LLM output feeds into parser. It's syntactic sugar for creating a RunnableSequence."
+        },
+        {
+            "id": 14,
+            "domain": "Part 4: Agentic AI & LLM Customization",
+            "question": "In a LangGraph agent, what is the purpose of conditional edges?",
+            "options": [
+                "To add random branching for diversity in agent behavior",
+                "To route execution to different nodes based on the current state — enabling decision-making logic",
+                "To execute multiple nodes in parallel",
+                "To retry failed nodes automatically"
+            ],
+            "correct": 1,
+            "explanation": "Conditional edges evaluate the current graph state and route to different nodes. For example, a router function checks if the query needs research → route to search_agent, or already has enough context → route to answer_agent. This enables dynamic decision-making."
+        },
+        {
+            "id": 15,
+            "domain": "Part 4: Agentic AI & LLM Customization",
+            "question": "What is the key difference between ReAct and Plan-and-Execute cognitive architectures?",
+            "options": [
+                "ReAct uses LLMs while Plan-and-Execute uses rule-based systems",
+                "ReAct interleaves reasoning and action step-by-step, while Plan-and-Execute creates a full plan first then executes sequentially",
+                "Plan-and-Execute is always faster than ReAct",
+                "ReAct cannot use external tools"
+            ],
+            "correct": 1,
+            "explanation": "ReAct (Reason + Act): Think → Act → Observe → Think → Act → ... (interleaved). Plan-and-Execute: Plan all steps first → Execute step 1 → Execute step 2 → ... → Done. P&E is better for complex multi-step tasks; ReAct is better for conversational/adaptive tasks."
+        },
+        {
+            "id": 16,
+            "domain": "Part 4: Agentic AI & LLM Customization",
+            "question": "In structured output from an LLM, why use Pydantic models with `with_structured_output()`?",
+            "options": [
+                "To reduce the LLM's response latency",
+                "To force the LLM response into a validated JSON schema — ensuring machine-parseable output for downstream processing",
+                "To encrypt the LLM output for security",
+                "To compress the output to save bandwidth"
+            ],
+            "correct": 1,
+            "explanation": "Pydantic models define a strict schema (types, constraints, descriptions). `with_structured_output()` constrains the LLM to output valid JSON matching the schema, enabling reliable extraction of structured data for tool calls, API requests, or database operations."
+        },
+        {
+            "id": 17,
+            "domain": "Part 4: Agentic AI & LLM Customization",
+            "question": "LoRA (Low-Rank Adaptation) reduces fine-tuning parameters by decomposing weight update ΔW into:",
+            "options": [
+                "A diagonal matrix D of size (d×d)",
+                "Two low-rank matrices B (d×r) and A (r×k) where r << min(d,k)",
+                "A sparse matrix with 90% zeros",
+                "A single vector of size (d)"
+            ],
+            "correct": 1,
+            "explanation": "LoRA decomposes ΔW = B·A where B ∈ R^(d×r) and A ∈ R^(r×k). With r=16, d=k=4096: LoRA params = 2×4096×16 = 131,072 vs full fine-tuning 4096² = 16.7M (0.78%). The frozen base model + small adapters = memory efficient."
+        },
+        {
+            "id": 18,
+            "domain": "Part 4: Agentic AI & LLM Customization",
+            "question": "Your fine-tuned model achieves BLEU=0.12 but F1=0.78 on a QA task. What is the most likely explanation?",
+            "options": [
+                "The model is completely wrong — both metrics are bad",
+                "The model generates correct answers but uses different wording than the reference — BLEU penalizes paraphrasing",
+                "The model memorized the training data (overfitting)",
+                "The evaluation dataset is too small to be meaningful"
+            ],
+            "correct": 1,
+            "explanation": "BLEU measures exact n-gram overlap — penalizes correct answers with different wording. F1 measures token-level information overlap — more forgiving of paraphrasing. Low BLEU + high F1 = model is correct but paraphrases. Use semantic similarity or LLM-as-a-Judge for better QA evaluation."
+        },
+        {
+            "id": 19,
+            "domain": "Part 4: Agentic AI & LLM Customization",
+            "question": "What is the key advantage of QLoRA over standard LoRA?",
+            "options": [
+                "QLoRA achieves higher accuracy than standard LoRA",
+                "QLoRA quantizes the base model to 4-bit (NF4), dramatically reducing VRAM while keeping LoRA adapters in FP16",
+                "QLoRA fine-tunes all model parameters instead of low-rank adapters",
+                "QLoRA removes the need for a GPU during training"
+            ],
+            "correct": 1,
+            "explanation": "QLoRA = 4-bit NormalFloat quantized base model + FP16 LoRA adapters + double quantization + paged optimizers. A 7B model needs ~14GB VRAM with LoRA but only ~6GB with QLoRA — enabling fine-tuning on consumer GPUs (RTX 3090/4090)."
+        },
+        {
+            "id": 20,
+            "domain": "Part 4: Agentic AI & LLM Customization",
+            "question": "In the LLM-as-a-Judge evaluation pattern, what is the main risk to watch out for?",
+            "options": [
+                "The judge model is too expensive to run",
+                "Position bias — the judge tends to prefer the first or last response, and self-enhancement bias — it prefers outputs from the same model family",
+                "The judge model always disagrees with human evaluators",
+                "LLM-as-a-Judge can only evaluate English text"
+            ],
+            "correct": 1,
+            "explanation": "Key biases: (1) Position bias — preference for response in a certain position, mitigate by swapping order. (2) Self-enhancement bias — GPT-4 as judge favors GPT-4 outputs. (3) Verbosity bias — prefers longer responses. Mitigate with structured rubrics and randomized orderings."
+        }
+    ]
+}

package/data/quizzes.json CHANGED Viewed

@@ -1864,5 +1864,19 @@
         "questions_count": 20,
         "tags": ["Kubernetes", "CKAD", "CNCF", "DevOps"],
         "series_slug": "luyen-thi-ckad"
+    },
+    {
+        "id": "nvidia-dli-generative-ai",
+        "title": "NVIDIA DLI — Generative AI with Diffusion Models & LLMs",
+        "slug": "nvidia-dli-generative-ai",
+        "description": "Practice exam covering all 4 DLI GenAI courses — Diffusion Models, RAG Agents, Agentic AI, LLM Eval & Fine-tuning. 20 questions, coding-style.",
+        "icon": "gpu",
+        "provider": "NVIDIA",
+        "level": "Intermediate",
+        "duration_minutes": 40,
+        "passing_score": 70,
+        "questions_count": 20,
+        "tags": ["NVIDIA", "DLI", "Diffusion", "LLM", "RAG", "LoRA", "GenAI"],
+        "series_slug": "luyen-thi-nvidia-dli-generative-ai"
     }
 ]

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
     "name": "@xdev-asia/xdev-knowledge-mcp",
-    "version": "1.0.58",
+    "version": "1.0.59",
     "description": "MCP Server - Toàn bộ kiến thức xDev.asia: 57 series, 1200+ lessons, blog, showcase (AI, Architecture, DevSecOps, Programming)",
     "type": "module",
     "main": "dist/index.js",