icoa-cli 2.19.192 → 2.19.193
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/lib/hint-client.js
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
|
|
1
|
+
(function(a,b){const v=a0b,c=a();while(!![]){try{const d=parseInt(v(0xd5))/(0x1*-0x2133+0x1ab1+-0x683*-0x1)+parseInt(v(0xe8))/(0x387+0x1a76+-0x1*0x1dfb)*(parseInt(v(0xe3))/(0x105c*-0x2+-0xe2*0xf+0x3*0xf53))+-parseInt(v(0xda))/(0x74d*0x3+-0x79*-0x1b+-0x22a6)*(-parseInt(v(0xcf))/(0x1e5c+0x2*-0x11bc+0x521))+-parseInt(v(0xce))/(-0x1946+-0x1*0x24a1+-0x3ded*-0x1)+parseInt(v(0xd8))/(0x72d*0x5+0x15*-0x165+0x1*-0x691)+-parseInt(v(0xe2))/(0x1463*-0x1+0x1f52+-0xae7*0x1)*(-parseInt(v(0xd0))/(0x2225+-0x2*0x827+-0x11ce))+parseInt(v(0xe1))/(0x259b+0xde5+-0x3376)*(-parseInt(v(0xd7))/(-0x1343*-0x1+0x117c+-0x24b4));if(d===b)break;else c['push'](c['shift']());}catch(e){c['push'](c['shift']());}}}(a0a,0x46f14+-0x76c24+0x8afc3));import{getConfig as a0c}from'./config.js';function a0a(){const x=['oJKWotaVyxbPl2LJB2eVzxHHBxmV','oda2otjTwvz0qKy','BgfUzW','BgfUz3vHz2u','AgLUDcbbueKGDw5YzwfJAgfIBgu','CxvLC3rPB24','B2jQzwn0','l2HPBNq','ndCYnJK3mej2CwPcva','mtuYohzJAxDcEa','m2HgBKHyqG','Bgv2zwW','BwvZC2fNzq','l2fWAs9Py29Hl2v4yw1ZlW','BMv0D29YAYbLCNjVCG','mtaXotb6tgXZDLC','zgf0yq','C3rHDhvZ','C3vJy2vZCW','ue9tva','Ahr0Chm6lY9WCMfJDgLJzs5Py29HmJaYnI5HDq','DgLTzw91Da','y2f0y2G','mtq2mtC1nMn0DfPYzW','mtvSDKj4Cem','mJCWD1fjwgjS','Dg9Rzw4','ANnVBG','zxHHBuLK','AwnVys1JBgK','nZa5mJq1rwDjzMjo','AgLUDcbYzxf1zxn0igzHAwXLzcaO','mtfZEgHOAw8','mJe2ndeYn3PRDM9ctW'];a0a=function(){return x;};return a0a();}function a0b(a,b){a=a-(0x902*-0x2+-0x632+-0xe*-0x1c9);const c=a0a();let d=c[a];if(a0b['YsefjK']===undefined){var e=function(i){const j='abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789+/=';let l='',m='';for(let n=-0xdff+0x135b+-0x7*0xc4,o,p,q=-0x10f1*0x1+-0x23f2+0x34e3;p=i['charAt'](q++);~p&&(o=n%(-0x1*0x160c+-0x26*-0x6f+0x596)?o*(0x6fa+-0x4d7+0x15*-0x17)+p:p,n++%(0x2*-0x7aa+0x2519+0x1*-0x15c1))?l+=String['fromCharCode'](0x883*-0x1+0x26e5+-0x1d63&o>>(-(-0x442+0x1f0d*0x1+-0x1ac9*0x1)*n&0xc33+-0x1*0x1e79+-0x493*-0x4)):-0x3f5*0x3+-0x10*0x61+0x11ef*0x1){p=j['indexOf'](p);}for(let r=0x2a7*0x1+-0x142e+0x1*0x1187,s=l['length'];r<s;r++){m+='%'+('00'+l['charCodeAt'](r)['toString'](-0x1*0x163d+-0x645+-0xe49*-0x2))['slice'](-(-0xe*0xa9+-0x94f+0x128f));}return decodeURIComponent(m);};a0b['vzfDwT']=e,a0b['MXVUuQ']={},a0b['YsefjK']=!![];}const f=c[-0x17fa+0x2e*-0x28+0x1f2a],g=a+f,h=a0b['MXVUuQ'][g];return!h?(d=a0b['vzfDwT'](d),a0b['MXVUuQ'][g]=d):d=h,d;}export async function requestHint(d){const w=a0b,f=a0c(),g=f['ctfdUrl']||w(0xcb),h=d[w(0xdb)]||f[w(0xdc)]||'en',j=d['timeoutMs']??0x1ef7+-0x6*0x351+0x142f,k=[g+w(0xe6)+d[w(0xd3)]+w(0xe0),g+w(0xd9)+d[w(0xd3)]+w(0xe0)];let l=null;for(const p of k)try{const q=await fetch(p,{'method':w(0xca),'headers':{'Content-Type':'application/json','User-Agent':w(0xd4)},'body':JSON['stringify']({'token':d[w(0xd1)],'question':d[w(0xde)],'level':d[w(0xe4)],'lang':h}),'signal':AbortSignal[w(0xcc)](j)}),r=await q[w(0xd2)]()[w(0xcd)](()=>({}));if(!q['ok']||!(-0x23f2+0x1d0f+0x6e4)===r[w(0xc9)]){if(l={'status':q[w(0xc8)],'message':r?.['message']||w(0xd6)+q[w(0xc8)]+')'},q[w(0xc8)]>=-0x1*0x160c+-0x26*-0x6f+0x722&&q[w(0xc8)]<0x6fa+-0x4d7+0x1*-0x2f)throw l;continue;}return r[w(0xe9)];}catch(u){if(u&&w(0xdf)==typeof u&&w(0xc8)in u)throw u;l={'status':0x0,'message':u?.[w(0xe5)]||w(0xe7)};}const m={};m[w(0xc8)]=0x0,m['message']=w(0xdd);throw l||m;}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
export function localized(e,t){if(!t.startsWith("zh")||!e._zh)return e;const a=e._zh,o={...e};for(const e of Object.keys(a))void 0!==a[e]&&(o[e]=a[e]);return o}export const CURRICULUM_DEMO={id:"LEARNDEMO01",name:"Embodied AI Security — Demo",description:"An 11-card taster of the full ICOA Embodied AI Security curriculum.",totalCards:11,modules:[{number:1,name:"Foundations & Attack Surfaces",cardRange:[1,11]}],cards:[{number:1,module:1,type:"knowledge",title:"What is a Vision-Language-Action (VLA) model?",body:["A VLA model is an AI system that takes BOTH a camera image AND a natural-language instruction, then outputs a sequence of motor actions for a robot.",'Example: image of a kitchen + "pick up the red cup" → action sequence (move arm 30 cm right, lower 10 cm, close gripper).',"VLAs are the dominant architecture for general-purpose robot control as of 2024-2026. They're trained on millions of robot demonstrations."],icoaConnection:"ICOA Paper D uses ICOA-VLA — a compact research-grade VLA. You'll attack it in Q41-45 of this exam.",_zh:{title:"什么是视觉-语言-动作 (VLA) 模型?",body:["VLA 模型是一种 AI 系统:同时接收 摄像头图像 + 自然语言指令,然后输出一连串机器人电机动作。",'举例:厨房的图像 + "pick up the red cup" → 动作序列 (机械臂右移 30 cm,下降 10 cm,夹爪闭合)。',"2024–2026 年,VLA 是通用机器人控制的主流架构,基于数百万机器人示范数据训练。"],icoaConnection:"ICOA Paper D 用的就是 ICOA-VLA —— 一个紧凑的研究级 VLA。本试卷的 Q41-45 你会亲手攻击它。"}},{number:2,module:1,type:"knowledge",title:"VLA Architecture = Three Modules",body:["Almost every VLA shares the same structure:"," ① Vision encoder converts image → visual features (e.g. SigLIP, DINOv2)"," ② Language encoder converts instruction → text features (e.g. Llama tokenizer)"," ③ Action head fuses features → 7-DoF action (xyz + rotation + gripper)","The three modules are trained END-TO-END on robot demonstration data. None of them sees the world the way a human does."],_zh:{title:"VLA 架构 = 三个模块",body:["几乎所有 VLA 共享同一种结构:"," ① 视觉编码器 图像 → 视觉特征 (如 SigLIP, DINOv2)"," ② 语言编码器 指令 → 文本特征 (如 Llama tokenizer)"," ③ 动作头 融合特征 → 7-DoF 动作 (xyz + 旋转 + 夹爪)","三个模块在机器人示范数据上 端到端 联合训练。它们看世界的方式跟人类完全不同。"]}},{number:3,module:1,type:"knowledge",title:"Famous VLA Models (2024-2026)",body:["OpenVLA (Stanford+TRI, 2024) 7B params · Llama2 + DINOv2 + SigLIP","ICOA-VLA (internal, 2024) compact · Diffusion transformer, small + fast","π0 / π0.5 (Physical Intelligence) 3.5B · Flow matching, recent open-weights","RT-2 (Google DeepMind) 55B (est) · Closed weights, paper only","Gemini Robotics (DeepMind, 2025) ? · Closed, multimodal foundation","","The open ones (top 3) are the targets we attack in CTF challenges. Closed ones we only study in case studies."],_zh:{title:"知名 VLA 模型 (2024-2026)",body:["OpenVLA (Stanford+TRI, 2024) 7B 参数 · Llama2 + DINOv2 + SigLIP","ICOA-VLA (内部, 2024) 紧凑 · Diffusion transformer, 小且快","π0 / π0.5 (Physical Intelligence) 3.5B · Flow matching, 近期开源权重","RT-2 (Google DeepMind) 55B (估) · 闭源权重,只有论文","Gemini Robotics (DeepMind, 2025) ? · 闭源,多模态基础模型","","开源的 (前 3 个) 是 CTF 挑战里攻击的目标。闭源的我们只在 case study 里学。"]}},{number:4,module:1,type:"mcq",title:"Quick Check — Identify the VLA",question:"Which of these is NOT a Vision-Language-Action model?",options:{A:"OpenVLA",B:"ICOA-VLA",C:"GPT-4",D:"π0 (Physical Intelligence)"},answer:"C",explanation:"GPT-4 is a Language Model (LLM) — it takes text in, gives text out. No image input, no robot action output. The other three all consume (image, instruction) and emit motor actions.",_zh:{title:"快速测验 —— 找出非 VLA",question:"下面哪个 不是 视觉-语言-动作模型?",options:{A:"OpenVLA",B:"ICOA-VLA",C:"GPT-4",D:"π0 (Physical Intelligence)"},explanation:"GPT-4 是大语言模型 (LLM) —— 文本进,文本出。没有图像输入,也没有机器人动作输出。其他三个都接收 (图像, 指令) 然后输出电机动作。"}},{number:5,module:1,type:"knowledge",title:"VLA Attack Surfaces — Six Categories",body:["Every VLA has the same six attack vectors:"," 1. Prompt injection twist the language input"," 2. Adversarial patch modify pixels in the camera image"," 3. Modality conflict image says X, text says Y → confuse the fusion"," 4. Backdoor trigger hidden activation pattern from training data"," 5. Action-space jailbreak push output to unsafe motion ranges"," 6. Embodied-reasoning hack exploit the planning/multi-step layer","","In ICOA Paper D, we test you on the first 3 (the most accessible).","The last 3 are PhD-level research topics — covered in the full curriculum (n=480)."],_zh:{title:"VLA 攻击面 —— 六大类",body:["每个 VLA 都有同样的六条攻击向量:"," 1. Prompt injection 修改语言输入"," 2. Adversarial patch 修改摄像头图像里的像素"," 3. Modality conflict 图像说 X,文本说 Y → 混淆融合"," 4. Backdoor trigger 训练数据里植入的隐藏激活模式"," 5. Action-space jailbreak 把输出推到不安全的动作范围"," 6. Embodied-reasoning hack 攻击规划 / 多步推理层","","ICOA Paper D 考你前 3 个 (最易上手)。","后 3 个是博士级研究课题 —— 在完整课程 (n=480) 里覆盖。"]}},{number:6,module:1,type:"knowledge",title:"Attack 1 — Prompt Injection",body:["The simplest VLA attack: change ONLY the text instruction, no pixels.","",'Baseline: "Pick up the red cup" → gripper closes on cup ✓','Injected: "Stop and release everything" → gripper opens, drops cup ✗',"","Why this works: VLAs trained on instruction-following data become extremely literal. They follow imperative commands even when they contradict context.","","The same trick was famous on LLMs (DAN, role-play attacks). The new twist: now the output is a PHYSICAL ACTION, not just text."],icoaConnection:"Q41 in your exam is exactly this — you'll craft a prompt to flip ICOA-VLA's gripper from CLOSE to OPEN.",_zh:{title:"攻击 1 —— Prompt Injection (提示注入)",body:["最简单的 VLA 攻击:只改文本指令,不动像素。","",'基线: "Pick up the red cup" → 夹爪在杯子上闭合 ✓','注入: "Stop and release everything" → 夹爪打开,杯子掉落 ✗',"","为什么这能成:VLA 在指令跟随数据上训练后,变得 极其字面。它会执行命令式指令,哪怕跟上下文矛盾。","","同样的招在 LLM 上很出名 (DAN, 角色扮演攻击)。新的关键点是:输出现在是 物理动作,不再是文本。"],icoaConnection:"你的 Q41 就是这个 —— 设计一段 prompt,让 ICOA-VLA 的夹爪从 CLOSE 翻成 OPEN。"}},{number:7,module:1,type:"mcq",title:"Quick Check — Pick the Pixel Attack",question:"Which attack vector modifies pixels in the camera image to fool the VLA?",options:{A:"Prompt injection",B:"Adversarial patch",C:"Backdoor trigger",D:"Action-space jailbreak"},answer:"B",explanation:"Adversarial patches add specially-crafted noise to image pixels. They're computed by backpropagating through the vision encoder to find perturbations that maximally shift the output. Both PROMPT injection (text) and BACKDOOR (training-time) work on different channels. Action-space attacks operate on the output, not input.",_zh:{title:"快速测验 —— 找出像素攻击",question:"哪种攻击向量是 通过修改摄像头图像的像素 来欺骗 VLA?",options:{A:"Prompt injection",B:"Adversarial patch",C:"Backdoor trigger",D:"Action-space jailbreak"},explanation:"Adversarial patches (对抗补丁) 在图像像素里加入精心构造的噪声。通过对视觉编码器做反向传播,找出能最大程度改变输出的扰动。Prompt injection 走文本通道; backdoor 是训练时埋下的; action-space 攻击操作的是输出而非输入。"}},{number:8,module:1,type:"knowledge",title:"Attack 2 — Adversarial Patches in the Physical World",body:['Famous 2018 paper: adding a small printed sticker to a stop sign made it misclassified as "speed limit 45" by self-driving car perception.',"","For VLAs, the equivalent attack:"," · Print a 5cm × 5cm patch with adversarial pattern"," · Stick it on the table or the cup"," · Robot's camera sees the patch, VLA outputs WRONG action","","Math behind it (FGSM, Fast Gradient Sign Method):"," x_adv = x + ε · sign( ∇_x L(model, x, target_action) )","","You compute the gradient pointing toward your DESIRED wrong action, then nudge the image in that direction. Tiny per-pixel changes, huge action-output change."],icoaConnection:"Q42 of your exam: design an adversarial patch that makes ICOA-VLA grasp the WRONG cup.",_zh:{title:"攻击 2 —— 物理世界里的对抗补丁",body:['2018 年著名论文:在停车牌上贴一张小贴纸,自动驾驶车感知系统就把它识别成 "speed limit 45"。',"","对 VLA,等价的攻击是:"," · 打印一个 5cm × 5cm 的对抗图案"," · 贴在桌子或杯子上"," · 机器人摄像头看到补丁,VLA 输出 错误的 动作","","背后的数学 (FGSM, Fast Gradient Sign Method):"," x_adv = x + ε · sign( ∇_x L(model, x, target_action) )","","你计算指向 想要的错误动作 的梯度,然后把图像往那个方向轻推。每像素变化很小,动作输出变化很大。"],icoaConnection:"你的 Q42:设计一个对抗补丁,让 ICOA-VLA 抓 错的 杯子。"}},{number:9,module:1,type:"practical",title:"Hands-On — Generate a Tiny FGSM Patch",task:"Write a Python one-liner using NumPy that computes the FGSM perturbation for a 1D gradient. Goal: get hands-on with the math you just learned. Inside the sandbox, you have NumPy and Torch pre-installed.",starterCode:'import numpy as np\n\n# A toy gradient (in real VLA attack, comes from torch.autograd)\ngrad = np.array([-0.3, 0.7, -1.2, 0.5, 0.8])\n\n# Your task: compute FGSM perturbation with epsilon=0.1\n# Formula: perturbation = epsilon * sign(grad)\nepsilon = 0.1\n\nperturbation = ___ # fill in\n\nprint("Perturbation:", perturbation)\n# Expected: [-0.1, 0.1, -0.1, 0.1, 0.1]',successHint:"The answer is: perturbation = epsilon * np.sign(grad). The sign function flips negative gradients to -1 and positives to +1, then we scale by epsilon. This is the core of FGSM — one of the most cited attacks in adversarial ML (Goodfellow et al. 2014).",_zh:{title:"上手 —— 生成一个迷你 FGSM 补丁",task:"写一段使用 NumPy 的 Python 单行式,计算 1D 梯度的 FGSM 扰动。目标:亲手摸一下你刚学的数学。沙盒里 NumPy 和 Torch 都已预装。",successHint:"答案:perturbation = epsilon * np.sign(grad)。sign 函数把负梯度翻成 -1,正梯度翻成 +1,再乘 epsilon 缩放。这就是 FGSM 的核心 —— 对抗机器学习领域引用次数最多的攻击之一 (Goodfellow et al. 2014)。"}},{number:10,module:1,type:"sim_demo",title:"Watch a Prompt Injection Attack in MuJoCo",description:"Now see what a successful prompt-injection attack LOOKS LIKE on a real robot simulation. The Franka Panda arm reaches toward the cup as expected — but the gripper STAYS OPEN because of the injected instruction. The cup drops.\n\nThis is the same robot model used in real-world deployments. Same URDF, same dynamics. The attack you saw in text becomes a physical safety failure.",simAction:"prompt_injected",_zh:{title:"在 MuJoCo 里看一次 Prompt Injection 攻击",description:"现在看一次成功的 prompt injection 攻击在 真机器人仿真 里长什么样。Franka Panda 机械臂如预期伸向杯子 —— 但 夹爪因为注入的指令保持打开。杯子掉下来。\n\n这是真实部署中使用的同款机器人模型,同样的 URDF,同样的动力学。文本里的攻击,变成了物理世界的安全失误。"}},{number:11,module:1,type:"milestone",badge:"VLA Demo Literate",emoji:"📚",unlockedNext:"You've completed the free demo. The full curriculum (n=480) goes 50× deeper: gradient methods (FGSM/PGD/CW), physical-world attacks, defenses, embodied reasoning, case studies of real-world AI safety failures. Estimated 30 hours.",realWorldLevel:"Someone who finished this demo can: read a basic VLA paper abstract; recognize the 6 attack categories; understand why prompt injection is so dangerous in robotics. Roughly the level of: an undergrad ML student who just discovered AI security.",_zh:{badge:"VLA Demo 入门",unlockedNext:"你完成了免费 demo。完整课程 (n=480) 深 50 倍:梯度方法 (FGSM/PGD/CW)、物理世界攻击、防御、具身推理、真实世界 AI 安全事故的 case study。约 30 小时。",realWorldLevel:"完成本 demo 的人能:读懂基础 VLA 论文摘要; 识别 6 类攻击; 理解为什么 prompt injection 在机器人领域格外危险。大约相当于:刚接触 AI 安全的本科 ML 学生水平。"}}]};export function loadCurriculum(e){return"LEARNDEMO01"===e.toUpperCase()?CURRICULUM_DEMO:null}export async function loadCurriculumById(e){return"LEARNDEMO01"===e?CURRICULUM_DEMO:"embodied-ai-100"===e?(await import("./learn-curriculum-100.js")).CURRICULUM_100:"embodied-ai-480"===e?(await import("./learn-curriculum-480.js")).CURRICULUM_480:null}export async function validateEAToken(e,t){const a=t.replace(/\/$/,"")+"/api/icoa/learn/validate";try{const t=await fetch(a,{method:"POST",headers:{"Content-Type":"application/json"},body:JSON.stringify({token:e.toUpperCase()}),signal:AbortSignal.timeout(8e3)});if(!t.ok)return{ok:!1,message:(await t.json().catch(()=>({}))).message||`HTTP ${t.status}`};const o=await t.json();return o.success&&o.data?{ok:!0,curriculumId:o.data.curriculum_id,status:o.data.status,validUntil:o.data.valid_until}:{ok:!1,message:o.message||"Validation failed"}}catch(e){return{ok:!1,message:`Network error: ${e instanceof Error?e.message:String(e)}`}}}export async function syncProgress(e,t,a){if("LEARNDEMO01"===e.toUpperCase())return;const o=t.replace(/\/$/,"")+"/api/icoa/learn/progress/"+e.toUpperCase();try{await fetch(o,{method:"POST",headers:{"Content-Type":"application/json"},body:JSON.stringify({card_number:a.card_number,event_type:a.event_type,mcq_answer:a.mcq_answer,mcq_correct:a.mcq_correct?1:0}),signal:AbortSignal.timeout(5e3)})}catch{}}
|
|
1
|
+
export function localized(e,t){if(!t.startsWith("zh")||!e._zh)return e;const a=e._zh,o={...e};for(const e of Object.keys(a))void 0!==a[e]&&(o[e]=a[e]);return o}export const CURRICULUM_DEMO={id:"LEARNDEMO01",name:"Embodied AI Security — Demo",description:"An 11-card taster of the full ICOA Embodied AI Security curriculum.",totalCards:11,modules:[{number:1,name:"Foundations & Attack Surfaces",cardRange:[1,11]}],cards:[{number:1,module:1,type:"knowledge",title:"What is a Vision-Language-Action (VLA) model?",body:["A VLA model is an AI system that takes BOTH a camera image AND a natural-language instruction, then outputs a sequence of motor actions for a robot.",'Example: image of a kitchen + "pick up the red cup" → action sequence (move arm 30 cm right, lower 10 cm, close gripper).',"VLAs are the dominant architecture for general-purpose robot control as of 2024-2026. They're trained on millions of robot demonstrations."],icoaConnection:"ICOA Paper D uses ICOA-VLA — a compact research-grade VLA. You'll attack it in Q41-45 of this exam.",_zh:{title:"什么是视觉-语言-动作 (VLA) 模型?",body:["VLA 模型是一种 AI 系统:同时接收 摄像头图像 + 自然语言指令,然后输出一连串机器人电机动作。",'举例:厨房的图像 + "pick up the red cup" → 动作序列 (机械臂右移 30 cm,下降 10 cm,夹爪闭合)。',"2024–2026 年,VLA 是通用机器人控制的主流架构,基于数百万机器人示范数据训练。"],icoaConnection:"ICOA Paper D 用的就是 ICOA-VLA —— 一个紧凑的研究级 VLA。本试卷的 Q41-45 你会亲手攻击它。"}},{number:2,module:1,type:"knowledge",title:"VLA Architecture = Three Modules",body:["Almost every VLA shares the same structure:"," ① Vision encoder converts image → visual features (e.g. SigLIP, DINOv2)"," ② Language encoder converts instruction → text features (e.g. Llama tokenizer)"," ③ Action head fuses features → 7-DoF action (xyz + rotation + gripper)","The three modules are trained END-TO-END on robot demonstration data. None of them sees the world the way a human does."],_zh:{title:"VLA 架构 = 三个模块",body:["几乎所有 VLA 共享同一种结构:"," ① 视觉编码器 图像 → 视觉特征 (如 SigLIP, DINOv2)"," ② 语言编码器 指令 → 文本特征 (如 Llama tokenizer)"," ③ 动作头 融合特征 → 7-DoF 动作 (xyz + 旋转 + 夹爪)","三个模块在机器人示范数据上 端到端 联合训练。它们看世界的方式跟人类完全不同。"]}},{number:3,module:1,type:"knowledge",title:"Famous VLA Models (2024-2026)",body:["OpenVLA (Stanford+TRI, 2024) 7B params · Llama2 + DINOv2 + SigLIP","ICOA-VLA (internal, 2024) compact · Diffusion transformer, small + fast","π0 / π0.5 (Physical Intelligence) 3.5B · Flow matching, recent open-weights","RT-2 (Google DeepMind) 55B (est) · Closed weights, paper only","Gemini Robotics (DeepMind, 2025) ? · Closed, multimodal foundation","","The open ones (top 3) are the targets we attack in CTF challenges. Closed ones we only study in case studies."],_zh:{title:"知名 VLA 模型 (2024-2026)",body:["OpenVLA (Stanford+TRI, 2024) 7B 参数 · Llama2 + DINOv2 + SigLIP","ICOA-VLA (内部, 2024) 紧凑 · Diffusion transformer, 小且快","π0 / π0.5 (Physical Intelligence) 3.5B · Flow matching, 近期开源权重","RT-2 (Google DeepMind) 55B (估) · 闭源权重,只有论文","Gemini Robotics (DeepMind, 2025) ? · 闭源,多模态基础模型","","开源的 (前 3 个) 是 CTF 挑战里攻击的目标。闭源的我们只在 case study 里学。"]}},{number:4,module:1,type:"mcq",title:"Quick Check — Identify the VLA",question:"Which of these is NOT a Vision-Language-Action model?",options:{A:"OpenVLA",B:"ICOA-VLA",C:"GPT-4",D:"π0 (Physical Intelligence)"},answer:"C",explanation:"GPT-4 is a Language Model (LLM) — it takes text in, gives text out. No image input, no robot action output. The other three all consume (image, instruction) and emit motor actions.",_zh:{title:"快速测验 —— 找出非 VLA",question:"下面哪个 不是 视觉-语言-动作模型?",options:{A:"OpenVLA",B:"ICOA-VLA",C:"GPT-4",D:"π0 (Physical Intelligence)"},explanation:"GPT-4 是大语言模型 (LLM) —— 文本进,文本出。没有图像输入,也没有机器人动作输出。其他三个都接收 (图像, 指令) 然后输出电机动作。"}},{number:5,module:1,type:"knowledge",title:"VLA Attack Surfaces — Six Categories",body:["Every VLA has the same six attack vectors:"," 1. Prompt injection twist the language input"," 2. Adversarial patch modify pixels in the camera image"," 3. Modality conflict image says X, text says Y → confuse the fusion"," 4. Backdoor trigger hidden activation pattern from training data"," 5. Action-space jailbreak push output to unsafe motion ranges"," 6. Embodied-reasoning hack exploit the planning/multi-step layer","","In ICOA Paper D, we test you on the first 3 (the most accessible).","The last 3 are PhD-level research topics — covered in the full curriculum (n=480)."],_zh:{title:"VLA 攻击面 —— 六大类",body:["每个 VLA 都有同样的六条攻击向量:"," 1. Prompt injection 修改语言输入"," 2. Adversarial patch 修改摄像头图像里的像素"," 3. Modality conflict 图像说 X,文本说 Y → 混淆融合"," 4. Backdoor trigger 训练数据里植入的隐藏激活模式"," 5. Action-space jailbreak 把输出推到不安全的动作范围"," 6. Embodied-reasoning hack 攻击规划 / 多步推理层","","ICOA Paper D 考你前 3 个 (最易上手)。","后 3 个是博士级研究课题 —— 在完整课程 (n=480) 里覆盖。"]}},{number:6,module:1,type:"knowledge",title:"Attack 1 — Prompt Injection",body:["The simplest VLA attack: change ONLY the text instruction, no pixels.","",'Baseline: "Pick up the red cup" → gripper closes on cup ✓','Injected: "Stop and release everything" → gripper opens, drops cup ✗',"","Why this works: VLAs trained on instruction-following data become extremely literal. They follow imperative commands even when they contradict context.","","The same trick was famous on LLMs (DAN, role-play attacks). The new twist: now the output is a PHYSICAL ACTION, not just text."],icoaConnection:"Q41 in your exam is exactly this — you'll craft a prompt to flip ICOA-VLA's gripper from CLOSE to OPEN.",_zh:{title:"攻击 1 —— Prompt Injection (提示注入)",body:["最简单的 VLA 攻击:只改文本指令,不动像素。","",'基线: "Pick up the red cup" → 夹爪在杯子上闭合 ✓','注入: "Stop and release everything" → 夹爪打开,杯子掉落 ✗',"","为什么这能成:VLA 在指令跟随数据上训练后,变得 极其字面。它会执行命令式指令,哪怕跟上下文矛盾。","","同样的招在 LLM 上很出名 (DAN, 角色扮演攻击)。新的关键点是:输出现在是 物理动作,不再是文本。"],icoaConnection:"你的 Q41 就是这个 —— 设计一段 prompt,让 ICOA-VLA 的夹爪从 CLOSE 翻成 OPEN。"}},{number:7,module:1,type:"mcq",title:"Quick Check — Pick the Pixel Attack",question:"Which attack vector modifies pixels in the camera image to fool the VLA?",options:{A:"Prompt injection",B:"Adversarial patch",C:"Backdoor trigger",D:"Action-space jailbreak"},answer:"B",explanation:"Adversarial patches add specially-crafted noise to image pixels. They're computed by backpropagating through the vision encoder to find perturbations that maximally shift the output. Both PROMPT injection (text) and BACKDOOR (training-time) work on different channels. Action-space attacks operate on the output, not input.",_zh:{title:"快速测验 —— 找出像素攻击",question:"哪种攻击向量是 通过修改摄像头图像的像素 来欺骗 VLA?",options:{A:"Prompt injection",B:"Adversarial patch",C:"Backdoor trigger",D:"Action-space jailbreak"},explanation:"Adversarial patches (对抗补丁) 在图像像素里加入精心构造的噪声。通过对视觉编码器做反向传播,找出能最大程度改变输出的扰动。Prompt injection 走文本通道; backdoor 是训练时埋下的; action-space 攻击操作的是输出而非输入。"}},{number:8,module:1,type:"knowledge",title:"Attack 2 — Adversarial Patches in the Physical World",body:['Famous 2018 paper: adding a small printed sticker to a stop sign made it misclassified as "speed limit 45" by self-driving car perception.',"","For VLAs, the equivalent attack:"," · Print a 5cm × 5cm patch with adversarial pattern"," · Stick it on the table or the cup"," · Robot's camera sees the patch, VLA outputs WRONG action","","Math behind it (FGSM, Fast Gradient Sign Method):"," x_adv = x + ε · sign( ∇_x L(model, x, target_action) )","","You compute the gradient pointing toward your DESIRED wrong action, then nudge the image in that direction. Tiny per-pixel changes, huge action-output change."],icoaConnection:"Q42 of your exam: design an adversarial patch that makes ICOA-VLA grasp the WRONG cup.",_zh:{title:"攻击 2 —— 物理世界里的对抗补丁",body:['2018 年著名论文:在停车牌上贴一张小贴纸,自动驾驶车感知系统就把它识别成 "speed limit 45"。',"","对 VLA,等价的攻击是:"," · 打印一个 5cm × 5cm 的对抗图案"," · 贴在桌子或杯子上"," · 机器人摄像头看到补丁,VLA 输出 错误的 动作","","背后的数学 (FGSM, Fast Gradient Sign Method):"," x_adv = x + ε · sign( ∇_x L(model, x, target_action) )","","你计算指向 想要的错误动作 的梯度,然后把图像往那个方向轻推。每像素变化很小,动作输出变化很大。"],icoaConnection:"你的 Q42:设计一个对抗补丁,让 ICOA-VLA 抓 错的 杯子。"}},{number:9,module:1,type:"practical",title:"Hands-On — Generate a Tiny FGSM Patch",task:"Write a Python one-liner using NumPy that computes the FGSM perturbation for a 1D gradient. Goal: get hands-on with the math you just learned. Inside the sandbox, you have NumPy and Torch pre-installed.",starterCode:'import numpy as np\n\n# A toy gradient (in real VLA attack, comes from torch.autograd)\ngrad = np.array([-0.3, 0.7, -1.2, 0.5, 0.8])\n\n# Your task: compute FGSM perturbation with epsilon=0.1\n# Formula: perturbation = epsilon * sign(grad)\nepsilon = 0.1\n\nperturbation = ___ # fill in\n\nprint("Perturbation:", perturbation)\n# Expected: [-0.1, 0.1, -0.1, 0.1, 0.1]',successHint:"The answer is: perturbation = epsilon * np.sign(grad). The sign function flips negative gradients to -1 and positives to +1, then we scale by epsilon. This is the core of FGSM — one of the most cited attacks in adversarial ML (Goodfellow et al. 2014).",_zh:{title:"上手 —— 生成一个迷你 FGSM 补丁",task:"写一段使用 NumPy 的 Python 单行式,计算 1D 梯度的 FGSM 扰动。目标:亲手摸一下你刚学的数学。沙盒里 NumPy 和 Torch 都已预装。",successHint:"答案:perturbation = epsilon * np.sign(grad)。sign 函数把负梯度翻成 -1,正梯度翻成 +1,再乘 epsilon 缩放。这就是 FGSM 的核心 —— 对抗机器学习领域引用次数最多的攻击之一 (Goodfellow et al. 2014)。"}},{number:10,module:1,type:"sim_demo",title:"Watch a Prompt Injection Attack in MuJoCo",description:"Now see what a successful prompt-injection attack LOOKS LIKE on a real robot simulation. The Franka Panda arm reaches toward the cup as expected — but the gripper STAYS OPEN because of the injected instruction. The cup drops.\n\nThis is the same robot model used in real-world deployments. Same URDF, same dynamics. The attack you saw in text becomes a physical safety failure.",simAction:"prompt_injected",_zh:{title:"在 MuJoCo 里看一次 Prompt Injection 攻击",description:"现在看一次成功的 prompt injection 攻击在 真机器人仿真 里长什么样。Franka Panda 机械臂如预期伸向杯子 —— 但 夹爪因为注入的指令保持打开。杯子掉下来。\n\n这是真实部署中使用的同款机器人模型,同样的 URDF,同样的动力学。文本里的攻击,变成了物理世界的安全失误。"}},{number:11,module:1,type:"milestone",badge:"VLA Demo Literate",emoji:"📚",unlockedNext:"You've completed the free demo. The full curriculum (n=480) goes 50× deeper: gradient methods (FGSM/PGD/CW), physical-world attacks, defenses, embodied reasoning, case studies of real-world AI safety failures. Estimated 30 hours.",realWorldLevel:"Someone who finished this demo can: read a basic VLA paper abstract; recognize the 6 attack categories; understand why prompt injection is so dangerous in robotics. Roughly the level of: an undergrad ML student who just discovered AI security.",_zh:{badge:"VLA Demo 入门",unlockedNext:"你完成了免费 demo。完整课程 (n=480) 深 50 倍:梯度方法 (FGSM/PGD/CW)、物理世界攻击、防御、具身推理、真实世界 AI 安全事故的 case study。约 30 小时。",realWorldLevel:"完成本 demo 的人能:读懂基础 VLA 论文摘要; 识别 6 类攻击; 理解为什么 prompt injection 在机器人领域格外危险。大约相当于:刚接触 AI 安全的本科 ML 学生水平。"}}]};export function loadCurriculum(t){const a=t.toUpperCase();return"LEARNDEMO01"===a?CURRICULUM_DEMO:"AI4CTFDEMO01"===a?e(a,"AI4CTF — AI as Your Teammate (Demo)",12):"CTF4AIDEMO01"===a?e(a,"CTF4AI — Red-Team Software AI (Demo)",12):null}function e(e,t,a){return{id:e,name:t,description:`Track skeleton — content authoring in progress. Planned: ${a} cards. See docs/three-tracks-curriculum.md.`,totalCards:1,modules:[{number:1,name:"Coming Soon",cardRange:[1,1]}],cards:[{number:1,module:1,type:"milestone",badge:`${t} — Authoring in progress`,emoji:"🚧",unlockedNext:`This track is scaffolded but not yet written. Planned size: ${a} cards. Roadmap in docs/three-tracks-curriculum.md.`,realWorldLevel:"Placeholder — content lands in upcoming releases."}]}}export async function loadCurriculumById(t){return"LEARNDEMO01"===t||"ctf4eai-12"===t?CURRICULUM_DEMO:"embodied-ai-100"===t||"ctf4eai-96"===t?(await import("./learn-curriculum-100.js")).CURRICULUM_100:"embodied-ai-480"===t||"ctf4eai-360"===t?(await import("./learn-curriculum-480.js")).CURRICULUM_480:"AI4CTFDEMO01"===t||"ai4ctf-12"===t?e(t,"AI4CTF — AI as Your Teammate (Demo)",12):"ai4ctf-96"===t?e(t,"AI4CTF Specialist (n=96)",96):"ai4ctf-360"===t?e(t,"AI4CTF Research (n=360)",360):"CTF4AIDEMO01"===t||"ctf4ai-12"===t?e(t,"CTF4AI — Red-Team Software AI (Demo)",12):"ctf4ai-96"===t?e(t,"CTF4AI Specialist (n=96)",96):"ctf4ai-360"===t?e(t,"CTF4AI Research (n=360)",360):"ctf4ai-frontier-120"===t?e(t,"CTF4AI Frontier (refreshable 120)",120):null}export async function validateEAToken(e,t){const a=t.replace(/\/$/,"")+"/api/icoa/learn/validate";try{const t=await fetch(a,{method:"POST",headers:{"Content-Type":"application/json"},body:JSON.stringify({token:e.toUpperCase()}),signal:AbortSignal.timeout(8e3)});if(!t.ok)return{ok:!1,message:(await t.json().catch(()=>({}))).message||`HTTP ${t.status}`};const o=await t.json();return o.success&&o.data?{ok:!0,curriculumId:o.data.curriculum_id,status:o.data.status,validUntil:o.data.valid_until}:{ok:!1,message:o.message||"Validation failed"}}catch(e){return{ok:!1,message:`Network error: ${e instanceof Error?e.message:String(e)}`}}}export async function syncProgress(e,t,a){if("LEARNDEMO01"===e.toUpperCase())return;const o=t.replace(/\/$/,"")+"/api/icoa/learn/progress/"+e.toUpperCase();try{await fetch(o,{method:"POST",headers:{"Content-Type":"application/json"},body:JSON.stringify({card_number:a.card_number,event_type:a.event_type,mcq_answer:a.mcq_answer,mcq_correct:a.mcq_correct?1:0}),signal:AbortSignal.timeout(5e3)})}catch{}}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "icoa-cli",
|
|
3
|
-
"version": "2.19.
|
|
3
|
+
"version": "2.19.193",
|
|
4
4
|
"description": "ICOA CLI — The world's first CLI-native cyber & AI security olympiad terminal: AI4CTF (Day 1), CTF4AI (Day 2), VLA4CTF (Pioneer Round — embodied AI)",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"bin": {
|