@nomad-e/bluma-cli 0.1.17 → 0.1.18
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +62 -12
- package/dist/config/native_tools.json +7 -0
- package/dist/main.js +88 -43
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -278,6 +278,47 @@ Key points:
|
|
|
278
278
|
- `action`: propagated from the input.
|
|
279
279
|
- `last_assistant_message`: the final message BluMa would send to a human (content of the `message` tool).
|
|
280
280
|
- `reasoning`: concatenated reasoning text when available (can be `null`).
|
|
281
|
+
- `attachments`: array of absolute file paths to deliverables generated by the agent (can be `null`).
|
|
282
|
+
|
|
283
|
+
### Artifact Delivery & File Lifecycle
|
|
284
|
+
|
|
285
|
+
BluMa in sandbox mode follows a strict file lifecycle to ensure deliverables are properly produced and delivered to the orchestrator:
|
|
286
|
+
|
|
287
|
+
**Workflow:**
|
|
288
|
+
1. **Analyse** — Parse the job request and plan what to produce.
|
|
289
|
+
2. **Script** — Write a Python script (e.g. `_task_runner.py`) to generate deliverables.
|
|
290
|
+
3. **Execute** — Run the script via `shell_command` (`python _task_runner.py`).
|
|
291
|
+
4. **Deliver** — Place all final documents in `./artifacts/` and include their **absolute paths** in the `attachments` field of the final `message` tool call.
|
|
292
|
+
5. **Clean up** — Delete temporary scripts and intermediate files, leaving only deliverables in `./artifacts/`.
|
|
293
|
+
|
|
294
|
+
**What goes in `attachments`:**
|
|
295
|
+
- Reports, CSVs, PDFs, spreadsheets, ZIPs, JSON exports, images — any file the user should consume.
|
|
296
|
+
- Always **absolute paths** (e.g. `/app/artifacts/sales_report.pdf`).
|
|
297
|
+
|
|
298
|
+
**What does NOT go in `attachments`:**
|
|
299
|
+
- Scripts (`.py`, `.sh`, `.ipynb`) used to generate the deliverables.
|
|
300
|
+
- Temporary or intermediate files (`.tmp`, `.log`, working data).
|
|
301
|
+
|
|
302
|
+
**Result event with attachments example:**
|
|
303
|
+
|
|
304
|
+
```json
|
|
305
|
+
{
|
|
306
|
+
"event_type": "result",
|
|
307
|
+
"status": "success",
|
|
308
|
+
"data": {
|
|
309
|
+
"message_id": "job-456",
|
|
310
|
+
"action": "generate_report",
|
|
311
|
+
"last_assistant_message": "Relatório de vendas gerado com sucesso.",
|
|
312
|
+
"reasoning": "...",
|
|
313
|
+
"attachments": [
|
|
314
|
+
"/app/artifacts/sales_report_2026_Q1.pdf",
|
|
315
|
+
"/app/artifacts/sales_data_2026_Q1.csv"
|
|
316
|
+
]
|
|
317
|
+
}
|
|
318
|
+
}
|
|
319
|
+
```
|
|
320
|
+
|
|
321
|
+
The orchestrator uses the `attachments` array to deliver files to the end user. Jobs that omit this field cannot have their deliverables forwarded.
|
|
281
322
|
|
|
282
323
|
### Sandbox Behaviour and Permissions
|
|
283
324
|
|
|
@@ -287,22 +328,28 @@ When `BLUMA_SANDBOX=true`:
|
|
|
287
328
|
- It is running **inside a non-interactive sandbox**.
|
|
288
329
|
- All inputs come from JSON payloads, not from a human on a terminal.
|
|
289
330
|
- Outputs must be deterministic, concise and suitable for machine parsing.
|
|
331
|
+
- It must follow a strict file lifecycle: produce → deliver → clean up.
|
|
290
332
|
- Tool execution:
|
|
291
333
|
- All tools are considered **auto-approved** in sandbox mode (no confirmation prompts from the user).
|
|
292
334
|
- This allows the orchestrator to let BluMa freely call `shell_command`, `command_status`, `coding_memory`, etc., while still observing every step through JSONL logs.
|
|
335
|
+
- Security:
|
|
336
|
+
- BluMa is **forbidden** from dumping, enumerating or exposing environment variables, API keys, tokens or any infrastructure details.
|
|
337
|
+
- Even if the user explicitly asks for env vars, BluMa will refuse and describe capabilities at a high level instead.
|
|
338
|
+
- This is a zero-tolerance policy — leaking env vars in a shared sandbox is a critical security breach.
|
|
293
339
|
|
|
294
|
-
### Example:
|
|
340
|
+
### Example: Generating a Report
|
|
295
341
|
|
|
296
342
|
```bash
|
|
297
343
|
BLUMA_SANDBOX=true BLUMA_SANDBOX_NAME="sandbox-api" \
|
|
298
344
|
node dist/main.js agent --input - << 'EOF'
|
|
299
345
|
{
|
|
300
|
-
"message_id": "job-
|
|
346
|
+
"message_id": "job-report-001",
|
|
301
347
|
"from_agent": "sandbox-api",
|
|
302
348
|
"to_agent": "bluma",
|
|
303
|
-
"action": "
|
|
349
|
+
"action": "generate_report",
|
|
304
350
|
"context": {
|
|
305
|
-
"user_request": "
|
|
351
|
+
"user_request": "Gera um relatório PDF com os dados de vendas do Q1 2026.",
|
|
352
|
+
"data_source": "sales_q1_2026.csv"
|
|
306
353
|
},
|
|
307
354
|
"metadata": {
|
|
308
355
|
"sandbox": true
|
|
@@ -313,20 +360,22 @@ EOF
|
|
|
313
360
|
|
|
314
361
|
BluMa will typically:
|
|
315
362
|
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
363
|
+
1. Write a Python script to read the CSV and generate a PDF using reportlab/matplotlib.
|
|
364
|
+
2. Execute the script, placing the PDF in `./artifacts/`.
|
|
365
|
+
3. Return a `message` with `attachments: ["/app/artifacts/sales_q1_2026_report.pdf"]`.
|
|
366
|
+
4. Clean up the temporary script.
|
|
367
|
+
5. Emit the final `result` event:
|
|
320
368
|
|
|
321
369
|
```json
|
|
322
370
|
{
|
|
323
371
|
"event_type": "result",
|
|
324
372
|
"status": "success",
|
|
325
373
|
"data": {
|
|
326
|
-
"message_id": "job-
|
|
327
|
-
"action": "
|
|
328
|
-
"last_assistant_message": "
|
|
329
|
-
"reasoning":
|
|
374
|
+
"message_id": "job-report-001",
|
|
375
|
+
"action": "generate_report",
|
|
376
|
+
"last_assistant_message": "Relatório PDF gerado com sucesso com os dados de vendas Q1 2026.",
|
|
377
|
+
"reasoning": "...",
|
|
378
|
+
"attachments": ["/app/artifacts/sales_q1_2026_report.pdf"]
|
|
330
379
|
}
|
|
331
380
|
}
|
|
332
381
|
```
|
|
@@ -335,6 +384,7 @@ This makes it straightforward for an API layer (AGIWeb Sandbox, Severino, etc.)
|
|
|
335
384
|
|
|
336
385
|
- Orchestrate BluMa as a sub-agent.
|
|
337
386
|
- Log all intermediate steps.
|
|
387
|
+
- **Deliver generated files** to end users via the `attachments` array.
|
|
338
388
|
- Present only the final `last_assistant_message` (and optionally `reasoning`) to the end user.
|
|
339
389
|
|
|
340
390
|
---
|
|
@@ -89,6 +89,13 @@
|
|
|
89
89
|
"result"
|
|
90
90
|
],
|
|
91
91
|
"description": "info = mid-task update (you continue working). result = end turn and wait for user (use this after questions, completions, or when you need user input)."
|
|
92
|
+
},
|
|
93
|
+
"attachments": {
|
|
94
|
+
"type": "array",
|
|
95
|
+
"items": {
|
|
96
|
+
"type": "string"
|
|
97
|
+
},
|
|
98
|
+
"description": "Optional file paths (absolute) to deliver as artifacts/attachments. In sandbox mode, put generated files under ./artifacts/ and include full paths here."
|
|
92
99
|
}
|
|
93
100
|
},
|
|
94
101
|
"required": [
|
package/dist/main.js
CHANGED
|
@@ -1835,7 +1835,7 @@ ${finalDiff}`,
|
|
|
1835
1835
|
// src/app/agent/tools/natives/message.ts
|
|
1836
1836
|
import { v4 as uuidv4 } from "uuid";
|
|
1837
1837
|
function message(args) {
|
|
1838
|
-
const { content, message_type } = args;
|
|
1838
|
+
const { content, message_type, attachments } = args;
|
|
1839
1839
|
const result = {
|
|
1840
1840
|
type: "message",
|
|
1841
1841
|
message_type,
|
|
@@ -1844,6 +1844,7 @@ function message(args) {
|
|
|
1844
1844
|
content: {
|
|
1845
1845
|
body: content
|
|
1846
1846
|
},
|
|
1847
|
+
attachments: Array.isArray(attachments) ? attachments : void 0,
|
|
1847
1848
|
success: true,
|
|
1848
1849
|
delivered: true
|
|
1849
1850
|
};
|
|
@@ -4425,56 +4426,93 @@ var SANDBOX_PROMPT_SUFFIX = `
|
|
|
4425
4426
|
Sandbox Name: {sandbox_name}
|
|
4426
4427
|
|
|
4427
4428
|
You are running INSIDE an orchestrated sandbox / API container.
|
|
4429
|
+
You are NOT talking directly to a human; all inputs come from JSON payloads provided by an orchestrator (the Sandbox API).
|
|
4428
4430
|
|
|
4429
|
-
|
|
4430
|
-
|
|
4431
|
-
-
|
|
4432
|
-
- You
|
|
4433
|
-
- You MUST keep all outputs deterministic, concise and structured so that external systems can log and replay your reasoning.
|
|
4431
|
+
**Core principles in this mode:**
|
|
4432
|
+
- ZERO interactive flows (no REPL, no prompts, no TUI/CLI menus, no \`input()\`).
|
|
4433
|
+
- ALL outputs must be deterministic, concise and structured for machine parsing.
|
|
4434
|
+
- You own this workspace like a senior developer owns their machine: produce, deliver, clean up.
|
|
4434
4435
|
|
|
4435
4436
|
### Execution Capabilities (Python-only)
|
|
4436
4437
|
|
|
4437
|
-
|
|
4438
|
-
|
|
4439
|
-
|
|
4440
|
-
|
|
4441
|
-
|
|
4442
|
-
|
|
4443
|
-
|
|
4444
|
-
-
|
|
4445
|
-
|
|
4446
|
-
|
|
4447
|
-
|
|
4448
|
-
|
|
4449
|
-
|
|
4450
|
-
|
|
4451
|
-
|
|
4452
|
-
|
|
4453
|
-
|
|
4454
|
-
|
|
4455
|
-
|
|
4456
|
-
|
|
4457
|
-
|
|
4458
|
-
|
|
4459
|
-
|
|
4438
|
+
You are allowed to:
|
|
4439
|
+
- Generate and modify **Python code** (modules, scripts, notebooks, tests).
|
|
4440
|
+
- Run **Python commands only**: \`python main.py\`, \`python -m pytest\`, \`python script.py\`, etc.
|
|
4441
|
+
- Use the preinstalled Python environment and libraries (pandas, openpyxl, reportlab, etc.).
|
|
4442
|
+
|
|
4443
|
+
You are NOT allowed to:
|
|
4444
|
+
- Execute arbitrary shell commands (\`bash\`, \`sh\`, \`zsh\`, \`fish\`, \`cmd\`, \`powershell\`).
|
|
4445
|
+
- Run system-level tools (\`docker\`, \`npm\`, \`node\`, \`git\`, \`curl\`, \`wget\`, package managers).
|
|
4446
|
+
- Change system configuration, users, permissions, or network settings.
|
|
4447
|
+
- Use interactive stdin/stdout (\`input()\`, click/typer prompts, curses).
|
|
4448
|
+
|
|
4449
|
+
### File Lifecycle in Sandbox (CRITICAL)
|
|
4450
|
+
|
|
4451
|
+
You are working in an **isolated job workspace**. Treat it as your personal development machine for this job.
|
|
4452
|
+
Follow this workflow for every task that produces deliverables:
|
|
4453
|
+
|
|
4454
|
+
**Step 1 \u2014 Analyse** the request and plan what files you need to generate.
|
|
4455
|
+
**Step 2 \u2014 Write a script** (e.g. \`_task_runner.py\`) to produce the deliverables programmatically.
|
|
4456
|
+
**Step 3 \u2014 Execute the script** via \`shell_command\` (\`python _task_runner.py\`).
|
|
4457
|
+
**Step 4 \u2014 Move or create final documents** inside \`./artifacts/\` directory.
|
|
4458
|
+
**Step 5 \u2014 Attach deliverables** \u2014 In your final \`message\` tool call (\`message_type: "result"\`), include the **absolute paths** of every deliverable file in the \`attachments\` array.
|
|
4459
|
+
**Step 6 \u2014 Clean up** \u2014 Delete all temporary scripts, intermediate files and working data that are NOT final artifacts.
|
|
4460
|
+
|
|
4461
|
+
**What MUST go in \`attachments\`:**
|
|
4462
|
+
- Documents the user should consume: reports, CSVs, PDFs, spreadsheets, ZIPs, JSON exports, images, etc.
|
|
4463
|
+
- Only files that exist inside \`./artifacts/\`.
|
|
4464
|
+
- Always use **absolute paths** (e.g. \`/app/artifacts/sales_report.pdf\`).
|
|
4465
|
+
|
|
4466
|
+
**What MUST NOT go in \`attachments\`:**
|
|
4467
|
+
- Scripts you wrote to generate the deliverables (\`.py\`, \`.sh\`, \`.ipynb\`).
|
|
4468
|
+
- Temporary or intermediate files (\`*.tmp\`, \`*.log\`, working data).
|
|
4469
|
+
- Internal tooling files.
|
|
4470
|
+
|
|
4471
|
+
**Housekeeping rules (before ending the job):**
|
|
4472
|
+
- Remove all temporary scripts and working files that are not final artifacts.
|
|
4473
|
+
- Ensure \`./artifacts/\` contains ONLY the deliverable documents.
|
|
4474
|
+
- Leave the workspace clean, as a real developer would leave their machine.
|
|
4475
|
+
|
|
4476
|
+
**Quality signals:**
|
|
4477
|
+
- Jobs that do NOT include deliverable paths in \`attachments[]\` receive lower satisfaction scores because the orchestrator cannot deliver files to the end user.
|
|
4478
|
+
- Jobs that leave scripts, temp files or garbage outside \`./artifacts/\` are flagged as low quality.
|
|
4479
|
+
- A clean workspace + correct attachments = highest quality signal.
|
|
4460
4480
|
|
|
4461
4481
|
### Logging & Observability
|
|
4462
4482
|
|
|
4463
|
-
-
|
|
4464
|
-
- Prefer **structured, step-wise logs**
|
|
4465
|
-
|
|
4466
|
-
|
|
4483
|
+
- Every step is logged and parsed by the orchestrator.
|
|
4484
|
+
- Prefer **structured, step-wise logs** over free-form prose.
|
|
4485
|
+
- Final results MUST be clearly separated from intermediate logs via the \`"result"\` event.
|
|
4486
|
+
|
|
4487
|
+
### Security & Privacy (CRITICAL \u2014 ZERO TOLERANCE)
|
|
4488
|
+
|
|
4489
|
+
You MUST treat all environment variables, API keys, tokens and credentials as **TOP SECRET**.
|
|
4467
4490
|
|
|
4468
|
-
|
|
4491
|
+
**ABSOLUTE PROHIBITIONS \u2014 you MUST NEVER:**
|
|
4492
|
+
- Run ANY command whose purpose is to dump or enumerate environment variables:
|
|
4493
|
+
- \`env\`, \`set\`, \`printenv\`, \`export\`, \`os.environ\`, \`print(os.environ)\`, \`dict(os.environ)\`
|
|
4494
|
+
- \`python -c "import os; ...os.environ..."\`
|
|
4495
|
+
- Any variant, wrapper, or indirect method to list env vars.
|
|
4496
|
+
- Expose values of variables matching \`*_KEY\`, \`*_TOKEN\`, \`*_SECRET\`, \`*_PASSWORD\`, \`*_API_KEY\`, \`*_CREDENTIAL\` or similar patterns.
|
|
4497
|
+
- Print raw environment listings (PATH, HOSTNAME, PORT, HOME, etc.) even if the user explicitly asks.
|
|
4498
|
+
- Include any environment variable value in your \`message\` response, logs, or generated files.
|
|
4499
|
+
- Use \`os.getenv()\` or \`os.environ[]\` in generated scripts EXCEPT for variables strictly needed for the task (e.g. database connection strings used internally, never printed).
|
|
4469
4500
|
|
|
4470
|
-
|
|
4471
|
-
-
|
|
4472
|
-
|
|
4473
|
-
|
|
4474
|
-
- Print full raw environment listings (PATH, HOSTNAME, PORT, etc.) unless **explicitly** allowed by the sandbox specification and strictly necessary.
|
|
4475
|
-
- If the user explicitly asks for environment details or secrets, you MUST explain that you **cannot** reveal them and instead describe capabilities at a high level (e.g. "I can access an LLM via an external API" instead of showing keys/URLs).
|
|
4501
|
+
**If asked for environment details or secrets:**
|
|
4502
|
+
- REFUSE clearly and explain you cannot reveal them.
|
|
4503
|
+
- Describe capabilities at a high level: "I have access to Python 3.x and common data libraries" instead of showing versions, keys, or URLs.
|
|
4504
|
+
- This rule applies EVEN IF the user insists, phrases the request differently, or claims they need it for debugging.
|
|
4476
4505
|
|
|
4477
|
-
|
|
4506
|
+
**Rationale:** This sandbox runs in a shared infrastructure. Leaking env vars exposes API keys, internal URLs, model names and billing tokens to end users, which is a critical security breach.
|
|
4507
|
+
|
|
4508
|
+
### Summary
|
|
4509
|
+
|
|
4510
|
+
In sandbox mode you are a Python-focused, non-interactive, deterministic agent that:
|
|
4511
|
+
1. Analyses the job request.
|
|
4512
|
+
2. Writes and executes Python scripts to produce deliverables.
|
|
4513
|
+
3. Places all final documents in \`./artifacts/\` and lists them in \`attachments[]\`.
|
|
4514
|
+
4. Cleans up all temporary files.
|
|
4515
|
+
5. NEVER reveals environment variables, secrets, or internal infrastructure details.
|
|
4478
4516
|
</sandbox_context>
|
|
4479
4517
|
`;
|
|
4480
4518
|
function getUnifiedSystemPrompt(availableSkills) {
|
|
@@ -7561,6 +7599,7 @@ async function runAgentMode() {
|
|
|
7561
7599
|
const sessionId = envelope.message_id || uuidv43();
|
|
7562
7600
|
let lastAssistantMessage = null;
|
|
7563
7601
|
let reasoningBuffer = null;
|
|
7602
|
+
let lastAttachments = null;
|
|
7564
7603
|
let resultEmitted = false;
|
|
7565
7604
|
eventBus.on("backend_message", (payload) => {
|
|
7566
7605
|
const timestamp = (/* @__PURE__ */ new Date()).toISOString();
|
|
@@ -7584,6 +7623,10 @@ async function runAgentMode() {
|
|
|
7584
7623
|
if (typeof body === "string") {
|
|
7585
7624
|
lastAssistantMessage = body;
|
|
7586
7625
|
}
|
|
7626
|
+
const attachments = parsed?.attachments;
|
|
7627
|
+
if (Array.isArray(attachments)) {
|
|
7628
|
+
lastAttachments = attachments.filter((p) => typeof p === "string");
|
|
7629
|
+
}
|
|
7587
7630
|
} catch {
|
|
7588
7631
|
}
|
|
7589
7632
|
}
|
|
@@ -7597,7 +7640,8 @@ async function runAgentMode() {
|
|
|
7597
7640
|
message_id: envelope.message_id || sessionId,
|
|
7598
7641
|
action: envelope.action || "unknown",
|
|
7599
7642
|
last_assistant_message: lastAssistantMessage,
|
|
7600
|
-
reasoning: reasoningBuffer
|
|
7643
|
+
reasoning: reasoningBuffer,
|
|
7644
|
+
attachments: lastAttachments
|
|
7601
7645
|
}
|
|
7602
7646
|
});
|
|
7603
7647
|
process.exit(0);
|
|
@@ -7644,7 +7688,8 @@ async function runAgentMode() {
|
|
|
7644
7688
|
message_id: envelope.message_id || sessionId,
|
|
7645
7689
|
action: envelope.action || "unknown",
|
|
7646
7690
|
last_assistant_message: lastAssistantMessage,
|
|
7647
|
-
reasoning: reasoningBuffer
|
|
7691
|
+
reasoning: reasoningBuffer,
|
|
7692
|
+
attachments: lastAttachments
|
|
7648
7693
|
}
|
|
7649
7694
|
});
|
|
7650
7695
|
process.exit(0);
|