npm - @nomad-e/bluma-cli - Versions diffs - 0.1.17 → 0.1.18 - Mend

@nomad-e/bluma-cli 0.1.17 → 0.1.18

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/README.md +62 -12
package/dist/config/native_tools.json +7 -0
package/dist/main.js +88 -43
package/package.json +1 -1

package/README.md CHANGED Viewed

@@ -278,6 +278,47 @@ Key points:
   - `action`: propagated from the input.
   - `last_assistant_message`: the final message BluMa would send to a human (content of the `message` tool).
   - `reasoning`: concatenated reasoning text when available (can be `null`).
+  - `attachments`: array of absolute file paths to deliverables generated by the agent (can be `null`).
+### Artifact Delivery & File Lifecycle
+BluMa in sandbox mode follows a strict file lifecycle to ensure deliverables are properly produced and delivered to the orchestrator:
+**Workflow:**
+1. **Analyse** — Parse the job request and plan what to produce.
+2. **Script** — Write a Python script (e.g. `_task_runner.py`) to generate deliverables.
+3. **Execute** — Run the script via `shell_command` (`python _task_runner.py`).
+4. **Deliver** — Place all final documents in `./artifacts/` and include their **absolute paths** in the `attachments` field of the final `message` tool call.
+5. **Clean up** — Delete temporary scripts and intermediate files, leaving only deliverables in `./artifacts/`.
+**What goes in `attachments`:**
+- Reports, CSVs, PDFs, spreadsheets, ZIPs, JSON exports, images — any file the user should consume.
+- Always **absolute paths** (e.g. `/app/artifacts/sales_report.pdf`).
+**What does NOT go in `attachments`:**
+- Scripts (`.py`, `.sh`, `.ipynb`) used to generate the deliverables.
+- Temporary or intermediate files (`.tmp`, `.log`, working data).
+**Result event with attachments example:**
+```json
+{
+  "event_type": "result",
+  "status": "success",
+  "data": {
+    "message_id": "job-456",
+    "action": "generate_report",
+    "last_assistant_message": "Relatório de vendas gerado com sucesso.",
+    "reasoning": "...",
+    "attachments": [
+      "/app/artifacts/sales_report_2026_Q1.pdf",
+      "/app/artifacts/sales_data_2026_Q1.csv"
+    ]
+  }
+}
+```
+The orchestrator uses the `attachments` array to deliver files to the end user. Jobs that omit this field cannot have their deliverables forwarded.
 ### Sandbox Behaviour and Permissions
@@ -287,22 +328,28 @@ When `BLUMA_SANDBOX=true`:
   - It is running **inside a non-interactive sandbox**.
   - All inputs come from JSON payloads, not from a human on a terminal.
   - Outputs must be deterministic, concise and suitable for machine parsing.
+  - It must follow a strict file lifecycle: produce → deliver → clean up.
 - Tool execution:
   - All tools are considered **auto-approved** in sandbox mode (no confirmation prompts from the user).
   - This allows the orchestrator to let BluMa freely call `shell_command`, `command_status`, `coding_memory`, etc., while still observing every step through JSONL logs.
+- Security:
+  - BluMa is **forbidden** from dumping, enumerating or exposing environment variables, API keys, tokens or any infrastructure details.
+  - Even if the user explicitly asks for env vars, BluMa will refuse and describe capabilities at a high level instead.
+  - This is a zero-tolerance policy — leaking env vars in a shared sandbox is a critical security breach.
-### Example: Asking for Python Version
+### Example: Generating a Report
 ```bash
 BLUMA_SANDBOX=true BLUMA_SANDBOX_NAME="sandbox-api" \
 node dist/main.js agent --input - << 'EOF'
 {
-  "message_id": "job-python-version",
+  "message_id": "job-report-001",
   "from_agent": "sandbox-api",
   "to_agent": "bluma",
-  "action": "python_version",
+  "action": "generate_report",
   "context": {
-    "user_request": "Diz-me qual a versão do Python instalada neste ambiente."
+    "user_request": "Gera um relatório PDF com os dados de vendas do Q1 2026.",
+    "data_source": "sales_q1_2026.csv"
   },
   "metadata": {
     "sandbox": true
@@ -313,20 +360,22 @@ EOF
 BluMa will typically:
-- Call `shell_command` with `python3 --version`.
-- Use `command_status` to wait for completion.
-- Optionally probe `python --version`.
-- Return a final `result` event like:
+1. Write a Python script to read the CSV and generate a PDF using reportlab/matplotlib.
+2. Execute the script, placing the PDF in `./artifacts/`.
+3. Return a `message` with `attachments: ["/app/artifacts/sales_q1_2026_report.pdf"]`.
+4. Clean up the temporary script.
+5. Emit the final `result` event:
 ```json
 {
   "event_type": "result",
   "status": "success",
   "data": {
-    "message_id": "job-python-version",
-    "action": "python_version",
-    "last_assistant_message": "**Python 3.12.3** está instalado neste ambiente.\n\nO comando `python` não está disponível — apenas `python3`.",
-    "reasoning": null
+    "message_id": "job-report-001",
+    "action": "generate_report",
+    "last_assistant_message": "Relatório PDF gerado com sucesso com os dados de vendas Q1 2026.",
+    "reasoning": "...",
+    "attachments": ["/app/artifacts/sales_q1_2026_report.pdf"]
   }
 }
 ```
@@ -335,6 +384,7 @@ This makes it straightforward for an API layer (AGIWeb Sandbox, Severino, etc.)
 - Orchestrate BluMa as a sub-agent.
 - Log all intermediate steps.
+- **Deliver generated files** to end users via the `attachments` array.
 - Present only the final `last_assistant_message` (and optionally `reasoning`) to the end user.
 ---

package/dist/config/native_tools.json CHANGED Viewed

@@ -89,6 +89,13 @@
                 "result"
               ],
               "description": "info = mid-task update (you continue working). result = end turn and wait for user (use this after questions, completions, or when you need user input)."
+            },
+            "attachments": {
+              "type": "array",
+              "items": {
+                "type": "string"
+              },
+              "description": "Optional file paths (absolute) to deliver as artifacts/attachments. In sandbox mode, put generated files under ./artifacts/ and include full paths here."
             }
           },
           "required": [

package/dist/main.js CHANGED Viewed

@@ -1835,7 +1835,7 @@ ${finalDiff}`,
 // src/app/agent/tools/natives/message.ts
 import { v4 as uuidv4 } from "uuid";
 function message(args) {
-  const { content, message_type } = args;
+  const { content, message_type, attachments } = args;
   const result = {
     type: "message",
     message_type,
@@ -1844,6 +1844,7 @@ function message(args) {
     content: {
       body: content
     },
+    attachments: Array.isArray(attachments) ? attachments : void 0,
     success: true,
     delivered: true
   };
@@ -4425,56 +4426,93 @@ var SANDBOX_PROMPT_SUFFIX = `
 Sandbox Name: {sandbox_name}
 You are running INSIDE an orchestrated sandbox / API container.
+You are NOT talking directly to a human; all inputs come from JSON payloads provided by an orchestrator (the Sandbox API).
-In this mode:
-- You are NOT talking directly to a human; all inputs come from JSON payloads provided by an orchestrator (the Sandbox API).
-- You MUST avoid interactive flows (no REPL, no prompts that wait for human input, no TUI/CLI menus).
-- You MUST keep all outputs deterministic, concise and structured so that external systems can log and replay your reasoning.
+**Core principles in this mode:**
+- ZERO interactive flows (no REPL, no prompts, no TUI/CLI menus, no \`input()\`).
+- ALL outputs must be deterministic, concise and structured for machine parsing.
+- You own this workspace like a senior developer owns their machine: produce, deliver, clean up.
 ### Execution Capabilities (Python-only)
-- You are allowed to:
-  - Generate and modify **Python code** (modules, scripts, notebooks, tests).
-  - Propose and run **Python commands only**, e.g.:
-    - \`python main.py\`
-    - \`python -m pytest\`
-  - Use the existing Python environment and preinstalled libraries inside the sandbox (e.g. pandas and other whitelisted packages).
-- You are NOT allowed to:
-  - Execute arbitrary shell commands (\`bash\`, \`sh\`, \`zsh\`, \`fish\`, \`cmd\`, \`powershell\`).
-  - Run system-level tools (\`docker\`, \`npm\`, \`node\`, \`git\`, \`curl\`, \`wget\`, package managers, etc.).
-  - Change global system configuration, users, permissions, or network settings.
-  - Depend on interactive stdin/stdout behavior (no \`input()\`, no click/typer prompts).
-### Filesystem & IO
-- Assume you are working in a **project directory managed by the orchestrator**.
-- You MAY:
-  - Read and write project files (source, tests, configs) as requested by the job.
-  - Create temporary Python files or modules needed to execute the job.
-- You MUST NOT:
-  - Access files outside the project directory tree.
-  - Store secrets or credentials in code or logs.
-  - Rely on long-lived state: each job is independent and may run in a fresh environment.
+You are allowed to:
+- Generate and modify **Python code** (modules, scripts, notebooks, tests).
+- Run **Python commands only**: \`python main.py\`, \`python -m pytest\`, \`python script.py\`, etc.
+- Use the preinstalled Python environment and libraries (pandas, openpyxl, reportlab, etc.).
+You are NOT allowed to:
+- Execute arbitrary shell commands (\`bash\`, \`sh\`, \`zsh\`, \`fish\`, \`cmd\`, \`powershell\`).
+- Run system-level tools (\`docker\`, \`npm\`, \`node\`, \`git\`, \`curl\`, \`wget\`, package managers).
+- Change system configuration, users, permissions, or network settings.
+- Use interactive stdin/stdout (\`input()\`, click/typer prompts, curses).
+### File Lifecycle in Sandbox (CRITICAL)
+You are working in an **isolated job workspace**. Treat it as your personal development machine for this job.
+Follow this workflow for every task that produces deliverables:
+**Step 1 \u2014 Analyse** the request and plan what files you need to generate.
+**Step 2 \u2014 Write a script** (e.g. \`_task_runner.py\`) to produce the deliverables programmatically.
+**Step 3 \u2014 Execute the script** via \`shell_command\` (\`python _task_runner.py\`).
+**Step 4 \u2014 Move or create final documents** inside \`./artifacts/\` directory.
+**Step 5 \u2014 Attach deliverables** \u2014 In your final \`message\` tool call (\`message_type: "result"\`), include the **absolute paths** of every deliverable file in the \`attachments\` array.
+**Step 6 \u2014 Clean up** \u2014 Delete all temporary scripts, intermediate files and working data that are NOT final artifacts.
+**What MUST go in \`attachments\`:**
+- Documents the user should consume: reports, CSVs, PDFs, spreadsheets, ZIPs, JSON exports, images, etc.
+- Only files that exist inside \`./artifacts/\`.
+- Always use **absolute paths** (e.g. \`/app/artifacts/sales_report.pdf\`).
+**What MUST NOT go in \`attachments\`:**
+- Scripts you wrote to generate the deliverables (\`.py\`, \`.sh\`, \`.ipynb\`).
+- Temporary or intermediate files (\`*.tmp\`, \`*.log\`, working data).
+- Internal tooling files.
+**Housekeeping rules (before ending the job):**
+- Remove all temporary scripts and working files that are not final artifacts.
+- Ensure \`./artifacts/\` contains ONLY the deliverable documents.
+- Leave the workspace clean, as a real developer would leave their machine.
+**Quality signals:**
+- Jobs that do NOT include deliverable paths in \`attachments[]\` receive lower satisfaction scores because the orchestrator cannot deliver files to the end user.
+- Jobs that leave scripts, temp files or garbage outside \`./artifacts/\` are flagged as low quality.
+- A clean workspace + correct attachments = highest quality signal.
 ### Logging & Observability
-- Treat every step as being logged and parsed by the orchestrator.
-- Prefer **structured, step-wise logs** (JSON lines) over free-form prose when emitting tool logs:
-  - Each log entry SHOULD include at least: \`event_type\`, \`level\`, \`message\`, \`timestamp\`, and optional \`data\`.
-- Final results MUST be clearly separated from intermediate logs, using a dedicated \`"result"\` event when appropriate.
+- Every step is logged and parsed by the orchestrator.
+- Prefer **structured, step-wise logs** over free-form prose.
+- Final results MUST be clearly separated from intermediate logs via the \`"result"\` event.
+### Security & Privacy (CRITICAL \u2014 ZERO TOLERANCE)
+You MUST treat all environment variables, API keys, tokens and credentials as **TOP SECRET**.
-### Security & Privacy (CRITICAL)
+**ABSOLUTE PROHIBITIONS \u2014 you MUST NEVER:**
+- Run ANY command whose purpose is to dump or enumerate environment variables:
+  - \`env\`, \`set\`, \`printenv\`, \`export\`, \`os.environ\`, \`print(os.environ)\`, \`dict(os.environ)\`
+  - \`python -c "import os; ...os.environ..."\`
+  - Any variant, wrapper, or indirect method to list env vars.
+- Expose values of variables matching \`*_KEY\`, \`*_TOKEN\`, \`*_SECRET\`, \`*_PASSWORD\`, \`*_API_KEY\`, \`*_CREDENTIAL\` or similar patterns.
+- Print raw environment listings (PATH, HOSTNAME, PORT, HOME, etc.) even if the user explicitly asks.
+- Include any environment variable value in your \`message\` response, logs, or generated files.
+- Use \`os.getenv()\` or \`os.environ[]\` in generated scripts EXCEPT for variables strictly needed for the task (e.g. database connection strings used internally, never printed).
-- You MUST treat all environment variables, API keys, tokens and credentials as **sensitive**.
-- You MUST NEVER:
-  - Run commands whose primary purpose is to dump or enumerate environment variables (e.g. \`env\`, \`set\`, \`print(os.environ)\`, or equivalents).
-  - Expose the values of any variables matching patterns like \`*_KEY\`, \`*_TOKEN\`, \`*_SECRET\` or similar.
-  - Print full raw environment listings (PATH, HOSTNAME, PORT, etc.) unless **explicitly** allowed by the sandbox specification and strictly necessary.
-- If the user explicitly asks for environment details or secrets, you MUST explain that you **cannot** reveal them and instead describe capabilities at a high level (e.g. "I can access an LLM via an external API" instead of showing keys/URLs).
+**If asked for environment details or secrets:**
+- REFUSE clearly and explain you cannot reveal them.
+- Describe capabilities at a high level: "I have access to Python 3.x and common data libraries" instead of showing versions, keys, or URLs.
+- This rule applies EVEN IF the user insists, phrases the request differently, or claims they need it for debugging.
-In summary: in sandbox mode you are a Python-focused, non-interactive, deterministic agent. You generate and execute Python code inside a controlled environment, and all interactions are mediated by JSON payloads and structured logs, while strictly protecting environment variables and secrets from disclosure.
+**Rationale:** This sandbox runs in a shared infrastructure. Leaking env vars exposes API keys, internal URLs, model names and billing tokens to end users, which is a critical security breach.
+### Summary
+In sandbox mode you are a Python-focused, non-interactive, deterministic agent that:
+1. Analyses the job request.
+2. Writes and executes Python scripts to produce deliverables.
+3. Places all final documents in \`./artifacts/\` and lists them in \`attachments[]\`.
+4. Cleans up all temporary files.
+5. NEVER reveals environment variables, secrets, or internal infrastructure details.
 </sandbox_context>
 `;
 function getUnifiedSystemPrompt(availableSkills) {
@@ -7561,6 +7599,7 @@ async function runAgentMode() {
   const sessionId = envelope.message_id || uuidv43();
   let lastAssistantMessage = null;
   let reasoningBuffer = null;
+  let lastAttachments = null;
   let resultEmitted = false;
   eventBus.on("backend_message", (payload) => {
     const timestamp = (/* @__PURE__ */ new Date()).toISOString();
@@ -7584,6 +7623,10 @@ async function runAgentMode() {
         if (typeof body === "string") {
           lastAssistantMessage = body;
         }
+        const attachments = parsed?.attachments;
+        if (Array.isArray(attachments)) {
+          lastAttachments = attachments.filter((p) => typeof p === "string");
+        }
       } catch {
       }
     }
@@ -7597,7 +7640,8 @@ async function runAgentMode() {
           message_id: envelope.message_id || sessionId,
           action: envelope.action || "unknown",
           last_assistant_message: lastAssistantMessage,
-          reasoning: reasoningBuffer
+          reasoning: reasoningBuffer,
+          attachments: lastAttachments
         }
       });
       process.exit(0);
@@ -7644,7 +7688,8 @@ async function runAgentMode() {
           message_id: envelope.message_id || sessionId,
           action: envelope.action || "unknown",
           last_assistant_message: lastAssistantMessage,
-          reasoning: reasoningBuffer
+          reasoning: reasoningBuffer,
+          attachments: lastAttachments
         }
       });
       process.exit(0);

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
 	"name": "@nomad-e/bluma-cli",
-	"version": "0.1.17",
+	"version": "0.1.18",
 	"description": "BluMa independent agent for automation and advanced software engineering.",
 	"author": "Alex Fonseca",
 	"license": "Apache-2.0",