npm - @chaprola/mcp-server - Versions diffs - 1.2.0 → 1.3.1 - Mend

@chaprola/mcp-server 1.2.0 → 1.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/dist/index.js CHANGED Viewed

@@ -665,6 +665,43 @@ server.tool("chaprola_schedule_delete", "Delete a scheduled job by name", {
     const res = await authedFetch("/schedule/delete", { name });
     return textResult(res);
 }));
+// --- Record CRUD ---
+server.tool("chaprola_insert_record", "Insert a new record into a data file's merge file (.MRG). The record appears at the end of the file until consolidation.", {
+    project: z.string().describe("Project name"),
+    file: z.string().describe("Data file name (without extension)"),
+    record: z.record(z.string()).describe("Field name → value pairs. Unspecified fields default to blanks."),
+}, async ({ project, file, record }) => withBaaCheck(async () => {
+    const { username } = getCredentials();
+    const res = await authedFetch("/insert-record", { userid: username, project, file, record });
+    return textResult(res);
+}));
+server.tool("chaprola_update_record", "Update fields in a single record matched by a where clause. If no sort-key changes, updates in place; otherwise marks old record ignored and appends to merge file.", {
+    project: z.string().describe("Project name"),
+    file: z.string().describe("Data file name (without extension)"),
+    where: z.record(z.string()).describe("Field name → value pairs to identify exactly one record"),
+    set: z.record(z.string()).describe("Field name → new value pairs to update"),
+}, async ({ project, file, where: whereClause, set }) => withBaaCheck(async () => {
+    const { username } = getCredentials();
+    const res = await authedFetch("/update-record", { userid: username, project, file, where: whereClause, set });
+    return textResult(res);
+}));
+server.tool("chaprola_delete_record", "Delete a single record matched by a where clause. Marks the record as ignored (.IGN). Physically removed on consolidation.", {
+    project: z.string().describe("Project name"),
+    file: z.string().describe("Data file name (without extension)"),
+    where: z.record(z.string()).describe("Field name → value pairs to identify exactly one record"),
+}, async ({ project, file, where: whereClause }) => withBaaCheck(async () => {
+    const { username } = getCredentials();
+    const res = await authedFetch("/delete-record", { userid: username, project, file, where: whereClause });
+    return textResult(res);
+}));
+server.tool("chaprola_consolidate", "Merge a .MRG file into its parent .DA, producing a clean sorted data file. Deletes .MRG and .IGN after success. Aborts if .MRG was modified during the operation.", {
+    project: z.string().describe("Project name"),
+    file: z.string().describe("Data file name (without extension)"),
+}, async ({ project, file }) => withBaaCheck(async () => {
+    const { username } = getCredentials();
+    const res = await authedFetch("/consolidate", { userid: username, project, file });
+    return textResult(res);
+}));
 // --- Start server ---
 async function main() {
     const transport = new StdioServerTransport();

package/package.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "name": "@chaprola/mcp-server",
-  "version": "1.2.0",
-  "description": "MCP server for Chaprola — agent-first data platform. Gives AI agents 42 tools for structured data storage, querying, schema inspection, web search, URL fetching, scheduled jobs, and execution via plain HTTP.",
+  "version": "1.3.1",
+  "description": "MCP server for Chaprola — agent-first data platform. Gives AI agents 46 tools for structured data storage, record CRUD, querying, schema inspection, web search, URL fetching, scheduled jobs, and execution via plain HTTP.",
   "type": "module",
   "main": "dist/index.js",
   "bin": {

package/references/cookbook.md CHANGED Viewed

@@ -136,17 +136,222 @@ Supports: CSV, TSV, JSON, NDJSON, Parquet (zstd/snappy/lz4), Excel (.xlsx/.xls).
 AI instructions are optional — omit to import all columns as-is.
 Lambda: 10 GB /tmp, 900s timeout, 500 MB download limit.
-## HULDRA Optimization Pattern
+## HULDRA Optimization — Nonlinear Parameter Fitting
-R1–R20 = elements (HULDRA sets before each run)
-R21–R40 = objectives (your program computes, HULDRA reads after)
-R41–R50 = scratch space
+HULDRA finds the best parameter values for a mathematical model by minimizing the difference between model predictions and observed data. You propose a model, HULDRA finds the coefficients.
+### How It Works
+1. You write a VALUE program (normal Chaprola) that reads data, computes predictions using R-variable parameters, and stores the error in an objective R-variable
+2. HULDRA repeatedly runs your program with different parameter values, using gradient descent to minimize the objective
+3. When the objective stops improving, HULDRA returns the optimal parameters
+### R-Variable Interface
+| Range | Purpose | Who sets it |
+|-------|---------|-------------|
+| R1–R20 | **Elements** (parameters to optimize) | HULDRA sets these before each VM run |
+| R21–R40 | **Objectives** (error metrics) | Your program computes and stores these |
+| R41–R50 | **Scratch space** | Your program uses these for temp variables |
+### Complete Example: Fit a Linear Model
+**Goal:** Find `salary = a × years_exp + b` that best fits employee data.
+**Step 1: Import data**
+```bash
+POST /import {
+  userid, project: "fit", name: "EMP",
+  data: [
+    {"years_exp": 2, "salary": 55000},
+    {"years_exp": 5, "salary": 72000},
+    {"years_exp": 8, "salary": 88000},
+    {"years_exp": 12, "salary": 105000},
+    {"years_exp": 15, "salary": 118000}
+  ]
+}
+```
+**Step 2: Write and compile the VALUE program**
+```chaprola
+// VALUE program: salary = R1 * years_exp + R2
+// R1 = slope (per-year raise), R2 = base salary
+// R21 = sum of squared residuals (SSR)
+DEFINE VARIABLE REC R41
+DEFINE VARIABLE YRS R42
+DEFINE VARIABLE SAL R43
+DEFINE VARIABLE PRED R44
+DEFINE VARIABLE RESID R45
+DEFINE VARIABLE SSR R46
+LET SSR = 0
+LET REC = 1
+100 SEEK REC
+    IF EOF GOTO 200
+    GET YRS FROM P.years_exp
+    GET SAL FROM P.salary
+    LET PRED = R1 * YRS
+    LET PRED = PRED + R2
+    LET RESID = PRED - SAL
+    LET RESID = RESID * RESID
+    LET SSR = SSR + RESID
+    LET REC = REC + 1
+    GOTO 100
+200 LET R21 = SSR
+    END
+```
+Compile with: `primary_format: "EMP"`
+**Step 3: Run HULDRA**
 ```bash
 POST /optimize {
-  userid, project, program: "FITMODEL", primary_file: "DATA",
-  elements: [{index: 1, label: "slope", start: 0.0, min: -100, max: 100, delta: 0.01}],
-  objectives: [{index: 1, label: "SSR", goal: 0.0, weight: 1.0}],
+  userid, project: "fit",
+  program: "SALFIT",
+  primary_file: "EMP",
+  elements: [
+    {index: 1, label: "per_year_raise", start: 5000, min: 0, max: 20000, delta: 10},
+    {index: 2, label: "base_salary", start: 40000, min: 0, max: 100000, delta: 100}
+  ],
+  objectives: [
+    {index: 1, label: "SSR", goal: 0.0, weight: 1.0}
+  ],
   max_iterations: 100
 }
 ```
+**Response:**
+```json
+{
+  "status": "converged",
+  "iterations": 12,
+  "elements": [
+    {"index": 1, "label": "per_year_raise", "value": 4876.5},
+    {"index": 2, "label": "base_salary", "value": 46230.1}
+  ],
+  "objectives": [
+    {"index": 1, "label": "SSR", "value": 2841050.3, "goal": 0.0}
+  ],
+  "elapsed_seconds": 0.02
+}
+```
+**Result:** `salary = $4,877/year × experience + $46,230 base`
+### Element Parameters Explained
+| Field | Description | Guidance |
+|-------|-------------|----------|
+| `index` | Maps to R-variable (1 → R1, 2 → R2, ...) | Max 20 elements |
+| `label` | Human-readable name | Returned in results |
+| `start` | Initial guess | Closer to true value = faster convergence |
+| `min`, `max` | Bounds | HULDRA clamps parameters to this range |
+| `delta` | Step size for gradient computation | ~0.1% of expected value range. Too large = inaccurate gradients. Too small = numerical noise |
+### Choosing Delta Values
+Delta controls how HULDRA estimates gradients (via central differences). Rules of thumb:
+- **Dollar amounts** (fares, salaries): `delta: 0.01` to `1.0`
+- **Rates/percentages** (per-mile, per-minute): `delta: 0.001` to `0.01`
+- **Counts/integers**: `delta: 0.1` to `1.0`
+- **Time values** (hours, peaks): `delta: 0.05` to `0.5`
+If optimization doesn't converge, try making delta smaller.
+### Performance & Limits
+HULDRA runs your VALUE program **1 + 2 × N_elements** times per iteration (once for evaluation, twice per element for gradient). With `max_iterations: 100`:
+| Elements | VM runs/iteration | At 100 iterations |
+|----------|-------------------|-------------------|
+| 2 | 5 | 500 |
+| 3 | 7 | 700 |
+| 5 | 11 | 1,100 |
+| 10 | 21 | 2,100 |
+**Lambda timeout is 900 seconds.** If each VM run takes 0.01s (100 records), you're fine. If each run takes 1s (100K records), 3 elements × 100 iterations = 700s — cutting it close.
+**Strategy for large datasets:** Sample first. Query 200–500 representative records into a smaller dataset, optimize against that. The coefficients transfer to the full dataset.
+```bash
+# Sample 500 records from a large dataset
+POST /query {userid, project, file: "BIGDATA", limit: 500, offset: 100000}
+# Import the sample
+POST /import {userid, project, name: "SAMPLE", data: [...results...]}
+# Optimize against the sample
+POST /optimize {... primary_file: "SAMPLE" ...}
+```
+### Async Optimization
+For optimizations that might exceed 30 seconds (API Gateway timeout), use async mode:
+```bash
+POST /optimize {
+  ... async_exec: true ...
+}
+# Response: {status: "running", job_id: "20260325_..."}
+POST /optimize/status {userid, project, job_id: "20260325_..."}
+# Response: {status: "converged", elements: [...], ...}
+```
+### Multi-Objective Optimization
+HULDRA can minimize multiple objectives simultaneously with different weights:
+```bash
+objectives: [
+  {index: 1, label: "price_error", goal: 0.0, weight: 1.0},
+  {index: 2, label: "volume_error", goal: 0.0, weight: 10.0}
+]
+```
+Higher weight = more important. HULDRA minimizes `Q = sum(weight × (value - goal)²)`.
+### Interpreting Results
+- **`status: "converged"`** — Optimal parameters found. The objective stopped improving.
+- **`status: "timeout"`** — Hit 900s wall clock. Results are the best found so far — often still useful.
+- **`total_objective`** — The raw Q value. Compare across runs, not in absolute terms. Lower = better fit.
+- **`SSR` (objective value)** — Sum of squared residuals. Divide by record count for mean squared error. Take the square root for RMSE in the same units as your data.
+- **`dq_dx` on elements** — Gradient. Values near zero mean the parameter is well-optimized. Large values may indicate the bounds are too tight.
+### Nonlinear Models
+HULDRA handles any model you can express in Chaprola, not just linear. Use EXP, LOG, SQRT, POW for curves:
+```chaprola
+// Exponential decay: value = R1 * exp(-R2 * time) + R3
+DEFINE VARIABLE T R41
+DEFINE VARIABLE OBS R42
+DEFINE VARIABLE PRED R43
+DEFINE VARIABLE ARG R44
+DEFINE VARIABLE SSR R45
+GET T FROM P.time
+GET OBS FROM P.observed
+LET ARG = R2 * T
+LET ARG = ARG * -1
+LET PRED = EXP ARG
+LET PRED = PRED * R1
+LET PRED = PRED + R3
+LET R46 = PRED - OBS       // residual
+LET R46 = R46 * R46
+LET SSR = SSR + R46
+```
+This fits exponential decay, growth curves, dose-response functions, depreciation models — any formula where you need to find the best-fitting coefficients.
+### Agent Workflow Summary
+1. **Inspect** — Call `/format` to see what fields exist
+2. **Sample** — Use `/query` with `limit` to get a manageable subset (200–500 records)
+3. **Import sample** — `/import` the subset as a new small dataset
+4. **Hypothesize** — Propose a model relating the fields
+5. **Write VALUE program** — Loop through records, compute predicted vs actual, accumulate SSR in R21
+6. **Compile** — `/compile` with `primary_format` pointing to the sample
+7. **Optimize** — `/optimize` with elements, objectives, and the sample as primary_file
+8. **Interpret** — Read the converged element values — those are your model coefficients
+9. **Iterate** — If SSR is high, try a different model (add terms, try nonlinear)

package/references/gotchas.md CHANGED Viewed

@@ -72,6 +72,33 @@ Only one secondary file can be open. CLOSE before opening another. Save any need
 ### CLOSE flushes writes
 Always CLOSE before END if you wrote to the secondary file. Unflushed writes are lost.
+## HULDRA Optimization
+### Use R41–R50 for scratch variables, not R1–R20
+R1–R20 are reserved for HULDRA elements. R21–R40 are reserved for objectives. Your VALUE program's DEFINE VARIABLE declarations must use R41–R50 only.
+```chaprola
+// WRONG: DEFINE VARIABLE counter R1  (HULDRA will overwrite this)
+// RIGHT: DEFINE VARIABLE counter R41
+```
+### Sample large datasets before optimizing
+HULDRA runs your program `1 + 2 × N_elements` times per iteration. With 3 elements and 100 iterations, that's 700 VM runs. If each run processes 1M records (7+ seconds), total time = 5,000+ seconds — well beyond the 900-second Lambda timeout. Query 200–500 records into a sample dataset and optimize against that.
+### Delta too large = bad convergence
+If HULDRA doesn't converge or oscillates, reduce `delta`. Start with ~0.1% of the expected parameter range. For dollar amounts, try `delta: 0.01`. For rates, try `delta: 0.001`.
+### Always initialize SSR to zero
+Your VALUE program accumulates squared residuals across all records. If you forget `LET SSR = 0` before the loop, SSR carries garbage from a previous HULDRA iteration (R-variables persist between runs within an optimization).
+### Filter bad data in the VALUE program
+Negative fares, zero distances, and other anomalies will corrupt your fit. Add guards:
+```chaprola
+GET FARE FROM P.fare
+IF FARE LE 0 GOTO 300    // skip bad records
+// ... compute residual ...
+300 LET REC = REC + 1
+```
 ## Email
 ### Content moderation on outbound