@oriro/orirocli 0.3.9 → 0.3.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/dist/cli.js +8 -943
  2. package/package.json +1 -1
package/dist/cli.js CHANGED
@@ -9930,942 +9930,9 @@ function registerSetupCommand(program2) {
9930
9930
  });
9931
9931
  }
9932
9932
 
9933
- // src/config/session.ts
9934
- init_paths();
9935
- import { readFileSync as readFileSync27, writeFileSync as writeFileSync23, rmSync as rmSync6, chmodSync } from "fs";
9936
- import { join as join35 } from "path";
9937
- function sessionPath() {
9938
- return join35(ensureOriroDir(), "session.json");
9939
- }
9940
- function saveSession(setupToken) {
9941
- const p = sessionPath();
9942
- writeFileSync23(p, JSON.stringify({ setup_token: setupToken, saved_at: Date.now() }), "utf8");
9943
- try {
9944
- chmodSync(p, 384);
9945
- } catch {
9946
- }
9947
- }
9948
- function readSetupToken() {
9949
- if (process.env.ORIRO_SETUP_TOKEN) return process.env.ORIRO_SETUP_TOKEN;
9950
- try {
9951
- const s = JSON.parse(readFileSync27(sessionPath(), "utf8"));
9952
- return s.setup_token || void 0;
9953
- } catch {
9954
- return void 0;
9955
- }
9956
- }
9957
- function readLicense() {
9958
- return process.env.ORIRO_LICENSE_KEY ?? "oriro-local-v1";
9959
- }
9960
- function isLoggedIn() {
9961
- return !!readSetupToken();
9962
- }
9963
- function clearSession() {
9964
- try {
9965
- rmSync6(sessionPath(), { force: true });
9966
- } catch {
9967
- }
9968
- }
9969
-
9970
- // src/commands/login.ts
9971
- init_theme();
9972
- function registerLoginCommand(program2) {
9973
- program2.command("login [code]").description("authorize model downloads on this machine (paste the code from oriro.app)").option("--status", "show whether this machine is authorized").action((code, opts) => {
9974
- if (opts.status) {
9975
- info(isLoggedIn() ? "this machine is authorized for downloads" : `not authorized \u2014 run ${accent("oriro login <code>")}`);
9976
- return;
9977
- }
9978
- if (!code) {
9979
- die(`paste your setup code: ${accent("oriro login <code>")} \u2014 get it on oriro.app \u2192 Download \u2192 \u201CConnect this computer\u201D.`);
9980
- }
9981
- if (!/^[a-f0-9]{32,64}$/i.test(code)) die("that doesn't look like a valid setup code (expected a 32\u201364 char hex code).");
9982
- saveSession(code);
9983
- heading("Authorized");
9984
- ok(`this machine can now download models \u2014 run ${accent("oriro models pull")}`);
9985
- info(dim("stored locally in ~/.oriro/session.json (0600); run `oriro logout` to remove."));
9986
- });
9987
- program2.command("logout").description("remove this machine's download authorization").action(() => {
9988
- clearSession();
9989
- ok("logged out \u2014 download authorization removed from this machine");
9990
- });
9991
- }
9992
-
9993
- // src/commands/models.ts
9994
- init_paths();
9995
- import { existsSync as existsSync27, statSync as statSync8 } from "fs";
9996
- import { join as join40 } from "path";
9997
-
9998
- // src/weights/container-stream.ts
9999
- import { createCipheriv, createDecipheriv, randomBytes, createHash } from "crypto";
10000
- import { open, stat } from "fs/promises";
10001
- var MAGIC = Buffer.from("ORX1", "ascii");
10002
- var VERSION = 1;
10003
- var IO_CHUNK = 4 * 1024 * 1024;
10004
- function u32(n) {
10005
- const b = Buffer.alloc(4);
10006
- b.writeUInt32BE(n, 0);
10007
- return b;
10008
- }
10009
- async function packOrxToFile(srcGgufPath, destOrxPath, opts) {
10010
- if (opts.kek.length !== 32) throw new Error("KEK must be 32 bytes (AES-256)");
10011
- const { size } = await stat(srcGgufPath);
10012
- const hash = createHash("sha256");
10013
- {
10014
- const fh = await open(srcGgufPath, "r");
10015
- try {
10016
- const b = Buffer.alloc(IO_CHUNK);
10017
- let pos = 0;
10018
- for (; ; ) {
10019
- const { bytesRead } = await fh.read(b, 0, IO_CHUNK, pos);
10020
- if (!bytesRead) break;
10021
- hash.update(b.subarray(0, bytesRead));
10022
- pos += bytesRead;
10023
- }
10024
- } finally {
10025
- await fh.close();
10026
- }
10027
- }
10028
- const sha256 = hash.digest("hex");
10029
- const cek = randomBytes(32);
10030
- const payloadNonce = randomBytes(12);
10031
- const aad = Buffer.from(
10032
- JSON.stringify({ v: VERSION, modelId: opts.meta.modelId, version: opts.meta.version, watermark: opts.watermark, sha256 }),
10033
- "utf8"
10034
- );
10035
- const wrapNonce = randomBytes(12);
10036
- const wc = createCipheriv("aes-256-gcm", opts.kek, wrapNonce);
10037
- wc.setAAD(aad);
10038
- const wrappedCek = Buffer.concat([wc.update(cek), wc.final()]);
10039
- const wrapTag = wc.getAuthTag();
10040
- const header2 = {
10041
- ...opts.meta,
10042
- cipher: "AES-256-GCM",
10043
- wrapNonce: wrapNonce.toString("base64"),
10044
- wrappedCek: wrappedCek.toString("base64"),
10045
- wrapTag: wrapTag.toString("base64"),
10046
- payloadNonce: payloadNonce.toString("base64"),
10047
- watermark: opts.watermark,
10048
- sha256,
10049
- payloadLen: size
10050
- // GCM ciphertext length == plaintext length
10051
- };
10052
- const headerJson = Buffer.from(JSON.stringify(header2), "utf8");
10053
- const out = await open(destOrxPath, "w");
10054
- try {
10055
- await out.write(Buffer.concat([MAGIC, Buffer.from([VERSION]), u32(headerJson.length), headerJson]));
10056
- const pc = createCipheriv("aes-256-gcm", cek, payloadNonce);
10057
- pc.setAAD(aad);
10058
- const fh = await open(srcGgufPath, "r");
10059
- try {
10060
- const b = Buffer.alloc(IO_CHUNK);
10061
- let pos = 0;
10062
- for (; ; ) {
10063
- const { bytesRead } = await fh.read(b, 0, IO_CHUNK, pos);
10064
- if (!bytesRead) break;
10065
- const enc = pc.update(b.subarray(0, bytesRead));
10066
- if (enc.length) await out.write(enc);
10067
- pos += bytesRead;
10068
- }
10069
- } finally {
10070
- await fh.close();
10071
- }
10072
- const fin = pc.final();
10073
- await out.write(Buffer.concat([fin, pc.getAuthTag()]));
10074
- } finally {
10075
- await out.close();
10076
- }
10077
- return header2;
10078
- }
10079
- async function unpackOrxToFile(srcOrxPath, destGgufPath, kek) {
10080
- if (kek.length !== 32) throw new Error("KEK must be 32 bytes (AES-256)");
10081
- const src = await open(srcOrxPath, "r");
10082
- try {
10083
- const pre = Buffer.alloc(9);
10084
- const { bytesRead: pr } = await src.read(pre, 0, 9, 0);
10085
- if (pr < 9 || !pre.subarray(0, 4).equals(MAGIC)) throw new Error("not an ORX container");
10086
- if (pre[4] !== VERSION) throw new Error(`unsupported ORX version ${pre[4]}`);
10087
- const headerLen = pre.readUInt32BE(5);
10088
- const hbuf = Buffer.alloc(headerLen);
10089
- await src.read(hbuf, 0, headerLen, 9);
10090
- const header2 = JSON.parse(hbuf.toString("utf8"));
10091
- const aad = Buffer.from(
10092
- JSON.stringify({ v: VERSION, modelId: header2.modelId, version: header2.version, watermark: header2.watermark, sha256: header2.sha256 }),
10093
- "utf8"
10094
- );
10095
- const wc = createDecipheriv("aes-256-gcm", kek, Buffer.from(header2.wrapNonce, "base64"));
10096
- wc.setAAD(aad);
10097
- wc.setAuthTag(Buffer.from(header2.wrapTag, "base64"));
10098
- let cek;
10099
- try {
10100
- cek = Buffer.concat([wc.update(Buffer.from(header2.wrappedCek, "base64")), wc.final()]);
10101
- } catch {
10102
- throw new Error("ORX unlock failed \u2014 wrong device/license, or the container was tampered with");
10103
- }
10104
- const cipherStart = 9 + headerLen;
10105
- const cipherLen = header2.payloadLen;
10106
- const tag = Buffer.alloc(16);
10107
- await src.read(tag, 0, 16, cipherStart + cipherLen);
10108
- const pc = createDecipheriv("aes-256-gcm", cek, Buffer.from(header2.payloadNonce, "base64"));
10109
- pc.setAAD(aad);
10110
- pc.setAuthTag(tag);
10111
- const hash = createHash("sha256");
10112
- const out = await open(destGgufPath, "w");
10113
- try {
10114
- const b = Buffer.alloc(IO_CHUNK);
10115
- let pos = cipherStart;
10116
- let remaining = cipherLen;
10117
- while (remaining > 0) {
10118
- const { bytesRead } = await src.read(b, 0, Math.min(IO_CHUNK, remaining), pos);
10119
- if (!bytesRead) break;
10120
- const dec = pc.update(b.subarray(0, bytesRead));
10121
- if (dec.length) {
10122
- await out.write(dec);
10123
- hash.update(dec);
10124
- }
10125
- pos += bytesRead;
10126
- remaining -= bytesRead;
10127
- }
10128
- let fin;
10129
- try {
10130
- fin = pc.final();
10131
- } catch {
10132
- throw new Error("ORX payload decryption failed \u2014 container corrupt or tampered");
10133
- }
10134
- if (fin.length) {
10135
- await out.write(fin);
10136
- hash.update(fin);
10137
- }
10138
- } finally {
10139
- await out.close();
10140
- }
10141
- if (hash.digest("hex") !== header2.sha256) {
10142
- throw new Error("ORX integrity check failed \u2014 decrypted weights do not match the manifest");
10143
- }
10144
- return header2;
10145
- } finally {
10146
- await src.close();
10147
- }
10148
- }
10149
-
10150
- // src/weights/binding.ts
10151
- init_paths();
10152
- import { randomBytes as randomBytes2, scryptSync, createHash as createHash2 } from "crypto";
10153
- import { existsSync as existsSync23, mkdirSync as mkdirSync18, readFileSync as readFileSync28, writeFileSync as writeFileSync24, chmodSync as chmodSync2 } from "fs";
10154
- import { hostname, userInfo, platform as platform2, arch } from "os";
10155
- import { join as join36 } from "path";
10156
- var SCRYPT_N = 1 << 15;
10157
- var SCRYPT_R = 8;
10158
- var SCRYPT_P = 1;
10159
- function weightsDir() {
10160
- const d = join36(oriroDir(), "weights");
10161
- mkdirSync18(d, { recursive: true });
10162
- return d;
10163
- }
10164
- function installSecret() {
10165
- const p = join36(weightsDir(), ".install");
10166
- if (existsSync23(p)) {
10167
- const b = Buffer.from(readFileSync28(p, "utf8").trim(), "hex");
10168
- if (b.length === 32) return b;
10169
- }
10170
- const secret = randomBytes2(32);
10171
- writeFileSync24(p, secret.toString("hex"), "utf8");
10172
- try {
10173
- chmodSync2(p, 384);
10174
- } catch {
10175
- }
10176
- return secret;
10177
- }
10178
- function deviceFingerprint() {
10179
- const u = userInfo();
10180
- return createHash2("sha256").update([hostname(), u.username, platform2(), arch(), String(u.uid)].join("|")).digest("hex");
10181
- }
10182
- function deriveKek(licenseKey) {
10183
- const salt = createHash2("sha256").update("oriro-orx-v1|" + deviceFingerprint()).digest();
10184
- const material = Buffer.concat([installSecret(), Buffer.from(licenseKey, "utf8")]);
10185
- return scryptSync(material, salt, 32, { N: SCRYPT_N, r: SCRYPT_R, p: SCRYPT_P, maxmem: 128 * 1024 * 1024 });
10186
- }
10187
-
10188
- // src/weights/pull.ts
10189
- init_paths();
10190
- import { createHash as createHash3 } from "crypto";
10191
- import { open as open2, stat as stat2, mkdir, unlink } from "fs/promises";
10192
- import { existsSync as existsSync24 } from "fs";
10193
- import { join as join37 } from "path";
10194
- function nextRange(offset, total, chunk) {
10195
- if (offset >= total) return null;
10196
- return { start: offset, end: Math.min(offset + chunk, total) - 1 };
10197
- }
10198
- async function probeSize(url, signal) {
10199
- const r = await fetch(url, { headers: { Range: "bytes=0-0" }, signal });
10200
- const cr = r.headers.get("content-range");
10201
- await r.arrayBuffer().catch(() => void 0);
10202
- if (!r.ok && r.status !== 206) throw new Error(`size probe HTTP ${r.status}`);
10203
- if (!cr) throw new Error("server did not return a byte range \u2014 cannot size the model");
10204
- const total = Number(cr.split("/")[1]);
10205
- if (!Number.isFinite(total) || total <= 1) throw new Error("could not determine model size");
10206
- return total;
10207
- }
10208
- async function sha256File(path) {
10209
- const hash = createHash3("sha256");
10210
- const fh = await open2(path, "r");
10211
- try {
10212
- const b = Buffer.alloc(4 * 1024 * 1024);
10213
- let pos = 0;
10214
- for (; ; ) {
10215
- const { bytesRead } = await fh.read(b, 0, b.length, pos);
10216
- if (!bytesRead) break;
10217
- hash.update(b.subarray(0, bytesRead));
10218
- pos += bytesRead;
10219
- }
10220
- } finally {
10221
- await fh.close();
10222
- }
10223
- return hash.digest("hex");
10224
- }
10225
- async function fetchRange(urlRef, range, spec, retries) {
10226
- let attempt = 0;
10227
- let refreshes = 0;
10228
- for (; ; ) {
10229
- if (spec.signal?.aborted) throw new DOMException("Aborted", "AbortError");
10230
- try {
10231
- const r = await fetch(urlRef.url, { headers: { Range: `bytes=${range.start}-${range.end}` }, signal: spec.signal });
10232
- if (r.status === 401 && spec.refresh && refreshes < 3) {
10233
- refreshes++;
10234
- urlRef.url = await spec.refresh();
10235
- continue;
10236
- }
10237
- if (r.status === 200) throw new Error("server ignored Range (200) \u2014 refusing to write a full body at an offset");
10238
- if (r.status !== 206) throw new Error(`chunk HTTP ${r.status}`);
10239
- const ab = await r.arrayBuffer();
10240
- const want = range.end - range.start + 1;
10241
- if (ab.byteLength !== want) throw new Error(`short chunk (${ab.byteLength}/${want})`);
10242
- return Buffer.from(ab);
10243
- } catch (e) {
10244
- attempt++;
10245
- if (spec.signal?.aborted || attempt >= retries) throw e;
10246
- await new Promise((res) => setTimeout(res, Math.min(500 * 2 ** (attempt - 1), 4e3)));
10247
- }
10248
- }
10249
- }
10250
- async function pullModelToOrx(spec) {
10251
- const stageDir = join37(oriroDir(), "weights", "staging");
10252
- await mkdir(stageDir, { recursive: true });
10253
- const part = join37(stageDir, `${spec.modelId}.gguf.part`);
10254
- const chunk = spec.chunkBytes ?? 16 * 1024 * 1024;
10255
- const retries = spec.retries ?? 5;
10256
- const urlRef = { url: spec.url };
10257
- let offset = existsSync24(part) ? (await stat2(part)).size : 0;
10258
- if (offset > spec.sizeBytes) {
10259
- await unlink(part);
10260
- offset = 0;
10261
- }
10262
- spec.onProgress?.(offset, spec.sizeBytes);
10263
- const fh = await open2(part, offset > 0 ? "r+" : "w");
10264
- try {
10265
- let cursor = offset;
10266
- for (let range = nextRange(cursor, spec.sizeBytes, chunk); range; range = nextRange(cursor, spec.sizeBytes, chunk)) {
10267
- const buf = await fetchRange(urlRef, range, spec, retries);
10268
- await fh.write(buf, 0, buf.length, cursor);
10269
- cursor += buf.length;
10270
- spec.onProgress?.(cursor, spec.sizeBytes);
10271
- }
10272
- } finally {
10273
- await fh.close();
10274
- }
10275
- const actual = await sha256File(part);
10276
- if (spec.sha256 && actual !== spec.sha256) {
10277
- await unlink(part);
10278
- throw new Error(`${spec.modelId}: download failed the integrity check (sha256 mismatch) \u2014 please retry`);
10279
- }
10280
- const orxPath2 = join37(oriroDir(), "weights", `${spec.modelId}.orx`);
10281
- await packOrxToFile(part, orxPath2, {
10282
- kek: deriveKek(spec.licenseKey),
10283
- watermark: spec.watermark ?? `orx:${spec.modelId}-v${spec.version ?? "2.4"}:${actual.slice(0, 12)}`,
10284
- meta: { modelId: spec.modelId, version: spec.version ?? "2.4", createdTs: spec.createdTs }
10285
- });
10286
- await unlink(part);
10287
- return { orxPath: orxPath2, modelId: spec.modelId, bytes: spec.sizeBytes };
10288
- }
10289
-
10290
- // src/weights/serve.ts
10291
- import { createServer } from "http";
10292
- import { randomBytes as randomBytes4 } from "crypto";
10293
- import { existsSync as existsSync26 } from "fs";
10294
- import { join as join39 } from "path";
10295
-
10296
- // src/weights/secure-load.ts
10297
- init_paths();
10298
- import {
10299
- chmodSync as chmodSync3,
10300
- mkdirSync as mkdirSync19,
10301
- writeFileSync as writeFileSync25,
10302
- existsSync as existsSync25,
10303
- statSync as statSync6,
10304
- openSync as openSync5,
10305
- writeSync as writeSync5,
10306
- closeSync as closeSync5,
10307
- unlinkSync
10308
- } from "fs";
10309
- import { join as join38 } from "path";
10310
- import { randomBytes as randomBytes3 } from "crypto";
10311
- function secureDir() {
10312
- const d = join38(ensureOriroDir(), "weights", "run");
10313
- mkdirSync19(d, { recursive: true });
10314
- try {
10315
- chmodSync3(d, 448);
10316
- } catch {
10317
- }
10318
- return d;
10319
- }
10320
- function shredAndUnlink(path) {
10321
- try {
10322
- if (!existsSync25(path)) return;
10323
- const size = statSync6(path).size;
10324
- const fd = openSync5(path, "r+");
10325
- try {
10326
- const chunk = randomBytes3(1024 * 1024);
10327
- for (let off = 0; off < size; off += chunk.length) {
10328
- writeSync5(fd, chunk, 0, Math.min(chunk.length, size - off), off);
10329
- }
10330
- } finally {
10331
- closeSync5(fd);
10332
- }
10333
- unlinkSync(path);
10334
- } catch {
10335
- }
10336
- }
10337
- var live = /* @__PURE__ */ new Set();
10338
- var exitHooked = false;
10339
- function hookExit() {
10340
- if (exitHooked) return;
10341
- exitHooked = true;
10342
- const cleanup = () => {
10343
- for (const p of live) shredAndUnlink(p);
10344
- live.clear();
10345
- };
10346
- process.once("exit", cleanup);
10347
- for (const sig of ["SIGINT", "SIGTERM", "SIGHUP"]) {
10348
- process.once(sig, () => {
10349
- cleanup();
10350
- process.exit(130);
10351
- });
10352
- }
10353
- }
10354
- async function decryptToSecureFile(orxPath2, licenseKey) {
10355
- hookExit();
10356
- const path = join38(secureDir(), `run-${randomBytes3(6).toString("hex")}.gguf`);
10357
- live.add(path);
10358
- let header2;
10359
- try {
10360
- header2 = await unpackOrxToFile(orxPath2, path, deriveKek(licenseKey));
10361
- } catch (e) {
10362
- live.delete(path);
10363
- shredAndUnlink(path);
10364
- throw e;
10365
- }
10366
- try {
10367
- chmodSync3(path, 384);
10368
- } catch {
10369
- }
10370
- let disposed = false;
10371
- return {
10372
- path,
10373
- modelId: header2.modelId,
10374
- dispose() {
10375
- if (disposed) return;
10376
- disposed = true;
10377
- live.delete(path);
10378
- shredAndUnlink(path);
10379
- }
10380
- };
10381
- }
10382
-
10383
- // src/weights/template.ts
10384
- var IM_START = "<|im_start|>";
10385
- var IM_END = "<|im_end|>";
10386
- var STOP_SEQUENCES = [IM_END, "<|endoftext|>"];
10387
- var SYSTEM = {
10388
- gauss: "You are Gauss, ORIRO's builder intelligence \u2014 you help people build websites, apps, and APIs and ship software. Lead with the deliverable immediately, no preamble. You are part of ORIRO (oriro.ai), the free AI platform. You never mention any other AI model \u2014 you are Gauss, and that is all.",
10389
- avila: "You are Avila, ORIRO's orchestration intelligence. You help users plan, coordinate ORIRO's features, and manage multi-step workflows. You are part of ORIRO (oriro.ai), the free AI platform. You never mention any other AI model \u2014 you are Avila, and that is all."
10390
- };
10391
- function systemFor(modelId) {
10392
- const s = SYSTEM[modelId.toLowerCase()];
10393
- if (!s) throw new Error(`no system prompt for model "${modelId}"`);
10394
- return s;
10395
- }
10396
- function buildPrompt(modelId, messages, systemOverride) {
10397
- const sys = systemOverride ?? systemFor(modelId);
10398
- let prompt = `${IM_START}system
10399
- ${sys}${IM_END}
10400
- `;
10401
- for (const m of messages) {
10402
- if (m.role === "system") continue;
10403
- prompt += `${IM_START}${m.role}
10404
- ${m.content}${IM_END}
10405
- `;
10406
- }
10407
- prompt += `${IM_START}assistant
10408
- `;
10409
- return { prompt, stops: STOP_SEQUENCES };
10410
- }
10411
-
10412
- // src/weights/think-strip.ts
10413
- var OPEN = "<think>";
10414
- var CLOSE = "</think>";
10415
- function thinkFilter() {
10416
- let state = "before";
10417
- let buf = "";
10418
- return {
10419
- push(chunk) {
10420
- if (state === "after") return chunk;
10421
- buf += chunk;
10422
- if (state === "before") {
10423
- const trimmed = buf.replace(/^\s+/, "");
10424
- if (trimmed === "") return "";
10425
- if (OPEN.startsWith(trimmed)) return "";
10426
- if (trimmed.startsWith(OPEN)) {
10427
- state = "inside";
10428
- buf = trimmed.slice(OPEN.length);
10429
- } else {
10430
- state = "after";
10431
- const out = buf;
10432
- buf = "";
10433
- return out;
10434
- }
10435
- }
10436
- const idx = buf.indexOf(CLOSE);
10437
- if (idx === -1) {
10438
- const keep = Math.min(buf.length, CLOSE.length - 1);
10439
- buf = buf.slice(buf.length - keep);
10440
- return "";
10441
- }
10442
- state = "after";
10443
- const after = buf.slice(idx + CLOSE.length).replace(/^\s+/, "");
10444
- buf = "";
10445
- return after;
10446
- },
10447
- end() {
10448
- if (state === "before") {
10449
- const out = buf;
10450
- buf = "";
10451
- return out;
10452
- }
10453
- buf = "";
10454
- return "";
10455
- }
10456
- };
10457
- }
10458
-
10459
- // src/weights/engine.ts
10460
- async function importLlama() {
10461
- try {
10462
- return await import("node-llama-cpp");
10463
- } catch {
10464
- throw new Error(
10465
- "The on-device engine isn't available. The ORIRO app ships it prebuilt; from source, install the optional dependency with: npm i node-llama-cpp"
10466
- );
10467
- }
10468
- }
10469
- async function loadEngine(modelPath, modelId, nCtx, mmprojPath) {
10470
- const mod = await importLlama();
10471
- const getLlama = mod.getLlama;
10472
- const LlamaCompletion = mod.LlamaCompletion;
10473
- const llama = await getLlama();
10474
- const model = await llama.loadModel(mmprojPath ? { modelPath, mmproj: mmprojPath } : { modelPath });
10475
- const context = await model.createContext({ contextSize: nCtx });
10476
- return {
10477
- async chat(messages, onToken, opts) {
10478
- const { prompt, stops } = buildPrompt(modelId, messages, opts?.systemOverride);
10479
- const completion = new LlamaCompletion({ contextSequence: context.getSequence() });
10480
- const filter = thinkFilter();
10481
- let visible = "";
10482
- await completion.generateCompletion(prompt, {
10483
- onTextChunk: (t) => {
10484
- const v = filter.push(t);
10485
- if (v) {
10486
- visible += v;
10487
- onToken(v);
10488
- }
10489
- },
10490
- customStopTriggers: stops,
10491
- maxTokens: opts?.maxTokens,
10492
- signal: opts?.signal
10493
- });
10494
- const tail = filter.end();
10495
- if (tail) {
10496
- visible += tail;
10497
- onToken(tail);
10498
- }
10499
- completion.dispose?.();
10500
- return visible;
10501
- },
10502
- async dispose() {
10503
- await context.dispose();
10504
- await model.dispose();
10505
- }
10506
- };
10507
- }
10508
-
10509
- // src/weights/preflight.ts
10510
- import { freemem } from "os";
10511
- var MiB = 1024 * 1024;
10512
- var GiB = 1024 * MiB;
10513
- var DEFAULT_CTX = 8192;
10514
- var CTX_FLOOR = 2048;
10515
- var HEADROOM = 1.25;
10516
- function kvBytesPerToken(paramsB) {
10517
- return Math.max(0.05, paramsB * 0.014) * MiB;
10518
- }
10519
- function planContext(fp, requestedCtx, freeBytes = freemem()) {
10520
- const weights = fp.fileBytes;
10521
- const budget = freeBytes / HEADROOM - weights;
10522
- if (budget <= 0) {
10523
- return {
10524
- decision: "refuse",
10525
- nCtx: 0,
10526
- reason: `Not enough free memory: this model needs about ${(weights / GiB).toFixed(1)} GB resident and only ${(freeBytes / GiB).toFixed(1)} GB is free. Close some apps and try again.`
10527
- };
10528
- }
10529
- const maxCtxByRam = Math.floor(budget / kvBytesPerToken(fp.paramsB));
10530
- const want = requestedCtx > 0 ? requestedCtx : DEFAULT_CTX;
10531
- const nCtx = Math.max(0, Math.min(want, maxCtxByRam));
10532
- if (nCtx < CTX_FLOOR) {
10533
- return {
10534
- decision: "refuse",
10535
- nCtx: 0,
10536
- reason: `Not enough free memory for a usable context window (only ${nCtx.toLocaleString()} tokens fit). Close some apps and try again.`
10537
- };
10538
- }
10539
- if (nCtx < want) {
10540
- return {
10541
- decision: "reduced",
10542
- nCtx,
10543
- reason: `Context set to ${nCtx.toLocaleString()} tokens to fit your free memory (${(freeBytes / GiB).toFixed(1)} GB). The model still works \u2014 just a shorter memory per chat.`
10544
- };
10545
- }
10546
- return { decision: "ok", nCtx, reason: `Ready \u2014 ${nCtx.toLocaleString()} tokens of context.` };
10547
- }
10548
-
10549
- // src/weights/local-runtime.ts
10550
- import { statSync as statSync7 } from "fs";
10551
- async function startLocalModel(orxPath2, licenseKey, paramsB, opts = {}) {
10552
- const secure = await decryptToSecureFile(orxPath2, licenseKey);
10553
- let mmproj = null;
10554
- try {
10555
- if (opts.mmprojOrxPath) mmproj = await decryptToSecureFile(opts.mmprojOrxPath, licenseKey);
10556
- const plan = planContext({ paramsB, fileBytes: statSync7(secure.path).size }, opts.requestedCtx ?? 0);
10557
- if (plan.decision === "refuse") throw new Error(plan.reason);
10558
- const engine = await loadEngine(secure.path, secure.modelId, plan.nCtx, mmproj?.path);
10559
- let disposed = false;
10560
- return {
10561
- modelId: secure.modelId,
10562
- plan,
10563
- async chat(messages, onToken, o) {
10564
- const last = messages[messages.length - 1];
10565
- const answer = await engine.chat(messages, onToken, { signal: o?.signal });
10566
- if (opts.capture && last?.role === "user") {
10567
- opts.capture.captureConsentedTurn(secure.modelId, last.content, answer);
10568
- }
10569
- return answer;
10570
- },
10571
- async dispose() {
10572
- if (disposed) return;
10573
- disposed = true;
10574
- await engine.dispose();
10575
- mmproj?.dispose();
10576
- secure.dispose();
10577
- }
10578
- };
10579
- } catch (e) {
10580
- mmproj?.dispose();
10581
- secure.dispose();
10582
- throw e;
10583
- }
10584
- }
10585
-
10586
- // src/weights/serve.ts
10587
- init_paths();
10588
- var KNOWN_MODELS = ["gauss", "avila"];
10589
- var MODEL_PARAMS_B = { gauss: 9, avila: 9 };
10590
- var DEFAULT_PORT = 11435;
10591
- var ORIRO_ORIGIN = /^https:\/\/(www\.)?oriro\.(ai|app)$/;
10592
- function orxPathFor(modelId) {
10593
- return join39(oriroDir(), "weights", `${modelId}.orx`);
10594
- }
10595
- function normalizeModel(raw) {
10596
- const s = String(raw ?? "").toLowerCase();
10597
- for (const m of KNOWN_MODELS) if (s.includes(m)) return m;
10598
- return "gauss";
10599
- }
10600
- function sseData(obj) {
10601
- return `data: ${JSON.stringify(obj)}
10602
-
10603
- `;
10604
- }
10605
- function deltaChunk(model, id, content) {
10606
- return { id, object: "chat.completion.chunk", model, choices: [{ index: 0, delta: { content }, finish_reason: null }] };
10607
- }
10608
- function finalChunk(model, id) {
10609
- return { id, object: "chat.completion.chunk", model, choices: [{ index: 0, delta: {}, finish_reason: "stop" }] };
10610
- }
10611
- async function readJson2(req) {
10612
- const chunks = [];
10613
- for await (const c of req) chunks.push(c);
10614
- try {
10615
- return JSON.parse(Buffer.concat(chunks).toString("utf8"));
10616
- } catch {
10617
- return {};
10618
- }
10619
- }
10620
- async function startLocalServer(opts) {
10621
- const warm = /* @__PURE__ */ new Map();
10622
- const getModel = (modelId) => {
10623
- let p = warm.get(modelId);
10624
- if (!p) {
10625
- const mmprojPath = orxPathFor(`${modelId}-mmproj`);
10626
- p = startLocalModel(orxPathFor(modelId), opts.licenseKey, MODEL_PARAMS_B[modelId] ?? 9, {
10627
- mmprojOrxPath: existsSync26(mmprojPath) ? mmprojPath : void 0
10628
- });
10629
- warm.set(modelId, p);
10630
- }
10631
- return p;
10632
- };
10633
- const server = createServer((req, res) => {
10634
- void handle(req, res, getModel).catch(() => {
10635
- if (!res.headersSent) res.writeHead(500);
10636
- res.end();
10637
- });
10638
- });
10639
- const port = opts.port ?? DEFAULT_PORT;
10640
- await new Promise((resolve3) => server.listen(port, opts.host ?? "127.0.0.1", resolve3));
10641
- return {
10642
- port,
10643
- async close() {
10644
- await new Promise((r) => server.close(() => r()));
10645
- for (const p of warm.values()) {
10646
- try {
10647
- (await p).dispose();
10648
- } catch {
10649
- }
10650
- }
10651
- }
10652
- };
10653
- }
10654
- async function handle(req, res, getModel) {
10655
- const origin = req.headers.origin;
10656
- if (origin && ORIRO_ORIGIN.test(origin)) {
10657
- res.setHeader("access-control-allow-origin", origin);
10658
- res.setHeader("vary", "origin");
10659
- }
10660
- if (req.method === "OPTIONS") {
10661
- if (origin && ORIRO_ORIGIN.test(origin)) {
10662
- res.setHeader("access-control-allow-methods", "GET, POST, OPTIONS");
10663
- res.setHeader("access-control-allow-headers", "content-type, authorization");
10664
- res.setHeader("access-control-allow-private-network", "true");
10665
- }
10666
- res.writeHead(204).end();
10667
- return;
10668
- }
10669
- const url = new URL(req.url ?? "/", "http://localhost");
10670
- if (url.pathname === "/health") {
10671
- res.writeHead(200, { "content-type": "application/json" }).end(JSON.stringify({ status: "ok" }));
10672
- return;
10673
- }
10674
- if (url.pathname === "/v1/models") {
10675
- const data = KNOWN_MODELS.filter((m) => existsSync26(orxPathFor(m))).map((id) => ({ id, object: "model" }));
10676
- res.writeHead(200, { "content-type": "application/json" }).end(JSON.stringify({ object: "list", data }));
10677
- return;
10678
- }
10679
- if (url.pathname === "/v1/chat/completions" && req.method === "POST") {
10680
- await chatCompletions(req, res, getModel);
10681
- return;
10682
- }
10683
- res.writeHead(404).end();
10684
- }
10685
- async function chatCompletions(req, res, getModel) {
10686
- const body = await readJson2(req);
10687
- const modelId = normalizeModel(body.model);
10688
- const messages = Array.isArray(body.messages) ? body.messages : [];
10689
- const id = `chatcmpl-${randomBytes4(8).toString("hex")}`;
10690
- let model;
10691
- try {
10692
- model = await getModel(modelId);
10693
- } catch (e) {
10694
- res.writeHead(503, { "content-type": "application/json" }).end(JSON.stringify({ error: { message: e.message } }));
10695
- return;
10696
- }
10697
- res.writeHead(200, { "content-type": "text/event-stream", "cache-control": "no-cache", connection: "keep-alive" });
10698
- try {
10699
- await model.chat(messages, (t) => {
10700
- res.write(sseData(deltaChunk(modelId, id, t)));
10701
- });
10702
- res.write(sseData(finalChunk(modelId, id)));
10703
- res.write("data: [DONE]\n\n");
10704
- } catch (e) {
10705
- res.write(sseData({ error: { message: e.message } }));
10706
- }
10707
- res.end();
10708
- }
10709
-
10710
- // src/commands/models.ts
10711
- init_theme();
10712
- var MODELS = [
10713
- { id: "gauss", label: "Gauss V2.4", paramsB: 9 },
10714
- { id: "avila", label: "Avila V2.4", paramsB: 9 }
10715
- ];
10716
- function orxPath(id) {
10717
- return join40(oriroDir(), "weights", `${id}.orx`);
10718
- }
10719
- function gb(n) {
10720
- return (n / 1e9).toFixed(2) + " GB";
10721
- }
10722
- function modelIdFromFilename(p) {
10723
- const base = (p.replace(/\\/g, "/").split("/").pop() ?? "").toLowerCase().replace(/\.gguf$/, "");
10724
- const root = base.includes("gauss") ? "gauss" : base.includes("avila") ? "avila" : null;
10725
- if (!root) return null;
10726
- return base.includes("mmproj") ? `${root}-mmproj` : root;
10727
- }
10728
- async function fetchManifest(base, setupToken) {
10729
- const r = await fetch(`${base}/api/weights/manifest`, {
10730
- method: "POST",
10731
- headers: { "content-type": "application/json" },
10732
- body: JSON.stringify({ setup_token: setupToken })
10733
- });
10734
- if (r.status === 401 || r.status === 403) {
10735
- throw new Error(`downloads refused (HTTP ${r.status}) \u2014 run \`oriro login\` to authorize this machine${r.status === 403 ? " (device attestation required)" : ""}.`);
10736
- }
10737
- if (!r.ok) throw new Error(`weights manifest HTTP ${r.status}`);
10738
- const d = await r.json();
10739
- return d.models ?? {};
10740
- }
10741
- async function resumeUrl(base, setupToken, modelId) {
10742
- const r = await fetch(`${base}/api/weights/resume`, {
10743
- method: "POST",
10744
- headers: { "content-type": "application/json" },
10745
- body: JSON.stringify({ key: `${modelId}.gguf`, setup_token: setupToken })
10746
- });
10747
- const d = await r.json().catch(() => ({}));
10748
- if (!r.ok || !d.model?.url) throw new Error("link refresh failed");
10749
- return d.model.url;
10750
- }
10751
- function registerModelsCommand(program2) {
10752
- const models = program2.command("models").description("run Gauss & Avila on this machine \u2014 download, serve, status (no Ollama)");
10753
- models.command("status").description("show which models are installed on this machine").action(() => {
10754
- heading("ORIRO models \u2014 on this machine");
10755
- for (const m of MODELS) {
10756
- const p = orxPath(m.id);
10757
- if (existsSync27(p)) ok(`${m.label} \u2014 installed (${gb(statSync8(p).size)}, device-locked)`);
10758
- else info(`${m.label} \u2014 ${dim("not downloaded")}`);
10759
- }
10760
- info(dim(`location: ${join40(oriroDir(), "weights")}`));
10761
- });
10762
- models.command("pull").description("download Gauss then Avila to this machine (one at a time, resumable)").option("--only <id>", "download just one model (gauss|avila)").action(async (opts) => {
10763
- const base = process.env.ORIRO_API_BASE ?? "https://oriro.ai";
10764
- const license = readLicense();
10765
- const setupToken = readSetupToken();
10766
- if (!setupToken) die("not authorized for downloads \u2014 run `oriro login <code>` first (code from oriro.app).");
10767
- const pick = opts.only ? MODELS.filter((m) => m.id === opts.only.toLowerCase()) : [...MODELS];
10768
- if (!pick.length) die(`unknown model "${opts.only}" (use gauss|avila)`);
10769
- let manifest;
10770
- try {
10771
- manifest = await fetchManifest(base, setupToken);
10772
- } catch (e) {
10773
- return die(e.message);
10774
- }
10775
- for (const m of pick) {
10776
- const entry = manifest[m.id];
10777
- if (!entry?.url) return die(`the weights manifest has no entry for ${m.id}`);
10778
- heading(m.label);
10779
- const size = await probeSize(entry.url).catch((e) => die(e.message));
10780
- info(`downloading ${gb(size)} \u2014 resumable, streams straight to disk`);
10781
- let lastPct = -1;
10782
- await pullModelToOrx({
10783
- modelId: m.id,
10784
- url: entry.url,
10785
- sizeBytes: size,
10786
- sha256: entry.sha256 ?? "",
10787
- licenseKey: license,
10788
- createdTs: Date.now(),
10789
- version: "2.4",
10790
- refresh: () => resumeUrl(base, setupToken, m.id),
10791
- onProgress: (done, total) => {
10792
- const pct = Math.floor(done / total * 100);
10793
- if (pct !== lastPct) {
10794
- lastPct = pct;
10795
- process.stdout.write(`\r ${accent(String(pct).padStart(3))}% ${gb(done)} / ${gb(total)} `);
10796
- }
10797
- }
10798
- });
10799
- process.stdout.write("\n");
10800
- ok(`${m.label} ready \u2014 locked to this device`);
10801
- const vision = manifest[`${m.id}-mmproj`];
10802
- if (vision?.url) {
10803
- info(`${m.label} vision projector \u2026`);
10804
- const vsize = await probeSize(vision.url).catch(() => 0);
10805
- if (vsize > 0) {
10806
- await pullModelToOrx({
10807
- modelId: `${m.id}-mmproj`,
10808
- url: vision.url,
10809
- sizeBytes: vsize,
10810
- sha256: vision.sha256 ?? "",
10811
- licenseKey: license,
10812
- createdTs: Date.now(),
10813
- version: "2.4",
10814
- refresh: () => resumeUrl(base, setupToken, `${m.id}-mmproj`)
10815
- });
10816
- ok(`${m.label} vision ready`);
10817
- }
10818
- }
10819
- }
10820
- ok("all set \u2014 run `oriro models serve` to use them locally");
10821
- });
10822
- models.command("import <files...>").description("device-lock GGUFs you downloaded from oriro.app into runnable .orx (local, no login)").action(async (files) => {
10823
- const license = readLicense();
10824
- heading("Import models to this machine");
10825
- let done = 0;
10826
- for (const f of files) {
10827
- if (!existsSync27(f)) {
10828
- info(`skip ${f} \u2014 file not found`);
10829
- continue;
10830
- }
10831
- const id = modelIdFromFilename(f);
10832
- if (!id) {
10833
- info(`skip ${f} \u2014 filename must contain 'gauss' or 'avila'`);
10834
- continue;
10835
- }
10836
- const label = MODELS.find((m) => m.id === id)?.label ?? id;
10837
- info(`device-locking ${label} \u2026`);
10838
- await packOrxToFile(f, orxPath(id), {
10839
- kek: deriveKek(license),
10840
- watermark: `orx:${id}-import`,
10841
- meta: { modelId: id, version: "2.4", createdTs: Date.now() }
10842
- });
10843
- ok(`${label} imported (${gb(statSync8(orxPath(id)).size)}, locked to this device)`);
10844
- done++;
10845
- }
10846
- if (!done) die("nothing imported \u2014 pass the gauss.gguf / avila.gguf you downloaded from oriro.app");
10847
- ok("run `oriro models serve` to use them locally (no Ollama)");
10848
- });
10849
- models.command("serve").description("run the models locally on an OpenAI-compatible endpoint (no Ollama)").option("-p, --port <n>", "port (default 11435)", (v) => parseInt(v, 10)).action(async (opts) => {
10850
- const license = readLicense();
10851
- const installed = MODELS.filter((m) => existsSync27(orxPath(m.id)));
10852
- if (!installed.length) die("no models installed \u2014 run `oriro models pull` first.");
10853
- const server = await startLocalServer({ licenseKey: license, port: opts.port });
10854
- heading("ORIRO local endpoint");
10855
- ok(`serving ${installed.map((m) => m.label).join(" + ")} on http://127.0.0.1:${server.port}`);
10856
- info(dim("OpenAI-compatible: POST /v1/chat/completions \xB7 GET /v1/models \xB7 GET /health"));
10857
- info(dim("oriro.ai / oriro.app reach it directly (CORS built in). Ctrl-C to stop."));
10858
- await new Promise((resolve3) => {
10859
- process.on("SIGINT", () => {
10860
- void server.close().then(resolve3);
10861
- });
10862
- });
10863
- });
10864
- }
10865
-
10866
9933
  // src/commands/import.ts
10867
- import { existsSync as existsSync28, readFileSync as readFileSync29, readdirSync as readdirSync4, statSync as statSync9, cpSync as cpSync2, mkdirSync as mkdirSync20 } from "fs";
10868
- import { join as join41, basename as basename4 } from "path";
9934
+ import { existsSync as existsSync23, readFileSync as readFileSync27, readdirSync as readdirSync4, statSync as statSync6, cpSync as cpSync2, mkdirSync as mkdirSync18 } from "fs";
9935
+ import { join as join35, basename as basename4 } from "path";
10869
9936
  init_mcp_client();
10870
9937
  init_custom();
10871
9938
  init_loader();
@@ -10873,10 +9940,10 @@ init_theme();
10873
9940
  function registerImportCommand(program2) {
10874
9941
  const imp = program2.command("import").description("migrate from another CLI (MCP servers, skills)");
10875
9942
  imp.command("mcp <file>").description("import MCP servers from a Claude-compatible mcp.json (Guardian-vetted)").action((file6) => {
10876
- if (!existsSync28(file6)) die(`no such file: ${file6}`);
9943
+ if (!existsSync23(file6)) die(`no such file: ${file6}`);
10877
9944
  let servers;
10878
9945
  try {
10879
- const j = JSON.parse(readFileSync29(file6, "utf8"));
9946
+ const j = JSON.parse(readFileSync27(file6, "utf8"));
10880
9947
  servers = j.mcpServers ?? j.servers ?? {};
10881
9948
  } catch (e) {
10882
9949
  die(`could not parse ${file6}: ${e instanceof Error ? e.message : String(e)}`);
@@ -10922,14 +9989,14 @@ function registerImportCommand(program2) {
10922
9989
  info(`${imported} imported \xB7 ${blocked2} blocked${imported ? ` \u2014 they connect in-session; see \`oriro connectors custom\`` : ""}`);
10923
9990
  });
10924
9991
  imp.command("skills <dir>").description("import SKILL.md skill folders from another CLI's skills directory").action((dir) => {
10925
- if (!existsSync28(dir) || !statSync9(dir).isDirectory()) die(`no such directory: ${dir}`);
9992
+ if (!existsSync23(dir) || !statSync6(dir).isDirectory()) die(`no such directory: ${dir}`);
10926
9993
  const dest = userSkillsDir();
10927
- mkdirSync20(dest, { recursive: true });
9994
+ mkdirSync18(dest, { recursive: true });
10928
9995
  heading("Import skills");
10929
- const sources = existsSync28(join41(dir, "SKILL.md")) ? [dir] : readdirSync4(dir).map((e) => join41(dir, e)).filter((p) => statSync9(p).isDirectory() && existsSync28(join41(p, "SKILL.md")));
9996
+ const sources = existsSync23(join35(dir, "SKILL.md")) ? [dir] : readdirSync4(dir).map((e) => join35(dir, e)).filter((p) => statSync6(p).isDirectory() && existsSync23(join35(p, "SKILL.md")));
10930
9997
  let n = 0;
10931
9998
  for (const src of sources) {
10932
- cpSync2(src, join41(dest, basename4(src)), { recursive: true });
9999
+ cpSync2(src, join35(dest, basename4(src)), { recursive: true });
10933
10000
  process.stdout.write(` ${fgHex(PALETTE.success, "\u2713")} ${accent(basename4(src))}
10934
10001
  `);
10935
10002
  n++;
@@ -11040,8 +10107,6 @@ registerVoiceCommand(program);
11040
10107
  registerAgentsCommand(program);
11041
10108
  registerConfigCommand(program);
11042
10109
  registerSetupCommand(program);
11043
- registerLoginCommand(program);
11044
- registerModelsCommand(program);
11045
10110
  registerImportCommand(program);
11046
10111
  registerCompletionCommand(program);
11047
10112
  enableHelpOnError(program);
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@oriro/orirocli",
3
- "version": "0.3.9",
3
+ "version": "0.3.10",
4
4
  "description": "ORIRO — a free, on-device-friendly terminal AI agent. Built on the Pi agent harness (used as a library).",
5
5
  "type": "module",
6
6
  "bin": {