@jaredboynton/rtinfer 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/rtinferd.js +11 -0
- package/clients/rtinfer-client.mjs +195 -0
- package/clients/rtinfer_client.py +226 -0
- package/package.json +24 -0
- package/src/index.js +52 -0
- package/src/postinstall.js +44 -0
package/bin/rtinferd.js
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
'use strict';
|
|
3
|
+
|
|
4
|
+
const { spawnRaw } = require('../src/index');
|
|
5
|
+
|
|
6
|
+
const result = spawnRaw(process.argv.slice(2));
|
|
7
|
+
if (result.error) {
|
|
8
|
+
console.error('error: rtinferd native runtime failed to start');
|
|
9
|
+
process.exit(1);
|
|
10
|
+
}
|
|
11
|
+
process.exit(result.status == null ? 1 : result.status);
|
|
@@ -0,0 +1,195 @@
|
|
|
1
|
+
// Canonical rtinfer/1 HTTP client (JS).
|
|
2
|
+
//
|
|
3
|
+
// This is the SOURCE OF TRUTH for the JS client. Consumers vendor or symlink
|
|
4
|
+
// this file; do not fork it. The matching Python client lives at
|
|
5
|
+
// clients/python/rtinfer_client.py and MUST be edited in lockstep when the
|
|
6
|
+
// wire contract changes.
|
|
7
|
+
//
|
|
8
|
+
// The daemon (rtinferd) serves the `rtinfer/1` loopback contract:
|
|
9
|
+
// POST /v1/infer { contract, tier, system, user, schema?, schema_name?, model? }
|
|
10
|
+
// GET /v1/infer/health -> { contract, ready, provider, tiers }
|
|
11
|
+
//
|
|
12
|
+
// The daemon is the ONLY inference path: there is no live-realtime fallback.
|
|
13
|
+
// When no daemon is reachable, calls reject with DaemonUnreachable so the
|
|
14
|
+
// orchestrator fails loud (non-zero exit) rather than silently producing
|
|
15
|
+
// zero output.
|
|
16
|
+
//
|
|
17
|
+
// Discovery order (deterministic; no repo imports another):
|
|
18
|
+
// 1. $CSE_RTINFER_URL explicit override / tests
|
|
19
|
+
// 2. ~/.cse-rtinfer/endpoint.json rtinferd advertises here on boot (authoritative)
|
|
20
|
+
// 3. http://127.0.0.1:8787 legacy cse-toold cockpit default (transitional)
|
|
21
|
+
// else: unreachable -> fail loud.
|
|
22
|
+
import fs from "node:fs";
|
|
23
|
+
import os from "node:os";
|
|
24
|
+
import path from "node:path";
|
|
25
|
+
|
|
26
|
+
export const RTINFER_CONTRACT = "rtinfer/1";
|
|
27
|
+
const CONTRACT_MAJOR = 1;
|
|
28
|
+
const WELL_KNOWN = path.join(os.homedir(), ".cse-rtinfer", "endpoint.json");
|
|
29
|
+
const LEGACY_COCKPIT_DEFAULT = "http://127.0.0.1:8787";
|
|
30
|
+
const POOL_SIZE = Math.max(1, parseInt(process.env.EXPLORE_SEARCH_DAEMON_POOL || "4", 10));
|
|
31
|
+
const SCORER_MODEL = (process.env.EXPLORE_SEARCH_SCORER_MODEL || "gpt-realtime-2").trim();
|
|
32
|
+
const HEALTH_TIMEOUT_MS = Math.round(
|
|
33
|
+
(parseFloat(process.env.EXPLORE_SEARCH_DAEMON_CONNECT || "") || 0.5) * 1000,
|
|
34
|
+
);
|
|
35
|
+
const REQUEST_TIMEOUT_MS = Math.round(
|
|
36
|
+
(parseFloat(process.env.EXPLORE_SEARCH_DAEMON_REQUEST || "") || 20.0) * 1000,
|
|
37
|
+
);
|
|
38
|
+
// Synthesis (responses_text, gpt-5.x map-reduce) runs ~30s on a full pack and
|
|
39
|
+
// far longer on a map-reduce, so it gets its own higher ceiling. The realtime
|
|
40
|
+
// navigator/scorer tiers keep the tight 20s timeout above.
|
|
41
|
+
const SYNTH_REQUEST_TIMEOUT_MS = Math.round(
|
|
42
|
+
(parseFloat(process.env.EXPLORE_SEARCH_DAEMON_SYNTH_REQUEST || "") || 90.0) * 1000,
|
|
43
|
+
);
|
|
44
|
+
|
|
45
|
+
let _resolved = false;
|
|
46
|
+
let _base = null;
|
|
47
|
+
|
|
48
|
+
export class DaemonUnreachable extends Error {
|
|
49
|
+
constructor(message = "no rtinfer daemon reachable") {
|
|
50
|
+
super(message);
|
|
51
|
+
this.name = "DaemonUnreachable";
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
export function daemonPoolSize() {
|
|
56
|
+
return POOL_SIZE;
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
export function scorerModel() {
|
|
60
|
+
return SCORER_MODEL;
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
// Accept any rtinfer/<major>.* matching CONTRACT_MAJOR so a minor bump does
|
|
64
|
+
// not dark-fail; a true rtinfer/2 cleanly falls open.
|
|
65
|
+
function contractMajorOk(contract) {
|
|
66
|
+
if (typeof contract !== "string") return false;
|
|
67
|
+
const m = /^rtinfer\/(\d+)/.exec(contract);
|
|
68
|
+
return !!m && parseInt(m[1], 10) === CONTRACT_MAJOR;
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
function envBool(name) {
|
|
72
|
+
return ["1", "true", "yes", "on"].includes((process.env[name] || "").trim().toLowerCase());
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
function healthUrl(base) {
|
|
76
|
+
return `${base.replace(/\/$/, "")}/v1/infer/health`;
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
function inferUrl(base) {
|
|
80
|
+
return `${base.replace(/\/$/, "")}/v1/infer`;
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
// A daemon is usable only if it answers health with our contract and reports
|
|
84
|
+
// ready (codex auth reachable). `ready:false` means "present but warming" -> we
|
|
85
|
+
// keep probing the next candidate, then the caller retries briefly.
|
|
86
|
+
async function probe(base) {
|
|
87
|
+
try {
|
|
88
|
+
const r = await fetch(healthUrl(base), { signal: AbortSignal.timeout(HEALTH_TIMEOUT_MS) });
|
|
89
|
+
if (!r.ok) return false;
|
|
90
|
+
const d = await r.json();
|
|
91
|
+
return d && contractMajorOk(d.contract) && d.ready === true;
|
|
92
|
+
} catch {
|
|
93
|
+
return false;
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
function candidates() {
|
|
98
|
+
const out = [];
|
|
99
|
+
if (process.env.CSE_RTINFER_URL) out.push(process.env.CSE_RTINFER_URL);
|
|
100
|
+
// Strict mode: trust ONLY the explicit override, no well-known / cockpit
|
|
101
|
+
// fallback. Default off keeps the documented discovery order. Mirrors the
|
|
102
|
+
// Python client's CSE_RTINFER_STRICT_URL.
|
|
103
|
+
if (process.env.CSE_RTINFER_URL && envBool("CSE_RTINFER_STRICT_URL")) return out;
|
|
104
|
+
try {
|
|
105
|
+
const d = JSON.parse(fs.readFileSync(WELL_KNOWN, "utf8"));
|
|
106
|
+
if (d && contractMajorOk(d.contract) && d.base_url) out.push(d.base_url);
|
|
107
|
+
} catch {
|
|
108
|
+
/* no well-known file */
|
|
109
|
+
}
|
|
110
|
+
out.push(LEGACY_COCKPIT_DEFAULT);
|
|
111
|
+
return out;
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
// Resolve the daemon base URL once per process. Returns null when nothing is
|
|
115
|
+
// reachable (the caller decides whether that is fatal).
|
|
116
|
+
export async function discoverEndpoint({ refresh = false } = {}) {
|
|
117
|
+
if (_resolved && !refresh) return _base;
|
|
118
|
+
for (const base of candidates()) {
|
|
119
|
+
// eslint-disable-next-line no-await-in-loop
|
|
120
|
+
if (await probe(base)) {
|
|
121
|
+
_base = base;
|
|
122
|
+
_resolved = true;
|
|
123
|
+
return _base;
|
|
124
|
+
}
|
|
125
|
+
}
|
|
126
|
+
_base = null;
|
|
127
|
+
_resolved = true;
|
|
128
|
+
return null;
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
// True when a daemon is reachable. Async (was a sync gate-dir check before).
|
|
132
|
+
export async function daemonEnabled() {
|
|
133
|
+
if (process.env.EXPLORE_SEARCH_DAEMON === "0") return false;
|
|
134
|
+
return (await discoverEndpoint()) != null;
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
// Warm-up = discovery probe; the server pool is already hot, so there is no
|
|
138
|
+
// per-process socket to spawn. Returns the base URL or null.
|
|
139
|
+
export async function warmDaemonPool() {
|
|
140
|
+
return discoverEndpoint();
|
|
141
|
+
}
|
|
142
|
+
|
|
143
|
+
// POST one rtinfer request. Throws DaemonUnreachable when no daemon is
|
|
144
|
+
// reachable; returns null on a per-request error (so a single bad ask degrades
|
|
145
|
+
// without aborting a batch). `tier` selects the model arm.
|
|
146
|
+
export async function postInfer(tier, body) {
|
|
147
|
+
const base = await discoverEndpoint();
|
|
148
|
+
if (!base) throw new DaemonUnreachable();
|
|
149
|
+
const timeoutMs = tier === "responses_text" ? SYNTH_REQUEST_TIMEOUT_MS : REQUEST_TIMEOUT_MS;
|
|
150
|
+
let resp;
|
|
151
|
+
try {
|
|
152
|
+
resp = await fetch(inferUrl(base), {
|
|
153
|
+
method: "POST",
|
|
154
|
+
headers: { "content-type": "application/json" },
|
|
155
|
+
body: JSON.stringify({ contract: RTINFER_CONTRACT, tier, ...body }),
|
|
156
|
+
signal: AbortSignal.timeout(timeoutMs),
|
|
157
|
+
});
|
|
158
|
+
} catch (e) {
|
|
159
|
+
// Transport failure after a successful health probe: the daemon went away
|
|
160
|
+
// mid-run. Treat as unreachable so the orchestrator fails loud.
|
|
161
|
+
throw new DaemonUnreachable(`rtinfer request failed: ${e.message}`);
|
|
162
|
+
}
|
|
163
|
+
let json = null;
|
|
164
|
+
try {
|
|
165
|
+
json = await resp.json();
|
|
166
|
+
} catch {
|
|
167
|
+
return null;
|
|
168
|
+
}
|
|
169
|
+
if (!resp.ok || !json || json.ok !== true) return null;
|
|
170
|
+
return tier === "responses_text" ? json.text : json.object;
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
// One structured realtime ask (navigator / scorer). Returns the parsed object
|
|
174
|
+
// or null on a per-request error.
|
|
175
|
+
export async function daemonAsk(namespace, req, { model = SCORER_MODEL } = {}) {
|
|
176
|
+
return postInfer("realtime_structured", {
|
|
177
|
+
system: req.system,
|
|
178
|
+
user: req.user,
|
|
179
|
+
schema: req.schema,
|
|
180
|
+
schema_name: req.schemaName || req.schema_name || "result",
|
|
181
|
+
model,
|
|
182
|
+
reasoning_effort: req.reasoningEffort,
|
|
183
|
+
});
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
// Batch of structured realtime asks. True parallelism comes from the server
|
|
187
|
+
// pool's semaphore; the client just fans out concurrent fetches. Per-request
|
|
188
|
+
// errors surface as null elements; total unreachability throws.
|
|
189
|
+
export async function daemonAskBatch(namespace, requests, { model = SCORER_MODEL } = {}) {
|
|
190
|
+
// Probe once up front so an unreachable daemon throws before fan-out instead
|
|
191
|
+
// of N times in parallel.
|
|
192
|
+
const base = await discoverEndpoint();
|
|
193
|
+
if (!base) throw new DaemonUnreachable();
|
|
194
|
+
return Promise.all(requests.map((req) => daemonAsk(namespace, req, { model })));
|
|
195
|
+
}
|
|
@@ -0,0 +1,226 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""Canonical rtinfer/1 HTTP client (Python).
|
|
3
|
+
|
|
4
|
+
SOURCE OF TRUTH for the Python client. Consumers vendor or symlink this file;
|
|
5
|
+
do not fork it. The matching JS client lives at clients/js/rtinfer-client.mjs
|
|
6
|
+
and MUST be edited in lockstep when the wire contract changes.
|
|
7
|
+
|
|
8
|
+
The daemon (rtinferd) serves the `rtinfer/1` loopback contract:
|
|
9
|
+
POST /v1/infer {contract, tier, system, user, schema?, schema_name?, model?}
|
|
10
|
+
GET /v1/infer/health -> {contract, ready, provider, tiers}
|
|
11
|
+
|
|
12
|
+
This is a *preferred* path for borrowing the shared daemon, never a required
|
|
13
|
+
one when used as a fallback layer: any unreachability, timeout, or non-OK
|
|
14
|
+
envelope returns ``(None, None)`` so callers can fall through to their own
|
|
15
|
+
inference path.
|
|
16
|
+
|
|
17
|
+
Discovery order (matches clients/js/rtinfer-client.mjs):
|
|
18
|
+
1. $CSE_RTINFER_URL explicit override / tests
|
|
19
|
+
2. ~/.cse-rtinfer/endpoint.json rtinferd advertises here on boot (authoritative)
|
|
20
|
+
3. http://127.0.0.1:8787 legacy cse-toold cockpit default (transitional)
|
|
21
|
+
|
|
22
|
+
The health gate accepts any rtinfer/1.x (major-1 match), so a minor bump does
|
|
23
|
+
not dark-fail; a true rtinfer/2 cleanly falls open.
|
|
24
|
+
|
|
25
|
+
Stdlib only: urllib + json.
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
from __future__ import annotations
|
|
29
|
+
|
|
30
|
+
import json
|
|
31
|
+
import os
|
|
32
|
+
import re
|
|
33
|
+
import sys
|
|
34
|
+
import time
|
|
35
|
+
import urllib.error
|
|
36
|
+
import urllib.request
|
|
37
|
+
from pathlib import Path
|
|
38
|
+
from typing import Any
|
|
39
|
+
|
|
40
|
+
CONTRACT = "rtinfer/1"
|
|
41
|
+
_CONTRACT_MAJOR = 1
|
|
42
|
+
_LEGACY_COCKPIT_DEFAULT = "http://127.0.0.1:8787"
|
|
43
|
+
_WELL_KNOWN = Path.home() / ".cse-rtinfer" / "endpoint.json"
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def _contract_major_ok(contract: Any) -> bool:
|
|
47
|
+
"""True when ``contract`` is rtinfer/<major>.* matching _CONTRACT_MAJOR."""
|
|
48
|
+
if not isinstance(contract, str):
|
|
49
|
+
return False
|
|
50
|
+
m = re.match(r"^rtinfer/(\d+)", contract)
|
|
51
|
+
return bool(m) and int(m.group(1)) == _CONTRACT_MAJOR
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def _debug_log(msg: str) -> None:
|
|
55
|
+
if (os.environ.get("UNIFABLE_DEBUG") or os.environ.get("DEBUG") or "").strip():
|
|
56
|
+
try:
|
|
57
|
+
sys.stderr.write(f"[rtinfer] {msg}\n")
|
|
58
|
+
except OSError:
|
|
59
|
+
pass
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def _env_float(name: str, default: float) -> float:
|
|
63
|
+
try:
|
|
64
|
+
return float(os.environ.get(name) or default)
|
|
65
|
+
except (TypeError, ValueError):
|
|
66
|
+
return default
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
HEALTH_TIMEOUT = _env_float("CSE_RTINFER_HEALTH_TIMEOUT", 0.5)
|
|
70
|
+
REQUEST_TIMEOUT = _env_float("CSE_RTINFER_REQUEST_TIMEOUT", 95.0)
|
|
71
|
+
# Re-discovery is cheap but not free; cache the resolved base for this process.
|
|
72
|
+
_DISCOVERY_TTL = _env_float("CSE_RTINFER_DISCOVERY_TTL", 30.0)
|
|
73
|
+
|
|
74
|
+
_resolved_at = 0.0
|
|
75
|
+
_resolved_base: str | None = None
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def _env_bool(name: str) -> bool:
|
|
79
|
+
return (os.environ.get(name) or "").strip().lower() in ("1", "true", "yes", "on")
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def _candidates() -> list[str]:
|
|
83
|
+
out: list[str] = []
|
|
84
|
+
override = os.environ.get("CSE_RTINFER_URL")
|
|
85
|
+
if override:
|
|
86
|
+
out.append(override.strip())
|
|
87
|
+
# Strict mode: trust ONLY the explicit override, no well-known / cockpit
|
|
88
|
+
# fallback. Default off keeps the documented discovery order.
|
|
89
|
+
if override and _env_bool("CSE_RTINFER_STRICT_URL"):
|
|
90
|
+
return out
|
|
91
|
+
try:
|
|
92
|
+
data = json.loads(_WELL_KNOWN.read_text("utf-8"))
|
|
93
|
+
if isinstance(data, dict) and _contract_major_ok(data.get("contract")) and data.get("base_url"):
|
|
94
|
+
out.append(str(data["base_url"]).strip())
|
|
95
|
+
except (OSError, ValueError):
|
|
96
|
+
pass
|
|
97
|
+
out.append(_LEGACY_COCKPIT_DEFAULT)
|
|
98
|
+
return out
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
def _health_ok(base: str) -> bool:
|
|
102
|
+
url = base.rstrip("/") + "/v1/infer/health"
|
|
103
|
+
try:
|
|
104
|
+
with urllib.request.urlopen(url, timeout=HEALTH_TIMEOUT) as resp: # noqa: S310 (loopback only)
|
|
105
|
+
if resp.status != 200:
|
|
106
|
+
return False
|
|
107
|
+
data = json.loads(resp.read().decode("utf-8"))
|
|
108
|
+
except (urllib.error.URLError, OSError, ValueError, TimeoutError):
|
|
109
|
+
return False
|
|
110
|
+
if not isinstance(data, dict):
|
|
111
|
+
return False
|
|
112
|
+
if not _contract_major_ok(data.get("contract")):
|
|
113
|
+
if data.get("contract"):
|
|
114
|
+
_debug_log(f"contract mismatch at {base}: {data.get('contract')} (want rtinfer/{_CONTRACT_MAJOR}.x)")
|
|
115
|
+
return False
|
|
116
|
+
return data.get("ready") is True
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
def discover(refresh: bool = False) -> str | None:
|
|
120
|
+
"""Resolve a ready rtinfer base URL, or None. Cached for _DISCOVERY_TTL."""
|
|
121
|
+
global _resolved_at, _resolved_base
|
|
122
|
+
now = time.monotonic()
|
|
123
|
+
if not refresh and _resolved_base is not None and (now - _resolved_at) < _DISCOVERY_TTL:
|
|
124
|
+
return _resolved_base
|
|
125
|
+
for base in _candidates():
|
|
126
|
+
if _health_ok(base):
|
|
127
|
+
_resolved_base = base.rstrip("/")
|
|
128
|
+
_resolved_at = now
|
|
129
|
+
return _resolved_base
|
|
130
|
+
_resolved_base = None
|
|
131
|
+
_resolved_at = now
|
|
132
|
+
return None
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
def ask_structured(
|
|
136
|
+
system: str,
|
|
137
|
+
user: str,
|
|
138
|
+
schema: dict[str, Any],
|
|
139
|
+
*,
|
|
140
|
+
schema_name: str = "result",
|
|
141
|
+
model: str | None = None,
|
|
142
|
+
timeout: float = REQUEST_TIMEOUT,
|
|
143
|
+
) -> tuple[dict[str, Any] | None, dict[str, int] | None]:
|
|
144
|
+
"""One structured ask over the shared daemon's realtime tier. Returns
|
|
145
|
+
``(object, usage)`` on success, ``(None, None)`` to signal fallback.
|
|
146
|
+
|
|
147
|
+
``usage`` is always None: the loopback endpoint does not surface token
|
|
148
|
+
counts, and the borrow path is off the correctness/measurement path."""
|
|
149
|
+
base = discover()
|
|
150
|
+
if base is None:
|
|
151
|
+
return None, None
|
|
152
|
+
body = {
|
|
153
|
+
"contract": CONTRACT,
|
|
154
|
+
"tier": "realtime_structured",
|
|
155
|
+
"system": system,
|
|
156
|
+
"user": user,
|
|
157
|
+
"schema": schema,
|
|
158
|
+
"schema_name": schema_name,
|
|
159
|
+
}
|
|
160
|
+
if model:
|
|
161
|
+
body["model"] = model
|
|
162
|
+
payload = json.dumps(body).encode("utf-8")
|
|
163
|
+
req = urllib.request.Request(
|
|
164
|
+
base + "/v1/infer",
|
|
165
|
+
data=payload,
|
|
166
|
+
headers={"content-type": "application/json"},
|
|
167
|
+
method="POST",
|
|
168
|
+
)
|
|
169
|
+
try:
|
|
170
|
+
with urllib.request.urlopen(req, timeout=timeout) as resp: # noqa: S310 (loopback only)
|
|
171
|
+
data = json.loads(resp.read().decode("utf-8"))
|
|
172
|
+
except (urllib.error.URLError, OSError, ValueError, TimeoutError):
|
|
173
|
+
# Daemon went away mid-run: invalidate so the next call re-discovers.
|
|
174
|
+
_invalidate()
|
|
175
|
+
return None, None
|
|
176
|
+
if not isinstance(data, dict) or data.get("ok") is not True:
|
|
177
|
+
return None, None
|
|
178
|
+
obj = data.get("object")
|
|
179
|
+
if not isinstance(obj, dict):
|
|
180
|
+
return None, None
|
|
181
|
+
return obj, None
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
def ask_text(
|
|
185
|
+
system: str,
|
|
186
|
+
user: str,
|
|
187
|
+
*,
|
|
188
|
+
model: str | None = None,
|
|
189
|
+
timeout: float = REQUEST_TIMEOUT,
|
|
190
|
+
) -> str | None:
|
|
191
|
+
"""One freeform-text ask over the shared daemon's responses_text tier.
|
|
192
|
+
Returns the assembled text on success, ``None`` to signal fallback."""
|
|
193
|
+
base = discover()
|
|
194
|
+
if base is None:
|
|
195
|
+
return None
|
|
196
|
+
body: dict[str, Any] = {
|
|
197
|
+
"contract": CONTRACT,
|
|
198
|
+
"tier": "responses_text",
|
|
199
|
+
"system": system,
|
|
200
|
+
"user": user,
|
|
201
|
+
}
|
|
202
|
+
if model:
|
|
203
|
+
body["model"] = model
|
|
204
|
+
payload = json.dumps(body).encode("utf-8")
|
|
205
|
+
req = urllib.request.Request(
|
|
206
|
+
base + "/v1/infer",
|
|
207
|
+
data=payload,
|
|
208
|
+
headers={"content-type": "application/json"},
|
|
209
|
+
method="POST",
|
|
210
|
+
)
|
|
211
|
+
try:
|
|
212
|
+
with urllib.request.urlopen(req, timeout=timeout) as resp: # noqa: S310 (loopback only)
|
|
213
|
+
data = json.loads(resp.read().decode("utf-8"))
|
|
214
|
+
except (urllib.error.URLError, OSError, ValueError, TimeoutError):
|
|
215
|
+
_invalidate()
|
|
216
|
+
return None
|
|
217
|
+
if not isinstance(data, dict) or data.get("ok") is not True:
|
|
218
|
+
return None
|
|
219
|
+
text = data.get("text")
|
|
220
|
+
return text if isinstance(text, str) else None
|
|
221
|
+
|
|
222
|
+
|
|
223
|
+
def _invalidate() -> None:
|
|
224
|
+
global _resolved_at, _resolved_base
|
|
225
|
+
_resolved_base = None
|
|
226
|
+
_resolved_at = 0.0
|
package/package.json
ADDED
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@jaredboynton/rtinfer",
|
|
3
|
+
"version": "0.1.0",
|
|
4
|
+
"description": "Always-on loopback rtinfer/1 inference daemon (gpt-realtime + codex/responses over Codex OAuth). Shared by cse-tools and unifable.",
|
|
5
|
+
"license": "MIT",
|
|
6
|
+
"repository": "https://github.com/jaredboynton/rtinfer",
|
|
7
|
+
"engines": { "node": ">=18.0.0" },
|
|
8
|
+
"bin": {
|
|
9
|
+
"rtinferd": "bin/rtinferd.js"
|
|
10
|
+
},
|
|
11
|
+
"main": "clients/rtinfer-client.mjs",
|
|
12
|
+
"files": [
|
|
13
|
+
"bin/",
|
|
14
|
+
"src/",
|
|
15
|
+
"clients/"
|
|
16
|
+
],
|
|
17
|
+
"scripts": {
|
|
18
|
+
"postinstall": "node src/postinstall.js"
|
|
19
|
+
},
|
|
20
|
+
"optionalDependencies": {
|
|
21
|
+
"@jaredboynton/rtinfer-darwin-arm64": "0.1.0",
|
|
22
|
+
"@jaredboynton/rtinfer-linux-arm64": "0.1.0"
|
|
23
|
+
}
|
|
24
|
+
}
|
package/src/index.js
ADDED
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
// Resolve the native rtinferd binary from the platform package (or a vendored
|
|
4
|
+
// fallback) and exec it. Mirrors the cse-tools js-wrapper pattern.
|
|
5
|
+
|
|
6
|
+
const path = require('path');
|
|
7
|
+
const fs = require('fs');
|
|
8
|
+
const child_process = require('child_process');
|
|
9
|
+
|
|
10
|
+
function platformPackageName() {
|
|
11
|
+
return `@jaredboynton/rtinfer-${process.platform}-${process.arch}`;
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
function nativeBinaryName(platform) {
|
|
15
|
+
return platform === 'win32' ? 'rtinferd.exe' : 'rtinferd';
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
function resolveCoreBinary() {
|
|
19
|
+
const platformKey = `${process.platform}-${process.arch}`;
|
|
20
|
+
const supportedPlatforms = new Set(['darwin-arm64', 'linux-arm64']);
|
|
21
|
+
const binaryName = nativeBinaryName(process.platform);
|
|
22
|
+
|
|
23
|
+
if (supportedPlatforms.has(platformKey)) {
|
|
24
|
+
try {
|
|
25
|
+
const pkgJson = require.resolve(`${platformPackageName()}/package.json`);
|
|
26
|
+
const candidate = path.join(path.dirname(pkgJson), 'bin', binaryName);
|
|
27
|
+
if (fs.existsSync(candidate)) {
|
|
28
|
+
fs.accessSync(candidate, fs.constants.X_OK);
|
|
29
|
+
return candidate;
|
|
30
|
+
}
|
|
31
|
+
} catch (e) {
|
|
32
|
+
if (e && e.code === 'EACCES') throw new Error('native runtime is not executable');
|
|
33
|
+
}
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
const vendorCandidate = path.resolve(__dirname, '..', 'vendor', binaryName);
|
|
37
|
+
if (fs.existsSync(vendorCandidate)) {
|
|
38
|
+
fs.accessSync(vendorCandidate, fs.constants.X_OK);
|
|
39
|
+
return vendorCandidate;
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
throw new Error(`native runtime is unavailable for this platform: ${platformKey}`);
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
function spawnRaw(args, options) {
|
|
46
|
+
return child_process.spawnSync(resolveCoreBinary(), args || [], {
|
|
47
|
+
stdio: 'inherit',
|
|
48
|
+
...(options || {}),
|
|
49
|
+
});
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
module.exports = { resolveCoreBinary, spawnRaw };
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
// postinstall: install + load the always-on LaunchAgent so rtinferd is
|
|
4
|
+
// running immediately after `npm i -g @jaredboynton/rtinfer`. Best-effort:
|
|
5
|
+
// a failure here (no binary for this platform, CI sandbox, non-macOS) must
|
|
6
|
+
// NOT fail the install. The daemon can always be started manually with
|
|
7
|
+
// `rtinferd install` or `rtinferd serve`.
|
|
8
|
+
|
|
9
|
+
const path = require('path');
|
|
10
|
+
const fs = require('fs');
|
|
11
|
+
const { spawnRaw } = require('./index');
|
|
12
|
+
|
|
13
|
+
if (process.env.RTINFER_SKIP_POSTINSTALL === '1') {
|
|
14
|
+
process.exit(0);
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
// Pin the LaunchAgent to the STABLE npm global bin shim (this very wrapper),
|
|
18
|
+
// not the versioned native binary, so in-daemon self-update is a no-op for the
|
|
19
|
+
// plist: npm rewrites the shim in place and launchd respawns it unchanged.
|
|
20
|
+
function npmGlobalShim() {
|
|
21
|
+
// npm exposes the install prefix to lifecycle scripts; the global bin shim
|
|
22
|
+
// lives at <prefix>/bin/rtinferd on POSIX.
|
|
23
|
+
const prefix = process.env.npm_config_prefix || process.env.PREFIX;
|
|
24
|
+
if (prefix) {
|
|
25
|
+
const shim = path.join(prefix, 'bin', 'rtinferd');
|
|
26
|
+
if (fs.existsSync(shim)) return shim;
|
|
27
|
+
}
|
|
28
|
+
return null;
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
try {
|
|
32
|
+
const shim = npmGlobalShim();
|
|
33
|
+
const env = shim ? { ...process.env, RTINFER_LAUNCH_BIN: shim } : process.env;
|
|
34
|
+
const result = spawnRaw(['install'], { env });
|
|
35
|
+
if (result.error || (result.status != null && result.status !== 0)) {
|
|
36
|
+
console.error('[rtinfer] postinstall: daemon not installed automatically; run `rtinferd install` to enable the always-on service.');
|
|
37
|
+
}
|
|
38
|
+
} catch (e) {
|
|
39
|
+
console.error(`[rtinfer] postinstall skipped: ${e.message}`);
|
|
40
|
+
console.error('[rtinfer] run `rtinferd install` (macOS) or `rtinferd serve` to start the daemon.');
|
|
41
|
+
}
|
|
42
|
+
// Always exit 0 so npm install never fails on the daemon side-effect.
|
|
43
|
+
process.exit(0);
|
|
44
|
+
|