npm - modelstat - Versions diffs - 0.0.36 → 0.0.38 - Mend

modelstat 0.0.36 → 0.0.38

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (3) hide show

package/dist/cli.mjs CHANGED Viewed

@@ -44362,7 +44362,7 @@ var COGNITION_SYSTEM_PROMPT, MAX_COGNITION_TAGS_PER_FIELD, MAX_COGNITION_TAG_CHA
 var init_cognition = __esm({
   "../../packages/companion-core/src/pipeline/cognition.ts"() {
     "use strict";
-    COGNITION_SYSTEM_PROMPT = 'You read a one-sentence summary of an AI-coding work session and identify the user\'s emotional state and meta-cognitive state. Output JSON only \u2014 first character of reply is `{`. Schema: {"emotions":[],"meta":[]}. emotions: \u2264 3 short lowercase mood tags such as frustrated, curious, excited, focused, calm, confused, anxious, satisfied, proud, bored, energised. meta: \u2264 3 short lowercase tags about cognitive mode, such as debugging, exploring, planning, designing, learning, deciding, reviewing, refactoring, investigating, documenting. Each tag \u2264 24 chars, single word or hyphenated, no punctuation. Only emit a tag if the summary gives clear evidence \u2014 return [] for either field when unsure. Do not invent emotions the user did not display. No prose, no markdown.';
+    COGNITION_SYSTEM_PROMPT = 'You read a one-sentence summary of an AI-coding work session and identify the user\'s emotional state and meta-cognitive state. Output JSON only \u2014 first character of reply is `{`. Schema: {"emotions":[],"meta":[]}. emotions: \u2264 3 short lowercase MOOD tags \u2014 how the user FEELS \u2014 such as frustrated, curious, excited, calm, confused, anxious, satisfied, proud, bored, energised, overwhelmed, confident. meta: \u2264 3 short lowercase MENTAL-MODE tags \u2014 HOW the user is THINKING, never what they are doing. Valid examples: focused, scattered, in-flow, deliberate, hurried, stuck, open, exploratory, methodical, distracted. DO NOT emit ACTIVITY verbs (debugging, refactoring, designing, reviewing, deploying, planning, documenting, implementing) under meta \u2014 those describe the WORK, not the MIND. If the only candidate tag would be an activity verb, return [] for meta instead. Each tag \u2264 24 chars, single word or hyphenated, no punctuation. Only emit a tag if the summary gives clear evidence \u2014 return [] for either field when unsure. Do not invent emotions or mental modes the user did not display. No prose, no markdown.';
     MAX_COGNITION_TAGS_PER_FIELD = 3;
     MAX_COGNITION_TAG_CHARS = 24;
     COGNITION_MAX_TOKENS = 80;
@@ -44839,6 +44839,14 @@ function ollamaSummarize(cfg = defaultOllamaConfig()) {
       body: JSON.stringify({
         model: cfg.chatModel,
         stream: false,
+        // Disable reasoning. qwen3 (the default summariser family) is a
+        // thinking model: with `think` on it spends the entire
+        // `num_predict` budget on a <think> block and returns EMPTY
+        // content, so the summariser saw "" and the whole pipeline
+        // crash-looped at preflight. We only want the final terse
+        // abstract, never the chain-of-thought. Ollama ignores this
+        // field for non-thinking models, so it's safe across families.
+        think: false,
         options: {
           temperature: SUMMARISER_TEMPERATURE,
           num_predict: Math.min(maxTokens, SUMMARISER_MAX_TOKENS)
@@ -44868,6 +44876,10 @@ function ollamaCognize(cfg = defaultOllamaConfig()) {
           model: cfg.chatModel,
           stream: false,
           format: "json",
+          // Same reason as the summariser: no thinking budget, just the
+          // JSON cognition tags. Thinking models otherwise emit a long
+          // <think> block and return empty content.
+          think: false,
           options: {
             temperature: COGNITION_TEMPERATURE,
             num_predict: COGNITION_MAX_TOKENS
@@ -45331,12 +45343,12 @@ async function getAdapters() {
       `[modelstat] ollama up at ${ollamaCfg.baseUrl} \u2014 using ${ollamaCfg.chatModel} for summarisation`
     );
     adapters = {
-      // BGE-small via transformers.js — same model the server uses
-      // via fastembed, so segment vectors land in the same 384-dim
-      // space as leaf-description vectors and cosine similarity is
-      // directly meaningful. We do NOT use ollamaEmbed here because
-      // Ollama's library doesn't host bge-small (404 on pull) and
-      // shipping MiniLM-via-Ollama vs BGE-small-server would break
+      // BGE-small via transformers.js — same model the server uses,
+      // so segment vectors land in the same 384-dim space as
+      // leaf-description vectors and cosine similarity is directly
+      // meaningful. We do NOT use ollamaEmbed here because Ollama's
+      // library doesn't host bge-small (404 on pull) and shipping
+      // MiniLM via Ollama vs BGE-small server-side would break
       // cross-source similarity.
       embed: createTransformersJsEmbedder(),
       summarize: ollamaSummarize(ollamaCfg),
@@ -45506,7 +45518,7 @@ var init_scan = __esm({
     init_pipeline2();
     init_config2();
     init_api();
-    AGENT_VERSION = "agent-0.0.36";
+    AGENT_VERSION = "agent-0.0.38";
     BATCH_MAX_EVENTS = 2e3;
   }
 });
@@ -47598,7 +47610,7 @@ var init_daemon = __esm({
     init_config2();
     init_lock();
     init_scan();
-    AGENT_VERSION2 = "agent-0.0.36";
+    AGENT_VERSION2 = "agent-0.0.38";
     HEARTBEAT_INTERVAL_MS = 1e4;
     SCAN_INTERVAL_MS = 5 * 60 * 1e3;
     status = {
@@ -47806,6 +47818,11 @@ ${programArgs}
   <key>EnvironmentVariables</key>
   <dict>
     <key>PATH</key><string>/usr/local/bin:/opt/homebrew/bin:/usr/bin:/bin</string>
+    <!-- Heap headroom for the startup scan of a large transcript backlog.
+         Node's default old-space ceiling (~4 GB) OOM-crashed the daemon on
+         big histories; raise it well below typical RAM. Inherited by the
+         tray-spawned 'modelstat start' child too (NODE_OPTIONS propagates). -->
+    <key>NODE_OPTIONS</key><string>--max-old-space-size=8192</string>
   </dict>
   <key>WorkingDirectory</key><string>${home()}</string>
 </dict>
@@ -47869,6 +47886,10 @@ Wants=network-online.target
 [Service]
 Type=simple
+# Heap headroom for the startup scan of a large transcript backlog \u2014
+# Node's default ~4 GB old-space ceiling OOM-crashed the daemon on big
+# histories.
+Environment=NODE_OPTIONS=--max-old-space-size=8192
 ExecStart=${nodeBinary()} ${cliPath} start
 Restart=always
 RestartSec=10
@@ -48023,7 +48044,7 @@ function tryOpenBrowser(url) {
     return false;
   }
 }
-var AGENT_VERSION3 = "agent-0.0.36";
+var AGENT_VERSION3 = "agent-0.0.38";
 function osFamily() {
   const p = platform4();
   if (p === "darwin") return "macos";
@@ -48340,7 +48361,7 @@ async function cmdWatch() {
 }
 async function cmdStart(rest) {
   if (!state.bearer || !state.deviceId) {
-    console.error("not paired yet. Run `modelstat connect` first.");
+    console.error("not paired yet. Run `modelstat` first.");
     process.exit(1);
   }
   const force = rest.includes("--force") || rest.includes("-f");
@@ -48352,7 +48373,7 @@ async function cmdStop() {
     uninstallService();
     console.log("\u2713 service stopped and uninstalled");
     console.log(`  Your device pairing is still in ${state.storePath}`);
-    console.log("  Run `modelstat connect` again to re-enable.");
+    console.log("  Run `modelstat` again to re-enable.");
   } catch (err) {
     console.error(`\u2717 ${err.message}`);
     process.exit(1);
@@ -48466,7 +48487,7 @@ async function cmdJobs(args) {
       process.stdout.write(`${JSON.stringify({ paired: false, reason: "no_claim_code" })}
 `);
     } else {
-      console.log("no claim code on record \u2014 run `modelstat connect` first");
+      console.log("no claim code on record \u2014 run `modelstat` first");
     }
     return;
   }