nothumanallowed 16.0.12 → 16.0.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "nothumanallowed",
3
- "version": "16.0.12",
3
+ "version": "16.0.13",
4
4
  "description": "NotHumanAllowed — 38 AI agents, 80 tools, Studio (visual agentic workflows). Email, calendar, browser automation, screen capture, canvas, cron/heartbeat, Alexandria E2E messaging, GitHub, Notion, Slack, voice chat, free AI (Liara), 28 languages. Zero-dependency CLI.",
5
5
  "type": "module",
6
6
  "bin": {
package/src/constants.mjs CHANGED
@@ -5,7 +5,7 @@ import { fileURLToPath } from 'url';
5
5
  const __filename = fileURLToPath(import.meta.url);
6
6
  const __dirname = path.dirname(__filename);
7
7
 
8
- export const VERSION = '16.0.12';
8
+ export const VERSION = '16.0.13';
9
9
  export const BASE_URL = 'https://nothumanallowed.com/cli';
10
10
  export const API_BASE = 'https://nothumanallowed.com/api/v1';
11
11
 
@@ -229,43 +229,77 @@ export function register(router) {
229
229
  content: (h.content || '').replace(/!\[Screenshot\]\(data:image\/[^)]+\)/g, '[Screenshot taken]'),
230
230
  })).filter(m => m.content);
231
231
 
232
- // ── Rolling summary (Fix 2) ──
233
- // After a threshold of turns we generate (or reuse) a compact summary of
234
- // everything OLDER than the recent window, persisted in the conversation
235
- // object. The recent window stays as raw messages[]. This is the
236
- // "memory like ChatGPT/Claude" pattern.
237
- const RECENT = 12;
232
+ // ── Rolling summary (Fix 2) — TOKEN-based threshold ──
233
+ // Industry pattern (Claude context compaction, ChatGPT memory): trigger
234
+ // summary when the OLDER messages would consume more than a budget,
235
+ // measured in tokens (~chars/4). Provider-aware budget:
236
+ // - anthropic / openai / gemini → 24k tokens raw before summary
237
+ // - nha (Liara/Qwen 32B 32k ctx) → 8k tokens raw before summary
238
+ // - others → 8k as safe default
239
+ // Plus per-turn cap of MAX_RECENT turns so latency stays bounded.
240
+ const provider = config.llm?.provider || (config.llm?.apiKey ? 'anthropic' : 'nha');
241
+ const TOKEN_BUDGET_BY_PROVIDER = {
242
+ anthropic: 24000, openai: 24000, gemini: 24000,
243
+ nha: 8000, deepseek: 16000, grok: 16000, mistral: 16000, cohere: 8000,
244
+ };
245
+ const tokenBudget = TOKEN_BUDGET_BY_PROVIDER[provider] || 8000;
246
+ const MAX_RECENT_TURNS = 30; // hard cap (latency safeguard)
247
+ const approxTokens = (s) => Math.ceil((s || '').length / 4);
248
+
238
249
  let conversationSummary = '';
239
250
  let recentHistory = rawHistory;
240
- if (rawHistory.length > RECENT) {
241
- recentHistory = rawHistory.slice(-RECENT);
242
- const older = rawHistory.slice(0, -RECENT);
243
- // Try to reuse a cached summary from the conversation, regenerate if
244
- // the older slice grew beyond what the cached summary covered.
245
- let cachedConv = null;
246
- if (body.conversationId) {
247
- try { cachedConv = loadConversation(body.conversationId); } catch {}
251
+ if (rawHistory.length > 0) {
252
+ // Walk backwards accumulating tokens until we exceed the budget OR
253
+ // hit MAX_RECENT_TURNS. Everything BEFORE that index goes into summary.
254
+ let recentTokens = 0;
255
+ let splitIdx = 0;
256
+ for (let i = rawHistory.length - 1; i >= 0; i--) {
257
+ const t = approxTokens(rawHistory[i].content);
258
+ if (recentTokens + t > tokenBudget) { splitIdx = i + 1; break; }
259
+ if (rawHistory.length - i > MAX_RECENT_TURNS) { splitIdx = i + 1; break; }
260
+ recentTokens += t;
261
+ splitIdx = i;
248
262
  }
249
- const cached = cachedConv?.rollingSummary;
250
- if (cached && cached.coveredTurns === older.length) {
251
- conversationSummary = cached.text;
252
- } else {
253
- // Generate a fresh summary via the same LLM. Compact, factual.
254
- const summaryInput = older.map(m =>
255
- `${m.role === 'user' ? 'Utente' : 'Assistente'}: ${m.content.slice(0, 600)}`
256
- ).join('\n\n');
257
- try {
258
- conversationSummary = await callLLM(
259
- config,
260
- 'Sei un sintetizzatore di conversazione. Riassumi in italiano in 200-400 token TUTTI i fatti, decisioni, preferenze utente, dati specifici (date, ID, nomi, numeri) emersi nella conversazione. Niente abbellimenti, solo informazione utile per ricostruire il contesto.',
261
- summaryInput,
262
- { max_tokens: 600, temperature: 0.2 },
263
- );
264
- if (cachedConv) {
265
- cachedConv.rollingSummary = { text: conversationSummary, coveredTurns: older.length, at: new Date().toISOString() };
266
- try { saveConversation(cachedConv); } catch {}
267
- }
268
- } catch { /* if summary fails, just skip it — recent history is enough */ }
263
+ recentHistory = rawHistory.slice(splitIdx);
264
+ const older = rawHistory.slice(0, splitIdx);
265
+
266
+ if (older.length > 0) {
267
+ // Reuse cached summary when the older slice hasn't grown.
268
+ let cachedConv = null;
269
+ if (body.conversationId) {
270
+ try { cachedConv = loadConversation(body.conversationId); } catch {}
271
+ }
272
+ const cached = cachedConv?.rollingSummary;
273
+ if (cached && cached.coveredTurns === older.length) {
274
+ conversationSummary = cached.text;
275
+ } else {
276
+ // Build summary input in user language. Trim individual turns to
277
+ // 1200 chars each (older context loses fine-grained details).
278
+ const summaryInput = older.map(m =>
279
+ `${m.role === 'user' ? 'User' : 'Assistant'}: ${m.content.slice(0, 1200)}`
280
+ ).join('\n\n');
281
+ const langLabel = userLang === 'it' ? 'in italiano' : `in ${userLang}`;
282
+ try {
283
+ conversationSummary = await callLLM(
284
+ config,
285
+ `You are a conversation summarizer. Summarize ${langLabel} in 200-500 tokens ALL facts, decisions, user preferences, specific data (dates, IDs, names, numbers, file paths, URLs) that emerged. No fluff, only information useful to reconstruct context. Preserve the language the user spoke in.`,
286
+ summaryInput,
287
+ { max_tokens: 700, temperature: 0.2 },
288
+ );
289
+ // Meta-compress: if the previous cached summary exists AND together
290
+ // with new content the result would balloon, replace fully with the
291
+ // new compact one (we just generated it from full older slice).
292
+ if (cachedConv) {
293
+ cachedConv.rollingSummary = {
294
+ text: conversationSummary,
295
+ coveredTurns: older.length,
296
+ coveredTokens: older.reduce((a, m) => a + approxTokens(m.content), 0),
297
+ at: new Date().toISOString(),
298
+ };
299
+ try { saveConversation(cachedConv); } catch {}
300
+ }
301
+ } catch { /* if summary fails, just skip it — recent history is enough */ }
302
+ }
269
303
  }
270
304
  }
271
305
 
@@ -278,9 +312,11 @@ export function register(router) {
278
312
  // ── User memory (Fix 3) — persistent across conversations + channels ──
279
313
  // Loaded from ~/.nha/user-memory.md, prepended to the system prompt.
280
314
  try {
281
- const { buildMemoryPrefix } = await import('../../services/user-memory.mjs');
315
+ const { buildMemoryPrefix, autoLearnFromTurn } = await import('../../services/user-memory.mjs');
282
316
  const memPrefix = buildMemoryPrefix();
283
317
  if (memPrefix) effectiveSystemPrompt = `${memPrefix}${effectiveSystemPrompt || ''}`;
318
+ // Auto-learn — fire and forget, doesn't block the response.
319
+ autoLearnFromTurn(msg, config).catch(() => null);
284
320
  } catch {}
285
321
 
286
322
  // The final user message — keep the per-turn language tag close to the
@@ -32,6 +32,24 @@ export function register(router) {
32
32
  sendJSON(res, 200, { ok: true, version: VERSION, ts: Date.now() });
33
33
  });
34
34
 
35
+ // GET /api/audit/query — query the cross-channel audit log.
36
+ // Optional query params: tool, channel, since (ms timestamp), limit.
37
+ router.get('/api/audit/query', async (req, res) => {
38
+ try {
39
+ const { queryAuditLog } = await import('../../services/message-responder.mjs');
40
+ const url = new URL(req.url, 'http://localhost');
41
+ const entries = queryAuditLog({
42
+ tool: url.searchParams.get('tool') || undefined,
43
+ channel: url.searchParams.get('channel') || undefined,
44
+ since: url.searchParams.get('since') ? parseInt(url.searchParams.get('since'), 10) : undefined,
45
+ limit: parseInt(url.searchParams.get('limit') || '100', 10),
46
+ });
47
+ sendJSON(res, 200, { entries });
48
+ } catch (e) {
49
+ sendJSON(res, 500, { error: e.message });
50
+ }
51
+ });
52
+
35
53
  // GET /api/version/check
36
54
  //
37
55
  // Returns three version signals so the UI can distinguish three states:
@@ -495,12 +495,23 @@ export async function callGemini(apiKey, model, systemPrompt, userMessage, _stre
495
495
  return data.candidates?.[0]?.content?.parts?.[0]?.text || '';
496
496
  }
497
497
 
498
+ // OpenAI-compatible history mapper — used by DeepSeek/Grok/Mistral/Cohere.
499
+ function _openaiHistory(opts) {
500
+ return Array.isArray(opts?.history)
501
+ ? opts.history.filter(m => m && m.role && m.content).map(m => ({
502
+ role: m.role === 'assistant' ? 'assistant' : 'user',
503
+ content: String(m.content),
504
+ }))
505
+ : [];
506
+ }
507
+
498
508
  export async function callDeepSeek(apiKey, model, systemPrompt, userMessage, stream = false, opts = {}) {
499
509
  const body = {
500
510
  model: model || 'deepseek-chat',
501
511
  max_tokens: opts.max_tokens || 8192,
502
512
  messages: [
503
513
  { role: 'system', content: systemPrompt },
514
+ ..._openaiHistory(opts),
504
515
  { role: 'user', content: userMessage },
505
516
  ],
506
517
  stream,
@@ -529,6 +540,7 @@ export async function callGrok(apiKey, model, systemPrompt, userMessage, stream
529
540
  max_tokens: opts.max_tokens || 8192,
530
541
  messages: [
531
542
  { role: 'system', content: systemPrompt },
543
+ ..._openaiHistory(opts),
532
544
  { role: 'user', content: userMessage },
533
545
  ],
534
546
  stream,
@@ -557,6 +569,7 @@ export async function callMistral(apiKey, model, systemPrompt, userMessage, stre
557
569
  max_tokens: opts.max_tokens || 8192,
558
570
  messages: [
559
571
  { role: 'system', content: systemPrompt },
572
+ ..._openaiHistory(opts),
560
573
  { role: 'user', content: userMessage },
561
574
  ],
562
575
  stream,
@@ -580,10 +593,18 @@ export async function callMistral(apiKey, model, systemPrompt, userMessage, stre
580
593
  }
581
594
 
582
595
  export async function callCohere(apiKey, model, systemPrompt, userMessage, _stream = false, opts = {}) {
596
+ // Cohere uses a 'chat_history' array with role: USER/CHATBOT (uppercase).
597
+ const cohereHistory = Array.isArray(opts.history)
598
+ ? opts.history.filter(m => m && m.role && m.content).map(m => ({
599
+ role: m.role === 'assistant' ? 'CHATBOT' : 'USER',
600
+ message: String(m.content),
601
+ }))
602
+ : [];
583
603
  const body = {
584
604
  model: model || 'command-r-plus',
585
605
  max_tokens: opts.max_tokens || 8192,
586
606
  preamble: systemPrompt,
607
+ chat_history: cohereHistory,
587
608
  message: userMessage,
588
609
  };
589
610
  if (opts.temperature !== undefined) body.temperature = opts.temperature;
@@ -23,12 +23,37 @@ import { VERSION } from '../constants.mjs';
23
23
  // (telegram, discord, chat web, AWF agents). Lets the user ask "what have you
24
24
  // done today?" from any surface and get a consistent answer.
25
25
  const _GLOBAL_AUDIT_FILE = path.join(os.homedir(), '.nha', 'audit-log.jsonl');
26
+ const _AUDIT_MAX_LINES = 10000; // rotate at 10k lines (~1MB JSONL)
27
+ const _AUDIT_ARCHIVE_PREFIX = 'audit-log-';
28
+
29
+ function _rotateAuditIfNeeded() {
30
+ try {
31
+ if (!fs.existsSync(_GLOBAL_AUDIT_FILE)) return;
32
+ const stat = fs.statSync(_GLOBAL_AUDIT_FILE);
33
+ // Quick check: skip the line count unless file is bigger than ~1.5MB
34
+ if (stat.size < 1_500_000) return;
35
+ const text = fs.readFileSync(_GLOBAL_AUDIT_FILE, 'utf-8');
36
+ const lines = text.split('\n').filter(Boolean);
37
+ if (lines.length <= _AUDIT_MAX_LINES) return;
38
+ // Archive older half, keep most recent _AUDIT_MAX_LINES.
39
+ const tail = lines.slice(-_AUDIT_MAX_LINES);
40
+ const archived = lines.slice(0, lines.length - _AUDIT_MAX_LINES);
41
+ const ts = new Date().toISOString().replace(/[:.]/g, '-');
42
+ const archiveFile = path.join(path.dirname(_GLOBAL_AUDIT_FILE), `${_AUDIT_ARCHIVE_PREFIX}${ts}.jsonl`);
43
+ fs.writeFileSync(archiveFile, archived.join('\n') + '\n');
44
+ fs.writeFileSync(_GLOBAL_AUDIT_FILE, tail.join('\n') + '\n');
45
+ } catch {}
46
+ }
47
+
26
48
  function _appendGlobalAudit(entry) {
27
49
  try {
28
50
  fs.mkdirSync(path.dirname(_GLOBAL_AUDIT_FILE), { recursive: true });
29
51
  fs.appendFileSync(_GLOBAL_AUDIT_FILE, JSON.stringify(entry) + '\n');
52
+ // Rotate occasionally (cheap stat-check; full scan only if size > 1.5MB).
53
+ if (Math.random() < 0.01) _rotateAuditIfNeeded();
30
54
  } catch {}
31
55
  }
56
+
32
57
  function _readGlobalAudit(limitTail = 100) {
33
58
  try {
34
59
  if (!fs.existsSync(_GLOBAL_AUDIT_FILE)) return [];
@@ -40,6 +65,20 @@ function _readGlobalAudit(limitTail = 100) {
40
65
  } catch { return []; }
41
66
  }
42
67
 
68
+ /**
69
+ * Query the audit log with filters. Exported for the HTTP /api/audit/query
70
+ * endpoint. Supports filtering by tool, channel, since timestamp.
71
+ */
72
+ export function queryAuditLog({ tool, channel, since, limit = 100 } = {}) {
73
+ const all = _readGlobalAudit(10000);
74
+ return all.filter(e => {
75
+ if (tool && e.tool !== tool) return false;
76
+ if (channel && e.channel !== channel) return false;
77
+ if (since && e.ts < since) return false;
78
+ return true;
79
+ }).slice(-limit);
80
+ }
81
+
43
82
  // ── Agent Routing (keyword-based, zero LLM calls) ───────────────────────────
44
83
 
45
84
  const ROUTING_TABLE = [
@@ -1200,6 +1239,18 @@ class TelegramResponder {
1200
1239
  const auditNote = this._renderAuditForPrompt(chatId);
1201
1240
  if (auditNote) enrichedMessage = auditNote + enrichedMessage;
1202
1241
 
1242
+ // ── User memory (Fix 3+D v16.0.13) — cross-channel persistent context.
1243
+ // Same memory file that's used by the chat web UI. The user can
1244
+ // `nha memory add "I prefer concise answers"` once and EVERY channel
1245
+ // honors it.
1246
+ try {
1247
+ const { buildMemoryPrefix, autoLearnFromTurn } = await import('./user-memory.mjs');
1248
+ const memPrefix = buildMemoryPrefix();
1249
+ if (memPrefix) enrichedMessage = memPrefix + enrichedMessage;
1250
+ // Auto-learn — fire and forget, doesn't block the response.
1251
+ autoLearnFromTurn(cleanText, this.config).catch(() => null);
1252
+ } catch {}
1253
+
1203
1254
  if (TOOL_AGENTS.has(agent)) {
1204
1255
  const result = await callAgentWithTools(this.config, agent, enrichedMessage, detectedLang, preHistory);
1205
1256
  responseText = result.text;
@@ -1987,7 +2038,7 @@ class TelegramResponder {
1987
2038
  // Clear the pending state so we don't double-delete on next yes.
1988
2039
  delete this._lastContextByChatId[chatId].pendingDeleteEvents;
1989
2040
  delete this._lastContextByChatId[chatId].lastCalendarEvents;
1990
- try { (await import('./telegram-context.mjs')).saveTelegramContext(this._lastContextByChatId); } catch {}
2041
+ try { saveTelegramContext(this._lastContextByChatId); } catch {}
1991
2042
 
1992
2043
  const subject = eligible.length === 1 ? `"${eligible[0].summary}"` : `${eligible.length} appuntamenti`;
1993
2044
  const lines = [`Ho cancellato ${subject}.`];
@@ -2194,7 +2245,7 @@ class TelegramResponder {
2194
2245
  lastCalendarListAt: Date.now(),
2195
2246
  lastCalendarSource: { tool: toolName, args },
2196
2247
  };
2197
- try { (await import('./telegram-context.mjs')).saveTelegramContext(this._lastContextByChatId); } catch {}
2248
+ try { saveTelegramContext(this._lastContextByChatId); } catch {}
2198
2249
  }
2199
2250
  return { action: actionKey, success: true, message: String(out) };
2200
2251
  } catch (e) { return { action: actionKey, success: false, message: `Errore: ${e.message}` }; }
@@ -2786,7 +2837,26 @@ class DiscordResponder {
2786
2837
  // Tool-capable agents use the full tool execution loop
2787
2838
  const TOOL_AGENTS = new Set(['herald', 'hermes', 'edi', 'jarvis', 'flux', 'echo', 'mercury', 'pipe', 'navi', 'link', 'prometheus', 'tempest']);
2788
2839
  const callFn = TOOL_AGENTS.has(agent) ? callAgentWithTools : callAgent;
2789
- const response = await callFn(this.config, agent, cleanText);
2840
+ // Cross-channel user memory + audit log + auto-learn (v16.0.13)
2841
+ let discordMsg = cleanText;
2842
+ try {
2843
+ const { buildMemoryPrefix, autoLearnFromTurn } = await import('./user-memory.mjs');
2844
+ const memPrefix = buildMemoryPrefix();
2845
+ if (memPrefix) discordMsg = memPrefix + discordMsg;
2846
+ autoLearnFromTurn(cleanText, this.config).catch(() => null);
2847
+ } catch {}
2848
+ try {
2849
+ const auditNote = _readGlobalAudit(15);
2850
+ if (auditNote.length > 0) {
2851
+ const lines = auditNote.slice(-10).map(e => {
2852
+ const t = new Date(e.ts).toLocaleString('it-IT', { day: '2-digit', month: 'short', hour: '2-digit', minute: '2-digit' });
2853
+ const st = e.success === false ? '✗' : '✓';
2854
+ return `- ${t} · ${e.tool} ${st} · ${e.summary || ''}`;
2855
+ }).join('\n');
2856
+ discordMsg = `[AZIONI RECENTI da altri canali]\n${lines}\n[FINE]\n\n${discordMsg}`;
2857
+ }
2858
+ } catch {}
2859
+ const response = await callFn(this.config, agent, discordMsg);
2790
2860
 
2791
2861
  // Discord message limit is 2000 chars
2792
2862
  const truncated = response.length > 1900
@@ -78,3 +78,51 @@ export function buildMemoryPrefix() {
78
78
  }
79
79
  return `[USER MEMORY — persistent across all conversations]\n${raw}\n[END USER MEMORY]\n\n`;
80
80
  }
81
+
82
+ /**
83
+ * Auto-extract memorable facts from a user turn and append them to memory.
84
+ * Mirrors ChatGPT's "Memory" auto-learn: scans the message for explicit
85
+ * "remember that..." / "ricorda che..." instructions AND for implicit
86
+ * personal facts (name, location, role, preferences, deadlines, contacts).
87
+ *
88
+ * Designed to be CHEAP: runs ONLY when the user message contains a likely
89
+ * signal ("ricord", "remember", "preferisco", "mi chiamo", "lavoro come",
90
+ * "ho un appuntamento", "uso sempre", etc.). Skips noise.
91
+ *
92
+ * @param {string} userText
93
+ * @param {object} config — NHA config (needs llm provider)
94
+ * @returns {Promise<string|null>} the new memory line if learned, else null
95
+ */
96
+ export async function autoLearnFromTurn(userText, config) {
97
+ if (!userText || typeof userText !== 'string' || userText.length < 8) return null;
98
+ // Cheap pre-filter — only call the LLM if the text plausibly contains a fact.
99
+ const trigger = /\b(ricord[aiy]|memorizz[aiy]|salv[aiy]\s+che|tieni\s+a\s+mente|prefer(isco|isci)|mi\s+chiamo|sono\s+(un|una)\b|lavoro\s+(come|presso|in)\b|abito\s+(a|in)\b|vivo\s+(a|in)\b|uso\s+sempre|preferenza|impostazione|deadline|scadenza|ho\s+un\s+(appuntament|incontro)|il\s+mio\s+(nome|email|telefon|indirizz)|api\s+key|password|remember\s+that|please\s+remember|note\s+that|my\s+name\s+is|i\s+work\s+as|i\s+live\s+in|i\s+prefer|i\s+use\s+always)\b/i;
100
+ if (!trigger.test(userText)) return null;
101
+
102
+ try {
103
+ const { callLLM } = await import('./llm.mjs');
104
+ const systemPrompt =
105
+ 'You are a memory extractor. Read the user message and decide if there is ONE durable fact, preference, or piece of personal context worth remembering across future conversations. ' +
106
+ 'Return STRICT JSON: {"memorable": true|false, "fact": "concise fact in the user language, max 140 chars"} or {"memorable": false}. ' +
107
+ 'Memorable: name, role, location, language preference, communication style, recurring contacts, long-term projects, API keys/IDs (only id, NOT secrets), tools they use, hard preferences. ' +
108
+ 'NOT memorable: greetings, transient questions, one-off tasks, weather, news, anything that expires within a day. ' +
109
+ 'NEVER fabricate facts that the user did not explicitly state.';
110
+ const raw = await callLLM(config, systemPrompt, userText, { max_tokens: 150, temperature: 0.1 });
111
+ const m = raw.match(/\{[\s\S]*\}/);
112
+ if (!m) return null;
113
+ const parsed = JSON.parse(m[0]);
114
+ if (!parsed.memorable || !parsed.fact || typeof parsed.fact !== 'string') return null;
115
+ const fact = parsed.fact.trim().slice(0, 140);
116
+ if (!fact) return null;
117
+ // Deduplicate: skip if a near-identical fact is already in memory.
118
+ const existing = loadUserMemory().toLowerCase();
119
+ const factLow = fact.toLowerCase();
120
+ // Very rough dedup: if the first 30 chars of the new fact appear in
121
+ // memory already, skip. Avoid LLM-driven dedup loop (would be expensive).
122
+ if (factLow.length > 20 && existing.includes(factLow.slice(0, Math.min(30, factLow.length)))) {
123
+ return null;
124
+ }
125
+ addUserMemory(`(auto) ${fact}`);
126
+ return fact;
127
+ } catch { return null; }
128
+ }