ai-lens 0.8.110 → 0.8.111

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/.commithash CHANGED
@@ -1 +1 @@
1
- a16390a
1
+ c982dc2
package/CHANGELOG.md CHANGED
@@ -2,6 +2,11 @@
2
2
 
3
3
  History of changes to the `ai-lens` CLI package on npm. New entries go on top. Format: `## X.Y.Z — YYYY-MM-DD`, followed by user-facing bullets.
4
4
 
5
+ ## 0.8.111 — 2026-06-23
6
+ - fix(status): case-fold project-filter-mismatch buckets on Windows
7
+ - fix(client): tolerate Cursor Windows mojibake paths in project_filter
8
+ - fix(status): project hooks "absent" and benign capture drops are info, not errors
9
+
5
10
  ## 0.8.110 — 2026-06-23
6
11
  - feat: `init` now sends a status report to the server right after it finishes, so the server has your current config (including which `projects` are tracked) and confirmation that hooks fire — no need to run `ai-lens status` by hand. The diagnostic runs quietly: just one summary line on screen.
7
12
 
package/README.md CHANGED
@@ -222,6 +222,12 @@ When MCP is enabled during `npx ai-lens init`, these tools become available insi
222
222
  | `get_chain_analysis` | AI-generated chain analysis: tasks, problems, tool errors, unanswered questions |
223
223
  | `request_analysis` | Manually trigger analysis for a specific session chain |
224
224
  | `get_token_usage` | Token usage statistics grouped by model (input/output/cache tokens) |
225
+ | `get_cost` | Dual-track cost breakdown (billed / estimated / subscription, do not sum) + by-model for a period; scope company/team/developer/bot |
226
+ | `get_cost_ranking` | Ranked cost by team or by people for a period |
227
+ | `get_cost_attribution` | Attribution health of factual Cursor spend — unattributed money and the people behind it |
228
+ | `get_cost_trend` | Company dual-track cost time-series over the last N buckets |
229
+ | `get_analysis_spend` | AI-Lens's own LLM (analysis pipeline) spend — daily series, total, delta, by source |
230
+ | `manage_subscription_fee` | Manage the manual per-developer × tool monthly seat fee (the subscription lane) |
225
231
  | `knowhow_search` | Search the team knowledge base built from session analyses |
226
232
  | `knowhow_update` | Add or update a knowledge base entry |
227
233
  | `export_developer_tips` | Export personalized tips as a Markdown document |
package/cli/status.js CHANGED
@@ -9,6 +9,8 @@ import { TLS_TRUST_CODES, tlsCodeOf, tlsVerdictSummary, issuerName } from '../cl
9
9
 
10
10
  import { getVersionInfo, readLensConfig, detectInstalledTools, getCursorToolConfig, getClaudeCodeToolConfig, getCodexToolConfig, analyzeToolHooks, checkHooksDisabled, verifyCodexHookTrust, CAPTURE_PATH, TOOL_CONFIGS, isClaudeProjectDirCommand, analyzeClaudeLocalOverlay, extractProjectDirRelPath, globalClaudeHooksActive, CONHOST_HEADLESS_PREFIX_RE } from './hooks.js';
11
11
  import { DATA_DIR, PENDING_DIR, SENDING_DIR, SESSION_PATHS_DIR, LOG_PATH, CAPTURE_LOG_PATH, LAST_STATUS_REPORT_PATH, getGitIdentity, getMonitoredProjects } from '../client/config.js';
12
+ import { recoverMojibake } from '../client/mojibake-fix.js';
13
+ import { isProjectMonitored } from '../client/capture.js';
12
14
  import { isLockStale } from '../client/sender.js';
13
15
  import { initLogger, info, success, warn, error, heading, blank, detail, setScreenQuiet } from './logger.js';
14
16
  import { scanNestedProjects, summarizeNestedProjects } from './scan.js';
@@ -610,7 +612,7 @@ function checkConfig() {
610
612
  };
611
613
  }
612
614
 
613
- function checkHooks(tool) {
615
+ export function checkHooks(tool) {
614
616
  const analysis = analyzeToolHooks(tool);
615
617
  const statusMap = {
616
618
  fresh: { ok: null, label: 'no config file' },
@@ -633,6 +635,21 @@ function checkHooks(tool) {
633
635
  }
634
636
  } catch { /* file doesn't exist or isn't valid */ }
635
637
 
638
+ // Project-scope hooks are opt-in (init --project-hooks). A project config that
639
+ // simply carries no AI Lens hooks ("absent") is informational, NOT a failure —
640
+ // it's the default for almost every repo with a committed .claude/.cursor config,
641
+ // and capture is handled by the global hooks. A genuine no-capture state (nothing
642
+ // installed in any scope) is still surfaced by the "Hook mode: none" check and the
643
+ // global tool lines, so this never hides a real problem.
644
+ const isProjectScope = tool.dirPath && dirname(tool.dirPath) !== homedir();
645
+ if (analysis.status === 'absent' && isProjectScope) {
646
+ return {
647
+ ok: null,
648
+ summary: 'no project hooks (opt-in)',
649
+ detail: `${detail}\n\nNo AI Lens hooks in this project's config — informational only. Project hooks are opt-in (npx -y ai-lens init --project-hooks); capture runs via the global hooks when installed.`,
650
+ };
651
+ }
652
+
636
653
  // Check if hooks are globally disabled (e.g. disableAllHooks in settings.local.json)
637
654
  const disabled = checkHooksDisabled(tool);
638
655
  if (disabled.length > 0) {
@@ -852,14 +869,14 @@ function checkSenderLog() {
852
869
  };
853
870
  }
854
871
 
855
- function checkCaptureLog() {
856
- if (!existsSync(CAPTURE_LOG_PATH)) {
872
+ export function checkCaptureLog(logPath = CAPTURE_LOG_PATH) {
873
+ if (!existsSync(logPath)) {
857
874
  return { ok: true, summary: 'no drops logged', detail: 'Capture log does not exist (no events dropped)' };
858
875
  }
859
876
 
860
877
  let lines;
861
878
  try {
862
- lines = readFileSync(CAPTURE_LOG_PATH, 'utf-8').split(/\r?\n/).filter(Boolean);
879
+ lines = readFileSync(logPath, 'utf-8').split(/\r?\n/).filter(Boolean);
863
880
  } catch (err) {
864
881
  return { ok: false, summary: `error reading log: ${err.message}`, detail: `Error: ${err.message}` };
865
882
  }
@@ -869,10 +886,19 @@ function checkCaptureLog() {
869
886
  // code distribution per category so it reaches the server via client_reports
870
887
  // — the count alone can't tell EMFILE from EACCES from ENOENT. The raw
871
888
  // error.message strings stay local (may contain paths); only the code travels.
889
+ // Severity of capture-log entries:
890
+ // - `reason` entries are expected gate DROPS (project_filter / no_email /
891
+ // duplicate / …) — never a problem, they reflect your own config.
892
+ // - `msg` entries are capture-side errors. Split them: SOFT transient read
893
+ // errors (transcript-offset-*: a re-read/offset race, not a broken setup)
894
+ // surface as informational; HARD errors (queue/spawn/capture failures) mean
895
+ // capture is actually failing and stay a hard ✗.
896
+ const SOFT_ERROR_MSGS = new Set(['transcript-offset-error', 'transcript-offset-commit-failed']);
872
897
  const counts = {};
873
898
  const codesByCategory = {};
874
899
  let lastTs = null;
875
- let hasErrors = false;
900
+ let hasHardError = false;
901
+ let hasSoftError = false;
876
902
  for (const line of lines) {
877
903
  try {
878
904
  const entry = JSON.parse(line);
@@ -883,7 +909,10 @@ function checkCaptureLog() {
883
909
  codesByCategory[category][entry.code] = (codesByCategory[category][entry.code] || 0) + 1;
884
910
  }
885
911
  lastTs = entry.ts;
886
- if (entry.msg) hasErrors = true;
912
+ if (entry.msg) {
913
+ if (SOFT_ERROR_MSGS.has(entry.msg)) hasSoftError = true;
914
+ else hasHardError = true;
915
+ }
887
916
  } catch { /* non-JSON line */ }
888
917
  }
889
918
 
@@ -906,13 +935,79 @@ function checkCaptureLog() {
906
935
  const codeBlock = codeLines.length ? `\n\nError codes by category:\n${codeLines.join('\n')}` : '';
907
936
  const last10 = lines.slice(-10);
908
937
 
938
+ // Hard error → ✗; only soft/transient errors → informational '-'; pure drops → ✓.
909
939
  return {
910
- ok: !hasErrors,
940
+ ok: hasHardError ? false : (hasSoftError ? null : true),
911
941
  summary,
912
- detail: `Log: ${CAPTURE_LOG_PATH}\nTotal: ${total}${codeBlock}\n\nLast 10 entries:\n${last10.join('\n')}`,
942
+ detail: `Log: ${logPath}\nTotal: ${total}${codeBlock}\n\nLast 10 entries:\n${last10.join('\n')}`,
913
943
  };
914
944
  }
915
945
 
946
+ // Render a captured (Cursor URI-style, forward-slash) path as a native path for
947
+ // the fix hint — `/c:/a/b` → `C:\a\b` on Windows; unchanged elsewhere.
948
+ function toDisplayPath(p) {
949
+ if (process.platform !== 'win32' || typeof p !== 'string') return p;
950
+ let n = p.replace(/^\/([a-zA-Z]:)/, '$1').replace(/\//g, '\\');
951
+ return n.replace(/^([a-z]):/, (_m, d) => d.toUpperCase() + ':');
952
+ }
953
+
954
+ // Surface project_filter drops where the user's ACTUAL working project isn't in
955
+ // the `projects` filter — real events silently dropped. The plain "Capture drops"
956
+ // counter hides this. Recovers Cursor 3.2.x+ Windows mojibake paths before the
957
+ // coverage check so they aren't flagged once the capture-side fix matches them.
958
+ // Guards against false positives: only entries logged under the CURRENT live
959
+ // filter (drops the status self-test's own synthetic drops + stale-config
960
+ // entries), excludes the self-test session/paths, and needs a meaningful count.
961
+ const PROJECT_FILTER_MISMATCH_MIN = 5;
962
+ export function checkProjectFilterMismatch(logPath = CAPTURE_LOG_PATH, monitored = getMonitoredProjects()) {
963
+ if (!monitored) return { ok: true, summary: 'no filter configured (capturing all)', detail: 'projects filter is unset — nothing to mismatch.' };
964
+ if (!existsSync(logPath)) return { ok: true, summary: 'no drops logged', detail: 'Capture log does not exist.' };
965
+
966
+ let lines;
967
+ try { lines = readFileSync(logPath, 'utf-8').split(/\r?\n/).filter(Boolean); }
968
+ catch (err) { return { ok: true, summary: 'log unreadable', detail: `Error: ${err.message}` }; }
969
+
970
+ const liveSig = JSON.stringify([...monitored].sort());
971
+ const byPath = new Map(); // case-normalized key -> { display, count }
972
+ for (const line of lines) {
973
+ let e;
974
+ try { e = JSON.parse(line); } catch { continue; }
975
+ if (e.reason !== 'project_filter' || typeof e.project_path !== 'string' || !e.project_path) continue;
976
+ // Skip the status self-test's own synthetic drops.
977
+ if (typeof e.session_id === 'string' && e.session_id.startsWith('status-check-')) continue;
978
+ if (e.project_path.includes('.ai-lens-status-check')) continue;
979
+ // Only entries logged under the CURRENT filter — drops stale-config and the
980
+ // self-test's `AI_LENS_PROJECTS=…status-check-nonexistent` runs.
981
+ const sig = Array.isArray(e.monitored) ? JSON.stringify([...e.monitored].sort()) : null;
982
+ if (sig !== liveSig) continue;
983
+ // If the live filter already covers it (raw OR mojibake-recovered), the
984
+ // capture-side fix will keep it — not a mismatch.
985
+ if (isProjectMonitored(e.project_path, [], monitored)) continue;
986
+ const display = toDisplayPath(recoverMojibake(e.project_path));
987
+ // Key case-insensitively on Windows so the same project logged with
988
+ // differing case doesn't split across buckets and dodge the threshold.
989
+ const key = process.platform === 'win32' ? display.toLowerCase() : display;
990
+ const cur = byPath.get(key);
991
+ if (cur) cur.count++;
992
+ else byPath.set(key, { display, count: 1 });
993
+ }
994
+
995
+ const offenders = [...byPath.values()].filter(o => o.count >= PROJECT_FILTER_MISMATCH_MIN).sort((a, b) => b.count - a.count);
996
+ if (offenders.length === 0) return { ok: true, summary: 'filter matches captured paths', detail: 'No project_filter drops outside the configured filter.' };
997
+
998
+ const { display: topPath, count: topCount } = offenders[0];
999
+ const filterStr = monitored.join(', ');
1000
+ const detail =
1001
+ `⚠️ Фильтр projects не покрывает твой рабочий путь — события отбрасываются (project_filter).\n` +
1002
+ ` Рабочий путь: ${topPath}\n` +
1003
+ ` В фильтре: ${filterStr}\n` +
1004
+ ` Почини: org-разработчику — /sync + /setup; иначе добавь путь в фильтр:\n` +
1005
+ ` npx -y ai-lens@latest init --yes --projects "${topPath}"\n` +
1006
+ ` (если этот проект исключён намеренно — игнорируй)` +
1007
+ (offenders.length > 1 ? `\n Ещё путей вне фильтра: ${offenders.length - 1}` : '');
1008
+ return { ok: null, summary: `⚠ ${topCount} событий отброшено — рабочий путь не в фильтре projects`, detail };
1009
+ }
1010
+
916
1011
  function checkRealActivity() {
917
1012
  try {
918
1013
  if (!existsSync(SESSION_PATHS_DIR)) {
@@ -1624,6 +1719,7 @@ export default async function status({ report = false, quiet = false } = {}) {
1624
1719
 
1625
1720
  // 10. Capture drops
1626
1721
  printLine('Capture drops', checkCaptureLog());
1722
+ printLine('Project filter', checkProjectFilterMismatch());
1627
1723
 
1628
1724
  // 11. Real activity
1629
1725
  const realActivityResult = checkRealActivity();
package/client/capture.js CHANGED
@@ -41,6 +41,16 @@ try {
41
41
  redactObject = mod.redactObject ?? ((o) => o);
42
42
  } catch { /* redact.js not installed yet — skip redaction, server will handle it */ }
43
43
 
44
+ // Soft import — mojibake-fix.js may not exist on older client installs.
45
+ // recoverMojibake repairs Cursor 3.2.x+ Windows mojibake (CP1251/CP1252 double
46
+ // UTF-8) for the project_filter MATCH decision only; the raw payload is never
47
+ // mutated (the server stays the canonical recoverer on ingest).
48
+ let recoverMojibake = (s) => s;
49
+ try {
50
+ const mod = await import('./mojibake-fix.js');
51
+ recoverMojibake = mod.recoverMojibake ?? ((s) => s);
52
+ } catch { /* mojibake-fix.js not installed yet — skip recovery, server recovers on ingest */ }
53
+
44
54
  const __dirname = dirname(fileURLToPath(import.meta.url));
45
55
 
46
56
  /**
@@ -964,7 +974,7 @@ const CURSOR_TYPE_MAP = {
964
974
  // to forward slashes before comparing. On Windows the filesystem is
965
975
  // case-insensitive, so we also lowercase both sides to prevent mismatches
966
976
  // when realpathSync returns a different case than the configured path.
967
- function pathContains(parent, child) {
977
+ export function pathContains(parent, child) {
968
978
  if (!parent || !child || typeof parent !== 'string' || typeof child !== 'string') return false;
969
979
  const norm = s => {
970
980
  let n = s.replace(/\\/g, '/').replace(/\/$/, '');
@@ -977,6 +987,36 @@ function pathContains(parent, child) {
977
987
  return c === p || c.startsWith(p + '/');
978
988
  }
979
989
 
990
+ // Decide whether an event's project is covered by the monitored filter.
991
+ // Tolerates the mojibake `project_path` Cursor 3.2.x+ emits on Windows
992
+ // (CP1251/CP1252 double-UTF-8): each candidate is matched BOTH as-is and via
993
+ // recoverMojibake(). The recovered form is used ONLY here for the match — the
994
+ // raw payload is shipped unchanged and the server recovers it on ingest.
995
+ // Returns true (= keep) when no filter is configured or no project_path is
996
+ // present, preserving the prior gate's "can't filter → pass through" behavior.
997
+ export function isProjectMonitored(projectPath, workspaceRoots, monitored) {
998
+ if (!monitored) return true;
999
+ if (!projectPath) return true;
1000
+ const candidates = [];
1001
+ const add = (v) => {
1002
+ if (typeof v !== 'string' || !v) return;
1003
+ candidates.push(v);
1004
+ const fixed = recoverMojibake(v);
1005
+ if (fixed !== v) candidates.push(fixed);
1006
+ };
1007
+ let resolved = projectPath;
1008
+ try { resolved = realpathSync(projectPath); } catch {}
1009
+ add(resolved);
1010
+ add(projectPath); // raw (realpathSync fails on a mojibake path, so keep the original too)
1011
+ for (const r of (Array.isArray(workspaceRoots) ? workspaceRoots : [])) {
1012
+ let rr = r;
1013
+ try { rr = realpathSync(r); } catch {}
1014
+ add(rr);
1015
+ add(r);
1016
+ }
1017
+ return candidates.some(c => monitored.some(p => pathContains(p, c)));
1018
+ }
1019
+
980
1020
  function pickWorkspaceRoot(roots) {
981
1021
  if (!Array.isArray(roots) || roots.length === 0) return null;
982
1022
  const valid = roots.filter(r => typeof r === 'string' && r.length > 0);
@@ -987,7 +1027,8 @@ function pickWorkspaceRoot(roots) {
987
1027
  const match = valid.find(root => {
988
1028
  let resolved = root;
989
1029
  try { resolved = realpathSync(root); } catch {}
990
- return monitored.some(p => pathContains(p, resolved));
1030
+ // Match raw OR mojibake-recovered (Cursor Windows non-ASCII paths).
1031
+ return monitored.some(p => pathContains(p, resolved) || pathContains(p, recoverMojibake(resolved)));
991
1032
  });
992
1033
  if (match) return match;
993
1034
  }
@@ -1527,16 +1568,12 @@ async function main() {
1527
1568
  // If the primary is filtered out, drop the entire batch (the per-call events
1528
1569
  // share the same project_path).
1529
1570
  const monitored = getMonitoredProjects();
1530
- let projectPath = primary.project_path;
1531
- try { projectPath = realpathSync(projectPath); } catch {}
1532
- if (monitored && projectPath && !monitored.some(p => pathContains(p, projectPath))) {
1533
- // Fallback: for Cursor multi-root workspaces, check if any raw workspace_roots entry matches
1534
- const roots = Array.isArray(event.workspace_roots) ? event.workspace_roots : [];
1535
- const resolvedRoots = roots.map(r => { try { return realpathSync(r); } catch { return r; } });
1536
- if (!resolvedRoots.some(root => monitored.some(p => pathContains(p, root)))) {
1537
- logDrop('project_filter', { type: primary.type, source: primary.source, session_id: primary.session_id, project_path: primary.project_path, monitored });
1538
- process.exit(0);
1539
- }
1571
+ // isProjectMonitored matches the primary project_path AND any Cursor
1572
+ // workspace_roots, each as-is and mojibake-recovered (Cursor 3.2.x+ Windows
1573
+ // CP1251/CP1252 paths). It returns true when no filter / no project_path.
1574
+ if (!isProjectMonitored(primary.project_path, event.workspace_roots, monitored)) {
1575
+ logDrop('project_filter', { type: primary.type, source: primary.source, session_id: primary.session_id, project_path: primary.project_path, monitored });
1576
+ process.exit(0);
1540
1577
  }
1541
1578
 
1542
1579
  // Resolve identity: git first, then fall back to event payload (e.g. Cursor's user_email)
@@ -0,0 +1,192 @@
1
+ /**
2
+ * Mojibake recovery for "double UTF-8" corruption from Cursor 3.2.x+ on Windows.
3
+ *
4
+ * CLIENT copy. This is a verbatim port of `server/utils/mojibake-fix.js` so the
5
+ * client-side project_filter can recover a mojibake `project_path` for the MATCH
6
+ * DECISION ONLY (the raw payload is shipped unchanged — the server stays the
7
+ * canonical recoverer on ingest). The client bundle (~/.ai-lens/client/) cannot
8
+ * import from server/, and the server image is esbuild-bundled without client/,
9
+ * so the two copies are kept identical by a parity test
10
+ * (test/client/mojibake-fix-parity.test.js) rather than a shared import.
11
+ *
12
+ * Two patterns observed in production:
13
+ * - Pattern B (CP1252 -> UTF-8): UTF-8 bytes (D0 95 = "Е") read as
14
+ * Windows-1252 ("Е") and re-encoded as UTF-8. English/Western
15
+ * Windows locales.
16
+ * - Pattern A (CP1251 -> UTF-8): same, but read as Windows-1251
17
+ * (D0 9A = "К" -> "Рљ"). Russian Windows locales.
18
+ *
19
+ * Recovery: encode each char back to its codepage byte, then strict-decode as
20
+ * UTF-8. Conservative -- only accepts the recovered string if it contains
21
+ * strictly more "plain" Cyrillic (a-z A-Z e E in Cyrillic) than the input.
22
+ */
23
+
24
+ // CP1251 byte -> Unicode code point. -1 = undefined byte.
25
+ const CP1251_FROM_BYTE = (() => {
26
+ const t = new Array(256);
27
+ for (let i = 0; i < 0x80; i++) t[i] = i;
28
+ const upper = [
29
+ 0x0402, 0x0403, 0x201A, 0x0453, 0x201E, 0x2026, 0x2020, 0x2021,
30
+ 0x20AC, 0x2030, 0x0409, 0x2039, 0x040A, 0x040C, 0x040B, 0x040F,
31
+ 0x0452, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014,
32
+ -1, 0x2122, 0x0459, 0x203A, 0x045A, 0x045C, 0x045B, 0x045F,
33
+ 0x00A0, 0x040E, 0x045E, 0x0408, 0x00A4, 0x0490, 0x00A6, 0x00A7,
34
+ 0x0401, 0x00A9, 0x0404, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x0407,
35
+ 0x00B0, 0x00B1, 0x0406, 0x0456, 0x0491, 0x00B5, 0x00B6, 0x00B7,
36
+ 0x0451, 0x2116, 0x0454, 0x00BB, 0x0458, 0x0405, 0x0455, 0x0457,
37
+ ];
38
+ for (let i = 0; i < upper.length; i++) t[0x80 + i] = upper[i];
39
+ for (let i = 0; i < 64; i++) t[0xC0 + i] = 0x0410 + i;
40
+ return t;
41
+ })();
42
+
43
+ // CP1252 byte -> Unicode. 0x80-0x9F have a few unused slots; rest is identity.
44
+ const CP1252_FROM_BYTE = (() => {
45
+ const t = new Array(256);
46
+ for (let i = 0; i < 256; i++) t[i] = i;
47
+ const upper = [
48
+ 0x20AC, -1, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021,
49
+ 0x02C6, 0x2030, 0x0160, 0x2039, 0x0152, -1, 0x017D, -1,
50
+ -1, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014,
51
+ 0x02DC, 0x2122, 0x0161, 0x203A, 0x0153, -1, 0x017E, 0x0178,
52
+ ];
53
+ for (let i = 0; i < upper.length; i++) if (upper[i] !== -1) t[0x80 + i] = upper[i];
54
+ return t;
55
+ })();
56
+
57
+ // Reverse maps: Unicode code point -> byte.
58
+ function buildReverse(forward) {
59
+ const m = new Map();
60
+ for (let b = 0; b < 256; b++) if (forward[b] !== -1) m.set(forward[b], b);
61
+ return m;
62
+ }
63
+ const CP1251_TO_BYTE = buildReverse(CP1251_FROM_BYTE);
64
+ const CP1252_TO_BYTE = buildReverse(CP1252_FROM_BYTE);
65
+
66
+ function encodeWith(str, reverseMap) {
67
+ const bytes = Buffer.alloc(str.length);
68
+ for (let i = 0; i < str.length; i++) {
69
+ const code = str.charCodeAt(i);
70
+ const b = reverseMap.get(code);
71
+ if (b === undefined) return null;
72
+ bytes[i] = b;
73
+ }
74
+ return bytes;
75
+ }
76
+
77
+ // Lossy variant: substitutes 0x3F ('?') for unmappable chars. Used by the
78
+ // loose-decode fallback when strict encode fails because Cursor inserted
79
+ // a stray non-codepage char (e.g., '–' EM-DASH from autocorrect).
80
+ function encodeWithLossy(str, reverseMap) {
81
+ const bytes = Buffer.alloc(str.length);
82
+ for (let i = 0; i < str.length; i++) {
83
+ const code = str.charCodeAt(i);
84
+ const b = reverseMap.get(code);
85
+ bytes[i] = b !== undefined ? b : 0x3F;
86
+ }
87
+ return bytes;
88
+ }
89
+
90
+ function strictUtf8Decode(buf) {
91
+ try {
92
+ return new TextDecoder('utf-8', { fatal: true }).decode(buf);
93
+ } catch {
94
+ return null;
95
+ }
96
+ }
97
+
98
+ function looseUtf8Decode(buf) {
99
+ // Non-fatal: invalid byte sequences become U+FFFD replacement chars.
100
+ return new TextDecoder('utf-8').decode(buf);
101
+ }
102
+
103
+ function countReplacements(str) {
104
+ let n = 0;
105
+ for (let i = 0; i < str.length; i++) if (str.charCodeAt(i) === 0xFFFD) n++;
106
+ return n;
107
+ }
108
+
109
+ const PLAIN_CYRILLIC_RE = /[А-яЁё]/g;
110
+ // Mojibeka marker chars that real Russian/English text never contains:
111
+ // - Latin1 supplement (À-ÿ) — Pattern B leaves these littered throughout.
112
+ // - Non-Russian Cyrillic supplement (Ѐ-Џ, ѐ-џ, Ґ-ӿ) — Pattern A's signature.
113
+ const NOISE_RE = /[À-ÿЀ-Џѐ-џҐ-ӿ]/g;
114
+ function countMatches(re, str) {
115
+ const m = str.match(re);
116
+ return m ? m.length : 0;
117
+ }
118
+
119
+ // Marker A: any latin1-supplement letter (À-ÿ) -- these almost never
120
+ // appear in our event data outside of Pattern-B mojibeka.
121
+ // Marker B: standard Russian Cyrillic (А-яЁё) immediately
122
+ // followed by a non-Russian Cyrillic supplement (Ѐ-Џѐ-џҐ-ӿ).
123
+ // Plain Russian text never produces such adjacency, but Pattern-A mojibeka
124
+ // does (e.g., "Рљ" = mojibeka of "К").
125
+ const MOJIBAKE_HINT_RE = /[À-ÿ]|[А-яЁё][Ѐ-Џѐ-џҐ-ӿ]/;
126
+
127
+ /**
128
+ * Detect double-UTF-8 mojibeka (CP1252 or CP1251 flavour) and recover the
129
+ * original Cyrillic text. Returns the input unchanged if no improvement.
130
+ */
131
+ export function recoverMojibake(str) {
132
+ if (typeof str !== 'string' || str.length < 2) return str;
133
+ if (!MOJIBAKE_HINT_RE.test(str)) return str;
134
+
135
+ const baseNoise = countMatches(NOISE_RE, str);
136
+ if (baseNoise === 0) return str;
137
+
138
+ for (const reverseMap of [CP1252_TO_BYTE, CP1251_TO_BYTE]) {
139
+ const bytes = encodeWith(str, reverseMap);
140
+ if (!bytes) continue;
141
+ const decoded = strictUtf8Decode(bytes);
142
+ if (decoded == null) continue;
143
+ // Accept if the round-trip strictly drops noise marker chars AND yields
144
+ // some real Russian content. Pure-mojibeka strings collapse 50/50
145
+ // markers→letters; partially mojibeked strings still see a strict drop.
146
+ const newNoise = countMatches(NOISE_RE, decoded);
147
+ const newCyrillic = countMatches(PLAIN_CYRILLIC_RE, decoded);
148
+ if (newNoise < baseNoise && newCyrillic > 0) return decoded;
149
+ }
150
+
151
+ // Loose-decode fallback. Triggers only when strict failed AND the input
152
+ // is densely mojibeked (>=5 marker chars). Cursor 3.2.x on Windows can
153
+ // drop the second byte of a 2-byte UTF-8 sequence (e.g., capslock + space
154
+ // shortcut), leaving "Р " instead of "Ри" — strict UTF-8 rejects, but
155
+ // non-fatal decode recovers the rest with a U+FFFD where the byte was lost.
156
+ // Tight thresholds keep clean Cyrillic, Ukrainian, and partially-corrupted
157
+ // text untouched: they all fail the >=10-cyrillic / <=5%-replacements
158
+ // / <=10%-residual-noise gate.
159
+ if (baseNoise >= 5) {
160
+ for (const reverseMap of [CP1252_TO_BYTE, CP1251_TO_BYTE]) {
161
+ const bytes = encodeWithLossy(str, reverseMap);
162
+ const decoded = looseUtf8Decode(bytes);
163
+ const newNoise = countMatches(NOISE_RE, decoded);
164
+ const newCyrillic = countMatches(PLAIN_CYRILLIC_RE, decoded);
165
+ const replacements = countReplacements(decoded);
166
+ if (newCyrillic >= 10
167
+ && newNoise <= baseNoise * 0.1
168
+ && replacements * 20 <= newCyrillic) {
169
+ return decoded;
170
+ }
171
+ }
172
+ }
173
+
174
+ return str;
175
+ }
176
+
177
+ /**
178
+ * Recursively walk an object and recover mojibeka in every leaf string.
179
+ * Mirrors `redactObject` (server/utils/redact.js): depth-guarded, returns a
180
+ * new object/array; primitives and non-objects pass through.
181
+ */
182
+ export function walkAndFix(value, depth = 0) {
183
+ if (typeof value === 'string') return recoverMojibake(value);
184
+ if (depth >= 100) return value;
185
+ if (Array.isArray(value)) return value.map(v => walkAndFix(v, depth + 1));
186
+ if (value && typeof value === 'object') {
187
+ const out = {};
188
+ for (const [k, v] of Object.entries(value)) out[k] = walkAndFix(v, depth + 1);
189
+ return out;
190
+ }
191
+ return value;
192
+ }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "ai-lens",
3
- "version": "0.8.110",
3
+ "version": "0.8.111",
4
4
  "type": "module",
5
5
  "description": "Centralized session analytics for AI coding tools",
6
6
  "bin": {