agentboss 0.1.1 → 0.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -25,7 +25,7 @@
25
25
  } catch (e) {}
26
26
  })();
27
27
  </script>
28
- <script type="module" crossorigin src="/assets/index-CsVml4AS.js"></script>
28
+ <script type="module" crossorigin src="/assets/index-CT8rBVfX.js"></script>
29
29
  <link rel="stylesheet" crossorigin href="/assets/index-C1wFD_Vo.css">
30
30
  </head>
31
31
  <body>
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "agentboss",
3
- "version": "0.1.1",
3
+ "version": "0.1.3",
4
4
  "description": "AI Agent collaboration analytics - become your AI agent's boss, not its babysitter",
5
5
  "main": "server/index.js",
6
6
  "bin": {
@@ -1,107 +1,111 @@
1
- /**
2
- * H2 — Judgement & Decision.
3
- *
4
- * Captures whether the human pushes back when the AI is wrong instead
5
- * of rubber-stamping output.
6
- * • challenge — challenge / questioning rate
7
- * • override — override-the-AI rate (band metric)
8
- * • accept_rate — passive-acceptance rate (ideal band 60-85%)
9
- *
10
- * See spec §4.2.
11
- *
12
- * @author Felix
13
- */
14
-
15
- 'use strict';
16
-
17
- const {
18
- fetchMessages,
19
- userMessages,
20
- matchesAny,
21
- CHALLENGE_PATTERNS,
22
- OVERRIDE_PATTERNS,
23
- ACCEPT_PATTERNS,
24
- } = require('../text-signals');
25
- const { explainIndicator, rollupDimension, scoreToLevel, H2 } = require('../thresholds-v2');
26
- const { makeEvidence } = require('../evidence-builder');
27
- const { mergeIndicator, dimensionSource } = require('./llm-merge');
28
-
29
- function analyze(db, session, difficulty = 2, llmCell = null) {
30
- const messages = fetchMessages(db, session.id);
31
- const users = userMessages(messages);
32
- const haveText = users.some((m) => m.text && m.text.length > 0);
33
-
34
- let challenge = null;
35
- let override = null;
36
- let accept = null;
37
- let challengeHits = 0;
38
- let overrideHits = 0;
39
- let overrideEligible = 0;
40
- let acceptHits = 0;
41
-
42
- if (haveText && users.length > 0) {
43
- challengeHits = users.filter((m) => matchesAny(m.text, CHALLENGE_PATTERNS)).length;
44
- challenge = challengeHits / users.length;
45
-
46
- // Override rate: user messages directly after an assistant reply
47
- // that explicitly override its choice.
48
- for (let i = 0; i < messages.length; i++) {
49
- const m = messages[i];
50
- if (m.role !== 'user') continue;
51
- const prev = i > 0 ? messages[i - 1] : null;
52
- if (!prev || prev.role !== 'assistant') continue;
53
- overrideEligible++;
54
- if (m.text && matchesAny(m.text, OVERRIDE_PATTERNS)) overrideHits++;
55
- }
56
- override = overrideEligible > 0 ? overrideHits / overrideEligible : null;
57
-
58
- // Accept rate: short pure-affirmation messages.
59
- acceptHits = users.filter((m) => {
60
- const t = (m.text || '').trim();
61
- if (!t || t.length > 30) return false;
62
- return matchesAny(t, ACCEPT_PATTERNS);
63
- }).length;
64
- accept = acceptHits / users.length;
65
- }
66
-
67
- const challengeE = explainIndicator(H2.challenge, challenge, difficulty);
68
- const overrideE = explainIndicator(H2.override, override, difficulty);
69
- const acceptE = explainIndicator(H2.accept_rate, accept, difficulty);
70
-
71
- const rule = {
72
- challenge: { score: challengeE.score, level: challengeE.level },
73
- override: { score: overrideE.score, level: overrideE.level },
74
- accept_rate: { score: acceptE.score, level: acceptE.level },
75
- };
76
- const cell = llmCell || {};
77
- const m = {
78
- challenge: mergeIndicator(cell.challenge, rule.challenge.score, rule.challenge.level),
79
- override: mergeIndicator(cell.override, rule.override.score, rule.override.level),
80
- accept_rate: mergeIndicator(cell.accept_rate, rule.accept_rate.score, rule.accept_rate.level),
81
- };
82
-
83
- const subScores = { challenge: m.challenge.score, override: m.override.score, accept_rate: m.accept_rate.score };
84
- const subLevels = { challenge: m.challenge.level, override: m.override.level, accept_rate: m.accept_rate.level };
85
-
86
- const subEvidence = {
87
- challenge: makeEvidence({ key: 'challenge', label: '合理质疑率', what: m.challenge.evidence || (haveText ? `规则版:质疑关键词命中 ${challengeHits}/${users.length}。` : '无文本,无法识别。'), expl: challengeE, unit: '%', difficulty }),
88
- override: makeEvidence({ key: 'override', label: '推翻率', what: m.override.evidence || (haveText ? `规则版:推翻关键词命中 ${overrideHits}/${overrideEligible}。` : '无文本,无法识别。'), expl: overrideE, unit: '%', difficulty }),
89
- accept_rate: makeEvidence({ key: 'accept_rate', label: '顺从/采纳判断', what: m.accept_rate.evidence || (haveText ? `规则版(兜底):纯肯定短消息占比 ${acceptHits}/${users.length}。LLM 开启后改为判断"采纳前是否有判断"。` : '无文本,无法识别。'), expl: acceptE, unit: '%', difficulty }),
90
- };
91
-
92
- const score = rollupDimension('H2', subScores);
93
- const level = scoreToLevel(score);
94
- const judgeSource = dimensionSource([m.challenge.source, m.override.source, m.accept_rate.source]);
95
-
96
- return {
97
- subScores,
98
- subLevels,
99
- subEvidence,
100
- raw: { challenge, override, accept_rate: accept, difficulty, haveText, challengeHits, overrideHits, overrideEligible, acceptHits },
101
- score,
102
- level,
103
- judgeSource,
104
- };
105
- }
106
-
107
- module.exports = { analyze };
1
+ /**
2
+ * H2 — Judgement & Decision.
3
+ *
4
+ * Captures whether the human pushes back when the AI is wrong instead
5
+ * of rubber-stamping output.
6
+ * • challenge — challenge / questioning rate
7
+ * • override — override-the-AI rate (band metric)
8
+ * • accept_rate — passive-acceptance rate (ideal band 60-85%)
9
+ *
10
+ * See spec §4.2.
11
+ *
12
+ * @author Felix
13
+ */
14
+
15
+ 'use strict';
16
+
17
+ const {
18
+ fetchMessages,
19
+ userMessages,
20
+ matchesAny,
21
+ CHALLENGE_PATTERNS,
22
+ OVERRIDE_PATTERNS,
23
+ ACCEPT_PATTERNS,
24
+ } = require('../text-signals');
25
+ const { explainIndicator, rollupDimension, scoreToLevel, H2 } = require('../thresholds-v2');
26
+ const { makeEvidence } = require('../evidence-builder');
27
+ const { mergeIndicator, dimensionSource } = require('./llm-merge');
28
+
29
+ function analyze(db, session, difficulty = 2, llmCell = null) {
30
+ const messages = fetchMessages(db, session.id);
31
+ const users = userMessages(messages);
32
+ const haveText = users.some((m) => m.text && m.text.length > 0);
33
+
34
+ let challenge = null;
35
+ let override = null;
36
+ let accept = null;
37
+ let challengeHits = 0;
38
+ let overrideHits = 0;
39
+ let overrideEligible = 0;
40
+ let acceptHits = 0;
41
+
42
+ if (haveText && users.length > 0) {
43
+ challengeHits = users.filter((m) => matchesAny(m.text, CHALLENGE_PATTERNS)).length;
44
+ challenge = challengeHits / users.length;
45
+
46
+ // Override rate: user messages directly after an assistant reply
47
+ // that explicitly override its choice.
48
+ for (let i = 0; i < messages.length; i++) {
49
+ const m = messages[i];
50
+ if (m.role !== 'user') continue;
51
+ const prev = i > 0 ? messages[i - 1] : null;
52
+ if (!prev || prev.role !== 'assistant') continue;
53
+ overrideEligible++;
54
+ if (m.text && matchesAny(m.text, OVERRIDE_PATTERNS)) overrideHits++;
55
+ }
56
+ override = overrideEligible > 0 ? overrideHits / overrideEligible : null;
57
+
58
+ // Accept rate: short pure-affirmation messages.
59
+ acceptHits = users.filter((m) => {
60
+ const t = (m.text || '').trim();
61
+ if (!t || t.length > 30) return false;
62
+ return matchesAny(t, ACCEPT_PATTERNS);
63
+ }).length;
64
+ accept = acceptHits / users.length;
65
+ }
66
+
67
+ const challengeE = explainIndicator(H2.challenge, challenge, difficulty);
68
+ const overrideE = explainIndicator(H2.override, override, difficulty);
69
+ const acceptE = explainIndicator(H2.accept_rate, accept, difficulty);
70
+
71
+ const rule = {
72
+ challenge: { score: challengeE.score, level: challengeE.level },
73
+ override: { score: overrideE.score, level: overrideE.level },
74
+ accept_rate: { score: acceptE.score, level: acceptE.level },
75
+ };
76
+ const cell = llmCell || {};
77
+ const m = {
78
+ challenge: mergeIndicator(cell.challenge, rule.challenge.score, rule.challenge.level),
79
+ override: mergeIndicator(cell.override, rule.override.score, rule.override.level),
80
+ accept_rate: mergeIndicator(cell.accept_rate, rule.accept_rate.score, rule.accept_rate.level),
81
+ };
82
+
83
+ const subScores = { challenge: m.challenge.score, override: m.override.score, accept_rate: m.accept_rate.score };
84
+ const subLevels = { challenge: m.challenge.level, override: m.override.level, accept_rate: m.accept_rate.level };
85
+
86
+ const subEvidence = {
87
+ challenge: makeEvidence({ key: 'challenge', label: '合理质疑率', what: m.challenge.evidence || (haveText ? `规则版:质疑关键词命中 ${challengeHits}/${users.length}。` : '无文本,无法识别。'), expl: challengeE, unit: '%', difficulty }),
88
+ override: makeEvidence({ key: 'override', label: '推翻率', what: m.override.evidence || (haveText ? `规则版:推翻关键词命中 ${overrideHits}/${overrideEligible}。` : '无文本,无法识别。'), expl: overrideE, unit: '%', difficulty }),
89
+ accept_rate: makeEvidence({ key: 'accept_rate', label: '顺从/采纳判断', what: m.accept_rate.evidence || (haveText ? `规则版(兜底):纯肯定短消息占比 ${acceptHits}/${users.length}。LLM 开启后改为判断"采纳前是否有判断"。` : '无文本,无法识别。'), expl: acceptE, unit: '%', difficulty }),
90
+ };
91
+
92
+ subEvidence.challenge.source = m.challenge.source;
93
+ subEvidence.override.source = m.override.source;
94
+ subEvidence.accept_rate.source = m.accept_rate.source;
95
+
96
+ const score = rollupDimension('H2', subScores);
97
+ const level = scoreToLevel(score);
98
+ const judgeSource = dimensionSource([m.challenge.source, m.override.source, m.accept_rate.source]);
99
+
100
+ return {
101
+ subScores,
102
+ subLevels,
103
+ subEvidence,
104
+ raw: { challenge, override, accept_rate: accept, difficulty, haveText, challengeHits, overrideHits, overrideEligible, acceptHits },
105
+ score,
106
+ level,
107
+ judgeSource,
108
+ };
109
+ }
110
+
111
+ module.exports = { analyze };
@@ -1,57 +1,59 @@
1
- // server/analysis/dimensions/llm-merge.js
2
- 'use strict';
3
-
4
- const { LEVEL_SCORE, scoreToLevel } = require('../thresholds-v2');
5
-
6
- /** Minimum LLM self-reported confidence to trust over the rule fallback. */
7
- const CONF_THRESHOLD = 0.5;
8
-
9
- /**
10
- * Merge one LLM-judged indicator cell with the rule-derived score.
11
- *
12
- * The LLM may report either a granular 0–100 `score` (preferred — gives
13
- * non-blocky sub-scores) or, for back-compat with older cached payloads, a
14
- * discrete `level` (1–4). With a score we derive the level band via
15
- * scoreToLevel; with only a level we fall back to the level's centred score.
16
- *
17
- * @param {{score:?number, level:?number, confidence:?number, evidence:?string}|null|undefined} cell
18
- * @param {number|null} ruleScore centred score from explainIndicator (fallback)
19
- * @param {number|null} ruleLevel
20
- * @returns {{score:number|null, level:number|null, source:'llm'|'rules', evidence:string|null}}
21
- */
22
- function mergeIndicator(cell, ruleScore, ruleLevel) {
23
- if (cell && typeof cell.confidence === 'number' && cell.confidence >= CONF_THRESHOLD) {
24
- // Preferred: granular 0–100 score (kept to one decimal).
25
- if (typeof cell.score === 'number' && Number.isFinite(cell.score)
26
- && cell.score >= 0 && cell.score <= 100) {
27
- const score = Math.round(cell.score * 10) / 10;
28
- return { score, level: scoreToLevel(score), source: 'llm', evidence: cell.evidence || null };
29
- }
30
- // Back-compat: discrete level → centred score.
31
- if (Number.isInteger(cell.level) && cell.level >= 1 && cell.level <= 4) {
32
- return { score: LEVEL_SCORE[cell.level], level: cell.level, source: 'llm', evidence: cell.evidence || null };
33
- }
34
- }
35
- return {
36
- score: ruleScore ?? null,
37
- level: ruleLevel ?? null,
38
- source: 'rules',
39
- evidence: null,
40
- };
41
- }
42
-
43
- /**
44
- * Aggregate per-indicator sources into a dimension-level source label.
45
- * @param {Array<'llm'|'rules'|null>} sources
46
- * @returns {'llm'|'rules'|'mixed'|null}
47
- */
48
- function dimensionSource(sources) {
49
- const used = sources.filter(Boolean);
50
- if (!used.length) return null;
51
- const hasLlm = used.includes('llm');
52
- const hasRules = used.includes('rules');
53
- if (hasLlm && hasRules) return 'mixed';
54
- return hasLlm ? 'llm' : 'rules';
55
- }
56
-
57
- module.exports = { mergeIndicator, dimensionSource, CONF_THRESHOLD };
1
+ // server/analysis/dimensions/llm-merge.js
2
+ 'use strict';
3
+
4
+ const { LEVEL_SCORE, scoreToLevel } = require('../thresholds-v2');
5
+
6
+ /** Minimum LLM self-reported confidence to trust over the rule fallback. */
7
+ const CONF_THRESHOLD = 0.5;
8
+
9
+ /**
10
+ * Merge one LLM-judged indicator cell with the rule-derived score.
11
+ *
12
+ * The LLM may report either a granular 0–100 `score` (preferred — gives
13
+ * non-blocky sub-scores) or, for back-compat with older cached payloads, a
14
+ * discrete `level` (1–4). With a score we derive the level band via
15
+ * scoreToLevel; with only a level we fall back to the level's centred score.
16
+ *
17
+ * @param {{score:?number, level:?number, confidence:?number, evidence:?string}|null|undefined} cell
18
+ * @param {number|null} ruleScore centred score from explainIndicator (fallback)
19
+ * @param {number|null} ruleLevel
20
+ * @returns {{score:number|null, level:number|null, source:'llm'|'rules', evidence:string|null}}
21
+ */
22
+ function mergeIndicator(cell, ruleScore, ruleLevel) {
23
+ if (cell && typeof cell.confidence === 'number' && cell.confidence >= CONF_THRESHOLD) {
24
+ // Preferred: granular 0–100 score (kept to one decimal).
25
+ if (typeof cell.score === 'number' && Number.isFinite(cell.score)
26
+ && cell.score >= 0 && cell.score <= 100) {
27
+ const score = Math.round(cell.score * 10) / 10;
28
+ return { score, level: scoreToLevel(score), source: 'llm', evidence: cell.evidence || null };
29
+ }
30
+ // Back-compat: discrete level → centred score.
31
+ if (Number.isInteger(cell.level) && cell.level >= 1 && cell.level <= 4) {
32
+ return { score: LEVEL_SCORE[cell.level], level: cell.level, source: 'llm', evidence: cell.evidence || null };
33
+ }
34
+ }
35
+ return {
36
+ score: ruleScore ?? null,
37
+ level: ruleLevel ?? null,
38
+ // No rule value supplied → the indicator is simply "not assessed",
39
+ // not rule-scored. Only call it 'rules' when a rule score exists.
40
+ source: ruleScore != null ? 'rules' : null,
41
+ evidence: null,
42
+ };
43
+ }
44
+
45
+ /**
46
+ * Aggregate per-indicator sources into a dimension-level source label.
47
+ * @param {Array<'llm'|'rules'|null>} sources
48
+ * @returns {'llm'|'rules'|'mixed'|null}
49
+ */
50
+ function dimensionSource(sources) {
51
+ const used = sources.filter(Boolean);
52
+ if (!used.length) return null;
53
+ const hasLlm = used.includes('llm');
54
+ const hasRules = used.includes('rules');
55
+ if (hasLlm && hasRules) return 'mixed';
56
+ return hasLlm ? 'llm' : 'rules';
57
+ }
58
+
59
+ module.exports = { mergeIndicator, dimensionSource, CONF_THRESHOLD };