@nookplot/mcp 0.4.108 → 0.4.110
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +293 -293
- package/SKILL.md +145 -145
- package/dist/auth.d.ts +5 -112
- package/dist/auth.d.ts.map +1 -1
- package/dist/auth.js +53 -294
- package/dist/auth.js.map +1 -1
- package/dist/gateway.d.ts.map +1 -1
- package/dist/gateway.js +1 -5
- package/dist/gateway.js.map +1 -1
- package/dist/index.d.ts +1 -12
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +51 -613
- package/dist/index.js.map +1 -1
- package/dist/server.js +81 -81
- package/dist/setup.js +7 -7
- package/dist/tools/cognitiveWorkspace.d.ts.map +1 -1
- package/dist/tools/cognitiveWorkspace.js +0 -30
- package/dist/tools/cognitiveWorkspace.js.map +1 -1
- package/dist/tools/ecosystem.d.ts.map +1 -1
- package/dist/tools/ecosystem.js +5 -1
- package/dist/tools/ecosystem.js.map +1 -1
- package/dist/tools/forgePresets.d.ts +2 -7
- package/dist/tools/forgePresets.d.ts.map +1 -1
- package/dist/tools/forgePresets.js +3 -133
- package/dist/tools/forgePresets.js.map +1 -1
- package/dist/tools/index.d.ts.map +1 -1
- package/dist/tools/index.js +2 -4
- package/dist/tools/index.js.map +1 -1
- package/dist/tools/knowledgeGraph.js +1 -1
- package/dist/tools/knowledgeGraph.js.map +1 -1
- package/dist/tools/memory.d.ts.map +1 -1
- package/dist/tools/memory.js +33 -0
- package/dist/tools/memory.js.map +1 -1
- package/dist/tools/miningPipeline.d.ts +2 -6
- package/dist/tools/miningPipeline.d.ts.map +1 -1
- package/dist/tools/miningPipeline.js +3 -392
- package/dist/tools/miningPipeline.js.map +1 -1
- package/dist/tools/onchain.js +6 -6
- package/dist/tools/onchain.js.map +1 -1
- package/dist/tools/papers.d.ts.map +1 -1
- package/dist/tools/papers.js +0 -16
- package/dist/tools/papers.js.map +1 -1
- package/dist/tools/read.d.ts.map +1 -1
- package/dist/tools/read.js +6 -27
- package/dist/tools/read.js.map +1 -1
- package/dist/tools/reasoningWork.js +60 -60
- package/dist/tools/swarms.d.ts.map +1 -1
- package/dist/tools/swarms.js +1 -21
- package/dist/tools/swarms.js.map +1 -1
- package/dist/tools/write.d.ts.map +1 -1
- package/dist/tools/write.js +42 -0
- package/dist/tools/write.js.map +1 -1
- package/package.json +96 -96
- package/skills/learn/SKILL.md +70 -70
- package/skills/mine/SKILL.md +85 -85
- package/skills/nookplot/SKILL.md +222 -222
- package/skills/social/SKILL.md +84 -84
- package/dist/profileName.d.ts +0 -65
- package/dist/profileName.d.ts.map +0 -1
- package/dist/profileName.js +0 -114
- package/dist/profileName.js.map +0 -1
- package/dist/syncSessions.d.ts +0 -84
- package/dist/syncSessions.d.ts.map +0 -1
- package/dist/syncSessions.js +0 -260
- package/dist/syncSessions.js.map +0 -1
- package/dist/syncSessionsExtractor.d.ts +0 -123
- package/dist/syncSessionsExtractor.d.ts.map +0 -1
- package/dist/syncSessionsExtractor.js +0 -362
- package/dist/syncSessionsExtractor.js.map +0 -1
- package/dist/syncSessionsState.d.ts +0 -89
- package/dist/syncSessionsState.d.ts.map +0 -1
- package/dist/syncSessionsState.js +0 -145
- package/dist/syncSessionsState.js.map +0 -1
- package/dist/tools/rlmMining.d.ts +0 -36
- package/dist/tools/rlmMining.d.ts.map +0 -1
- package/dist/tools/rlmMining.js +0 -388
- package/dist/tools/rlmMining.js.map +0 -1
- package/skills/hermes/nookplot/DESCRIPTION.md +0 -59
- package/skills/hermes/nookplot/daemon/SKILL.md +0 -103
- package/skills/hermes/nookplot/learn/SKILL.md +0 -131
- package/skills/hermes/nookplot/mine/SKILL.md +0 -111
- package/skills/hermes/nookplot/social/SKILL.md +0 -104
- package/skills/hermes/nookplot/sync/SKILL.md +0 -110
|
@@ -1,123 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Phase 2b — heuristic extractor for Hermes session files.
|
|
3
|
-
*
|
|
4
|
-
* Given a parsed Hermes session JSON (shape observed in
|
|
5
|
-
* `~/.hermes/sessions/session_*.json`), produces a list of
|
|
6
|
-
* capture-queue-ready items without calling an LLM. The LLM path is
|
|
7
|
-
* intentionally deferred — the plan suggests using the user's configured
|
|
8
|
-
* provider, but:
|
|
9
|
-
*
|
|
10
|
-
* - Shipping an extractor with zero external deps means `sync-sessions`
|
|
11
|
-
* just works the moment it's installed, no BYOK setup required.
|
|
12
|
-
* - Heuristic misses are caught by the gateway's quality gate +
|
|
13
|
-
* ContentScanner, so low-value captures don't pollute the KG.
|
|
14
|
-
* - The code below is structured around a pure function
|
|
15
|
-
* `extractFromSession(sessionJson) -> ExtractedItem[]`, so the LLM
|
|
16
|
-
* strategy can slot in later behind the same interface.
|
|
17
|
-
*
|
|
18
|
-
* Heuristic rules (conservative on purpose — we'd rather under-capture
|
|
19
|
-
* than spam the review queue):
|
|
20
|
-
*
|
|
21
|
-
* - A FINDING is extracted when a session has ≥2 tool-call turns
|
|
22
|
-
* followed by an assistant text turn. The final assistant text is
|
|
23
|
-
* the body; the first user message is the title / taskSummary.
|
|
24
|
-
* - A REASONING TRACE is extracted when a session has ≥2 assistant
|
|
25
|
-
* text turns interleaved with tool calls. Each text turn becomes a
|
|
26
|
-
* step; the last becomes the conclusion.
|
|
27
|
-
* - Body must be ≥200 chars for findings, ≥50 for conclusions.
|
|
28
|
-
* Shorter syntheses aren't worth queuing.
|
|
29
|
-
* - Tool-call outputs are NEVER used as the body — only the assistant's
|
|
30
|
-
* own text. This is the Phase 2d §6 mitigation against session
|
|
31
|
-
* transcript poisoning.
|
|
32
|
-
*
|
|
33
|
-
* @module syncSessionsExtractor
|
|
34
|
-
*/
|
|
35
|
-
/** One message from a Hermes session `messages[]` array. */
|
|
36
|
-
export interface HermesMessage {
|
|
37
|
-
role: "user" | "assistant" | "tool" | "system";
|
|
38
|
-
content?: string | null;
|
|
39
|
-
/** Present on assistant turns that invoked tools. */
|
|
40
|
-
tool_calls?: Array<{
|
|
41
|
-
id?: string;
|
|
42
|
-
type?: string;
|
|
43
|
-
function?: {
|
|
44
|
-
name?: string;
|
|
45
|
-
arguments?: string;
|
|
46
|
-
};
|
|
47
|
-
}>;
|
|
48
|
-
/** Present on `tool` turns, referring back to the assistant's call. */
|
|
49
|
-
tool_call_id?: string;
|
|
50
|
-
/** Present on `tool` turns — the tool that produced the result. */
|
|
51
|
-
name?: string;
|
|
52
|
-
}
|
|
53
|
-
/** The subset of a Hermes session file we care about. */
|
|
54
|
-
export interface HermesSession {
|
|
55
|
-
session_id?: string;
|
|
56
|
-
model?: string;
|
|
57
|
-
messages?: HermesMessage[];
|
|
58
|
-
session_start?: string;
|
|
59
|
-
last_updated?: string;
|
|
60
|
-
}
|
|
61
|
-
/**
|
|
62
|
-
* What we extract. Shape mirrors the `payload` field the capture-queue
|
|
63
|
-
* endpoint expects, plus a `kind` discriminator + a content hash we use
|
|
64
|
-
* for local dedup.
|
|
65
|
-
*/
|
|
66
|
-
export type ExtractedItem = {
|
|
67
|
-
kind: "finding";
|
|
68
|
-
hash: string;
|
|
69
|
-
payload: {
|
|
70
|
-
title: string;
|
|
71
|
-
body: string;
|
|
72
|
-
sources?: string[];
|
|
73
|
-
domain?: string;
|
|
74
|
-
tags?: string[];
|
|
75
|
-
};
|
|
76
|
-
} | {
|
|
77
|
-
kind: "reasoning";
|
|
78
|
-
hash: string;
|
|
79
|
-
payload: {
|
|
80
|
-
taskSummary: string;
|
|
81
|
-
steps: Array<{
|
|
82
|
-
step: string;
|
|
83
|
-
rationale?: string;
|
|
84
|
-
}>;
|
|
85
|
-
conclusion: string;
|
|
86
|
-
modelUsed?: string;
|
|
87
|
-
};
|
|
88
|
-
};
|
|
89
|
-
/**
|
|
90
|
-
* Extract a FINDING from a session that researched + synthesized.
|
|
91
|
-
*
|
|
92
|
-
* Preconditions checked inside:
|
|
93
|
-
* - ≥2 tool-call invocations (otherwise it's a trivial lookup, not a finding)
|
|
94
|
-
* - Final assistant text is ≥200 chars
|
|
95
|
-
* - There's a user prompt to use as the title
|
|
96
|
-
*
|
|
97
|
-
* Returns `null` if the session doesn't pattern-match. That's the common
|
|
98
|
-
* case — most sessions are one-shot Q&A, not research.
|
|
99
|
-
*/
|
|
100
|
-
export declare function extractFindingHeuristic(session: HermesSession): ExtractedItem | null;
|
|
101
|
-
/**
|
|
102
|
-
* Extract a REASONING TRACE from a session with multi-step thinking.
|
|
103
|
-
*
|
|
104
|
-
* Preconditions:
|
|
105
|
-
* - ≥2 non-empty assistant text turns (steps + conclusion)
|
|
106
|
-
* - ≥2 tool-call invocations OR total message count ≥ 5 (so a
|
|
107
|
-
* pure-text chat-of-thought still qualifies if it had structure)
|
|
108
|
-
* - Conclusion length ≥50 chars
|
|
109
|
-
*
|
|
110
|
-
* The `steps` array is built from every assistant text turn EXCEPT the
|
|
111
|
-
* last one. The last becomes the `conclusion`. If there's only one text
|
|
112
|
-
* turn, we bail — single-step "reasoning" is just a finding.
|
|
113
|
-
*/
|
|
114
|
-
export declare function extractReasoningHeuristic(session: HermesSession): ExtractedItem | null;
|
|
115
|
-
/**
|
|
116
|
-
* Main extraction entry point. Runs both extractors and returns every
|
|
117
|
-
* item that matched. It is valid for a single session to yield both a
|
|
118
|
-
* finding AND a reasoning trace — research sessions that pivoted midway
|
|
119
|
-
* have both a synthesis (the finding) and a process worth remembering
|
|
120
|
-
* (the reasoning trace). The gateway dedup guards prevent actual dupes.
|
|
121
|
-
*/
|
|
122
|
-
export declare function extractFromSession(session: HermesSession): ExtractedItem[];
|
|
123
|
-
//# sourceMappingURL=syncSessionsExtractor.d.ts.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"syncSessionsExtractor.d.ts","sourceRoot":"","sources":["../src/syncSessionsExtractor.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAiCG;AASH,4DAA4D;AAC5D,MAAM,WAAW,aAAa;IAC5B,IAAI,EAAE,MAAM,GAAG,WAAW,GAAG,MAAM,GAAG,QAAQ,CAAC;IAC/C,OAAO,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;IACxB,qDAAqD;IACrD,UAAU,CAAC,EAAE,KAAK,CAAC;QACjB,EAAE,CAAC,EAAE,MAAM,CAAC;QACZ,IAAI,CAAC,EAAE,MAAM,CAAC;QACd,QAAQ,CAAC,EAAE;YAAE,IAAI,CAAC,EAAE,MAAM,CAAC;YAAC,SAAS,CAAC,EAAE,MAAM,CAAA;SAAE,CAAC;KAClD,CAAC,CAAC;IACH,uEAAuE;IACvE,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,mEAAmE;IACnE,IAAI,CAAC,EAAE,MAAM,CAAC;CACf;AAED,yDAAyD;AACzD,MAAM,WAAW,aAAa;IAC5B,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,QAAQ,CAAC,EAAE,aAAa,EAAE,CAAC;IAC3B,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,YAAY,CAAC,EAAE,MAAM,CAAC;CACvB;AAED;;;;GAIG;AACH,MAAM,MAAM,aAAa,GACrB;IACE,IAAI,EAAE,SAAS,CAAC;IAChB,IAAI,EAAE,MAAM,CAAC;IACb,OAAO,EAAE;QACP,KAAK,EAAE,MAAM,CAAC;QACd,IAAI,EAAE,MAAM,CAAC;QACb,OAAO,CAAC,EAAE,MAAM,EAAE,CAAC;QACnB,MAAM,CAAC,EAAE,MAAM,CAAC;QAChB,IAAI,CAAC,EAAE,MAAM,EAAE,CAAC;KACjB,CAAC;CACH,GACD;IACE,IAAI,EAAE,WAAW,CAAC;IAClB,IAAI,EAAE,MAAM,CAAC;IACb,OAAO,EAAE;QACP,WAAW,EAAE,MAAM,CAAC;QACpB,KAAK,EAAE,KAAK,CAAC;YAAE,IAAI,EAAE,MAAM,CAAC;YAAC,SAAS,CAAC,EAAE,MAAM,CAAA;SAAE,CAAC,CAAC;QACnD,UAAU,EAAE,MAAM,CAAC;QACnB,SAAS,CAAC,EAAE,MAAM,CAAC;KACpB,CAAC;CACH,CAAC;AAyNN;;;;;;;;;;GAUG;AACH,wBAAgB,uBAAuB,CAAC,OAAO,EAAE,aAAa,GAAG,aAAa,GAAG,IAAI,CA0CpF;AAED;;;;;;;;;;;;GAYG;AACH,wBAAgB,yBAAyB,CAAC,OAAO,EAAE,aAAa,GAAG,aAAa,GAAG,IAAI,CAyCtF;AAED;;;;;;GAMG;AACH,wBAAgB,kBAAkB,CAAC,OAAO,EAAE,aAAa,GAAG,aAAa,EAAE,CAO1E"}
|
|
@@ -1,362 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Phase 2b — heuristic extractor for Hermes session files.
|
|
3
|
-
*
|
|
4
|
-
* Given a parsed Hermes session JSON (shape observed in
|
|
5
|
-
* `~/.hermes/sessions/session_*.json`), produces a list of
|
|
6
|
-
* capture-queue-ready items without calling an LLM. The LLM path is
|
|
7
|
-
* intentionally deferred — the plan suggests using the user's configured
|
|
8
|
-
* provider, but:
|
|
9
|
-
*
|
|
10
|
-
* - Shipping an extractor with zero external deps means `sync-sessions`
|
|
11
|
-
* just works the moment it's installed, no BYOK setup required.
|
|
12
|
-
* - Heuristic misses are caught by the gateway's quality gate +
|
|
13
|
-
* ContentScanner, so low-value captures don't pollute the KG.
|
|
14
|
-
* - The code below is structured around a pure function
|
|
15
|
-
* `extractFromSession(sessionJson) -> ExtractedItem[]`, so the LLM
|
|
16
|
-
* strategy can slot in later behind the same interface.
|
|
17
|
-
*
|
|
18
|
-
* Heuristic rules (conservative on purpose — we'd rather under-capture
|
|
19
|
-
* than spam the review queue):
|
|
20
|
-
*
|
|
21
|
-
* - A FINDING is extracted when a session has ≥2 tool-call turns
|
|
22
|
-
* followed by an assistant text turn. The final assistant text is
|
|
23
|
-
* the body; the first user message is the title / taskSummary.
|
|
24
|
-
* - A REASONING TRACE is extracted when a session has ≥2 assistant
|
|
25
|
-
* text turns interleaved with tool calls. Each text turn becomes a
|
|
26
|
-
* step; the last becomes the conclusion.
|
|
27
|
-
* - Body must be ≥200 chars for findings, ≥50 for conclusions.
|
|
28
|
-
* Shorter syntheses aren't worth queuing.
|
|
29
|
-
* - Tool-call outputs are NEVER used as the body — only the assistant's
|
|
30
|
-
* own text. This is the Phase 2d §6 mitigation against session
|
|
31
|
-
* transcript poisoning.
|
|
32
|
-
*
|
|
33
|
-
* @module syncSessionsExtractor
|
|
34
|
-
*/
|
|
35
|
-
import { createHash } from "node:crypto";
|
|
36
|
-
// ---------------------------------------------------------------------------
|
|
37
|
-
// Helpers
|
|
38
|
-
// ---------------------------------------------------------------------------
|
|
39
|
-
/**
|
|
40
|
-
* Canonicalize a string the same way the gateway does before hashing.
|
|
41
|
-
* Keep this in sync with `captureQueueService.ts::canonicalizeForHash`:
|
|
42
|
-
* - NFKD unicode normalize (Phase 2d P2C-4 — collapses "Café" precomposed
|
|
43
|
-
* vs decomposed so attackers can't bypass dedup with zero-width swaps)
|
|
44
|
-
* - lowercase
|
|
45
|
-
* - trim
|
|
46
|
-
* - collapse runs of whitespace to a single space
|
|
47
|
-
* Stays identical so the local hash equals what the gateway computes,
|
|
48
|
-
* making local dedup byte-compatible with the server UNIQUE constraint.
|
|
49
|
-
*/
|
|
50
|
-
function normalize(s) {
|
|
51
|
-
return s.normalize("NFKD").trim().toLowerCase().replace(/\s+/g, " ");
|
|
52
|
-
}
|
|
53
|
-
/**
|
|
54
|
-
* SHA-256 hex of a finding or reasoning body. Matches the gateway's hash
|
|
55
|
-
* algorithm so a locally-computed hash can be checked against the
|
|
56
|
-
* processed_sessions.json dedup map AND against the server UNIQUE
|
|
57
|
-
* constraint without any transformation.
|
|
58
|
-
*/
|
|
59
|
-
function hashPayload(kind, payload) {
|
|
60
|
-
const h = createHash("sha256");
|
|
61
|
-
h.update(kind);
|
|
62
|
-
h.update("\0");
|
|
63
|
-
// JSON.stringify is deterministic for the same key ordering; we sort
|
|
64
|
-
// keys for extra safety because different extractor strategies could
|
|
65
|
-
// emit fields in different orders.
|
|
66
|
-
h.update(canonicalJson(payload));
|
|
67
|
-
return h.digest("hex");
|
|
68
|
-
}
|
|
69
|
-
/**
|
|
70
|
-
* Deterministic JSON serialization — sorts object keys at every depth so
|
|
71
|
-
* two payloads with identical content but different key order hash
|
|
72
|
-
* identically.
|
|
73
|
-
*/
|
|
74
|
-
function canonicalJson(value) {
|
|
75
|
-
if (Array.isArray(value)) {
|
|
76
|
-
return `[${value.map(canonicalJson).join(",")}]`;
|
|
77
|
-
}
|
|
78
|
-
if (value && typeof value === "object") {
|
|
79
|
-
const obj = value;
|
|
80
|
-
const keys = Object.keys(obj).sort();
|
|
81
|
-
return `{${keys.map((k) => `${JSON.stringify(k)}:${canonicalJson(obj[k])}`).join(",")}}`;
|
|
82
|
-
}
|
|
83
|
-
// Strings get normalized the same way the server does before hashing.
|
|
84
|
-
if (typeof value === "string")
|
|
85
|
-
return JSON.stringify(normalize(value));
|
|
86
|
-
return JSON.stringify(value);
|
|
87
|
-
}
|
|
88
|
-
/**
|
|
89
|
-
* First non-empty user message — gives us a natural title / task summary.
|
|
90
|
-
* Hermes sessions always open with a user turn, so this normally returns
|
|
91
|
-
* the user's original prompt.
|
|
92
|
-
*/
|
|
93
|
-
function firstUserPrompt(messages) {
|
|
94
|
-
for (const m of messages) {
|
|
95
|
-
if (m.role === "user" && typeof m.content === "string" && m.content.trim().length > 0) {
|
|
96
|
-
return m.content.trim();
|
|
97
|
-
}
|
|
98
|
-
}
|
|
99
|
-
return null;
|
|
100
|
-
}
|
|
101
|
-
/** All assistant turns with non-empty text content, in order. */
|
|
102
|
-
function assistantTextTurns(messages) {
|
|
103
|
-
const out = [];
|
|
104
|
-
for (const m of messages) {
|
|
105
|
-
if (m.role === "assistant" && typeof m.content === "string" && m.content.trim().length > 0) {
|
|
106
|
-
out.push(m.content.trim());
|
|
107
|
-
}
|
|
108
|
-
}
|
|
109
|
-
return out;
|
|
110
|
-
}
|
|
111
|
-
/** How many tool-call invocations happened across the session. */
|
|
112
|
-
function toolCallCount(messages) {
|
|
113
|
-
let n = 0;
|
|
114
|
-
for (const m of messages) {
|
|
115
|
-
if (m.role === "assistant" && Array.isArray(m.tool_calls)) {
|
|
116
|
-
n += m.tool_calls.length;
|
|
117
|
-
}
|
|
118
|
-
}
|
|
119
|
-
return n;
|
|
120
|
-
}
|
|
121
|
-
/**
|
|
122
|
-
* Sanitize a tool name so it's safe to embed as a tag string on its way
|
|
123
|
-
* to the capture queue + KG row.
|
|
124
|
-
*
|
|
125
|
-
* The tool name comes from the session JSON's `tool_calls[].function.name`
|
|
126
|
-
* field — which is populated by whichever MCP server the Hermes agent
|
|
127
|
-
* connected to. A malicious MCP (third-party install, supply-chain attack,
|
|
128
|
-
* etc.) could include a tool whose name is `<script>alert(1)</script>` or
|
|
129
|
-
* `"; DROP TABLE ...;--`. Those strings would flow unsanitized into the
|
|
130
|
-
* `tags[]` column of the capture row and then into any UI that renders
|
|
131
|
-
* them without escaping.
|
|
132
|
-
*
|
|
133
|
-
* Whitelist: alnum + underscore + hyphen + dot + colon (tool names often
|
|
134
|
-
* have "server:tool" form). Anything else gets stripped. Cap at 64 chars
|
|
135
|
-
* so a 10 KB tool name can't bloat the row.
|
|
136
|
-
*/
|
|
137
|
-
function sanitizeToolName(name) {
|
|
138
|
-
return name.replace(/[^a-zA-Z0-9_:.\-]/g, "").slice(0, 64);
|
|
139
|
-
}
|
|
140
|
-
/** Distinct tool names invoked — used for domain inference. */
|
|
141
|
-
function distinctToolNames(messages) {
|
|
142
|
-
const seen = new Set();
|
|
143
|
-
for (const m of messages) {
|
|
144
|
-
if (m.role === "assistant" && Array.isArray(m.tool_calls)) {
|
|
145
|
-
for (const tc of m.tool_calls) {
|
|
146
|
-
const n = tc.function?.name;
|
|
147
|
-
if (typeof n === "string" && n.length > 0) {
|
|
148
|
-
// Always sanitize before adding — keeps the Set free of
|
|
149
|
-
// garbage even if some downstream forgets.
|
|
150
|
-
const clean = sanitizeToolName(n);
|
|
151
|
-
if (clean.length > 0)
|
|
152
|
-
seen.add(clean);
|
|
153
|
-
}
|
|
154
|
-
}
|
|
155
|
-
}
|
|
156
|
-
}
|
|
157
|
-
return Array.from(seen);
|
|
158
|
-
}
|
|
159
|
-
/**
|
|
160
|
-
* Lightweight domain inference from tool names. Conservative — only
|
|
161
|
-
* returns a concrete domain if the evidence is unambiguous. Anything
|
|
162
|
-
* mixed stays undefined and the gateway's compiler decides what to do.
|
|
163
|
-
*/
|
|
164
|
-
function inferDomain(toolNames) {
|
|
165
|
-
const names = toolNames.map((n) => n.toLowerCase());
|
|
166
|
-
const hasResearch = names.some((n) => ["web_search", "browser", "arxiv", "scholar", "search"].some((kw) => n.includes(kw)));
|
|
167
|
-
const hasNookplot = names.some((n) => n.startsWith("nookplot_"));
|
|
168
|
-
const hasCode = names.some((n) => ["read_file", "edit", "bash", "grep"].some((kw) => n.includes(kw)));
|
|
169
|
-
if (hasResearch && !hasCode)
|
|
170
|
-
return "research";
|
|
171
|
-
if (hasNookplot && !hasResearch && !hasCode)
|
|
172
|
-
return "nookplot";
|
|
173
|
-
if (hasCode && !hasResearch)
|
|
174
|
-
return "engineering";
|
|
175
|
-
return undefined;
|
|
176
|
-
}
|
|
177
|
-
/**
|
|
178
|
-
* URLs mentioned in assistant text (the synthesis), NOT in tool outputs.
|
|
179
|
-
* We don't trust tool outputs as citeable sources — see Phase 2d §6 —
|
|
180
|
-
* but if the assistant transcribed a URL in its own synthesis, that's
|
|
181
|
-
* a legitimate citation.
|
|
182
|
-
*/
|
|
183
|
-
function extractSourcesFromAssistantText(text) {
|
|
184
|
-
const urls = text.match(/https?:\/\/[^\s)\]]+/g);
|
|
185
|
-
if (!urls)
|
|
186
|
-
return [];
|
|
187
|
-
// De-dupe + strip trailing punctuation picked up by the regex.
|
|
188
|
-
const cleaned = urls
|
|
189
|
-
.map((u) => u.replace(/[.,;:!?)\]]+$/, ""))
|
|
190
|
-
.filter((u) => u.length <= 500);
|
|
191
|
-
return Array.from(new Set(cleaned));
|
|
192
|
-
}
|
|
193
|
-
/**
|
|
194
|
-
* Short, human-readable title from a longer string. Trims to 70 chars at
|
|
195
|
-
* a word boundary to match the capture-queue UI's display limit.
|
|
196
|
-
*/
|
|
197
|
-
function truncateTitle(text, max = 70) {
|
|
198
|
-
const trimmed = text.replace(/\s+/g, " ").trim();
|
|
199
|
-
if (trimmed.length <= max)
|
|
200
|
-
return trimmed;
|
|
201
|
-
const cut = trimmed.slice(0, max);
|
|
202
|
-
const lastSpace = cut.lastIndexOf(" ");
|
|
203
|
-
return (lastSpace > 30 ? cut.slice(0, lastSpace) : cut) + "…";
|
|
204
|
-
}
|
|
205
|
-
// ---------------------------------------------------------------------------
|
|
206
|
-
// Extractors
|
|
207
|
-
// ---------------------------------------------------------------------------
|
|
208
|
-
/** Minimum body length — matches the gateway quality gate. */
|
|
209
|
-
const MIN_FINDING_BODY = 200;
|
|
210
|
-
/** Minimum conclusion length for a reasoning trace. */
|
|
211
|
-
const MIN_REASONING_CONCLUSION = 50;
|
|
212
|
-
/**
|
|
213
|
-
* Maximum body length before we truncate (audit fix B8). The gateway's
|
|
214
|
-
* `validateFindingPayload` rejects anything over `MAX_STRING_FIELD * 10`
|
|
215
|
-
* (100_000 chars) — but the route ALSO rejects JSON-payload > 16 KB, and
|
|
216
|
-
* the service-side `MAX_PAYLOAD_BYTES` enforces `2 * MAX_STRING_FIELD`
|
|
217
|
-
* bytes. Leaving the body uncapped meant that a Hermes session with a
|
|
218
|
-
* 50 KB synthesis would silently fail validation, the session would get
|
|
219
|
-
* marked processed, and the capture would vanish.
|
|
220
|
-
*
|
|
221
|
-
* 9_500 gives headroom for the JSON envelope + title + tags + metadata
|
|
222
|
-
* inside the 16 KB route ceiling, while still preserving substantive
|
|
223
|
-
* syntheses. Anything larger is almost certainly bot-generated padding.
|
|
224
|
-
*/
|
|
225
|
-
const MAX_FINDING_BODY = 9_500;
|
|
226
|
-
const MAX_REASONING_CONCLUSION = 9_500;
|
|
227
|
-
const MAX_REASONING_STEP = 800; // pre-existing cap at the per-step level
|
|
228
|
-
/**
|
|
229
|
-
* Truncate at a word boundary so the synthesis never ends mid-word.
|
|
230
|
-
* If no word boundary is found in the last 50 chars of the cap, just
|
|
231
|
-
* hard-cut — better than losing the capture entirely.
|
|
232
|
-
*/
|
|
233
|
-
function truncateWithEllipsis(text, max) {
|
|
234
|
-
if (text.length <= max)
|
|
235
|
-
return text;
|
|
236
|
-
const cut = text.slice(0, max);
|
|
237
|
-
const lastWordBoundary = cut.lastIndexOf(" ");
|
|
238
|
-
const result = lastWordBoundary > max - 50 ? cut.slice(0, lastWordBoundary) : cut;
|
|
239
|
-
return result + "\n\n[truncated — original was " + text.length + " chars]";
|
|
240
|
-
}
|
|
241
|
-
/**
|
|
242
|
-
* Extract a FINDING from a session that researched + synthesized.
|
|
243
|
-
*
|
|
244
|
-
* Preconditions checked inside:
|
|
245
|
-
* - ≥2 tool-call invocations (otherwise it's a trivial lookup, not a finding)
|
|
246
|
-
* - Final assistant text is ≥200 chars
|
|
247
|
-
* - There's a user prompt to use as the title
|
|
248
|
-
*
|
|
249
|
-
* Returns `null` if the session doesn't pattern-match. That's the common
|
|
250
|
-
* case — most sessions are one-shot Q&A, not research.
|
|
251
|
-
*/
|
|
252
|
-
export function extractFindingHeuristic(session) {
|
|
253
|
-
const messages = session.messages ?? [];
|
|
254
|
-
if (messages.length < 3)
|
|
255
|
-
return null; // user + asst(tool) + asst(text) minimum
|
|
256
|
-
const tcCount = toolCallCount(messages);
|
|
257
|
-
if (tcCount < 2)
|
|
258
|
-
return null;
|
|
259
|
-
const textTurns = assistantTextTurns(messages);
|
|
260
|
-
if (textTurns.length === 0)
|
|
261
|
-
return null;
|
|
262
|
-
// The FINAL assistant text turn is the synthesis — Hermes's pattern is
|
|
263
|
-
// tool calls until the agent has what it needs, then a single
|
|
264
|
-
// user-facing response.
|
|
265
|
-
const rawBody = textTurns[textTurns.length - 1];
|
|
266
|
-
if (rawBody.length < MIN_FINDING_BODY)
|
|
267
|
-
return null;
|
|
268
|
-
// Audit fix B8: cap at MAX_FINDING_BODY so we don't silently lose
|
|
269
|
-
// the capture to gateway 400 when a long synthesis exceeds the JSON
|
|
270
|
-
// payload ceiling. Sources extraction runs on the raw (pre-truncation)
|
|
271
|
-
// text so we don't lose URLs that appear only in the tail.
|
|
272
|
-
const body = truncateWithEllipsis(rawBody, MAX_FINDING_BODY);
|
|
273
|
-
const prompt = firstUserPrompt(messages);
|
|
274
|
-
if (!prompt)
|
|
275
|
-
return null;
|
|
276
|
-
const toolNames = distinctToolNames(messages);
|
|
277
|
-
const domain = inferDomain(toolNames);
|
|
278
|
-
const sources = extractSourcesFromAssistantText(rawBody);
|
|
279
|
-
const payload = {
|
|
280
|
-
title: truncateTitle(prompt),
|
|
281
|
-
body,
|
|
282
|
-
...(sources.length > 0 ? { sources } : {}),
|
|
283
|
-
...(domain ? { domain } : {}),
|
|
284
|
-
// Tag with the tools used so discoverers can filter by tech stack.
|
|
285
|
-
tags: ["hermes-session", ...toolNames.slice(0, 5).map((n) => `tool:${n}`)],
|
|
286
|
-
};
|
|
287
|
-
return {
|
|
288
|
-
kind: "finding",
|
|
289
|
-
hash: hashPayload("finding", payload),
|
|
290
|
-
payload,
|
|
291
|
-
};
|
|
292
|
-
}
|
|
293
|
-
/**
|
|
294
|
-
* Extract a REASONING TRACE from a session with multi-step thinking.
|
|
295
|
-
*
|
|
296
|
-
* Preconditions:
|
|
297
|
-
* - ≥2 non-empty assistant text turns (steps + conclusion)
|
|
298
|
-
* - ≥2 tool-call invocations OR total message count ≥ 5 (so a
|
|
299
|
-
* pure-text chat-of-thought still qualifies if it had structure)
|
|
300
|
-
* - Conclusion length ≥50 chars
|
|
301
|
-
*
|
|
302
|
-
* The `steps` array is built from every assistant text turn EXCEPT the
|
|
303
|
-
* last one. The last becomes the `conclusion`. If there's only one text
|
|
304
|
-
* turn, we bail — single-step "reasoning" is just a finding.
|
|
305
|
-
*/
|
|
306
|
-
export function extractReasoningHeuristic(session) {
|
|
307
|
-
const messages = session.messages ?? [];
|
|
308
|
-
if (messages.length < 3)
|
|
309
|
-
return null;
|
|
310
|
-
const textTurns = assistantTextTurns(messages);
|
|
311
|
-
if (textTurns.length < 2)
|
|
312
|
-
return null;
|
|
313
|
-
const tcCount = toolCallCount(messages);
|
|
314
|
-
if (tcCount < 2 && messages.length < 5)
|
|
315
|
-
return null;
|
|
316
|
-
const prompt = firstUserPrompt(messages);
|
|
317
|
-
if (!prompt)
|
|
318
|
-
return null;
|
|
319
|
-
const rawConclusion = textTurns[textTurns.length - 1];
|
|
320
|
-
if (rawConclusion.length < MIN_REASONING_CONCLUSION)
|
|
321
|
-
return null;
|
|
322
|
-
// Audit fix B8: cap conclusion length so long trace syntheses don't
|
|
323
|
-
// fail the gateway's payload-size gate and get silently dropped.
|
|
324
|
-
const conclusion = truncateWithEllipsis(rawConclusion, MAX_REASONING_CONCLUSION);
|
|
325
|
-
const stepTurns = textTurns.slice(0, -1);
|
|
326
|
-
const steps = stepTurns.map((text) => ({
|
|
327
|
-
// Tighten each step to a reasonable size — a step of 2000+ chars is
|
|
328
|
-
// not a step, it's another finding dressed up.
|
|
329
|
-
step: text.length > MAX_REASONING_STEP
|
|
330
|
-
? truncateWithEllipsis(text, MAX_REASONING_STEP)
|
|
331
|
-
: text,
|
|
332
|
-
}));
|
|
333
|
-
const payload = {
|
|
334
|
-
taskSummary: truncateTitle(prompt, 150),
|
|
335
|
-
steps,
|
|
336
|
-
conclusion,
|
|
337
|
-
...(session.model ? { modelUsed: session.model } : {}),
|
|
338
|
-
};
|
|
339
|
-
return {
|
|
340
|
-
kind: "reasoning",
|
|
341
|
-
hash: hashPayload("reasoning", payload),
|
|
342
|
-
payload,
|
|
343
|
-
};
|
|
344
|
-
}
|
|
345
|
-
/**
|
|
346
|
-
* Main extraction entry point. Runs both extractors and returns every
|
|
347
|
-
* item that matched. It is valid for a single session to yield both a
|
|
348
|
-
* finding AND a reasoning trace — research sessions that pivoted midway
|
|
349
|
-
* have both a synthesis (the finding) and a process worth remembering
|
|
350
|
-
* (the reasoning trace). The gateway dedup guards prevent actual dupes.
|
|
351
|
-
*/
|
|
352
|
-
export function extractFromSession(session) {
|
|
353
|
-
const out = [];
|
|
354
|
-
const finding = extractFindingHeuristic(session);
|
|
355
|
-
if (finding)
|
|
356
|
-
out.push(finding);
|
|
357
|
-
const reasoning = extractReasoningHeuristic(session);
|
|
358
|
-
if (reasoning)
|
|
359
|
-
out.push(reasoning);
|
|
360
|
-
return out;
|
|
361
|
-
}
|
|
362
|
-
//# sourceMappingURL=syncSessionsExtractor.js.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"syncSessionsExtractor.js","sourceRoot":"","sources":["../src/syncSessionsExtractor.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAiCG;AAEH,OAAO,EAAE,UAAU,EAAE,MAAM,aAAa,CAAC;AA4DzC,8EAA8E;AAC9E,WAAW;AACX,8EAA8E;AAE9E;;;;;;;;;;GAUG;AACH,SAAS,SAAS,CAAC,CAAS;IAC1B,OAAO,CAAC,CAAC,SAAS,CAAC,MAAM,CAAC,CAAC,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;AACvE,CAAC;AAED;;;;;GAKG;AACH,SAAS,WAAW,CAAC,IAA6B,EAAE,OAAgB;IAClE,MAAM,CAAC,GAAG,UAAU,CAAC,QAAQ,CAAC,CAAC;IAC/B,CAAC,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC;IACf,CAAC,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC;IACf,qEAAqE;IACrE,qEAAqE;IACrE,mCAAmC;IACnC,CAAC,CAAC,MAAM,CAAC,aAAa,CAAC,OAAO,CAAC,CAAC,CAAC;IACjC,OAAO,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;AACzB,CAAC;AAED;;;;GAIG;AACH,SAAS,aAAa,CAAC,KAAc;IACnC,IAAI,KAAK,CAAC,OAAO,CAAC,KAAK,CAAC,EAAE,CAAC;QACzB,OAAO,IAAI,KAAK,CAAC,GAAG,CAAC,aAAa,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC;IACnD,CAAC;IACD,IAAI,KAAK,IAAI,OAAO,KAAK,KAAK,QAAQ,EAAE,CAAC;QACvC,MAAM,GAAG,GAAG,KAAgC,CAAC;QAC7C,MAAM,IAAI,GAAG,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC;QACrC,OAAO,IAAI,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,GAAG,IAAI,CAAC,SAAS,CAAC,CAAC,CAAC,IAAI,aAAa,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC;IAC3F,CAAC;IACD,sEAAsE;IACtE,IAAI,OAAO,KAAK,KAAK,QAAQ;QAAE,OAAO,IAAI,CAAC,SAAS,CAAC,SAAS,CAAC,KAAK,CAAC,CAAC,CAAC;IACvE,OAAO,IAAI,CAAC,SAAS,CAAC,KAAK,CAAC,CAAC;AAC/B,CAAC;AAED;;;;GAIG;AACH,SAAS,eAAe,CAAC,QAAyB;IAChD,KAAK,MAAM,CAAC,IAAI,QAAQ,EAAE,CAAC;QACzB,IAAI,CAAC,CAAC,IAAI,KAAK,MAAM,IAAI,OAAO,CAAC,CAAC,OAAO,KAAK,QAAQ,IAAI,CAAC,CAAC,OAAO,CAAC,IAAI,EAAE,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACtF,OAAO,CAAC,CAAC,OAAO,CAAC,IAAI,EAAE,CAAC;QAC1B,CAAC;IACH,CAAC;IACD,OAAO,IAAI,CAAC;AACd,CAAC;AAED,iEAAiE;AACjE,SAAS,kBAAkB,CAAC,QAAyB;IACnD,MAAM,GAAG,GAAa,EAAE,CAAC;IACzB,KAAK,MAAM,CAAC,IAAI,QAAQ,EAAE,CAAC;QACzB,IAAI,CAAC,CAAC,IAAI,KAAK,WAAW,IAAI,OAAO,CAAC,CAAC,OAAO,KAAK,QAAQ,IAAI,CAAC,CAAC,OAAO,CAAC,IAAI,EAAE,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAC3F,GAAG,CAAC,IAAI,CAAC,CAAC,CAAC,OAAO,CAAC,IAAI,EAAE,CAAC,CAAC;QAC7B,CAAC;IACH,CAAC;IACD,OAAO,GAAG,CAAC;AACb,CAAC;AAED,kEAAkE;AAClE,SAAS,aAAa,CAAC,QAAyB;IAC9C,IAAI,CAAC,GAAG,CAAC,CAAC;IACV,KAAK,MAAM,CAAC,IAAI,QAAQ,EAAE,CAAC;QACzB,IAAI,CAAC,CAAC,IAAI,KAAK,WAAW,IAAI,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,UAAU,CAAC,EAAE,CAAC;YAC1D,CAAC,IAAI,CAAC,CAAC,UAAU,CAAC,MAAM,CAAC;QAC3B,CAAC;IACH,CAAC;IACD,OAAO,CAAC,CAAC;AACX,CAAC;AAED;;;;;;;;;;;;;;;GAeG;AACH,SAAS,gBAAgB,CAAC,IAAY;IACpC,OAAO,IAAI,CAAC,OAAO,CAAC,oBAAoB,EAAE,EAAE,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;AAC7D,CAAC;AAED,+DAA+D;AAC/D,SAAS,iBAAiB,CAAC,QAAyB;IAClD,MAAM,IAAI,GAAG,IAAI,GAAG,EAAU,CAAC;IAC/B,KAAK,MAAM,CAAC,IAAI,QAAQ,EAAE,CAAC;QACzB,IAAI,CAAC,CAAC,IAAI,KAAK,WAAW,IAAI,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,UAAU,CAAC,EAAE,CAAC;YAC1D,KAAK,MAAM,EAAE,IAAI,CAAC,CAAC,UAAU,EAAE,CAAC;gBAC9B,MAAM,CAAC,GAAG,EAAE,CAAC,QAAQ,EAAE,IAAI,CAAC;gBAC5B,IAAI,OAAO,CAAC,KAAK,QAAQ,IAAI,CAAC,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;oBAC1C,wDAAwD;oBACxD,2CAA2C;oBAC3C,MAAM,KAAK,GAAG,gBAAgB,CAAC,CAAC,CAAC,CAAC;oBAClC,IAAI,KAAK,CAAC,MAAM,GAAG,CAAC;wBAAE,IAAI,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC;gBACxC,CAAC;YACH,CAAC;QACH,CAAC;IACH,CAAC;IACD,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAC1B,CAAC;AAED;;;;GAIG;AACH,SAAS,WAAW,CAAC,SAAmB;IACtC,MAAM,KAAK,GAAG,SAAS,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,WAAW,EAAE,CAAC,CAAC;IACpD,MAAM,WAAW,GAAG,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CACnC,CAAC,YAAY,EAAE,SAAS,EAAE,OAAO,EAAE,SAAS,EAAE,QAAQ,CAAC,CAAC,IAAI,CAAC,CAAC,EAAE,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,EAAE,CAAC,CAAC,CACrF,CAAC;IACF,MAAM,WAAW,GAAG,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,UAAU,CAAC,WAAW,CAAC,CAAC,CAAC;IACjE,MAAM,OAAO,GAAG,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,WAAW,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,CAAC,CAAC,IAAI,CAAC,CAAC,EAAE,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC;IAEtG,IAAI,WAAW,IAAI,CAAC,OAAO;QAAE,OAAO,UAAU,CAAC;IAC/C,IAAI,WAAW,IAAI,CAAC,WAAW,IAAI,CAAC,OAAO;QAAE,OAAO,UAAU,CAAC;IAC/D,IAAI,OAAO,IAAI,CAAC,WAAW;QAAE,OAAO,aAAa,CAAC;IAClD,OAAO,SAAS,CAAC;AACnB,CAAC;AAED;;;;;GAKG;AACH,SAAS,+BAA+B,CAAC,IAAY;IACnD,MAAM,IAAI,GAAG,IAAI,CAAC,KAAK,CAAC,uBAAuB,CAAC,CAAC;IACjD,IAAI,CAAC,IAAI;QAAE,OAAO,EAAE,CAAC;IACrB,+DAA+D;IAC/D,MAAM,OAAO,GAAG,IAAI;SACjB,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC,eAAe,EAAE,EAAE,CAAC,CAAC;SAC1C,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,IAAI,GAAG,CAAC,CAAC;IAClC,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC;AACtC,CAAC;AAED;;;GAGG;AACH,SAAS,aAAa,CAAC,IAAY,EAAE,GAAG,GAAG,EAAE;IAC3C,MAAM,OAAO,GAAG,IAAI,CAAC,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC;IACjD,IAAI,OAAO,CAAC,MAAM,IAAI,GAAG;QAAE,OAAO,OAAO,CAAC;IAC1C,MAAM,GAAG,GAAG,OAAO,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC;IAClC,MAAM,SAAS,GAAG,GAAG,CAAC,WAAW,CAAC,GAAG,CAAC,CAAC;IACvC,OAAO,CAAC,SAAS,GAAG,EAAE,CAAC,CAAC,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC,EAAE,SAAS,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,GAAG,GAAG,CAAC;AAChE,CAAC;AAED,8EAA8E;AAC9E,cAAc;AACd,8EAA8E;AAE9E,8DAA8D;AAC9D,MAAM,gBAAgB,GAAG,GAAG,CAAC;AAC7B,uDAAuD;AACvD,MAAM,wBAAwB,GAAG,EAAE,CAAC;AACpC;;;;;;;;;;;;GAYG;AACH,MAAM,gBAAgB,GAAG,KAAK,CAAC;AAC/B,MAAM,wBAAwB,GAAG,KAAK,CAAC;AACvC,MAAM,kBAAkB,GAAG,GAAG,CAAC,CAAC,yCAAyC;AAEzE;;;;GAIG;AACH,SAAS,oBAAoB,CAAC,IAAY,EAAE,GAAW;IACrD,IAAI,IAAI,CAAC,MAAM,IAAI,GAAG;QAAE,OAAO,IAAI,CAAC;IACpC,MAAM,GAAG,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC;IAC/B,MAAM,gBAAgB,GAAG,GAAG,CAAC,WAAW,CAAC,GAAG,CAAC,CAAC;IAC9C,MAAM,MAAM,GAAG,gBAAgB,GAAG,GAAG,GAAG,EAAE,CAAC,CAAC,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC,EAAE,gBAAgB,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC;IAClF,OAAO,MAAM,GAAG,gCAAgC,GAAG,IAAI,CAAC,MAAM,GAAG,SAAS,CAAC;AAC7E,CAAC;AAED;;;;;;;;;;GAUG;AACH,MAAM,UAAU,uBAAuB,CAAC,OAAsB;IAC5D,MAAM,QAAQ,GAAG,OAAO,CAAC,QAAQ,IAAI,EAAE,CAAC;IACxC,IAAI,QAAQ,CAAC,MAAM,GAAG,CAAC;QAAE,OAAO,IAAI,CAAC,CAAC,yCAAyC;IAE/E,MAAM,OAAO,GAAG,aAAa,CAAC,QAAQ,CAAC,CAAC;IACxC,IAAI,OAAO,GAAG,CAAC;QAAE,OAAO,IAAI,CAAC;IAE7B,MAAM,SAAS,GAAG,kBAAkB,CAAC,QAAQ,CAAC,CAAC;IAC/C,IAAI,SAAS,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,IAAI,CAAC;IAExC,uEAAuE;IACvE,8DAA8D;IAC9D,wBAAwB;IACxB,MAAM,OAAO,GAAG,SAAS,CAAC,SAAS,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;IAChD,IAAI,OAAO,CAAC,MAAM,GAAG,gBAAgB;QAAE,OAAO,IAAI,CAAC;IACnD,kEAAkE;IAClE,oEAAoE;IACpE,uEAAuE;IACvE,2DAA2D;IAC3D,MAAM,IAAI,GAAG,oBAAoB,CAAC,OAAO,EAAE,gBAAgB,CAAC,CAAC;IAE7D,MAAM,MAAM,GAAG,eAAe,CAAC,QAAQ,CAAC,CAAC;IACzC,IAAI,CAAC,MAAM;QAAE,OAAO,IAAI,CAAC;IAEzB,MAAM,SAAS,GAAG,iBAAiB,CAAC,QAAQ,CAAC,CAAC;IAC9C,MAAM,MAAM,GAAG,WAAW,CAAC,SAAS,CAAC,CAAC;IACtC,MAAM,OAAO,GAAG,+BAA+B,CAAC,OAAO,CAAC,CAAC;IAEzD,MAAM,OAAO,GAA2D;QACtE,KAAK,EAAE,aAAa,CAAC,MAAM,CAAC;QAC5B,IAAI;QACJ,GAAG,CAAC,OAAO,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,EAAE,OAAO,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;QAC1C,GAAG,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,MAAM,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;QAC7B,mEAAmE;QACnE,IAAI,EAAE,CAAC,gBAAgB,EAAE,GAAG,SAAS,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,QAAQ,CAAC,EAAE,CAAC,CAAC;KAC3E,CAAC;IAEF,OAAO;QACL,IAAI,EAAE,SAAS;QACf,IAAI,EAAE,WAAW,CAAC,SAAS,EAAE,OAAO,CAAC;QACrC,OAAO;KACR,CAAC;AACJ,CAAC;AAED;;;;;;;;;;;;GAYG;AACH,MAAM,UAAU,yBAAyB,CAAC,OAAsB;IAC9D,MAAM,QAAQ,GAAG,OAAO,CAAC,QAAQ,IAAI,EAAE,CAAC;IACxC,IAAI,QAAQ,CAAC,MAAM,GAAG,CAAC;QAAE,OAAO,IAAI,CAAC;IAErC,MAAM,SAAS,GAAG,kBAAkB,CAAC,QAAQ,CAAC,CAAC;IAC/C,IAAI,SAAS,CAAC,MAAM,GAAG,CAAC;QAAE,OAAO,IAAI,CAAC;IAEtC,MAAM,OAAO,GAAG,aAAa,CAAC,QAAQ,CAAC,CAAC;IACxC,IAAI,OAAO,GAAG,CAAC,IAAI,QAAQ,CAAC,MAAM,GAAG,CAAC;QAAE,OAAO,IAAI,CAAC;IAEpD,MAAM,MAAM,GAAG,eAAe,CAAC,QAAQ,CAAC,CAAC;IACzC,IAAI,CAAC,MAAM;QAAE,OAAO,IAAI,CAAC;IAEzB,MAAM,aAAa,GAAG,SAAS,CAAC,SAAS,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;IACtD,IAAI,aAAa,CAAC,MAAM,GAAG,wBAAwB;QAAE,OAAO,IAAI,CAAC;IACjE,oEAAoE;IACpE,iEAAiE;IACjE,MAAM,UAAU,GAAG,oBAAoB,CAAC,aAAa,EAAE,wBAAwB,CAAC,CAAC;IAEjF,MAAM,SAAS,GAAG,SAAS,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC;IACzC,MAAM,KAAK,GAAG,SAAS,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC;QACrC,oEAAoE;QACpE,+CAA+C;QAC/C,IAAI,EACF,IAAI,CAAC,MAAM,GAAG,kBAAkB;YAC9B,CAAC,CAAC,oBAAoB,CAAC,IAAI,EAAE,kBAAkB,CAAC;YAChD,CAAC,CAAC,IAAI;KACX,CAAC,CAAC,CAAC;IAEJ,MAAM,OAAO,GAA6D;QACxE,WAAW,EAAE,aAAa,CAAC,MAAM,EAAE,GAAG,CAAC;QACvC,KAAK;QACL,UAAU;QACV,GAAG,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,SAAS,EAAE,OAAO,CAAC,KAAK,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;KACvD,CAAC;IAEF,OAAO;QACL,IAAI,EAAE,WAAW;QACjB,IAAI,EAAE,WAAW,CAAC,WAAW,EAAE,OAAO,CAAC;QACvC,OAAO;KACR,CAAC;AACJ,CAAC;AAED;;;;;;GAMG;AACH,MAAM,UAAU,kBAAkB,CAAC,OAAsB;IACvD,MAAM,GAAG,GAAoB,EAAE,CAAC;IAChC,MAAM,OAAO,GAAG,uBAAuB,CAAC,OAAO,CAAC,CAAC;IACjD,IAAI,OAAO;QAAE,GAAG,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;IAC/B,MAAM,SAAS,GAAG,yBAAyB,CAAC,OAAO,CAAC,CAAC;IACrD,IAAI,SAAS;QAAE,GAAG,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;IACnC,OAAO,GAAG,CAAC;AACb,CAAC"}
|
|
@@ -1,89 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Phase 2b — session post-processor state tracking.
|
|
3
|
-
*
|
|
4
|
-
* Tracks which Hermes session files we've already processed, plus the
|
|
5
|
-
* SHA-256 hashes of the items we've extracted from each. Lives at
|
|
6
|
-
* `~/.nookplot/processed_sessions.json` with atomic tmp+rename writes so
|
|
7
|
-
* concurrent sync runs (if the user opts into a launchd timer AND invokes
|
|
8
|
-
* the CLI at the same time) can't corrupt the file.
|
|
9
|
-
*
|
|
10
|
-
* Two layers of dedup:
|
|
11
|
-
*
|
|
12
|
-
* 1. Session-level — if `sessions[session_id]` exists, skip the whole
|
|
13
|
-
* file on the next run. Cheapest check.
|
|
14
|
-
* 2. Item-level — within a session, each extracted item's content hash
|
|
15
|
-
* is recorded. The gateway's own `UNIQUE (agent_address, kind,
|
|
16
|
-
* content_hash)` UUID-index makes duplicate POSTs idempotent
|
|
17
|
-
* server-side anyway, but tracking locally lets `--force` do the
|
|
18
|
-
* right thing (re-extract, but skip items already captured).
|
|
19
|
-
*
|
|
20
|
-
* @module syncSessionsState
|
|
21
|
-
*/
|
|
22
|
-
/** One item that was extracted + captured from a Hermes session. */
|
|
23
|
-
export interface ProcessedItem {
|
|
24
|
-
/** SHA-256 hex of the canonicalized item body — matches what the gateway hashes. */
|
|
25
|
-
hash: string;
|
|
26
|
-
/** 'finding' or 'reasoning' — mirrors the capture queue kind. */
|
|
27
|
-
kind: "finding" | "reasoning";
|
|
28
|
-
/** Queue row id returned by the gateway (UUID). Present only for successful captures. */
|
|
29
|
-
captureId?: string;
|
|
30
|
-
/** If the POST failed, the reason. Present only for failures. */
|
|
31
|
-
error?: string;
|
|
32
|
-
}
|
|
33
|
-
/** State for one Hermes session we've seen. */
|
|
34
|
-
export interface ProcessedSession {
|
|
35
|
-
/** ISO-8601 timestamp we finished processing this session. */
|
|
36
|
-
processedAt: string;
|
|
37
|
-
/** Items extracted + posted from this session. */
|
|
38
|
-
items: ProcessedItem[];
|
|
39
|
-
}
|
|
40
|
-
/** Top-level state file shape. */
|
|
41
|
-
export interface SyncState {
|
|
42
|
-
/** Bump when breaking the file schema so we can migrate old installs. */
|
|
43
|
-
version: 1;
|
|
44
|
-
/** session_id → processing record */
|
|
45
|
-
sessions: Record<string, ProcessedSession>;
|
|
46
|
-
}
|
|
47
|
-
/**
|
|
48
|
-
* Default location of the state file. Exposed so tests can override; in
|
|
49
|
-
* production we always use `~/.nookplot/processed_sessions.json` which
|
|
50
|
-
* sits next to `~/.nookplot/credentials.json` that the rest of the CLI
|
|
51
|
-
* uses — same directory, same permission model.
|
|
52
|
-
*/
|
|
53
|
-
export declare function defaultStatePath(): string;
|
|
54
|
-
/**
|
|
55
|
-
* Load the state file, or return a blank state if it doesn't exist / is
|
|
56
|
-
* corrupted. Corruption fallback is deliberate: if someone hand-edits the
|
|
57
|
-
* file into garbage, we'd rather re-process everything than crash the
|
|
58
|
-
* installer. Re-processing is safe because the gateway's dedup guard
|
|
59
|
-
* blocks exact duplicates.
|
|
60
|
-
*/
|
|
61
|
-
export declare function loadState(path?: string): SyncState;
|
|
62
|
-
/**
|
|
63
|
-
* Atomic write: serialize → write to `${path}.tmp` → rename over the real
|
|
64
|
-
* file. `rename` is atomic on POSIX + NTFS, so a reader can never see a
|
|
65
|
-
* half-written file. No fsync here — if the process crashes between write
|
|
66
|
-
* and rename, the old file is untouched, and worst case we re-process a
|
|
67
|
-
* session (which the gateway dedup will catch).
|
|
68
|
-
*/
|
|
69
|
-
export declare function saveState(state: SyncState, path?: string): void;
|
|
70
|
-
/**
|
|
71
|
-
* Has this session already been processed? If so, the caller should skip
|
|
72
|
-
* it on the next run (unless `--force` was passed, in which case the
|
|
73
|
-
* caller explicitly bypasses this check).
|
|
74
|
-
*/
|
|
75
|
-
export declare function isSessionProcessed(state: SyncState, sessionId: string): boolean;
|
|
76
|
-
/**
|
|
77
|
-
* Has this specific item already been captured? Used inside `--force` re-runs
|
|
78
|
-
* so we don't hit the gateway with POSTs that we *know* will be deduped.
|
|
79
|
-
* Saves a round-trip + avoids inflating the per-agent rate-limit counter.
|
|
80
|
-
*/
|
|
81
|
-
export declare function isItemAlreadyCaptured(state: SyncState, sessionId: string, hash: string): boolean;
|
|
82
|
-
/**
|
|
83
|
-
* Record the result of processing one session. Overwrites any prior
|
|
84
|
-
* record — if a `--force` re-run captures more items than the first pass
|
|
85
|
-
* (e.g., the extractor heuristic was loosened), the merged record reflects
|
|
86
|
-
* everything captured across both passes.
|
|
87
|
-
*/
|
|
88
|
-
export declare function markSessionProcessed(state: SyncState, sessionId: string, items: ProcessedItem[]): SyncState;
|
|
89
|
-
//# sourceMappingURL=syncSessionsState.d.ts.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"syncSessionsState.d.ts","sourceRoot":"","sources":["../src/syncSessionsState.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;GAoBG;AAUH,oEAAoE;AACpE,MAAM,WAAW,aAAa;IAC5B,oFAAoF;IACpF,IAAI,EAAE,MAAM,CAAC;IACb,iEAAiE;IACjE,IAAI,EAAE,SAAS,GAAG,WAAW,CAAC;IAC9B,yFAAyF;IACzF,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,iEAAiE;IACjE,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB;AAED,+CAA+C;AAC/C,MAAM,WAAW,gBAAgB;IAC/B,8DAA8D;IAC9D,WAAW,EAAE,MAAM,CAAC;IACpB,kDAAkD;IAClD,KAAK,EAAE,aAAa,EAAE,CAAC;CACxB;AAED,kCAAkC;AAClC,MAAM,WAAW,SAAS;IACxB,yEAAyE;IACzE,OAAO,EAAE,CAAC,CAAC;IACX,qCAAqC;IACrC,QAAQ,EAAE,MAAM,CAAC,MAAM,EAAE,gBAAgB,CAAC,CAAC;CAC5C;AAMD;;;;;GAKG;AACH,wBAAgB,gBAAgB,IAAI,MAAM,CAEzC;AAMD;;;;;;GAMG;AACH,wBAAgB,SAAS,CAAC,IAAI,GAAE,MAA2B,GAAG,SAAS,CAsBtE;AAED;;;;;;GAMG;AACH,wBAAgB,SAAS,CAAC,KAAK,EAAE,SAAS,EAAE,IAAI,GAAE,MAA2B,GAAG,IAAI,CAMnF;AAMD;;;;GAIG;AACH,wBAAgB,kBAAkB,CAAC,KAAK,EAAE,SAAS,EAAE,SAAS,EAAE,MAAM,GAAG,OAAO,CAE/E;AAED;;;;GAIG;AACH,wBAAgB,qBAAqB,CACnC,KAAK,EAAE,SAAS,EAChB,SAAS,EAAE,MAAM,EACjB,IAAI,EAAE,MAAM,GACX,OAAO,CAIT;AAED;;;;;GAKG;AACH,wBAAgB,oBAAoB,CAClC,KAAK,EAAE,SAAS,EAChB,SAAS,EAAE,MAAM,EACjB,KAAK,EAAE,aAAa,EAAE,GACrB,SAAS,CA8BX"}
|