@nookplot/mcp 0.4.113 → 0.4.114

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (65) hide show
  1. package/dist/auth.d.ts +112 -5
  2. package/dist/auth.d.ts.map +1 -1
  3. package/dist/auth.js +294 -53
  4. package/dist/auth.js.map +1 -1
  5. package/dist/gateway.d.ts.map +1 -1
  6. package/dist/gateway.js +5 -1
  7. package/dist/gateway.js.map +1 -1
  8. package/dist/index.d.ts +12 -1
  9. package/dist/index.d.ts.map +1 -1
  10. package/dist/index.js +615 -18
  11. package/dist/index.js.map +1 -1
  12. package/dist/profileName.d.ts +65 -0
  13. package/dist/profileName.d.ts.map +1 -0
  14. package/dist/profileName.js +114 -0
  15. package/dist/profileName.js.map +1 -0
  16. package/dist/syncSessions.d.ts +84 -0
  17. package/dist/syncSessions.d.ts.map +1 -0
  18. package/dist/syncSessions.js +260 -0
  19. package/dist/syncSessions.js.map +1 -0
  20. package/dist/syncSessionsExtractor.d.ts +123 -0
  21. package/dist/syncSessionsExtractor.d.ts.map +1 -0
  22. package/dist/syncSessionsExtractor.js +362 -0
  23. package/dist/syncSessionsExtractor.js.map +1 -0
  24. package/dist/syncSessionsState.d.ts +89 -0
  25. package/dist/syncSessionsState.d.ts.map +1 -0
  26. package/dist/syncSessionsState.js +145 -0
  27. package/dist/syncSessionsState.js.map +1 -0
  28. package/dist/tools/cognitiveWorkspace.d.ts.map +1 -1
  29. package/dist/tools/cognitiveWorkspace.js +30 -0
  30. package/dist/tools/cognitiveWorkspace.js.map +1 -1
  31. package/dist/tools/ecosystem.d.ts.map +1 -1
  32. package/dist/tools/ecosystem.js +1 -5
  33. package/dist/tools/ecosystem.js.map +1 -1
  34. package/dist/tools/forgePresets.d.ts +7 -2
  35. package/dist/tools/forgePresets.d.ts.map +1 -1
  36. package/dist/tools/forgePresets.js +133 -3
  37. package/dist/tools/forgePresets.js.map +1 -1
  38. package/dist/tools/knowledgeGraph.js +1 -1
  39. package/dist/tools/knowledgeGraph.js.map +1 -1
  40. package/dist/tools/memory.d.ts.map +1 -1
  41. package/dist/tools/memory.js +0 -33
  42. package/dist/tools/memory.js.map +1 -1
  43. package/dist/tools/miningPipeline.d.ts +6 -2
  44. package/dist/tools/miningPipeline.d.ts.map +1 -1
  45. package/dist/tools/miningPipeline.js +392 -3
  46. package/dist/tools/miningPipeline.js.map +1 -1
  47. package/dist/tools/onchain.d.ts.map +1 -1
  48. package/dist/tools/onchain.js +132 -18
  49. package/dist/tools/onchain.js.map +1 -1
  50. package/dist/tools/papers.d.ts.map +1 -1
  51. package/dist/tools/papers.js +16 -0
  52. package/dist/tools/papers.js.map +1 -1
  53. package/dist/tools/read.d.ts.map +1 -1
  54. package/dist/tools/read.js +27 -6
  55. package/dist/tools/read.js.map +1 -1
  56. package/dist/tools/swarms.d.ts.map +1 -1
  57. package/dist/tools/swarms.js +21 -1
  58. package/dist/tools/swarms.js.map +1 -1
  59. package/package.json +1 -1
  60. package/skills/hermes/nookplot/DESCRIPTION.md +59 -0
  61. package/skills/hermes/nookplot/daemon/SKILL.md +103 -0
  62. package/skills/hermes/nookplot/learn/SKILL.md +131 -0
  63. package/skills/hermes/nookplot/mine/SKILL.md +111 -0
  64. package/skills/hermes/nookplot/social/SKILL.md +104 -0
  65. package/skills/hermes/nookplot/sync/SKILL.md +110 -0
@@ -0,0 +1,123 @@
1
+ /**
2
+ * Phase 2b — heuristic extractor for Hermes session files.
3
+ *
4
+ * Given a parsed Hermes session JSON (shape observed in
5
+ * `~/.hermes/sessions/session_*.json`), produces a list of
6
+ * capture-queue-ready items without calling an LLM. The LLM path is
7
+ * intentionally deferred — the plan suggests using the user's configured
8
+ * provider, but:
9
+ *
10
+ * - Shipping an extractor with zero external deps means `sync-sessions`
11
+ * just works the moment it's installed, no BYOK setup required.
12
+ * - Heuristic misses are caught by the gateway's quality gate +
13
+ * ContentScanner, so low-value captures don't pollute the KG.
14
+ * - The code below is structured around a pure function
15
+ * `extractFromSession(sessionJson) -> ExtractedItem[]`, so the LLM
16
+ * strategy can slot in later behind the same interface.
17
+ *
18
+ * Heuristic rules (conservative on purpose — we'd rather under-capture
19
+ * than spam the review queue):
20
+ *
21
+ * - A FINDING is extracted when a session has ≥2 tool-call turns
22
+ * followed by an assistant text turn. The final assistant text is
23
+ * the body; the first user message is the title / taskSummary.
24
+ * - A REASONING TRACE is extracted when a session has ≥2 assistant
25
+ * text turns interleaved with tool calls. Each text turn becomes a
26
+ * step; the last becomes the conclusion.
27
+ * - Body must be ≥200 chars for findings, ≥50 for conclusions.
28
+ * Shorter syntheses aren't worth queuing.
29
+ * - Tool-call outputs are NEVER used as the body — only the assistant's
30
+ * own text. This is the Phase 2d §6 mitigation against session
31
+ * transcript poisoning.
32
+ *
33
+ * @module syncSessionsExtractor
34
+ */
35
+ /** One message from a Hermes session `messages[]` array. */
36
+ export interface HermesMessage {
37
+ role: "user" | "assistant" | "tool" | "system";
38
+ content?: string | null;
39
+ /** Present on assistant turns that invoked tools. */
40
+ tool_calls?: Array<{
41
+ id?: string;
42
+ type?: string;
43
+ function?: {
44
+ name?: string;
45
+ arguments?: string;
46
+ };
47
+ }>;
48
+ /** Present on `tool` turns, referring back to the assistant's call. */
49
+ tool_call_id?: string;
50
+ /** Present on `tool` turns — the tool that produced the result. */
51
+ name?: string;
52
+ }
53
+ /** The subset of a Hermes session file we care about. */
54
+ export interface HermesSession {
55
+ session_id?: string;
56
+ model?: string;
57
+ messages?: HermesMessage[];
58
+ session_start?: string;
59
+ last_updated?: string;
60
+ }
61
+ /**
62
+ * What we extract. Shape mirrors the `payload` field the capture-queue
63
+ * endpoint expects, plus a `kind` discriminator + a content hash we use
64
+ * for local dedup.
65
+ */
66
+ export type ExtractedItem = {
67
+ kind: "finding";
68
+ hash: string;
69
+ payload: {
70
+ title: string;
71
+ body: string;
72
+ sources?: string[];
73
+ domain?: string;
74
+ tags?: string[];
75
+ };
76
+ } | {
77
+ kind: "reasoning";
78
+ hash: string;
79
+ payload: {
80
+ taskSummary: string;
81
+ steps: Array<{
82
+ step: string;
83
+ rationale?: string;
84
+ }>;
85
+ conclusion: string;
86
+ modelUsed?: string;
87
+ };
88
+ };
89
+ /**
90
+ * Extract a FINDING from a session that researched + synthesized.
91
+ *
92
+ * Preconditions checked inside:
93
+ * - ≥2 tool-call invocations (otherwise it's a trivial lookup, not a finding)
94
+ * - Final assistant text is ≥200 chars
95
+ * - There's a user prompt to use as the title
96
+ *
97
+ * Returns `null` if the session doesn't pattern-match. That's the common
98
+ * case — most sessions are one-shot Q&A, not research.
99
+ */
100
+ export declare function extractFindingHeuristic(session: HermesSession): ExtractedItem | null;
101
+ /**
102
+ * Extract a REASONING TRACE from a session with multi-step thinking.
103
+ *
104
+ * Preconditions:
105
+ * - ≥2 non-empty assistant text turns (steps + conclusion)
106
+ * - ≥2 tool-call invocations OR total message count ≥ 5 (so a
107
+ * pure-text chat-of-thought still qualifies if it had structure)
108
+ * - Conclusion length ≥50 chars
109
+ *
110
+ * The `steps` array is built from every assistant text turn EXCEPT the
111
+ * last one. The last becomes the `conclusion`. If there's only one text
112
+ * turn, we bail — single-step "reasoning" is just a finding.
113
+ */
114
+ export declare function extractReasoningHeuristic(session: HermesSession): ExtractedItem | null;
115
+ /**
116
+ * Main extraction entry point. Runs both extractors and returns every
117
+ * item that matched. It is valid for a single session to yield both a
118
+ * finding AND a reasoning trace — research sessions that pivoted midway
119
+ * have both a synthesis (the finding) and a process worth remembering
120
+ * (the reasoning trace). The gateway dedup guards prevent actual dupes.
121
+ */
122
+ export declare function extractFromSession(session: HermesSession): ExtractedItem[];
123
+ //# sourceMappingURL=syncSessionsExtractor.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"syncSessionsExtractor.d.ts","sourceRoot":"","sources":["../src/syncSessionsExtractor.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAiCG;AASH,4DAA4D;AAC5D,MAAM,WAAW,aAAa;IAC5B,IAAI,EAAE,MAAM,GAAG,WAAW,GAAG,MAAM,GAAG,QAAQ,CAAC;IAC/C,OAAO,CAAC,EAAE,MAAM,GAAG,IAAI,CAAC;IACxB,qDAAqD;IACrD,UAAU,CAAC,EAAE,KAAK,CAAC;QACjB,EAAE,CAAC,EAAE,MAAM,CAAC;QACZ,IAAI,CAAC,EAAE,MAAM,CAAC;QACd,QAAQ,CAAC,EAAE;YAAE,IAAI,CAAC,EAAE,MAAM,CAAC;YAAC,SAAS,CAAC,EAAE,MAAM,CAAA;SAAE,CAAC;KAClD,CAAC,CAAC;IACH,uEAAuE;IACvE,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB,mEAAmE;IACnE,IAAI,CAAC,EAAE,MAAM,CAAC;CACf;AAED,yDAAyD;AACzD,MAAM,WAAW,aAAa;IAC5B,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,QAAQ,CAAC,EAAE,aAAa,EAAE,CAAC;IAC3B,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,YAAY,CAAC,EAAE,MAAM,CAAC;CACvB;AAED;;;;GAIG;AACH,MAAM,MAAM,aAAa,GACrB;IACE,IAAI,EAAE,SAAS,CAAC;IAChB,IAAI,EAAE,MAAM,CAAC;IACb,OAAO,EAAE;QACP,KAAK,EAAE,MAAM,CAAC;QACd,IAAI,EAAE,MAAM,CAAC;QACb,OAAO,CAAC,EAAE,MAAM,EAAE,CAAC;QACnB,MAAM,CAAC,EAAE,MAAM,CAAC;QAChB,IAAI,CAAC,EAAE,MAAM,EAAE,CAAC;KACjB,CAAC;CACH,GACD;IACE,IAAI,EAAE,WAAW,CAAC;IAClB,IAAI,EAAE,MAAM,CAAC;IACb,OAAO,EAAE;QACP,WAAW,EAAE,MAAM,CAAC;QACpB,KAAK,EAAE,KAAK,CAAC;YAAE,IAAI,EAAE,MAAM,CAAC;YAAC,SAAS,CAAC,EAAE,MAAM,CAAA;SAAE,CAAC,CAAC;QACnD,UAAU,EAAE,MAAM,CAAC;QACnB,SAAS,CAAC,EAAE,MAAM,CAAC;KACpB,CAAC;CACH,CAAC;AAyNN;;;;;;;;;;GAUG;AACH,wBAAgB,uBAAuB,CAAC,OAAO,EAAE,aAAa,GAAG,aAAa,GAAG,IAAI,CA0CpF;AAED;;;;;;;;;;;;GAYG;AACH,wBAAgB,yBAAyB,CAAC,OAAO,EAAE,aAAa,GAAG,aAAa,GAAG,IAAI,CAyCtF;AAED;;;;;;GAMG;AACH,wBAAgB,kBAAkB,CAAC,OAAO,EAAE,aAAa,GAAG,aAAa,EAAE,CAO1E"}
@@ -0,0 +1,362 @@
1
+ /**
2
+ * Phase 2b — heuristic extractor for Hermes session files.
3
+ *
4
+ * Given a parsed Hermes session JSON (shape observed in
5
+ * `~/.hermes/sessions/session_*.json`), produces a list of
6
+ * capture-queue-ready items without calling an LLM. The LLM path is
7
+ * intentionally deferred — the plan suggests using the user's configured
8
+ * provider, but:
9
+ *
10
+ * - Shipping an extractor with zero external deps means `sync-sessions`
11
+ * just works the moment it's installed, no BYOK setup required.
12
+ * - Heuristic misses are caught by the gateway's quality gate +
13
+ * ContentScanner, so low-value captures don't pollute the KG.
14
+ * - The code below is structured around a pure function
15
+ * `extractFromSession(sessionJson) -> ExtractedItem[]`, so the LLM
16
+ * strategy can slot in later behind the same interface.
17
+ *
18
+ * Heuristic rules (conservative on purpose — we'd rather under-capture
19
+ * than spam the review queue):
20
+ *
21
+ * - A FINDING is extracted when a session has ≥2 tool-call turns
22
+ * followed by an assistant text turn. The final assistant text is
23
+ * the body; the first user message is the title / taskSummary.
24
+ * - A REASONING TRACE is extracted when a session has ≥2 assistant
25
+ * text turns interleaved with tool calls. Each text turn becomes a
26
+ * step; the last becomes the conclusion.
27
+ * - Body must be ≥200 chars for findings, ≥50 for conclusions.
28
+ * Shorter syntheses aren't worth queuing.
29
+ * - Tool-call outputs are NEVER used as the body — only the assistant's
30
+ * own text. This is the Phase 2d §6 mitigation against session
31
+ * transcript poisoning.
32
+ *
33
+ * @module syncSessionsExtractor
34
+ */
35
+ import { createHash } from "node:crypto";
36
+ // ---------------------------------------------------------------------------
37
+ // Helpers
38
+ // ---------------------------------------------------------------------------
39
+ /**
40
+ * Canonicalize a string the same way the gateway does before hashing.
41
+ * Keep this in sync with `captureQueueService.ts::canonicalizeForHash`:
42
+ * - NFKD unicode normalize (Phase 2d P2C-4 — collapses "Café" precomposed
43
+ * vs decomposed so attackers can't bypass dedup with zero-width swaps)
44
+ * - lowercase
45
+ * - trim
46
+ * - collapse runs of whitespace to a single space
47
+ * Stays identical so the local hash equals what the gateway computes,
48
+ * making local dedup byte-compatible with the server UNIQUE constraint.
49
+ */
50
+ function normalize(s) {
51
+ return s.normalize("NFKD").trim().toLowerCase().replace(/\s+/g, " ");
52
+ }
53
+ /**
54
+ * SHA-256 hex of a finding or reasoning body. Matches the gateway's hash
55
+ * algorithm so a locally-computed hash can be checked against the
56
+ * processed_sessions.json dedup map AND against the server UNIQUE
57
+ * constraint without any transformation.
58
+ */
59
+ function hashPayload(kind, payload) {
60
+ const h = createHash("sha256");
61
+ h.update(kind);
62
+ h.update("\0");
63
+ // JSON.stringify is deterministic for the same key ordering; we sort
64
+ // keys for extra safety because different extractor strategies could
65
+ // emit fields in different orders.
66
+ h.update(canonicalJson(payload));
67
+ return h.digest("hex");
68
+ }
69
+ /**
70
+ * Deterministic JSON serialization — sorts object keys at every depth so
71
+ * two payloads with identical content but different key order hash
72
+ * identically.
73
+ */
74
+ function canonicalJson(value) {
75
+ if (Array.isArray(value)) {
76
+ return `[${value.map(canonicalJson).join(",")}]`;
77
+ }
78
+ if (value && typeof value === "object") {
79
+ const obj = value;
80
+ const keys = Object.keys(obj).sort();
81
+ return `{${keys.map((k) => `${JSON.stringify(k)}:${canonicalJson(obj[k])}`).join(",")}}`;
82
+ }
83
+ // Strings get normalized the same way the server does before hashing.
84
+ if (typeof value === "string")
85
+ return JSON.stringify(normalize(value));
86
+ return JSON.stringify(value);
87
+ }
88
+ /**
89
+ * First non-empty user message — gives us a natural title / task summary.
90
+ * Hermes sessions always open with a user turn, so this normally returns
91
+ * the user's original prompt.
92
+ */
93
+ function firstUserPrompt(messages) {
94
+ for (const m of messages) {
95
+ if (m.role === "user" && typeof m.content === "string" && m.content.trim().length > 0) {
96
+ return m.content.trim();
97
+ }
98
+ }
99
+ return null;
100
+ }
101
+ /** All assistant turns with non-empty text content, in order. */
102
+ function assistantTextTurns(messages) {
103
+ const out = [];
104
+ for (const m of messages) {
105
+ if (m.role === "assistant" && typeof m.content === "string" && m.content.trim().length > 0) {
106
+ out.push(m.content.trim());
107
+ }
108
+ }
109
+ return out;
110
+ }
111
+ /** How many tool-call invocations happened across the session. */
112
+ function toolCallCount(messages) {
113
+ let n = 0;
114
+ for (const m of messages) {
115
+ if (m.role === "assistant" && Array.isArray(m.tool_calls)) {
116
+ n += m.tool_calls.length;
117
+ }
118
+ }
119
+ return n;
120
+ }
121
+ /**
122
+ * Sanitize a tool name so it's safe to embed as a tag string on its way
123
+ * to the capture queue + KG row.
124
+ *
125
+ * The tool name comes from the session JSON's `tool_calls[].function.name`
126
+ * field — which is populated by whichever MCP server the Hermes agent
127
+ * connected to. A malicious MCP (third-party install, supply-chain attack,
128
+ * etc.) could include a tool whose name is `<script>alert(1)</script>` or
129
+ * `"; DROP TABLE ...;--`. Those strings would flow unsanitized into the
130
+ * `tags[]` column of the capture row and then into any UI that renders
131
+ * them without escaping.
132
+ *
133
+ * Whitelist: alnum + underscore + hyphen + dot + colon (tool names often
134
+ * have "server:tool" form). Anything else gets stripped. Cap at 64 chars
135
+ * so a 10 KB tool name can't bloat the row.
136
+ */
137
+ function sanitizeToolName(name) {
138
+ return name.replace(/[^a-zA-Z0-9_:.\-]/g, "").slice(0, 64);
139
+ }
140
+ /** Distinct tool names invoked — used for domain inference. */
141
+ function distinctToolNames(messages) {
142
+ const seen = new Set();
143
+ for (const m of messages) {
144
+ if (m.role === "assistant" && Array.isArray(m.tool_calls)) {
145
+ for (const tc of m.tool_calls) {
146
+ const n = tc.function?.name;
147
+ if (typeof n === "string" && n.length > 0) {
148
+ // Always sanitize before adding — keeps the Set free of
149
+ // garbage even if some downstream forgets.
150
+ const clean = sanitizeToolName(n);
151
+ if (clean.length > 0)
152
+ seen.add(clean);
153
+ }
154
+ }
155
+ }
156
+ }
157
+ return Array.from(seen);
158
+ }
159
+ /**
160
+ * Lightweight domain inference from tool names. Conservative — only
161
+ * returns a concrete domain if the evidence is unambiguous. Anything
162
+ * mixed stays undefined and the gateway's compiler decides what to do.
163
+ */
164
+ function inferDomain(toolNames) {
165
+ const names = toolNames.map((n) => n.toLowerCase());
166
+ const hasResearch = names.some((n) => ["web_search", "browser", "arxiv", "scholar", "search"].some((kw) => n.includes(kw)));
167
+ const hasNookplot = names.some((n) => n.startsWith("nookplot_"));
168
+ const hasCode = names.some((n) => ["read_file", "edit", "bash", "grep"].some((kw) => n.includes(kw)));
169
+ if (hasResearch && !hasCode)
170
+ return "research";
171
+ if (hasNookplot && !hasResearch && !hasCode)
172
+ return "nookplot";
173
+ if (hasCode && !hasResearch)
174
+ return "engineering";
175
+ return undefined;
176
+ }
177
+ /**
178
+ * URLs mentioned in assistant text (the synthesis), NOT in tool outputs.
179
+ * We don't trust tool outputs as citeable sources — see Phase 2d §6 —
180
+ * but if the assistant transcribed a URL in its own synthesis, that's
181
+ * a legitimate citation.
182
+ */
183
+ function extractSourcesFromAssistantText(text) {
184
+ const urls = text.match(/https?:\/\/[^\s)\]]+/g);
185
+ if (!urls)
186
+ return [];
187
+ // De-dupe + strip trailing punctuation picked up by the regex.
188
+ const cleaned = urls
189
+ .map((u) => u.replace(/[.,;:!?)\]]+$/, ""))
190
+ .filter((u) => u.length <= 500);
191
+ return Array.from(new Set(cleaned));
192
+ }
193
+ /**
194
+ * Short, human-readable title from a longer string. Trims to 70 chars at
195
+ * a word boundary to match the capture-queue UI's display limit.
196
+ */
197
+ function truncateTitle(text, max = 70) {
198
+ const trimmed = text.replace(/\s+/g, " ").trim();
199
+ if (trimmed.length <= max)
200
+ return trimmed;
201
+ const cut = trimmed.slice(0, max);
202
+ const lastSpace = cut.lastIndexOf(" ");
203
+ return (lastSpace > 30 ? cut.slice(0, lastSpace) : cut) + "…";
204
+ }
205
+ // ---------------------------------------------------------------------------
206
+ // Extractors
207
+ // ---------------------------------------------------------------------------
208
+ /** Minimum body length — matches the gateway quality gate. */
209
+ const MIN_FINDING_BODY = 200;
210
+ /** Minimum conclusion length for a reasoning trace. */
211
+ const MIN_REASONING_CONCLUSION = 50;
212
+ /**
213
+ * Maximum body length before we truncate (audit fix B8). The gateway's
214
+ * `validateFindingPayload` rejects anything over `MAX_STRING_FIELD * 10`
215
+ * (100_000 chars) — but the route ALSO rejects JSON-payload > 16 KB, and
216
+ * the service-side `MAX_PAYLOAD_BYTES` enforces `2 * MAX_STRING_FIELD`
217
+ * bytes. Leaving the body uncapped meant that a Hermes session with a
218
+ * 50 KB synthesis would silently fail validation, the session would get
219
+ * marked processed, and the capture would vanish.
220
+ *
221
+ * 9_500 gives headroom for the JSON envelope + title + tags + metadata
222
+ * inside the 16 KB route ceiling, while still preserving substantive
223
+ * syntheses. Anything larger is almost certainly bot-generated padding.
224
+ */
225
+ const MAX_FINDING_BODY = 9_500;
226
+ const MAX_REASONING_CONCLUSION = 9_500;
227
+ const MAX_REASONING_STEP = 800; // pre-existing cap at the per-step level
228
+ /**
229
+ * Truncate at a word boundary so the synthesis never ends mid-word.
230
+ * If no word boundary is found in the last 50 chars of the cap, just
231
+ * hard-cut — better than losing the capture entirely.
232
+ */
233
+ function truncateWithEllipsis(text, max) {
234
+ if (text.length <= max)
235
+ return text;
236
+ const cut = text.slice(0, max);
237
+ const lastWordBoundary = cut.lastIndexOf(" ");
238
+ const result = lastWordBoundary > max - 50 ? cut.slice(0, lastWordBoundary) : cut;
239
+ return result + "\n\n[truncated — original was " + text.length + " chars]";
240
+ }
241
+ /**
242
+ * Extract a FINDING from a session that researched + synthesized.
243
+ *
244
+ * Preconditions checked inside:
245
+ * - ≥2 tool-call invocations (otherwise it's a trivial lookup, not a finding)
246
+ * - Final assistant text is ≥200 chars
247
+ * - There's a user prompt to use as the title
248
+ *
249
+ * Returns `null` if the session doesn't pattern-match. That's the common
250
+ * case — most sessions are one-shot Q&A, not research.
251
+ */
252
+ export function extractFindingHeuristic(session) {
253
+ const messages = session.messages ?? [];
254
+ if (messages.length < 3)
255
+ return null; // user + asst(tool) + asst(text) minimum
256
+ const tcCount = toolCallCount(messages);
257
+ if (tcCount < 2)
258
+ return null;
259
+ const textTurns = assistantTextTurns(messages);
260
+ if (textTurns.length === 0)
261
+ return null;
262
+ // The FINAL assistant text turn is the synthesis — Hermes's pattern is
263
+ // tool calls until the agent has what it needs, then a single
264
+ // user-facing response.
265
+ const rawBody = textTurns[textTurns.length - 1];
266
+ if (rawBody.length < MIN_FINDING_BODY)
267
+ return null;
268
+ // Audit fix B8: cap at MAX_FINDING_BODY so we don't silently lose
269
+ // the capture to gateway 400 when a long synthesis exceeds the JSON
270
+ // payload ceiling. Sources extraction runs on the raw (pre-truncation)
271
+ // text so we don't lose URLs that appear only in the tail.
272
+ const body = truncateWithEllipsis(rawBody, MAX_FINDING_BODY);
273
+ const prompt = firstUserPrompt(messages);
274
+ if (!prompt)
275
+ return null;
276
+ const toolNames = distinctToolNames(messages);
277
+ const domain = inferDomain(toolNames);
278
+ const sources = extractSourcesFromAssistantText(rawBody);
279
+ const payload = {
280
+ title: truncateTitle(prompt),
281
+ body,
282
+ ...(sources.length > 0 ? { sources } : {}),
283
+ ...(domain ? { domain } : {}),
284
+ // Tag with the tools used so discoverers can filter by tech stack.
285
+ tags: ["hermes-session", ...toolNames.slice(0, 5).map((n) => `tool:${n}`)],
286
+ };
287
+ return {
288
+ kind: "finding",
289
+ hash: hashPayload("finding", payload),
290
+ payload,
291
+ };
292
+ }
293
+ /**
294
+ * Extract a REASONING TRACE from a session with multi-step thinking.
295
+ *
296
+ * Preconditions:
297
+ * - ≥2 non-empty assistant text turns (steps + conclusion)
298
+ * - ≥2 tool-call invocations OR total message count ≥ 5 (so a
299
+ * pure-text chat-of-thought still qualifies if it had structure)
300
+ * - Conclusion length ≥50 chars
301
+ *
302
+ * The `steps` array is built from every assistant text turn EXCEPT the
303
+ * last one. The last becomes the `conclusion`. If there's only one text
304
+ * turn, we bail — single-step "reasoning" is just a finding.
305
+ */
306
+ export function extractReasoningHeuristic(session) {
307
+ const messages = session.messages ?? [];
308
+ if (messages.length < 3)
309
+ return null;
310
+ const textTurns = assistantTextTurns(messages);
311
+ if (textTurns.length < 2)
312
+ return null;
313
+ const tcCount = toolCallCount(messages);
314
+ if (tcCount < 2 && messages.length < 5)
315
+ return null;
316
+ const prompt = firstUserPrompt(messages);
317
+ if (!prompt)
318
+ return null;
319
+ const rawConclusion = textTurns[textTurns.length - 1];
320
+ if (rawConclusion.length < MIN_REASONING_CONCLUSION)
321
+ return null;
322
+ // Audit fix B8: cap conclusion length so long trace syntheses don't
323
+ // fail the gateway's payload-size gate and get silently dropped.
324
+ const conclusion = truncateWithEllipsis(rawConclusion, MAX_REASONING_CONCLUSION);
325
+ const stepTurns = textTurns.slice(0, -1);
326
+ const steps = stepTurns.map((text) => ({
327
+ // Tighten each step to a reasonable size — a step of 2000+ chars is
328
+ // not a step, it's another finding dressed up.
329
+ step: text.length > MAX_REASONING_STEP
330
+ ? truncateWithEllipsis(text, MAX_REASONING_STEP)
331
+ : text,
332
+ }));
333
+ const payload = {
334
+ taskSummary: truncateTitle(prompt, 150),
335
+ steps,
336
+ conclusion,
337
+ ...(session.model ? { modelUsed: session.model } : {}),
338
+ };
339
+ return {
340
+ kind: "reasoning",
341
+ hash: hashPayload("reasoning", payload),
342
+ payload,
343
+ };
344
+ }
345
+ /**
346
+ * Main extraction entry point. Runs both extractors and returns every
347
+ * item that matched. It is valid for a single session to yield both a
348
+ * finding AND a reasoning trace — research sessions that pivoted midway
349
+ * have both a synthesis (the finding) and a process worth remembering
350
+ * (the reasoning trace). The gateway dedup guards prevent actual dupes.
351
+ */
352
+ export function extractFromSession(session) {
353
+ const out = [];
354
+ const finding = extractFindingHeuristic(session);
355
+ if (finding)
356
+ out.push(finding);
357
+ const reasoning = extractReasoningHeuristic(session);
358
+ if (reasoning)
359
+ out.push(reasoning);
360
+ return out;
361
+ }
362
+ //# sourceMappingURL=syncSessionsExtractor.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"syncSessionsExtractor.js","sourceRoot":"","sources":["../src/syncSessionsExtractor.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAiCG;AAEH,OAAO,EAAE,UAAU,EAAE,MAAM,aAAa,CAAC;AA4DzC,8EAA8E;AAC9E,WAAW;AACX,8EAA8E;AAE9E;;;;;;;;;;GAUG;AACH,SAAS,SAAS,CAAC,CAAS;IAC1B,OAAO,CAAC,CAAC,SAAS,CAAC,MAAM,CAAC,CAAC,IAAI,EAAE,CAAC,WAAW,EAAE,CAAC,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;AACvE,CAAC;AAED;;;;;GAKG;AACH,SAAS,WAAW,CAAC,IAA6B,EAAE,OAAgB;IAClE,MAAM,CAAC,GAAG,UAAU,CAAC,QAAQ,CAAC,CAAC;IAC/B,CAAC,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC;IACf,CAAC,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC;IACf,qEAAqE;IACrE,qEAAqE;IACrE,mCAAmC;IACnC,CAAC,CAAC,MAAM,CAAC,aAAa,CAAC,OAAO,CAAC,CAAC,CAAC;IACjC,OAAO,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC,CAAC;AACzB,CAAC;AAED;;;;GAIG;AACH,SAAS,aAAa,CAAC,KAAc;IACnC,IAAI,KAAK,CAAC,OAAO,CAAC,KAAK,CAAC,EAAE,CAAC;QACzB,OAAO,IAAI,KAAK,CAAC,GAAG,CAAC,aAAa,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC;IACnD,CAAC;IACD,IAAI,KAAK,IAAI,OAAO,KAAK,KAAK,QAAQ,EAAE,CAAC;QACvC,MAAM,GAAG,GAAG,KAAgC,CAAC;QAC7C,MAAM,IAAI,GAAG,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC;QACrC,OAAO,IAAI,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,GAAG,IAAI,CAAC,SAAS,CAAC,CAAC,CAAC,IAAI,aAAa,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,GAAG,CAAC;IAC3F,CAAC;IACD,sEAAsE;IACtE,IAAI,OAAO,KAAK,KAAK,QAAQ;QAAE,OAAO,IAAI,CAAC,SAAS,CAAC,SAAS,CAAC,KAAK,CAAC,CAAC,CAAC;IACvE,OAAO,IAAI,CAAC,SAAS,CAAC,KAAK,CAAC,CAAC;AAC/B,CAAC;AAED;;;;GAIG;AACH,SAAS,eAAe,CAAC,QAAyB;IAChD,KAAK,MAAM,CAAC,IAAI,QAAQ,EAAE,CAAC;QACzB,IAAI,CAAC,CAAC,IAAI,KAAK,MAAM,IAAI,OAAO,CAAC,CAAC,OAAO,KAAK,QAAQ,IAAI,CAAC,CAAC,OAAO,CAAC,IAAI,EAAE,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YACtF,OAAO,CAAC,CAAC,OAAO,CAAC,IAAI,EAAE,CAAC;QAC1B,CAAC;IACH,CAAC;IACD,OAAO,IAAI,CAAC;AACd,CAAC;AAED,iEAAiE;AACjE,SAAS,kBAAkB,CAAC,QAAyB;IACnD,MAAM,GAAG,GAAa,EAAE,CAAC;IACzB,KAAK,MAAM,CAAC,IAAI,QAAQ,EAAE,CAAC;QACzB,IAAI,CAAC,CAAC,IAAI,KAAK,WAAW,IAAI,OAAO,CAAC,CAAC,OAAO,KAAK,QAAQ,IAAI,CAAC,CAAC,OAAO,CAAC,IAAI,EAAE,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;YAC3F,GAAG,CAAC,IAAI,CAAC,CAAC,CAAC,OAAO,CAAC,IAAI,EAAE,CAAC,CAAC;QAC7B,CAAC;IACH,CAAC;IACD,OAAO,GAAG,CAAC;AACb,CAAC;AAED,kEAAkE;AAClE,SAAS,aAAa,CAAC,QAAyB;IAC9C,IAAI,CAAC,GAAG,CAAC,CAAC;IACV,KAAK,MAAM,CAAC,IAAI,QAAQ,EAAE,CAAC;QACzB,IAAI,CAAC,CAAC,IAAI,KAAK,WAAW,IAAI,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,UAAU,CAAC,EAAE,CAAC;YAC1D,CAAC,IAAI,CAAC,CAAC,UAAU,CAAC,MAAM,CAAC;QAC3B,CAAC;IACH,CAAC;IACD,OAAO,CAAC,CAAC;AACX,CAAC;AAED;;;;;;;;;;;;;;;GAeG;AACH,SAAS,gBAAgB,CAAC,IAAY;IACpC,OAAO,IAAI,CAAC,OAAO,CAAC,oBAAoB,EAAE,EAAE,CAAC,CAAC,KAAK,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;AAC7D,CAAC;AAED,+DAA+D;AAC/D,SAAS,iBAAiB,CAAC,QAAyB;IAClD,MAAM,IAAI,GAAG,IAAI,GAAG,EAAU,CAAC;IAC/B,KAAK,MAAM,CAAC,IAAI,QAAQ,EAAE,CAAC;QACzB,IAAI,CAAC,CAAC,IAAI,KAAK,WAAW,IAAI,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,UAAU,CAAC,EAAE,CAAC;YAC1D,KAAK,MAAM,EAAE,IAAI,CAAC,CAAC,UAAU,EAAE,CAAC;gBAC9B,MAAM,CAAC,GAAG,EAAE,CAAC,QAAQ,EAAE,IAAI,CAAC;gBAC5B,IAAI,OAAO,CAAC,KAAK,QAAQ,IAAI,CAAC,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;oBAC1C,wDAAwD;oBACxD,2CAA2C;oBAC3C,MAAM,KAAK,GAAG,gBAAgB,CAAC,CAAC,CAAC,CAAC;oBAClC,IAAI,KAAK,CAAC,MAAM,GAAG,CAAC;wBAAE,IAAI,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC;gBACxC,CAAC;YACH,CAAC;QACH,CAAC;IACH,CAAC;IACD,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,CAAC,CAAC;AAC1B,CAAC;AAED;;;;GAIG;AACH,SAAS,WAAW,CAAC,SAAmB;IACtC,MAAM,KAAK,GAAG,SAAS,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,WAAW,EAAE,CAAC,CAAC;IACpD,MAAM,WAAW,GAAG,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CACnC,CAAC,YAAY,EAAE,SAAS,EAAE,OAAO,EAAE,SAAS,EAAE,QAAQ,CAAC,CAAC,IAAI,CAAC,CAAC,EAAE,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,EAAE,CAAC,CAAC,CACrF,CAAC;IACF,MAAM,WAAW,GAAG,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,UAAU,CAAC,WAAW,CAAC,CAAC,CAAC;IACjE,MAAM,OAAO,GAAG,KAAK,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,WAAW,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,CAAC,CAAC,IAAI,CAAC,CAAC,EAAE,EAAE,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC;IAEtG,IAAI,WAAW,IAAI,CAAC,OAAO;QAAE,OAAO,UAAU,CAAC;IAC/C,IAAI,WAAW,IAAI,CAAC,WAAW,IAAI,CAAC,OAAO;QAAE,OAAO,UAAU,CAAC;IAC/D,IAAI,OAAO,IAAI,CAAC,WAAW;QAAE,OAAO,aAAa,CAAC;IAClD,OAAO,SAAS,CAAC;AACnB,CAAC;AAED;;;;;GAKG;AACH,SAAS,+BAA+B,CAAC,IAAY;IACnD,MAAM,IAAI,GAAG,IAAI,CAAC,KAAK,CAAC,uBAAuB,CAAC,CAAC;IACjD,IAAI,CAAC,IAAI;QAAE,OAAO,EAAE,CAAC;IACrB,+DAA+D;IAC/D,MAAM,OAAO,GAAG,IAAI;SACjB,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC,eAAe,EAAE,EAAE,CAAC,CAAC;SAC1C,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,MAAM,IAAI,GAAG,CAAC,CAAC;IAClC,OAAO,KAAK,CAAC,IAAI,CAAC,IAAI,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC;AACtC,CAAC;AAED;;;GAGG;AACH,SAAS,aAAa,CAAC,IAAY,EAAE,GAAG,GAAG,EAAE;IAC3C,MAAM,OAAO,GAAG,IAAI,CAAC,OAAO,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC,IAAI,EAAE,CAAC;IACjD,IAAI,OAAO,CAAC,MAAM,IAAI,GAAG;QAAE,OAAO,OAAO,CAAC;IAC1C,MAAM,GAAG,GAAG,OAAO,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC;IAClC,MAAM,SAAS,GAAG,GAAG,CAAC,WAAW,CAAC,GAAG,CAAC,CAAC;IACvC,OAAO,CAAC,SAAS,GAAG,EAAE,CAAC,CAAC,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC,EAAE,SAAS,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC,GAAG,GAAG,CAAC;AAChE,CAAC;AAED,8EAA8E;AAC9E,cAAc;AACd,8EAA8E;AAE9E,8DAA8D;AAC9D,MAAM,gBAAgB,GAAG,GAAG,CAAC;AAC7B,uDAAuD;AACvD,MAAM,wBAAwB,GAAG,EAAE,CAAC;AACpC;;;;;;;;;;;;GAYG;AACH,MAAM,gBAAgB,GAAG,KAAK,CAAC;AAC/B,MAAM,wBAAwB,GAAG,KAAK,CAAC;AACvC,MAAM,kBAAkB,GAAG,GAAG,CAAC,CAAC,yCAAyC;AAEzE;;;;GAIG;AACH,SAAS,oBAAoB,CAAC,IAAY,EAAE,GAAW;IACrD,IAAI,IAAI,CAAC,MAAM,IAAI,GAAG;QAAE,OAAO,IAAI,CAAC;IACpC,MAAM,GAAG,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,CAAC;IAC/B,MAAM,gBAAgB,GAAG,GAAG,CAAC,WAAW,CAAC,GAAG,CAAC,CAAC;IAC9C,MAAM,MAAM,GAAG,gBAAgB,GAAG,GAAG,GAAG,EAAE,CAAC,CAAC,CAAC,GAAG,CAAC,KAAK,CAAC,CAAC,EAAE,gBAAgB,CAAC,CAAC,CAAC,CAAC,GAAG,CAAC;IAClF,OAAO,MAAM,GAAG,gCAAgC,GAAG,IAAI,CAAC,MAAM,GAAG,SAAS,CAAC;AAC7E,CAAC;AAED;;;;;;;;;;GAUG;AACH,MAAM,UAAU,uBAAuB,CAAC,OAAsB;IAC5D,MAAM,QAAQ,GAAG,OAAO,CAAC,QAAQ,IAAI,EAAE,CAAC;IACxC,IAAI,QAAQ,CAAC,MAAM,GAAG,CAAC;QAAE,OAAO,IAAI,CAAC,CAAC,yCAAyC;IAE/E,MAAM,OAAO,GAAG,aAAa,CAAC,QAAQ,CAAC,CAAC;IACxC,IAAI,OAAO,GAAG,CAAC;QAAE,OAAO,IAAI,CAAC;IAE7B,MAAM,SAAS,GAAG,kBAAkB,CAAC,QAAQ,CAAC,CAAC;IAC/C,IAAI,SAAS,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,IAAI,CAAC;IAExC,uEAAuE;IACvE,8DAA8D;IAC9D,wBAAwB;IACxB,MAAM,OAAO,GAAG,SAAS,CAAC,SAAS,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;IAChD,IAAI,OAAO,CAAC,MAAM,GAAG,gBAAgB;QAAE,OAAO,IAAI,CAAC;IACnD,kEAAkE;IAClE,oEAAoE;IACpE,uEAAuE;IACvE,2DAA2D;IAC3D,MAAM,IAAI,GAAG,oBAAoB,CAAC,OAAO,EAAE,gBAAgB,CAAC,CAAC;IAE7D,MAAM,MAAM,GAAG,eAAe,CAAC,QAAQ,CAAC,CAAC;IACzC,IAAI,CAAC,MAAM;QAAE,OAAO,IAAI,CAAC;IAEzB,MAAM,SAAS,GAAG,iBAAiB,CAAC,QAAQ,CAAC,CAAC;IAC9C,MAAM,MAAM,GAAG,WAAW,CAAC,SAAS,CAAC,CAAC;IACtC,MAAM,OAAO,GAAG,+BAA+B,CAAC,OAAO,CAAC,CAAC;IAEzD,MAAM,OAAO,GAA2D;QACtE,KAAK,EAAE,aAAa,CAAC,MAAM,CAAC;QAC5B,IAAI;QACJ,GAAG,CAAC,OAAO,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,EAAE,OAAO,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;QAC1C,GAAG,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,MAAM,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;QAC7B,mEAAmE;QACnE,IAAI,EAAE,CAAC,gBAAgB,EAAE,GAAG,SAAS,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,QAAQ,CAAC,EAAE,CAAC,CAAC;KAC3E,CAAC;IAEF,OAAO;QACL,IAAI,EAAE,SAAS;QACf,IAAI,EAAE,WAAW,CAAC,SAAS,EAAE,OAAO,CAAC;QACrC,OAAO;KACR,CAAC;AACJ,CAAC;AAED;;;;;;;;;;;;GAYG;AACH,MAAM,UAAU,yBAAyB,CAAC,OAAsB;IAC9D,MAAM,QAAQ,GAAG,OAAO,CAAC,QAAQ,IAAI,EAAE,CAAC;IACxC,IAAI,QAAQ,CAAC,MAAM,GAAG,CAAC;QAAE,OAAO,IAAI,CAAC;IAErC,MAAM,SAAS,GAAG,kBAAkB,CAAC,QAAQ,CAAC,CAAC;IAC/C,IAAI,SAAS,CAAC,MAAM,GAAG,CAAC;QAAE,OAAO,IAAI,CAAC;IAEtC,MAAM,OAAO,GAAG,aAAa,CAAC,QAAQ,CAAC,CAAC;IACxC,IAAI,OAAO,GAAG,CAAC,IAAI,QAAQ,CAAC,MAAM,GAAG,CAAC;QAAE,OAAO,IAAI,CAAC;IAEpD,MAAM,MAAM,GAAG,eAAe,CAAC,QAAQ,CAAC,CAAC;IACzC,IAAI,CAAC,MAAM;QAAE,OAAO,IAAI,CAAC;IAEzB,MAAM,aAAa,GAAG,SAAS,CAAC,SAAS,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC;IACtD,IAAI,aAAa,CAAC,MAAM,GAAG,wBAAwB;QAAE,OAAO,IAAI,CAAC;IACjE,oEAAoE;IACpE,iEAAiE;IACjE,MAAM,UAAU,GAAG,oBAAoB,CAAC,aAAa,EAAE,wBAAwB,CAAC,CAAC;IAEjF,MAAM,SAAS,GAAG,SAAS,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,CAAC;IACzC,MAAM,KAAK,GAAG,SAAS,CAAC,GAAG,CAAC,CAAC,IAAI,EAAE,EAAE,CAAC,CAAC;QACrC,oEAAoE;QACpE,+CAA+C;QAC/C,IAAI,EACF,IAAI,CAAC,MAAM,GAAG,kBAAkB;YAC9B,CAAC,CAAC,oBAAoB,CAAC,IAAI,EAAE,kBAAkB,CAAC;YAChD,CAAC,CAAC,IAAI;KACX,CAAC,CAAC,CAAC;IAEJ,MAAM,OAAO,GAA6D;QACxE,WAAW,EAAE,aAAa,CAAC,MAAM,EAAE,GAAG,CAAC;QACvC,KAAK;QACL,UAAU;QACV,GAAG,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC,CAAC,EAAE,SAAS,EAAE,OAAO,CAAC,KAAK,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;KACvD,CAAC;IAEF,OAAO;QACL,IAAI,EAAE,WAAW;QACjB,IAAI,EAAE,WAAW,CAAC,WAAW,EAAE,OAAO,CAAC;QACvC,OAAO;KACR,CAAC;AACJ,CAAC;AAED;;;;;;GAMG;AACH,MAAM,UAAU,kBAAkB,CAAC,OAAsB;IACvD,MAAM,GAAG,GAAoB,EAAE,CAAC;IAChC,MAAM,OAAO,GAAG,uBAAuB,CAAC,OAAO,CAAC,CAAC;IACjD,IAAI,OAAO;QAAE,GAAG,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;IAC/B,MAAM,SAAS,GAAG,yBAAyB,CAAC,OAAO,CAAC,CAAC;IACrD,IAAI,SAAS;QAAE,GAAG,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;IACnC,OAAO,GAAG,CAAC;AACb,CAAC"}
@@ -0,0 +1,89 @@
1
+ /**
2
+ * Phase 2b — session post-processor state tracking.
3
+ *
4
+ * Tracks which Hermes session files we've already processed, plus the
5
+ * SHA-256 hashes of the items we've extracted from each. Lives at
6
+ * `~/.nookplot/processed_sessions.json` with atomic tmp+rename writes so
7
+ * concurrent sync runs (if the user opts into a launchd timer AND invokes
8
+ * the CLI at the same time) can't corrupt the file.
9
+ *
10
+ * Two layers of dedup:
11
+ *
12
+ * 1. Session-level — if `sessions[session_id]` exists, skip the whole
13
+ * file on the next run. Cheapest check.
14
+ * 2. Item-level — within a session, each extracted item's content hash
15
+ * is recorded. The gateway's own `UNIQUE (agent_address, kind,
16
+ * content_hash)` UUID-index makes duplicate POSTs idempotent
17
+ * server-side anyway, but tracking locally lets `--force` do the
18
+ * right thing (re-extract, but skip items already captured).
19
+ *
20
+ * @module syncSessionsState
21
+ */
22
+ /** One item that was extracted + captured from a Hermes session. */
23
+ export interface ProcessedItem {
24
+ /** SHA-256 hex of the canonicalized item body — matches what the gateway hashes. */
25
+ hash: string;
26
+ /** 'finding' or 'reasoning' — mirrors the capture queue kind. */
27
+ kind: "finding" | "reasoning";
28
+ /** Queue row id returned by the gateway (UUID). Present only for successful captures. */
29
+ captureId?: string;
30
+ /** If the POST failed, the reason. Present only for failures. */
31
+ error?: string;
32
+ }
33
+ /** State for one Hermes session we've seen. */
34
+ export interface ProcessedSession {
35
+ /** ISO-8601 timestamp we finished processing this session. */
36
+ processedAt: string;
37
+ /** Items extracted + posted from this session. */
38
+ items: ProcessedItem[];
39
+ }
40
+ /** Top-level state file shape. */
41
+ export interface SyncState {
42
+ /** Bump when breaking the file schema so we can migrate old installs. */
43
+ version: 1;
44
+ /** session_id → processing record */
45
+ sessions: Record<string, ProcessedSession>;
46
+ }
47
+ /**
48
+ * Default location of the state file. Exposed so tests can override; in
49
+ * production we always use `~/.nookplot/processed_sessions.json` which
50
+ * sits next to `~/.nookplot/credentials.json` that the rest of the CLI
51
+ * uses — same directory, same permission model.
52
+ */
53
+ export declare function defaultStatePath(): string;
54
+ /**
55
+ * Load the state file, or return a blank state if it doesn't exist / is
56
+ * corrupted. Corruption fallback is deliberate: if someone hand-edits the
57
+ * file into garbage, we'd rather re-process everything than crash the
58
+ * installer. Re-processing is safe because the gateway's dedup guard
59
+ * blocks exact duplicates.
60
+ */
61
+ export declare function loadState(path?: string): SyncState;
62
+ /**
63
+ * Atomic write: serialize → write to `${path}.tmp` → rename over the real
64
+ * file. `rename` is atomic on POSIX + NTFS, so a reader can never see a
65
+ * half-written file. No fsync here — if the process crashes between write
66
+ * and rename, the old file is untouched, and worst case we re-process a
67
+ * session (which the gateway dedup will catch).
68
+ */
69
+ export declare function saveState(state: SyncState, path?: string): void;
70
+ /**
71
+ * Has this session already been processed? If so, the caller should skip
72
+ * it on the next run (unless `--force` was passed, in which case the
73
+ * caller explicitly bypasses this check).
74
+ */
75
+ export declare function isSessionProcessed(state: SyncState, sessionId: string): boolean;
76
+ /**
77
+ * Has this specific item already been captured? Used inside `--force` re-runs
78
+ * so we don't hit the gateway with POSTs that we *know* will be deduped.
79
+ * Saves a round-trip + avoids inflating the per-agent rate-limit counter.
80
+ */
81
+ export declare function isItemAlreadyCaptured(state: SyncState, sessionId: string, hash: string): boolean;
82
+ /**
83
+ * Record the result of processing one session. Overwrites any prior
84
+ * record — if a `--force` re-run captures more items than the first pass
85
+ * (e.g., the extractor heuristic was loosened), the merged record reflects
86
+ * everything captured across both passes.
87
+ */
88
+ export declare function markSessionProcessed(state: SyncState, sessionId: string, items: ProcessedItem[]): SyncState;
89
+ //# sourceMappingURL=syncSessionsState.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"syncSessionsState.d.ts","sourceRoot":"","sources":["../src/syncSessionsState.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;GAoBG;AAUH,oEAAoE;AACpE,MAAM,WAAW,aAAa;IAC5B,oFAAoF;IACpF,IAAI,EAAE,MAAM,CAAC;IACb,iEAAiE;IACjE,IAAI,EAAE,SAAS,GAAG,WAAW,CAAC;IAC9B,yFAAyF;IACzF,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,iEAAiE;IACjE,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB;AAED,+CAA+C;AAC/C,MAAM,WAAW,gBAAgB;IAC/B,8DAA8D;IAC9D,WAAW,EAAE,MAAM,CAAC;IACpB,kDAAkD;IAClD,KAAK,EAAE,aAAa,EAAE,CAAC;CACxB;AAED,kCAAkC;AAClC,MAAM,WAAW,SAAS;IACxB,yEAAyE;IACzE,OAAO,EAAE,CAAC,CAAC;IACX,qCAAqC;IACrC,QAAQ,EAAE,MAAM,CAAC,MAAM,EAAE,gBAAgB,CAAC,CAAC;CAC5C;AAMD;;;;;GAKG;AACH,wBAAgB,gBAAgB,IAAI,MAAM,CAEzC;AAMD;;;;;;GAMG;AACH,wBAAgB,SAAS,CAAC,IAAI,GAAE,MAA2B,GAAG,SAAS,CAsBtE;AAED;;;;;;GAMG;AACH,wBAAgB,SAAS,CAAC,KAAK,EAAE,SAAS,EAAE,IAAI,GAAE,MAA2B,GAAG,IAAI,CAMnF;AAMD;;;;GAIG;AACH,wBAAgB,kBAAkB,CAAC,KAAK,EAAE,SAAS,EAAE,SAAS,EAAE,MAAM,GAAG,OAAO,CAE/E;AAED;;;;GAIG;AACH,wBAAgB,qBAAqB,CACnC,KAAK,EAAE,SAAS,EAChB,SAAS,EAAE,MAAM,EACjB,IAAI,EAAE,MAAM,GACX,OAAO,CAIT;AAED;;;;;GAKG;AACH,wBAAgB,oBAAoB,CAClC,KAAK,EAAE,SAAS,EAChB,SAAS,EAAE,MAAM,EACjB,KAAK,EAAE,aAAa,EAAE,GACrB,SAAS,CA8BX"}