pi-session-search 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Sam Foy
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
package/README.md ADDED
@@ -0,0 +1,84 @@
1
+ # pi-session-search
2
+
3
+ Index, summarize, and search past [pi](https://github.com/badlogic/pi-mono) coding sessions. Provides semantic search across your entire session history — both active and archived sessions.
4
+
5
+ ## Features
6
+
7
+ - **Semantic search** — Find past sessions by topic, not just keywords (`session_search`)
8
+ - **Browse & filter** — List sessions by project, date range, archive status (`session_list`)
9
+ - **Read conversations** — View the full conversation from any past session (`session_read`)
10
+ - **Auto-indexing** — Parses JSONL session files on startup, tracks changes incrementally
11
+ - **Archive support** — Indexes both `~/.pi/agent/sessions/` and `~/.pi/agent/sessions-archive/`
12
+ - **Multiple embedders** — OpenAI, AWS Bedrock, or local Ollama
13
+
14
+ ## Install
15
+
16
+ ```bash
17
+ pi install pi-session-search
18
+ ```
19
+
20
+ Or add to `~/.pi/agent/settings.json`:
21
+
22
+ ```json
23
+ {
24
+ "packages": ["npm:pi-session-search"]
25
+ }
26
+ ```
27
+
28
+ ## Setup
29
+
30
+ Run `/session-search-setup` in pi to configure the embedding provider:
31
+
32
+ - **OpenAI** — Uses `text-embedding-3-small` (needs `OPENAI_API_KEY`)
33
+ - **Bedrock** — Uses Titan Embeddings v2 (needs AWS credentials)
34
+ - **Ollama** — Uses `nomic-embed-text` (needs local Ollama running)
35
+
36
+ Config is stored at `~/.pi/session-search/config.json`.
37
+
38
+ ## Usage
39
+
40
+ ### Semantic search
41
+ ```
42
+ session_search(query="how did we debug the Lambda timeout")
43
+ session_search(query="CI pipeline configuration", limit=5)
44
+ ```
45
+
46
+ ### Browse sessions
47
+ ```
48
+ session_list(project="Rosie", after="2026-03-01")
49
+ session_list(archived=true, limit=20)
50
+ ```
51
+
52
+ ### Read a session
53
+ ```
54
+ session_read(session="<file-path-or-uuid>")
55
+ session_read(session="<id>", offset=50, limit=50)
56
+ ```
57
+
58
+ ## Commands
59
+
60
+ | Command | Description |
61
+ |---------|-------------|
62
+ | `/session-search-setup` | Configure embedding provider |
63
+ | `/session-reindex` | Force full re-index of all sessions |
64
+
65
+ ## How It Works
66
+
67
+ 1. On startup, discovers all `.jsonl` session files in `~/.pi/agent/sessions/` (and `~/.pi/agent/sessions-archive/` if it exists)
68
+ 2. Parses each session to extract: user messages, assistant text, tool calls, files modified, models used, compaction summaries
69
+ 3. Generates a summary and embedding for each session
70
+ 4. Stores the index at `~/.pi/session-search/index/`
71
+ 5. On subsequent startups, only re-indexes new or changed sessions
72
+ 6. Re-syncs in the background every 5 minutes to pick up new sessions
73
+
74
+ You can also configure extra session/archive directories during setup if you store sessions in non-default locations.
75
+
76
+ ## Environment Variables
77
+
78
+ | Variable | Description |
79
+ |----------|-------------|
80
+ | `OPENAI_API_KEY` | Required for OpenAI embedder |
81
+
82
+ ## License
83
+
84
+ MIT
package/package.json ADDED
@@ -0,0 +1,43 @@
1
+ {
2
+ "name": "pi-session-search",
3
+ "version": "0.1.0",
4
+ "description": "Index, summarize, and search past pi sessions. Covers both active and archived sessions, enabling semantic search and introspection over your coding history.",
5
+ "keywords": [
6
+ "pi-package",
7
+ "extension",
8
+ "session",
9
+ "search",
10
+ "semantic-search",
11
+ "embeddings"
12
+ ],
13
+ "files": [
14
+ "src",
15
+ "skills",
16
+ "README.md",
17
+ "LICENSE"
18
+ ],
19
+ "license": "MIT",
20
+ "repository": {
21
+ "type": "git",
22
+ "url": "https://github.com/samfoy/pi-session-search"
23
+ },
24
+ "pi": {
25
+ "extensions": [
26
+ "./src/index.ts"
27
+ ],
28
+ "skills": [
29
+ "./skills"
30
+ ]
31
+ },
32
+ "peerDependencies": {
33
+ "@mariozechner/pi-coding-agent": "*",
34
+ "@sinclair/typebox": "*"
35
+ },
36
+ "optionalDependencies": {
37
+ "@aws-sdk/client-bedrock-runtime": "^3.700.0",
38
+ "@aws-sdk/credential-providers": "^3.700.0"
39
+ },
40
+ "devDependencies": {
41
+ "@types/node": "^25.5.0"
42
+ }
43
+ }
@@ -0,0 +1,68 @@
1
+ ---
2
+ name: session-history
3
+ description: Search, browse, and read past pi coding sessions. Use when the user asks about previous work, past decisions, what was done before, or wants to find a specific session. Covers both active and archived sessions.
4
+ ---
5
+
6
+ # Session History
7
+
8
+ Search, browse, and introspect on past pi coding sessions — including archived ones.
9
+
10
+ ## Available Tools
11
+
12
+ This skill provides three tools:
13
+
14
+ ### session_search
15
+ Semantic search across all indexed sessions. Use for finding sessions by topic, technology, or intent.
16
+
17
+ ```
18
+ session_search(query="refactoring the auth module")
19
+ session_search(query="Lambda timeout debugging", limit=5)
20
+ session_search(query="setting up CI pipeline for Nessie")
21
+ ```
22
+
23
+ ### session_list
24
+ Browse sessions with filters. Good for time-based queries or project-specific browsing.
25
+
26
+ ```
27
+ session_list(project="Rosie") # Sessions in the Rosie project
28
+ session_list(after="2026-03-01", limit=10) # Recent sessions
29
+ session_list(archived=true, limit=20) # Archived sessions only
30
+ session_list(project="pi-slack-bot", after="2026-03-10")
31
+ ```
32
+
33
+ ### session_read
34
+ Read the full conversation from a specific session. Use the file path or UUID from search/list results.
35
+
36
+ ```
37
+ session_read(session="~/.pi/agent/sessions/--workplace-samfp-Rosie--/2026-03-10T21-36-44.jsonl")
38
+ session_read(session="124c2fe2-820c-4d63-8899-eb8d48007d39")
39
+ session_read(session="...", offset=50, limit=50) # Pagination for long sessions
40
+ session_read(session="...", include_tools=true) # Include tool call results
41
+ ```
42
+
43
+ ## Workflow
44
+
45
+ 1. **Find sessions**: Use `session_search` for semantic queries or `session_list` for browsing
46
+ 2. **Read details**: Use `session_read` with the file path from results to see the full conversation
47
+ 3. **Extract context**: Use information from past sessions to inform current work
48
+
49
+ ## Setup
50
+
51
+ If not yet configured, run `/session-search-setup` to choose an embedding provider (OpenAI, Bedrock, or Ollama).
52
+
53
+ To force a full re-index, run `/session-reindex`.
54
+
55
+ ## What Gets Indexed
56
+
57
+ - All active sessions from `~/.pi/agent/sessions/`
58
+ - All archived sessions from `~/.pi/agent/sessions-archive/`
59
+ - User messages, assistant responses, tool usage patterns
60
+ - Compaction summaries (condensed session context)
61
+ - Files read/modified, models used, project directories
62
+
63
+ ## Tips
64
+
65
+ - Session search is best for "when did we...", "how did we handle...", "what approach did we use for..." queries
66
+ - Session list is best for "show me recent sessions", "what did we work on in project X" queries
67
+ - For very long sessions, use `session_read` with pagination (`offset`/`limit`)
68
+ - Set `include_tools=true` on `session_read` when you need to see the actual tool outputs (verbose)
package/src/config.ts ADDED
@@ -0,0 +1,54 @@
1
+ import { readFileSync, writeFileSync, existsSync, mkdirSync } from "node:fs";
2
+ import { join, dirname } from "node:path";
3
+ import type { EmbedderConfig } from "./embedder";
4
+
5
+ // ─── Types ───────────────────────────────────────────────────────────
6
+
7
+ export interface Config {
8
+ /** Extra session directories to scan (in addition to default) */
9
+ extraSessionDirs: string[];
10
+ /** Extra archive directories to scan (in addition to default) */
11
+ extraArchiveDirs: string[];
12
+ /** Embedder configuration */
13
+ embedder: EmbedderConfig;
14
+ }
15
+
16
+ export interface ConfigFile {
17
+ extraSessionDirs?: string[];
18
+ extraArchiveDirs?: string[];
19
+ embedder: EmbedderConfig;
20
+ }
21
+
22
+ // ─── Paths ───────────────────────────────────────────────────────────
23
+
24
+ const CONFIG_DIR = join(process.env.HOME || "~", ".pi", "session-search");
25
+ const CONFIG_FILE = join(CONFIG_DIR, "config.json");
26
+ const INDEX_DIR = join(CONFIG_DIR, "index");
27
+
28
+ export function getConfigPath(): string {
29
+ return CONFIG_FILE;
30
+ }
31
+
32
+ export function getIndexDir(): string {
33
+ return INDEX_DIR;
34
+ }
35
+
36
+ // ─── Load / Save ─────────────────────────────────────────────────────
37
+
38
+ export function loadConfig(): Config | null {
39
+ if (!existsSync(CONFIG_FILE)) return null;
40
+
41
+ const raw = readFileSync(CONFIG_FILE, "utf8");
42
+ const file = JSON.parse(raw) as ConfigFile;
43
+
44
+ return {
45
+ extraSessionDirs: file.extraSessionDirs ?? [],
46
+ extraArchiveDirs: file.extraArchiveDirs ?? [],
47
+ embedder: file.embedder,
48
+ };
49
+ }
50
+
51
+ export function saveConfig(file: ConfigFile): void {
52
+ mkdirSync(dirname(CONFIG_FILE), { recursive: true });
53
+ writeFileSync(CONFIG_FILE, JSON.stringify(file, null, 2), "utf8");
54
+ }
@@ -0,0 +1,251 @@
1
+ /**
2
+ * Embedding interface + factory. Reuses the same provider patterns as
3
+ * pi-knowledge-search but lives in this package to avoid a hard dependency.
4
+ */
5
+
6
+ export interface Embedder {
7
+ embed(text: string, signal?: AbortSignal): Promise<number[]>;
8
+ embedBatch(
9
+ texts: string[],
10
+ signal?: AbortSignal
11
+ ): Promise<(number[] | null)[]>;
12
+ }
13
+
14
+ export interface EmbedderConfig {
15
+ type: "openai" | "bedrock" | "ollama";
16
+ // OpenAI
17
+ apiKey?: string;
18
+ model?: string;
19
+ // Bedrock
20
+ profile?: string;
21
+ region?: string;
22
+ // Ollama
23
+ url?: string;
24
+ // Shared
25
+ dimensions?: number;
26
+ }
27
+
28
+ const DEFAULTS: Record<string, Partial<EmbedderConfig>> = {
29
+ openai: { model: "text-embedding-3-small", dimensions: 512 },
30
+ bedrock: {
31
+ model: "amazon.titan-embed-text-v2:0",
32
+ region: "us-east-1",
33
+ profile: "default",
34
+ dimensions: 512,
35
+ },
36
+ ollama: { model: "nomic-embed-text", url: "http://localhost:11434" },
37
+ };
38
+
39
+ export function createEmbedder(config: EmbedderConfig): Embedder {
40
+ const defaults = DEFAULTS[config.type] ?? {};
41
+ const merged = { ...defaults, ...config };
42
+
43
+ switch (merged.type) {
44
+ case "openai":
45
+ return new OpenAIEmbedder(
46
+ merged.apiKey || process.env.OPENAI_API_KEY || "",
47
+ merged.model!,
48
+ merged.dimensions!
49
+ );
50
+ case "bedrock":
51
+ return new BedrockEmbedder(
52
+ merged.profile!,
53
+ merged.region!,
54
+ merged.model!,
55
+ merged.dimensions!
56
+ );
57
+ case "ollama":
58
+ return new OllamaEmbedder(merged.url!, merged.model!);
59
+ default:
60
+ throw new Error(`Unknown embedder type: ${merged.type}`);
61
+ }
62
+ }
63
+
64
+ // ─── Helpers ─────────────────────────────────────────────────────────
65
+
66
+ function truncate(text: string, maxChars = 12000): string {
67
+ return text.length > maxChars ? text.slice(0, maxChars) : text;
68
+ }
69
+
70
+ async function parallelMap<T, R>(
71
+ items: T[],
72
+ fn: (item: T) => Promise<R>,
73
+ concurrency: number,
74
+ signal?: AbortSignal
75
+ ): Promise<R[]> {
76
+ const results: R[] = new Array(items.length);
77
+ let cursor = 0;
78
+ const worker = async () => {
79
+ while (cursor < items.length) {
80
+ if (signal?.aborted) throw new Error("Aborted");
81
+ const idx = cursor++;
82
+ results[idx] = await fn(items[idx]);
83
+ }
84
+ };
85
+ await Promise.all(
86
+ Array.from({ length: Math.min(concurrency, items.length) }, () => worker())
87
+ );
88
+ return results;
89
+ }
90
+
91
+ // ─── OpenAI ──────────────────────────────────────────────────────────
92
+
93
+ class OpenAIEmbedder implements Embedder {
94
+ constructor(
95
+ private apiKey: string,
96
+ private model: string,
97
+ private dimensions: number
98
+ ) {}
99
+
100
+ async embed(text: string, signal?: AbortSignal): Promise<number[]> {
101
+ const [result] = await this.embedBatch([text], signal);
102
+ if (!result) throw new Error("Embedding failed");
103
+ return result;
104
+ }
105
+
106
+ async embedBatch(
107
+ texts: string[],
108
+ signal?: AbortSignal
109
+ ): Promise<(number[] | null)[]> {
110
+ const BATCH = 100;
111
+ const results: (number[] | null)[] = new Array(texts.length).fill(null);
112
+
113
+ for (let i = 0; i < texts.length; i += BATCH) {
114
+ if (signal?.aborted) throw new Error("Aborted");
115
+ const batch = texts.slice(i, i + BATCH).map((t) => truncate(t));
116
+
117
+ const res = await fetch("https://api.openai.com/v1/embeddings", {
118
+ method: "POST",
119
+ headers: {
120
+ Authorization: `Bearer ${this.apiKey}`,
121
+ "Content-Type": "application/json",
122
+ },
123
+ body: JSON.stringify({
124
+ input: batch,
125
+ model: this.model,
126
+ dimensions: this.dimensions,
127
+ }),
128
+ signal,
129
+ });
130
+
131
+ if (!res.ok) {
132
+ const body = await res.text();
133
+ throw new Error(`OpenAI ${res.status}: ${body.slice(0, 200)}`);
134
+ }
135
+
136
+ const json = (await res.json()) as {
137
+ data: { embedding: number[]; index: number }[];
138
+ };
139
+ for (const item of json.data) {
140
+ results[i + item.index] = item.embedding;
141
+ }
142
+ }
143
+ return results;
144
+ }
145
+ }
146
+
147
+ // ─── Bedrock ─────────────────────────────────────────────────────────
148
+
149
+ class BedrockEmbedder implements Embedder {
150
+ private clientPromise: Promise<any>;
151
+
152
+ constructor(
153
+ profile: string,
154
+ region: string,
155
+ private model: string,
156
+ private dimensions: number
157
+ ) {
158
+ this.clientPromise = (async () => {
159
+ const { BedrockRuntimeClient } = await import(
160
+ "@aws-sdk/client-bedrock-runtime"
161
+ );
162
+ const { fromIni } = await import("@aws-sdk/credential-providers");
163
+ return new BedrockRuntimeClient({
164
+ region,
165
+ credentials: fromIni({ profile }),
166
+ });
167
+ })();
168
+ }
169
+
170
+ async embed(text: string, signal?: AbortSignal): Promise<number[]> {
171
+ const [result] = await this.embedBatch([text], signal);
172
+ if (!result) throw new Error("Embedding failed");
173
+ return result;
174
+ }
175
+
176
+ async embedBatch(
177
+ texts: string[],
178
+ signal?: AbortSignal
179
+ ): Promise<(number[] | null)[]> {
180
+ const client = await this.clientPromise;
181
+ return parallelMap(
182
+ texts,
183
+ async (text) => {
184
+ const { InvokeModelCommand } = await import(
185
+ "@aws-sdk/client-bedrock-runtime"
186
+ );
187
+ const body = JSON.stringify({
188
+ inputText: truncate(text),
189
+ dimensions: this.dimensions,
190
+ normalize: true,
191
+ });
192
+ const cmd = new InvokeModelCommand({
193
+ modelId: this.model,
194
+ contentType: "application/json",
195
+ accept: "application/json",
196
+ body: new TextEncoder().encode(body),
197
+ });
198
+ const res = await client.send(cmd);
199
+ const parsed = JSON.parse(new TextDecoder().decode(res.body));
200
+ if (!parsed.embedding) throw new Error("No embedding in response");
201
+ return parsed.embedding;
202
+ },
203
+ 10,
204
+ signal
205
+ );
206
+ }
207
+ }
208
+
209
+ // ─── Ollama ──────────────────────────────────────────────────────────
210
+
211
+ class OllamaEmbedder implements Embedder {
212
+ constructor(
213
+ private url: string,
214
+ private model: string
215
+ ) {
216
+ this.url = url.replace(/\/$/, "");
217
+ }
218
+
219
+ async embed(text: string, signal?: AbortSignal): Promise<number[]> {
220
+ const res = await fetch(`${this.url}/api/embed`, {
221
+ method: "POST",
222
+ headers: { "Content-Type": "application/json" },
223
+ body: JSON.stringify({ model: this.model, input: truncate(text) }),
224
+ signal,
225
+ });
226
+ if (!res.ok) {
227
+ const body = await res.text();
228
+ throw new Error(`Ollama ${res.status}: ${body.slice(0, 200)}`);
229
+ }
230
+ const json = (await res.json()) as { embeddings: number[][] };
231
+ return json.embeddings[0];
232
+ }
233
+
234
+ async embedBatch(
235
+ texts: string[],
236
+ signal?: AbortSignal
237
+ ): Promise<(number[] | null)[]> {
238
+ return parallelMap(
239
+ texts,
240
+ async (text) => {
241
+ try {
242
+ return await this.embed(text, signal);
243
+ } catch {
244
+ return null;
245
+ }
246
+ },
247
+ 4,
248
+ signal
249
+ );
250
+ }
251
+ }