pdf-brain 1.3.0 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/types.test.ts CHANGED
@@ -28,12 +28,16 @@ describe("Unified Search Types", () => {
28
28
  describe("DocumentSearchResult", () => {
29
29
  test("should create valid document search result", () => {
30
30
  const result = new DocumentSearchResult({
31
+ chunkId: "doc-123-0",
31
32
  docId: "doc-123",
32
33
  title: "Test Document",
33
34
  page: 1,
34
35
  chunkIndex: 0,
35
36
  content: "Test content",
36
37
  score: 0.95,
38
+ rawScore: 0.95,
39
+ scoreType: "cosine_similarity",
40
+ vectorScore: 0.95,
37
41
  matchType: "vector",
38
42
  entityType: "document",
39
43
  });
@@ -45,12 +49,16 @@ describe("Unified Search Types", () => {
45
49
 
46
50
  test("should support optional expanded content", () => {
47
51
  const result = new DocumentSearchResult({
52
+ chunkId: "doc-123-0",
48
53
  docId: "doc-123",
49
54
  title: "Test Document",
50
55
  page: 1,
51
56
  chunkIndex: 0,
52
57
  content: "Test content",
53
58
  score: 0.95,
59
+ rawScore: 0.95,
60
+ scoreType: "cosine_similarity",
61
+ vectorScore: 0.95,
54
62
  matchType: "vector",
55
63
  entityType: "document",
56
64
  expandedContent: "Expanded test content",
@@ -69,6 +77,8 @@ describe("Unified Search Types", () => {
69
77
  prefLabel: "Machine Learning",
70
78
  definition: "A subset of artificial intelligence...",
71
79
  score: 0.88,
80
+ rawScore: 0.88,
81
+ scoreType: "cosine_similarity",
72
82
  entityType: "concept",
73
83
  });
74
84
 
@@ -82,12 +92,16 @@ describe("Unified Search Types", () => {
82
92
  describe("UnifiedSearchResult", () => {
83
93
  test("should accept DocumentSearchResult", () => {
84
94
  const docResult: UnifiedSearchResult = new DocumentSearchResult({
95
+ chunkId: "doc-123-0",
85
96
  docId: "doc-123",
86
97
  title: "Test Document",
87
98
  page: 1,
88
99
  chunkIndex: 0,
89
100
  content: "Test content",
90
101
  score: 0.95,
102
+ rawScore: 0.95,
103
+ scoreType: "cosine_similarity",
104
+ vectorScore: 0.95,
91
105
  matchType: "vector",
92
106
  entityType: "document",
93
107
  });
@@ -101,6 +115,8 @@ describe("Unified Search Types", () => {
101
115
  prefLabel: "Machine Learning",
102
116
  definition: "A subset of artificial intelligence...",
103
117
  score: 0.88,
118
+ rawScore: 0.88,
119
+ scoreType: "cosine_similarity",
104
120
  entityType: "concept",
105
121
  });
106
122
 
@@ -110,12 +126,16 @@ describe("Unified Search Types", () => {
110
126
  test("should discriminate by entityType", () => {
111
127
  const results: UnifiedSearchResult[] = [
112
128
  new DocumentSearchResult({
129
+ chunkId: "doc-123-0",
113
130
  docId: "doc-123",
114
131
  title: "Test Document",
115
132
  page: 1,
116
133
  chunkIndex: 0,
117
134
  content: "Test content",
118
135
  score: 0.95,
136
+ rawScore: 0.95,
137
+ scoreType: "cosine_similarity",
138
+ vectorScore: 0.95,
119
139
  matchType: "vector",
120
140
  entityType: "document",
121
141
  }),
@@ -124,6 +144,8 @@ describe("Unified Search Types", () => {
124
144
  prefLabel: "Machine Learning",
125
145
  definition: "A subset of artificial intelligence...",
126
146
  score: 0.88,
147
+ rawScore: 0.88,
148
+ scoreType: "cosine_similarity",
127
149
  entityType: "concept",
128
150
  }),
129
151
  ];
package/src/types.ts CHANGED
@@ -52,12 +52,22 @@ export type EntityType = "document" | "concept";
52
52
  * @deprecated Use DocumentSearchResult for unified search. Kept for backwards compatibility.
53
53
  */
54
54
  export class SearchResult extends Schema.Class<SearchResult>("SearchResult")({
55
+ chunkId: Schema.String,
55
56
  docId: Schema.String,
56
57
  title: Schema.String,
57
58
  page: Schema.Number,
58
59
  chunkIndex: Schema.Number,
59
60
  content: Schema.String,
61
+ /** Normalized score in 0..1 for ranking across match types */
60
62
  score: Schema.Number,
63
+ /** Raw score from the underlying engine (e.g. cosine similarity, FTS rank) */
64
+ rawScore: Schema.Number,
65
+ /** What rawScore represents (do not assume one score meaning across engines) */
66
+ scoreType: Schema.Literal("cosine_similarity", "fts_rank", "hybrid"),
67
+ /** Optional component score for vector results */
68
+ vectorScore: Schema.optional(Schema.Number),
69
+ /** Optional component score for FTS results (raw FTS rank; often negative, more negative = better) */
70
+ ftsRank: Schema.optional(Schema.Number),
61
71
  matchType: Schema.Literal("vector", "fts", "hybrid"),
62
72
  /** Expanded context around the match (only populated when expandChars > 0) */
63
73
  expandedContent: Schema.optional(Schema.String),
@@ -65,7 +75,47 @@ export class SearchResult extends Schema.Class<SearchResult>("SearchResult")({
65
75
  expandedRange: Schema.optional(
66
76
  Schema.Struct({ start: Schema.Number, end: Schema.Number })
67
77
  ),
68
- }) {}
78
+ }) {
79
+ /**
80
+ * Backwards-compatible constructor:
81
+ * Older callers used `SearchResult` without chunkId/rawScore/scoreType.
82
+ *
83
+ * This type is deprecated in favor of `DocumentSearchResult`, but we keep
84
+ * legacy input working so downstream code doesn't explode.
85
+ */
86
+ constructor(props: any) {
87
+ const docId = props?.docId;
88
+ const page = props?.page;
89
+ const chunkIndex = props?.chunkIndex;
90
+
91
+ const matchType: "vector" | "fts" | "hybrid" = props?.matchType;
92
+ const score: number = props?.score;
93
+
94
+ const chunkId =
95
+ props?.chunkId ?? `legacy:${String(docId)}:${String(page)}:${String(chunkIndex)}`;
96
+
97
+ const rawScore = props?.rawScore ?? score;
98
+
99
+ const scoreType =
100
+ props?.scoreType ??
101
+ (matchType === "fts"
102
+ ? "fts_rank"
103
+ : matchType === "hybrid"
104
+ ? "hybrid"
105
+ : "cosine_similarity");
106
+
107
+ const vectorScore =
108
+ props?.vectorScore ?? (matchType === "vector" ? score : undefined);
109
+
110
+ super({
111
+ ...props,
112
+ chunkId,
113
+ rawScore,
114
+ scoreType,
115
+ vectorScore,
116
+ });
117
+ }
118
+ }
69
119
 
70
120
  /**
71
121
  * Document search result with entity type discriminator
@@ -73,12 +123,22 @@ export class SearchResult extends Schema.Class<SearchResult>("SearchResult")({
73
123
  export class DocumentSearchResult extends Schema.Class<DocumentSearchResult>(
74
124
  "DocumentSearchResult"
75
125
  )({
126
+ chunkId: Schema.String,
76
127
  docId: Schema.String,
77
128
  title: Schema.String,
78
129
  page: Schema.Number,
79
130
  chunkIndex: Schema.Number,
80
131
  content: Schema.String,
132
+ /** Normalized score in 0..1 for ranking across match types */
81
133
  score: Schema.Number,
134
+ /** Raw score from the underlying engine (e.g. cosine similarity, FTS rank) */
135
+ rawScore: Schema.Number,
136
+ /** What rawScore represents (do not assume one score meaning across engines) */
137
+ scoreType: Schema.Literal("cosine_similarity", "fts_rank", "hybrid"),
138
+ /** Optional component score for vector results */
139
+ vectorScore: Schema.optional(Schema.Number),
140
+ /** Optional component score for FTS results (raw FTS rank; often negative, more negative = better) */
141
+ ftsRank: Schema.optional(Schema.Number),
82
142
  matchType: Schema.Literal("vector", "fts", "hybrid"),
83
143
  entityType: Schema.Literal("document"),
84
144
  /** Expanded context around the match (only populated when expandChars > 0) */
@@ -98,7 +158,11 @@ export class ConceptSearchResult extends Schema.Class<ConceptSearchResult>(
98
158
  conceptId: Schema.String,
99
159
  prefLabel: Schema.String,
100
160
  definition: Schema.String,
161
+ /** Normalized score in 0..1 */
101
162
  score: Schema.Number,
163
+ /** Raw score from the underlying engine (cosine similarity) */
164
+ rawScore: Schema.Number,
165
+ scoreType: Schema.Literal("cosine_similarity"),
102
166
  entityType: Schema.Literal("concept"),
103
167
  }) {}
104
168
 
@@ -166,6 +230,9 @@ export class Config extends Schema.Class<Config>("Config")({
166
230
  host: Schema.String,
167
231
  autoInstall: Schema.Boolean,
168
232
  }),
233
+ gateway: Schema.optionalWith(Schema.Struct({
234
+ apiKey: Schema.optional(Schema.String),
235
+ }), { default: () => ({}) }),
169
236
  }) {
170
237
  /**
171
238
  * Default configuration: Ollama for all providers
@@ -187,7 +254,15 @@ export class Config extends Schema.Class<Config>("Config")({
187
254
  host: "http://localhost:11434",
188
255
  autoInstall: true,
189
256
  },
257
+ gateway: {},
190
258
  });
259
+
260
+ /**
261
+ * Resolve the gateway API key: config takes precedence over env var.
262
+ */
263
+ get gatewayApiKey(): string | undefined {
264
+ return this.gateway.apiKey ?? process.env.AI_GATEWAY_API_KEY;
265
+ }
191
266
  }
192
267
 
193
268
  // ============================================================================
@@ -224,7 +299,7 @@ export function loadConfig(): Config {
224
299
 
225
300
  /**
226
301
  * Save config to $PDF_LIBRARY_PATH/config.json.
227
- * API keys are never stored - they come from env vars (AI_GATEWAY_API_KEY).
302
+ * API keys can be stored in config or read from env var AI_GATEWAY_API_KEY.
228
303
  */
229
304
  export function saveConfig(config: Config): void {
230
305
  const libraryPath =
@@ -268,6 +343,11 @@ export class AddOptions extends Schema.Class<AddOptions>("AddOptions")({
268
343
  metadata: Schema.optional(
269
344
  Schema.Record({ key: Schema.String, value: Schema.Unknown })
270
345
  ),
346
+ /**
347
+ * Internal/advanced: preserve original `addedAt` on re-add/rechunk workflows.
348
+ * CLI does not expose this directly.
349
+ */
350
+ addedAt: Schema.optional(Schema.Date),
271
351
  }) {}
272
352
 
273
353
  // ============================================================================
package/src/updater.ts CHANGED
@@ -9,8 +9,9 @@
9
9
 
10
10
  import { existsSync, mkdirSync, readFileSync, writeFileSync, renameSync, unlinkSync, chmodSync } from "fs";
11
11
  import { join } from "path";
12
+ import { logInfo } from "./logger.js";
12
13
 
13
- const REPO = "joelhooks/pdf-library";
14
+ const REPO = "joelhooks/pdf-brain";
14
15
  const STATE_DIR = join(process.env.HOME || "~", ".pdf-brain");
15
16
  const STATE_FILE = join(STATE_DIR, "update-check.json");
16
17
  const CHECK_INTERVAL_MS = 24 * 60 * 60 * 1000; // 1 day
@@ -112,6 +113,10 @@ async function downloadAndReplace(version: string): Promise<boolean> {
112
113
  * Current invocation keeps running the old code — new version takes effect next run.
113
114
  */
114
115
  export function backgroundUpdateCheck(currentVersion: string): void {
116
+ // Agent-first default: disable background updates unless explicitly enabled.
117
+ // Background network calls + non-deterministic stderr output are a footgun for tool callers.
118
+ if (process.env.PDF_BRAIN_BACKGROUND_UPDATE !== "1") return;
119
+
115
120
  // Don't auto-update in dev mode
116
121
  if (currentVersion.includes("compiled") || currentVersion === "0.0.0") return;
117
122
 
@@ -135,8 +140,8 @@ export function backgroundUpdateCheck(currentVersion: string): void {
135
140
  if (ok) {
136
141
  newState.lastAutoUpdate = now;
137
142
  newState.latestVersion = latest;
138
- // Brief note so they know why behavior might change
139
- console.error(`\x1b[2mUpdated pdf-brain v${currentVersion} v${latest}\x1b[0m`);
143
+ // Brief note so they know why behavior might change (stderr only).
144
+ logInfo(`Updated pdf-brain v${currentVersion} -> v${latest}`);
140
145
  }
141
146
  }
142
147