latticesql 1.16.4 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -58,8 +58,10 @@ __export(index_exports, {
58
58
  attachBlob: () => attachBlob,
59
59
  autoFtsColumns: () => autoFtsColumns,
60
60
  autoUpdate: () => autoUpdate,
61
+ classifyLinks: () => classifyLinks,
61
62
  configDir: () => configDir,
62
63
  contentHash: () => contentHash,
64
+ crawlUrl: () => crawlUrl,
63
65
  createReadOnlyHeader: () => createReadOnlyHeader,
64
66
  createSQLiteStateStore: () => createSQLiteStateStore,
65
67
  decrypt: () => decrypt,
@@ -68,11 +70,15 @@ __export(index_exports, {
68
70
  deleteToken: () => deleteToken,
69
71
  deriveCanonicalContexts: () => deriveCanonicalContexts,
70
72
  deriveKey: () => deriveKey,
73
+ describeImage: () => describeImage,
74
+ describePdf: () => describePdf,
71
75
  encrypt: () => encrypt,
76
+ enrichKnowledge: () => enrichKnowledge,
72
77
  ensureFtsIndex: () => ensureFtsIndex,
73
78
  ensureLatticeRoot: () => ensureLatticeRoot,
74
79
  entityFileNames: () => entityFileNames,
75
80
  estimateTokens: () => estimateTokens,
81
+ extractObjects: () => extractObjects,
76
82
  findLatticeRoot: () => findLatticeRoot,
77
83
  fixSchemaConflicts: () => fixSchemaConflicts,
78
84
  frontmatter: () => frontmatter,
@@ -101,9 +107,12 @@ __export(index_exports, {
101
107
  migrateLatticeData: () => migrateLatticeData,
102
108
  normalizeEntityFiles: () => normalizeEntityFiles,
103
109
  openTargetLatticeForMigration: () => openTargetLatticeForMigration,
110
+ organizeSource: () => organizeSource,
104
111
  parseConfigFile: () => parseConfigFile,
105
112
  parseConfigString: () => parseConfigString,
106
113
  parseMarkdownEntries: () => parseMarkdownEntries,
114
+ parseMatches: () => parseMatches,
115
+ parseObjects: () => parseObjects,
107
116
  parseSessionMD: () => parseSessionMD,
108
117
  parseSessionWrites: () => parseSessionWrites,
109
118
  probeCloud: () => probeCloud,
@@ -126,6 +135,7 @@ __export(index_exports, {
126
135
  saveDbCredentialForTeam: () => saveDbCredentialForTeam,
127
136
  setActiveWorkspace: () => setActiveWorkspace,
128
137
  slugify: () => slugify,
138
+ summarizeText: () => summarizeText,
129
139
  toSafeDirName: () => toSafeDirName,
130
140
  truncate: () => truncate,
131
141
  validateEntryId: () => validateEntryId,
@@ -430,6 +440,9 @@ var PostgresAdapter = class {
430
440
  connectionString: this._connectionString,
431
441
  max: this._poolSize
432
442
  });
443
+ this._pool.on("error", (err) => {
444
+ console.error("[latticesql] recovered from idle Postgres client error:", err.message);
445
+ });
433
446
  this._polyfillsReady = this._registerPolyfills();
434
447
  this._opened = true;
435
448
  }
@@ -964,6 +977,15 @@ var SchemaManager = class {
964
977
  }
965
978
  this._entityContexts.set(table, def);
966
979
  }
980
+ /**
981
+ * Register OR replace an entity context — unlike {@link defineEntityContext}
982
+ * (which throws on a redefine), this overwrites. Used to re-derive a
983
+ * runtime-created table's canonical context after a schema change (e.g. a new
984
+ * junction adds a hasMany rollup to the tables it links) without a reopen.
985
+ */
986
+ redefineEntityContext(table, def) {
987
+ this._entityContexts.set(table, def);
988
+ }
967
989
  getTables() {
968
990
  return this._tables;
969
991
  }
@@ -2039,20 +2061,25 @@ function cleanupEntityContexts(outputDir, entityContexts, currentSlugsByTable, m
2039
2061
  }
2040
2062
 
2041
2063
  // src/render/engine.ts
2064
+ var NOOP_RENDER = () => "";
2042
2065
  var RenderEngine = class {
2043
2066
  _schema;
2044
2067
  _adapter;
2045
2068
  _getTaskContext;
2046
- constructor(schema, adapter, getTaskContext) {
2069
+ /** When true, skip the read + write for spec-less (no-op render) tables. */
2070
+ _skipEmpty;
2071
+ constructor(schema, adapter, getTaskContext, options) {
2047
2072
  this._schema = schema;
2048
2073
  this._adapter = adapter;
2049
2074
  this._getTaskContext = getTaskContext ?? (() => "");
2075
+ this._skipEmpty = options?.skipEmpty ?? false;
2050
2076
  }
2051
2077
  async render(outputDir) {
2052
2078
  const start = Date.now();
2053
2079
  const filesWritten = [];
2054
2080
  const counters = { skipped: 0 };
2055
2081
  for (const [name, def] of this._schema.getTables()) {
2082
+ if (this._skipEmpty && def.render === NOOP_RENDER) continue;
2056
2083
  let rows = await this._schema.queryTable(this._adapter, name);
2057
2084
  if (def.relevanceFilter) {
2058
2085
  const ctx = this._getTaskContext();
@@ -4386,7 +4413,9 @@ var Lattice = class _Lattice {
4386
4413
  this._adapter = options.adapter ?? buildAdapter(dbPath, options);
4387
4414
  this._schema = new SchemaManager();
4388
4415
  this._sanitizer = new Sanitizer(options.security);
4389
- this._render = new RenderEngine(this._schema, this._adapter, () => this._taskContext);
4416
+ this._render = new RenderEngine(this._schema, this._adapter, () => this._taskContext, {
4417
+ skipEmpty: options.renderSkipsEmpty ?? false
4418
+ });
4390
4419
  this._reverseSync = new ReverseSyncEngine(this._schema, this._adapter);
4391
4420
  this._reverseSeedEngine = new ReverseSeedEngine(this._schema, this._adapter);
4392
4421
  this._loop = new SyncLoop(this._render);
@@ -4507,7 +4536,7 @@ var Lattice = class _Lattice {
4507
4536
  table,
4508
4537
  this._schema,
4509
4538
  this._adapter
4510
- ) : () => "",
4539
+ ) : NOOP_RENDER,
4511
4540
  outputFile: def.outputFile ?? `.schema-only/${table}.md`,
4512
4541
  ...renderTemplateName ? { _renderTemplateName: renderTemplateName } : {}
4513
4542
  };
@@ -4523,6 +4552,15 @@ var Lattice = class _Lattice {
4523
4552
  this._schema.defineEntityContext(table, def);
4524
4553
  return this;
4525
4554
  }
4555
+ /**
4556
+ * Register or REPLACE an entity context (overwrites instead of throwing on a
4557
+ * redefine — see {@link SchemaManager.redefineEntityContext}). Used to refresh
4558
+ * a canonical context at runtime after a related schema change.
4559
+ */
4560
+ redefineEntityContext(table, def) {
4561
+ this._schema.redefineEntityContext(table, def);
4562
+ return this;
4563
+ }
4526
4564
  /**
4527
4565
  * All entity contexts currently registered on this Lattice — both those
4528
4566
  * declared in `lattice.config.yml` and those added programmatically via
@@ -6548,6 +6586,39 @@ var NATIVE_ENTITY_DEFS = {
6548
6586
  },
6549
6587
  render: () => "",
6550
6588
  outputFile: ".lattice-native/notes.md"
6589
+ },
6590
+ chat_threads: {
6591
+ // An assistant conversation. Native so chat history survives across
6592
+ // sessions and is queryable/renderable like any other Lattice entity.
6593
+ columns: {
6594
+ id: "TEXT PRIMARY KEY",
6595
+ title: "TEXT",
6596
+ created_at: "TEXT NOT NULL DEFAULT (datetime('now'))",
6597
+ updated_at: "TEXT NOT NULL DEFAULT (datetime('now'))",
6598
+ deleted_at: "TEXT"
6599
+ },
6600
+ render: () => "",
6601
+ outputFile: ".lattice-native/chat-threads.md"
6602
+ },
6603
+ chat_messages: {
6604
+ // One turn (or feed entry) within a chat_thread.
6605
+ columns: {
6606
+ id: "TEXT PRIMARY KEY",
6607
+ // Soft reference to chat_threads.id. Kept as a plain column (no FK)
6608
+ // to match the generic, dialect-agnostic native-entity style.
6609
+ thread_id: "TEXT",
6610
+ // user | assistant | tool | feed | system
6611
+ role: "TEXT NOT NULL DEFAULT 'user'",
6612
+ // JSON payload: text, tool_use / tool_result blocks, attachments, or
6613
+ // (for role='feed') the feed-event details.
6614
+ content_json: "TEXT",
6615
+ // ai | gui | cli | ingest — meaningful for role='feed'.
6616
+ source: "TEXT",
6617
+ created_at: "TEXT NOT NULL DEFAULT (datetime('now'))",
6618
+ deleted_at: "TEXT"
6619
+ },
6620
+ render: () => "",
6621
+ outputFile: ".lattice-native/chat-messages.md"
6551
6622
  }
6552
6623
  };
6553
6624
  var NATIVE_ENTITY_NAMES = new Set(Object.keys(NATIVE_ENTITY_DEFS));
@@ -8920,6 +8991,568 @@ function archiveLocalSqlite(dbPath) {
8920
8991
  }
8921
8992
  return backupPath;
8922
8993
  }
8994
+
8995
+ // src/ai/llm-client.ts
8996
+ var import_node_module2 = require("module");
8997
+ var DEFAULT_MODEL = "claude-haiku-4-5";
8998
+
8999
+ // src/ai/summarize.ts
9000
+ var SUMMARY_SYSTEM = 'You write a one or two sentence factual description of a document for a knowledge base, focused on what it is and what it contains. No preamble, no "This document". Plain text only.';
9001
+ async function summarizeText(client, text, name) {
9002
+ const turn = await client.runTurn({
9003
+ model: DEFAULT_MODEL,
9004
+ system: SUMMARY_SYSTEM,
9005
+ messages: [
9006
+ {
9007
+ role: "user",
9008
+ content: `File name: ${name}
9009
+
9010
+ Content:
9011
+ ${text.slice(0, 12e3)}
9012
+
9013
+ Describe it in 1-2 sentences.`
9014
+ }
9015
+ ],
9016
+ tools: [],
9017
+ onText: () => void 0
9018
+ });
9019
+ return turn.text.trim();
9020
+ }
9021
+ var CLASSIFY_SYSTEM = 'You decide which existing records a newly added document relates to. You are given a catalog of record types (with descriptions) and their records. Return ONLY a JSON array of {"table","id"} objects for records the document clearly relates to \u2014 an empty array if none. Output the JSON in a ```json fenced block and nothing else.';
9022
+ function buildCatalogBlock(catalog) {
9023
+ return catalog.map((e) => {
9024
+ const head = `## ${e.table}${e.description ? ` \u2014 ${e.description}` : ""}`;
9025
+ const rows = e.records.map((r) => `- id=${r.id} | ${r.label}`).join("\n");
9026
+ return `${head}
9027
+ ${rows || "- (no records)"}`;
9028
+ }).join("\n\n");
9029
+ }
9030
+ function parseMatches(raw, catalog) {
9031
+ const fence = /```json\s*([\s\S]*?)```/i.exec(raw);
9032
+ const body = fence ? fence[1] : raw;
9033
+ let parsed;
9034
+ try {
9035
+ parsed = JSON.parse((body ?? "").trim());
9036
+ } catch {
9037
+ return [];
9038
+ }
9039
+ if (!Array.isArray(parsed)) return [];
9040
+ const valid = new Map(catalog.map((e) => [e.table, new Set(e.records.map((r) => r.id))]));
9041
+ const out = [];
9042
+ for (const item of parsed) {
9043
+ if (!item || typeof item !== "object") continue;
9044
+ const table = item.table;
9045
+ const id = item.id;
9046
+ if (typeof table === "string" && typeof id === "string" && valid.get(table)?.has(id)) {
9047
+ out.push({ table, id });
9048
+ }
9049
+ }
9050
+ return out;
9051
+ }
9052
+ var ID_RE = /^[a-z][a-z0-9_]*$/;
9053
+ var RESERVED_COLS = /* @__PURE__ */ new Set(["id", "deleted_at", "created_at", "updated_at"]);
9054
+ var EXTRACT_SYSTEM = 'You build a knowledge base by extracting the key structured objects a document is ABOUT \u2014 e.g. an invoice, a person, a project, a contract, a meeting. You are given the existing entity types (tables) and their columns. For each salient object: reuse an existing entity when one clearly fits; otherwise propose a NEW entity with a short snake_case PLURAL name and 2-6 simple snake_case columns. Extract only objects the document is genuinely about \u2014 prefer 1-3, never more than 3, and never invent data not in the document. Return ONLY a JSON array of objects {"entity","isNew","columns","values","label"}, where "values" is an OBJECT mapping each column name to its value \u2014 e.g. {"invoice_number":"INV-114","total":"6400"} \u2014 in a ```json fenced block.';
9055
+ function buildSchemaBlock(existing) {
9056
+ if (existing.length === 0) return "(no entities yet \u2014 propose new ones)";
9057
+ return existing.map((e) => `## ${e.table}
9058
+ columns: ${e.columns.join(", ")}`).join("\n\n");
9059
+ }
9060
+ function parseObjects(raw) {
9061
+ const fence = /```json\s*([\s\S]*?)```/i.exec(raw);
9062
+ let parsed;
9063
+ try {
9064
+ parsed = JSON.parse((fence ? fence[1] : raw)?.trim() ?? "");
9065
+ } catch {
9066
+ return [];
9067
+ }
9068
+ if (!Array.isArray(parsed)) return [];
9069
+ const out = [];
9070
+ for (const item of parsed) {
9071
+ if (!item || typeof item !== "object") continue;
9072
+ const o = item;
9073
+ const entity = typeof o.entity === "string" ? o.entity.trim().toLowerCase() : "";
9074
+ const label = typeof o.label === "string" ? o.label.trim() : "";
9075
+ if (!ID_RE.test(entity) || !label) continue;
9076
+ let valuesRaw = {};
9077
+ if (Array.isArray(o.values) && Array.isArray(o.columns)) {
9078
+ o.columns.forEach((c, i) => {
9079
+ valuesRaw[String(c)] = o.values[i];
9080
+ });
9081
+ } else if (o.values && typeof o.values === "object") {
9082
+ valuesRaw = o.values;
9083
+ }
9084
+ const values = {};
9085
+ for (const [k, v] of Object.entries(valuesRaw)) {
9086
+ const col = k.trim().toLowerCase();
9087
+ if (ID_RE.test(col) && !RESERVED_COLS.has(col) && (typeof v === "string" || typeof v === "number")) {
9088
+ values[col] = String(v).slice(0, 2e3);
9089
+ }
9090
+ }
9091
+ if (Object.keys(values).length === 0) continue;
9092
+ const cols = Array.isArray(o.columns) ? o.columns.map((c) => String(c).trim().toLowerCase()).filter((c) => ID_RE.test(c) && !RESERVED_COLS.has(c)) : [];
9093
+ const columns = Array.from(/* @__PURE__ */ new Set([...cols, ...Object.keys(values)])).slice(0, 8);
9094
+ out.push({ entity, isNew: o.isNew === true, columns, values, label });
9095
+ if (out.length >= 3) break;
9096
+ }
9097
+ return out;
9098
+ }
9099
+ async function extractObjects(client, text, name, existing, temperature) {
9100
+ if (text.trim().length === 0) return [];
9101
+ const turn = await client.runTurn({
9102
+ model: DEFAULT_MODEL,
9103
+ system: EXTRACT_SYSTEM,
9104
+ messages: [
9105
+ {
9106
+ role: "user",
9107
+ content: `# Existing entities
9108
+ ${buildSchemaBlock(existing)}
9109
+
9110
+ # Document: ${name}
9111
+
9112
+ ${text.slice(0, 12e3)}
9113
+
9114
+ # Task
9115
+ Return the JSON array of objects to create.`
9116
+ }
9117
+ ],
9118
+ tools: [],
9119
+ ...temperature !== void 0 ? { temperature } : {},
9120
+ onText: () => void 0
9121
+ });
9122
+ return parseObjects(turn.text);
9123
+ }
9124
+ async function classifyLinks(client, text, name, catalog) {
9125
+ if (catalog.length === 0 || text.trim().length === 0) return [];
9126
+ let captured = "";
9127
+ const turn = await client.runTurn({
9128
+ model: DEFAULT_MODEL,
9129
+ system: CLASSIFY_SYSTEM,
9130
+ messages: [
9131
+ {
9132
+ role: "user",
9133
+ content: `# Catalog
9134
+ ${buildCatalogBlock(catalog)}
9135
+
9136
+ # Document: ${name}
9137
+
9138
+ ${text.slice(0, 12e3)}
9139
+
9140
+ # Task
9141
+ Return the JSON array of matching {table,id}.`
9142
+ }
9143
+ ],
9144
+ tools: [],
9145
+ onText: (d) => {
9146
+ captured += d;
9147
+ }
9148
+ });
9149
+ return parseMatches(turn.text || captured, catalog);
9150
+ }
9151
+
9152
+ // src/ai/organize.ts
9153
+ async function organizeSource(db, opts) {
9154
+ const { fileId, text, name, catalog, client } = opts;
9155
+ const linkTable = opts.linkTable ?? "file_links";
9156
+ const fallbackTable = opts.fallbackTable ?? "notes";
9157
+ const createIfNecessary = opts.createIfNecessary ?? true;
9158
+ if (!client) {
9159
+ return { skipped: true, description: "", linked: [], created: [], message: "" };
9160
+ }
9161
+ const linkExisting = opts.linkExisting ?? (async (m) => {
9162
+ await db.insert(linkTable, {
9163
+ file_id: fileId,
9164
+ table_name: m.table,
9165
+ row_id: m.id,
9166
+ relevance: "related"
9167
+ });
9168
+ return true;
9169
+ });
9170
+ const createFallback = opts.createFallback ?? (async (title, body) => {
9171
+ const id = await db.insert(fallbackTable, { title, body });
9172
+ await db.insert(linkTable, {
9173
+ file_id: fileId,
9174
+ table_name: fallbackTable,
9175
+ row_id: id,
9176
+ relevance: "primary"
9177
+ });
9178
+ return { table: fallbackTable, id };
9179
+ });
9180
+ const description = (await summarizeText(client, text, name)).trim();
9181
+ const matches = await classifyLinks(client, text, name, catalog);
9182
+ const linked = [];
9183
+ for (const m of matches) {
9184
+ if (await linkExisting(m)) linked.push({ table: m.table, id: m.id });
9185
+ }
9186
+ const created = [];
9187
+ if (linked.length === 0 && createIfNecessary && text.trim().length > 0) {
9188
+ const title = name.replace(/\.[^./\\]+$/, "").trim() || "Note";
9189
+ const body = description.length > 0 ? description : text.slice(0, 2e3);
9190
+ const result = await createFallback(title, body);
9191
+ if (result) created.push({ table: result.table, id: result.id, title });
9192
+ }
9193
+ return {
9194
+ skipped: false,
9195
+ description,
9196
+ linked,
9197
+ created,
9198
+ message: buildMessage(linked, created)
9199
+ };
9200
+ }
9201
+ function buildMessage(linked, created) {
9202
+ const parts = [];
9203
+ if (linked.length > 0) {
9204
+ const byTable = /* @__PURE__ */ new Map();
9205
+ for (const l of linked) byTable.set(l.table, (byTable.get(l.table) ?? 0) + 1);
9206
+ const where = [...byTable.entries()].map(([t, n]) => `${String(n)} in ${t}`).join(", ");
9207
+ parts.push(
9208
+ `Linked it to ${String(linked.length)} existing record${linked.length === 1 ? "" : "s"} (${where}).`
9209
+ );
9210
+ }
9211
+ for (const c of created) {
9212
+ parts.push(
9213
+ `Created a new ${singular(c.table)} "${c.title}" because it didn't fit any existing record.`
9214
+ );
9215
+ }
9216
+ if (parts.length === 0) parts.push("Saved it; nothing else needed organizing.");
9217
+ parts.push("You can change any of this anytime.");
9218
+ return parts.join(" ");
9219
+ }
9220
+ function singular(table) {
9221
+ if (/ies$/i.test(table)) return table.replace(/ies$/i, "y");
9222
+ if (/s$/i.test(table) && !/ss$/i.test(table)) return table.replace(/s$/i, "");
9223
+ return table;
9224
+ }
9225
+
9226
+ // src/ai/crawl.ts
9227
+ var import_jsdom = require("jsdom");
9228
+ var import_readability = require("@mozilla/readability");
9229
+ var import_node_path18 = require("path");
9230
+ var import_node_module3 = require("module");
9231
+ var import_meta2 = {};
9232
+ var DEFAULT_MAX_BYTES2 = 25 * 1024 * 1024;
9233
+ var DEFAULT_TIMEOUT_MS = 3e4;
9234
+ var DEFAULT_UA = "LatticeSQL/2.0 (+https://latticesql.com)";
9235
+ async function crawlUrl(rawUrl, opts = {}) {
9236
+ const u = await assertSafeUrl(rawUrl, opts.allowPrivate ?? false);
9237
+ const fetchImpl = opts.fetcher ?? fetch;
9238
+ const controller = new AbortController();
9239
+ const timer = setTimeout(() => {
9240
+ controller.abort();
9241
+ }, opts.timeoutMs ?? DEFAULT_TIMEOUT_MS);
9242
+ let res;
9243
+ try {
9244
+ res = await safeFetch(u.toString(), fetchImpl, {
9245
+ allowPrivate: opts.allowPrivate ?? false,
9246
+ init: {
9247
+ signal: controller.signal,
9248
+ headers: {
9249
+ "user-agent": opts.userAgent ?? DEFAULT_UA,
9250
+ accept: "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"
9251
+ }
9252
+ }
9253
+ });
9254
+ } finally {
9255
+ clearTimeout(timer);
9256
+ }
9257
+ if (!res.ok) {
9258
+ throw new Error(`Lattice: crawl failed for ${rawUrl}: HTTP ${String(res.status)}`);
9259
+ }
9260
+ let mime = (res.headers.get("content-type") ?? "").split(";")[0]?.trim().toLowerCase() ?? "";
9261
+ const maxBytes = opts.maxBytes ?? DEFAULT_MAX_BYTES2;
9262
+ const raw = Buffer.from(await res.arrayBuffer());
9263
+ const body = raw.length > maxBytes ? raw.subarray(0, maxBytes) : raw;
9264
+ const finalUrl = res.url || u.toString();
9265
+ if (mime === "" || mime === "application/octet-stream") {
9266
+ mime = await sniffMime(body) || mime;
9267
+ }
9268
+ const isHtml = mime.includes("html") || mime.includes("xml");
9269
+ if (mime && !isHtml && !mime.startsWith("text/")) {
9270
+ return {
9271
+ url: finalUrl,
9272
+ title: titleFromUrl(finalUrl),
9273
+ text: body.toString("utf-8"),
9274
+ excerpt: "",
9275
+ mime,
9276
+ byteLength: raw.length
9277
+ };
9278
+ }
9279
+ const dom = new import_jsdom.JSDOM(body.toString("utf-8"), { url: finalUrl });
9280
+ const doc = dom.window.document;
9281
+ let title = (doc.title || "").trim();
9282
+ let text = "";
9283
+ let excerpt = "";
9284
+ try {
9285
+ const article = new import_readability.Readability(doc).parse();
9286
+ if (article) {
9287
+ text = article.textContent.trim();
9288
+ const articleTitle = article.title.trim();
9289
+ if (articleTitle.length > 0) title = articleTitle;
9290
+ excerpt = article.excerpt.trim();
9291
+ }
9292
+ } catch {
9293
+ }
9294
+ if (text.length === 0) text = strippedBodyText(dom);
9295
+ if (!opts.noJs && text.length < 200) {
9296
+ const rendered = await renderViaPlaywright(finalUrl, opts.timeoutMs ?? DEFAULT_TIMEOUT_MS);
9297
+ if (rendered) {
9298
+ const rdom = new import_jsdom.JSDOM(rendered, { url: finalUrl });
9299
+ const rdoc = rdom.window.document;
9300
+ try {
9301
+ const a = new import_readability.Readability(rdoc).parse();
9302
+ if (a && a.textContent.trim().length > text.length) {
9303
+ text = a.textContent.trim();
9304
+ if (a.title.trim().length > 0) title = a.title.trim();
9305
+ if (a.excerpt.trim().length > 0) excerpt = a.excerpt.trim();
9306
+ }
9307
+ } catch {
9308
+ }
9309
+ if (text.length === 0) text = strippedBodyText(rdom);
9310
+ }
9311
+ }
9312
+ if (title.length === 0) title = titleFromUrl(finalUrl);
9313
+ return { url: finalUrl, title, text, excerpt, mime: mime || "text/html", byteLength: raw.length };
9314
+ }
9315
+ function strippedBodyText(dom) {
9316
+ const doc = dom.window.document;
9317
+ for (const el of Array.from(doc.querySelectorAll("script, style, noscript, template"))) {
9318
+ el.remove();
9319
+ }
9320
+ return (doc.body.textContent ?? "").replace(/\s+\n/g, "\n").replace(/[ \t]{2,}/g, " ").trim();
9321
+ }
9322
+ function titleFromUrl(rawUrl) {
9323
+ try {
9324
+ const u = new URL(rawUrl);
9325
+ const last = (0, import_node_path18.basename)(u.pathname);
9326
+ return last && last !== "/" ? last : u.hostname;
9327
+ } catch {
9328
+ return rawUrl;
9329
+ }
9330
+ }
9331
+ async function sniffMime(body) {
9332
+ try {
9333
+ const ft = await import("file-type");
9334
+ const result = await ft.fileTypeFromBuffer(body);
9335
+ return result?.mime ?? "";
9336
+ } catch {
9337
+ return "";
9338
+ }
9339
+ }
9340
+ async function renderViaPlaywright(url, timeoutMs) {
9341
+ let chromium;
9342
+ try {
9343
+ const importMetaUrl = import_meta2.url;
9344
+ const req = importMetaUrl ? (0, import_node_module3.createRequire)(importMetaUrl) : require;
9345
+ const pw = req("playwright");
9346
+ chromium = pw.chromium;
9347
+ } catch {
9348
+ return null;
9349
+ }
9350
+ let browser = null;
9351
+ try {
9352
+ browser = await chromium.launch({ headless: true });
9353
+ const page = await browser.newPage();
9354
+ await page.goto(url, { waitUntil: "networkidle", timeout: timeoutMs });
9355
+ return await page.content();
9356
+ } catch {
9357
+ return null;
9358
+ } finally {
9359
+ if (browser) await browser.close().catch(() => void 0);
9360
+ }
9361
+ }
9362
+
9363
+ // src/ai/enrich.ts
9364
+ var ENRICH_SYSTEM = 'You are writing the body of a knowledge-base entry by synthesizing several source documents into one coherent, factual summary. Integrate concrete facts (dates, names, amounts) and note relationships across sources. Do not invent anything; if the sources are thin, keep it short. Output ONLY the body markdown \u2014 no title, no headings like "Sources", no preamble, no fences.';
9365
+ async function enrichKnowledge(db, opts) {
9366
+ const { client } = opts;
9367
+ if (!client) return { skipped: true, enriched: [], examined: 0 };
9368
+ const knowledgeTable = opts.knowledgeTable ?? "notes";
9369
+ const bodyColumn = opts.bodyColumn ?? "body";
9370
+ const linkTable = opts.linkTable ?? "file_links";
9371
+ const sourceTable = opts.sourceTable ?? "files";
9372
+ const sourceTextColumn = opts.sourceTextColumn ?? "extracted_text";
9373
+ const minSources = opts.minSources ?? 2;
9374
+ const thinBodyChars = opts.thinBodyChars ?? 500;
9375
+ const maxObjects = opts.maxObjects ?? 40;
9376
+ const links = await db.query(linkTable);
9377
+ const objects = await db.query(knowledgeTable);
9378
+ const sourceIdsByObject = /* @__PURE__ */ new Map();
9379
+ for (const l of links) {
9380
+ if (String(l.table_name) !== knowledgeTable) continue;
9381
+ const rowId = String(l.row_id);
9382
+ const arr = sourceIdsByObject.get(rowId);
9383
+ if (arr) arr.push(String(l.file_id));
9384
+ else sourceIdsByObject.set(rowId, [String(l.file_id)]);
9385
+ }
9386
+ const enriched = [];
9387
+ let examined = 0;
9388
+ for (const obj of objects) {
9389
+ if (enriched.length >= maxObjects) break;
9390
+ const idVal = obj.id;
9391
+ const id = typeof idVal === "string" ? idVal : "";
9392
+ if (id.length === 0) continue;
9393
+ const sourceIds = sourceIdsByObject.get(id) ?? [];
9394
+ if (sourceIds.length < minSources) continue;
9395
+ const rawBody = obj[bodyColumn];
9396
+ const currentBody = typeof rawBody === "string" ? rawBody : "";
9397
+ if (currentBody.length >= thinBodyChars) continue;
9398
+ examined++;
9399
+ const snippets = [];
9400
+ for (const sid of sourceIds) {
9401
+ const src = await db.get(sourceTable, sid);
9402
+ const rawText = src ? src[sourceTextColumn] : "";
9403
+ if (typeof rawText === "string" && rawText.trim().length > 0) {
9404
+ snippets.push(rawText.slice(0, 4e3));
9405
+ }
9406
+ }
9407
+ if (snippets.length < minSources) continue;
9408
+ const titleVal = obj.title ?? obj.name;
9409
+ const title = typeof titleVal === "string" && titleVal.length > 0 ? titleVal : id;
9410
+ const userBlock = `# Entry: ${title}
9411
+
9412
+ Current body:
9413
+ ${currentBody || "(empty)"}
9414
+
9415
+ ` + snippets.map((s, i) => `## Source ${String(i + 1)}
9416
+ ${s}`).join("\n\n") + `
9417
+
9418
+ # Task
9419
+ Write the improved body.`;
9420
+ let newBody = "";
9421
+ try {
9422
+ const turn = await client.runTurn({
9423
+ model: DEFAULT_MODEL,
9424
+ system: ENRICH_SYSTEM,
9425
+ messages: [{ role: "user", content: userBlock }],
9426
+ tools: [],
9427
+ onText: () => void 0
9428
+ });
9429
+ newBody = turn.text.trim();
9430
+ } catch {
9431
+ continue;
9432
+ }
9433
+ if (isBetter(newBody, currentBody)) {
9434
+ await db.update(knowledgeTable, id, { [bodyColumn]: newBody });
9435
+ enriched.push(id);
9436
+ }
9437
+ }
9438
+ return { skipped: false, enriched, examined };
9439
+ }
9440
+ function isBetter(next, prev) {
9441
+ if (next.length === 0) return false;
9442
+ if (prev.trim().length < 40 && next.length > 120) return true;
9443
+ return next.length > prev.length + 80;
9444
+ }
9445
+
9446
+ // src/ai/vision.ts
9447
+ var import_node_module4 = require("module");
9448
+ var import_promises3 = require("fs/promises");
9449
+ var import_meta3 = {};
9450
+ var DEFAULT_PROMPT = "Describe this image for a knowledge base in 2-4 factual sentences: what it shows, any visible text, and notable details. No preamble.";
9451
+ var MAX_DIM = 1568;
9452
+ async function describeImage(auth, path2, opts = {}) {
9453
+ const data = (await normalizeImage(path2, opts.maxBytes ?? 14e5)).toString("base64");
9454
+ const sender = opts.sender ?? defaultSender(auth);
9455
+ const text = await sender({
9456
+ media_type: "image/jpeg",
9457
+ data,
9458
+ prompt: opts.prompt ?? DEFAULT_PROMPT,
9459
+ model: opts.model ?? DEFAULT_MODEL
9460
+ });
9461
+ return text.trim();
9462
+ }
9463
+ var DEFAULT_PDF_PROMPT = "Read this document for a knowledge base. First transcribe its readable text, then add a 2-4 sentence factual summary of what it is and its key details. It may be a scanned/image-only PDF \u2014 read the text from the page images. No preamble.";
9464
+ async function describePdf(auth, path2, opts = {}) {
9465
+ const buf = await (0, import_promises3.readFile)(path2);
9466
+ const maxBytes = opts.maxBytes ?? 3e7;
9467
+ if (buf.length > maxBytes) {
9468
+ throw new Error(
9469
+ `PDF too large for a direct model read (${String(buf.length)} > ${String(maxBytes)} bytes)`
9470
+ );
9471
+ }
9472
+ const sender = opts.sender ?? defaultPdfSender(auth);
9473
+ const text = await sender({
9474
+ data: buf.toString("base64"),
9475
+ prompt: opts.prompt ?? DEFAULT_PDF_PROMPT,
9476
+ model: opts.model ?? DEFAULT_MODEL
9477
+ });
9478
+ return text.trim();
9479
+ }
9480
+ async function normalizeImage(path2, maxBytes) {
9481
+ const sharpMod = await import("sharp");
9482
+ const sharp = sharpMod.default;
9483
+ let quality = 80;
9484
+ let buf = await renderJpeg(sharp, path2, quality);
9485
+ while (buf.length > maxBytes && quality > 35) {
9486
+ quality -= 15;
9487
+ buf = await renderJpeg(sharp, path2, quality);
9488
+ }
9489
+ return buf;
9490
+ }
9491
+ function renderJpeg(sharp, path2, quality) {
9492
+ return sharp(path2).rotate().resize({ width: MAX_DIM, height: MAX_DIM, fit: "inside", withoutEnlargement: true }).jpeg({ quality }).toBuffer();
9493
+ }
9494
+ function defaultSender(auth) {
9495
+ return async (input) => {
9496
+ const importMetaUrl = import_meta3.url;
9497
+ const req = importMetaUrl ? (0, import_node_module4.createRequire)(importMetaUrl) : require;
9498
+ const sdk = req("@anthropic-ai/sdk");
9499
+ const Anthropic = sdk.Anthropic ?? sdk.default;
9500
+ if (!Anthropic) throw new Error("Could not resolve Anthropic from '@anthropic-ai/sdk'");
9501
+ const config = {};
9502
+ if (auth.authToken) config.authToken = auth.authToken;
9503
+ else if (auth.apiKey) config.apiKey = auth.apiKey;
9504
+ if (auth.betaHeader) config.defaultHeaders = { "anthropic-beta": auth.betaHeader };
9505
+ const client = new Anthropic(config);
9506
+ const res = await client.messages.create({
9507
+ model: input.model,
9508
+ max_tokens: 1024,
9509
+ messages: [
9510
+ {
9511
+ role: "user",
9512
+ content: [
9513
+ {
9514
+ type: "image",
9515
+ source: { type: "base64", media_type: input.media_type, data: input.data }
9516
+ },
9517
+ { type: "text", text: input.prompt }
9518
+ ]
9519
+ }
9520
+ ]
9521
+ });
9522
+ return res.content.filter((b) => b.type === "text").map((b) => b.text ?? "").join("");
9523
+ };
9524
+ }
9525
+ function defaultPdfSender(auth) {
9526
+ return async (input) => {
9527
+ const importMetaUrl = import_meta3.url;
9528
+ const req = importMetaUrl ? (0, import_node_module4.createRequire)(importMetaUrl) : require;
9529
+ const sdk = req("@anthropic-ai/sdk");
9530
+ const Anthropic = sdk.Anthropic ?? sdk.default;
9531
+ if (!Anthropic) throw new Error("Could not resolve Anthropic from '@anthropic-ai/sdk'");
9532
+ const config = {};
9533
+ if (auth.authToken) config.authToken = auth.authToken;
9534
+ else if (auth.apiKey) config.apiKey = auth.apiKey;
9535
+ if (auth.betaHeader) config.defaultHeaders = { "anthropic-beta": auth.betaHeader };
9536
+ const client = new Anthropic(config);
9537
+ const res = await client.messages.create({
9538
+ model: input.model,
9539
+ max_tokens: 4096,
9540
+ messages: [
9541
+ {
9542
+ role: "user",
9543
+ content: [
9544
+ {
9545
+ type: "document",
9546
+ source: { type: "base64", media_type: "application/pdf", data: input.data }
9547
+ },
9548
+ { type: "text", text: input.prompt }
9549
+ ]
9550
+ }
9551
+ ]
9552
+ });
9553
+ return res.content.filter((b) => b.type === "text").map((b) => b.text ?? "").join("");
9554
+ };
9555
+ }
8923
9556
  // Annotate the CommonJS export names for ESM import in node:
8924
9557
  0 && (module.exports = {
8925
9558
  CONFIG_SUBDIR,
@@ -8950,8 +9583,10 @@ function archiveLocalSqlite(dbPath) {
8950
9583
  attachBlob,
8951
9584
  autoFtsColumns,
8952
9585
  autoUpdate,
9586
+ classifyLinks,
8953
9587
  configDir,
8954
9588
  contentHash,
9589
+ crawlUrl,
8955
9590
  createReadOnlyHeader,
8956
9591
  createSQLiteStateStore,
8957
9592
  decrypt,
@@ -8960,11 +9595,15 @@ function archiveLocalSqlite(dbPath) {
8960
9595
  deleteToken,
8961
9596
  deriveCanonicalContexts,
8962
9597
  deriveKey,
9598
+ describeImage,
9599
+ describePdf,
8963
9600
  encrypt,
9601
+ enrichKnowledge,
8964
9602
  ensureFtsIndex,
8965
9603
  ensureLatticeRoot,
8966
9604
  entityFileNames,
8967
9605
  estimateTokens,
9606
+ extractObjects,
8968
9607
  findLatticeRoot,
8969
9608
  fixSchemaConflicts,
8970
9609
  frontmatter,
@@ -8993,9 +9632,12 @@ function archiveLocalSqlite(dbPath) {
8993
9632
  migrateLatticeData,
8994
9633
  normalizeEntityFiles,
8995
9634
  openTargetLatticeForMigration,
9635
+ organizeSource,
8996
9636
  parseConfigFile,
8997
9637
  parseConfigString,
8998
9638
  parseMarkdownEntries,
9639
+ parseMatches,
9640
+ parseObjects,
8999
9641
  parseSessionMD,
9000
9642
  parseSessionWrites,
9001
9643
  probeCloud,
@@ -9018,6 +9660,7 @@ function archiveLocalSqlite(dbPath) {
9018
9660
  saveDbCredentialForTeam,
9019
9661
  setActiveWorkspace,
9020
9662
  slugify,
9663
+ summarizeText,
9021
9664
  toSafeDirName,
9022
9665
  truncate,
9023
9666
  validateEntryId,