latticesql 1.16.4 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -298,6 +298,9 @@ var PostgresAdapter = class {
298
298
  connectionString: this._connectionString,
299
299
  max: this._poolSize
300
300
  });
301
+ this._pool.on("error", (err) => {
302
+ console.error("[latticesql] recovered from idle Postgres client error:", err.message);
303
+ });
301
304
  this._polyfillsReady = this._registerPolyfills();
302
305
  this._opened = true;
303
306
  }
@@ -832,6 +835,15 @@ var SchemaManager = class {
832
835
  }
833
836
  this._entityContexts.set(table, def);
834
837
  }
838
+ /**
839
+ * Register OR replace an entity context — unlike {@link defineEntityContext}
840
+ * (which throws on a redefine), this overwrites. Used to re-derive a
841
+ * runtime-created table's canonical context after a schema change (e.g. a new
842
+ * junction adds a hasMany rollup to the tables it links) without a reopen.
843
+ */
844
+ redefineEntityContext(table, def) {
845
+ this._entityContexts.set(table, def);
846
+ }
835
847
  getTables() {
836
848
  return this._tables;
837
849
  }
@@ -1907,20 +1919,25 @@ function cleanupEntityContexts(outputDir, entityContexts, currentSlugsByTable, m
1907
1919
  }
1908
1920
 
1909
1921
  // src/render/engine.ts
1922
+ var NOOP_RENDER = () => "";
1910
1923
  var RenderEngine = class {
1911
1924
  _schema;
1912
1925
  _adapter;
1913
1926
  _getTaskContext;
1914
- constructor(schema, adapter, getTaskContext) {
1927
+ /** When true, skip the read + write for spec-less (no-op render) tables. */
1928
+ _skipEmpty;
1929
+ constructor(schema, adapter, getTaskContext, options) {
1915
1930
  this._schema = schema;
1916
1931
  this._adapter = adapter;
1917
1932
  this._getTaskContext = getTaskContext ?? (() => "");
1933
+ this._skipEmpty = options?.skipEmpty ?? false;
1918
1934
  }
1919
1935
  async render(outputDir) {
1920
1936
  const start = Date.now();
1921
1937
  const filesWritten = [];
1922
1938
  const counters = { skipped: 0 };
1923
1939
  for (const [name, def] of this._schema.getTables()) {
1940
+ if (this._skipEmpty && def.render === NOOP_RENDER) continue;
1924
1941
  let rows = await this._schema.queryTable(this._adapter, name);
1925
1942
  if (def.relevanceFilter) {
1926
1943
  const ctx = this._getTaskContext();
@@ -4262,7 +4279,9 @@ var Lattice = class _Lattice {
4262
4279
  this._adapter = options.adapter ?? buildAdapter(dbPath, options);
4263
4280
  this._schema = new SchemaManager();
4264
4281
  this._sanitizer = new Sanitizer(options.security);
4265
- this._render = new RenderEngine(this._schema, this._adapter, () => this._taskContext);
4282
+ this._render = new RenderEngine(this._schema, this._adapter, () => this._taskContext, {
4283
+ skipEmpty: options.renderSkipsEmpty ?? false
4284
+ });
4266
4285
  this._reverseSync = new ReverseSyncEngine(this._schema, this._adapter);
4267
4286
  this._reverseSeedEngine = new ReverseSeedEngine(this._schema, this._adapter);
4268
4287
  this._loop = new SyncLoop(this._render);
@@ -4383,7 +4402,7 @@ var Lattice = class _Lattice {
4383
4402
  table,
4384
4403
  this._schema,
4385
4404
  this._adapter
4386
- ) : () => "",
4405
+ ) : NOOP_RENDER,
4387
4406
  outputFile: def.outputFile ?? `.schema-only/${table}.md`,
4388
4407
  ...renderTemplateName ? { _renderTemplateName: renderTemplateName } : {}
4389
4408
  };
@@ -4399,6 +4418,15 @@ var Lattice = class _Lattice {
4399
4418
  this._schema.defineEntityContext(table, def);
4400
4419
  return this;
4401
4420
  }
4421
+ /**
4422
+ * Register or REPLACE an entity context (overwrites instead of throwing on a
4423
+ * redefine — see {@link SchemaManager.redefineEntityContext}). Used to refresh
4424
+ * a canonical context at runtime after a related schema change.
4425
+ */
4426
+ redefineEntityContext(table, def) {
4427
+ this._schema.redefineEntityContext(table, def);
4428
+ return this;
4429
+ }
4402
4430
  /**
4403
4431
  * All entity contexts currently registered on this Lattice — both those
4404
4432
  * declared in `lattice.config.yml` and those added programmatically via
@@ -6424,6 +6452,39 @@ var NATIVE_ENTITY_DEFS = {
6424
6452
  },
6425
6453
  render: () => "",
6426
6454
  outputFile: ".lattice-native/notes.md"
6455
+ },
6456
+ chat_threads: {
6457
+ // An assistant conversation. Native so chat history survives across
6458
+ // sessions and is queryable/renderable like any other Lattice entity.
6459
+ columns: {
6460
+ id: "TEXT PRIMARY KEY",
6461
+ title: "TEXT",
6462
+ created_at: "TEXT NOT NULL DEFAULT (datetime('now'))",
6463
+ updated_at: "TEXT NOT NULL DEFAULT (datetime('now'))",
6464
+ deleted_at: "TEXT"
6465
+ },
6466
+ render: () => "",
6467
+ outputFile: ".lattice-native/chat-threads.md"
6468
+ },
6469
+ chat_messages: {
6470
+ // One turn (or feed entry) within a chat_thread.
6471
+ columns: {
6472
+ id: "TEXT PRIMARY KEY",
6473
+ // Soft reference to chat_threads.id. Kept as a plain column (no FK)
6474
+ // to match the generic, dialect-agnostic native-entity style.
6475
+ thread_id: "TEXT",
6476
+ // user | assistant | tool | feed | system
6477
+ role: "TEXT NOT NULL DEFAULT 'user'",
6478
+ // JSON payload: text, tool_use / tool_result blocks, attachments, or
6479
+ // (for role='feed') the feed-event details.
6480
+ content_json: "TEXT",
6481
+ // ai | gui | cli | ingest — meaningful for role='feed'.
6482
+ source: "TEXT",
6483
+ created_at: "TEXT NOT NULL DEFAULT (datetime('now'))",
6484
+ deleted_at: "TEXT"
6485
+ },
6486
+ render: () => "",
6487
+ outputFile: ".lattice-native/chat-messages.md"
6427
6488
  }
6428
6489
  };
6429
6490
  var NATIVE_ENTITY_NAMES = new Set(Object.keys(NATIVE_ENTITY_DEFS));
@@ -8796,6 +8857,566 @@ function archiveLocalSqlite(dbPath) {
8796
8857
  }
8797
8858
  return backupPath;
8798
8859
  }
8860
+
8861
+ // src/ai/llm-client.ts
8862
+ import { createRequire as createRequire2 } from "module";
8863
+ var DEFAULT_MODEL = "claude-haiku-4-5";
8864
+
8865
+ // src/ai/summarize.ts
8866
+ var SUMMARY_SYSTEM = 'You write a one or two sentence factual description of a document for a knowledge base, focused on what it is and what it contains. No preamble, no "This document". Plain text only.';
8867
+ async function summarizeText(client, text, name) {
8868
+ const turn = await client.runTurn({
8869
+ model: DEFAULT_MODEL,
8870
+ system: SUMMARY_SYSTEM,
8871
+ messages: [
8872
+ {
8873
+ role: "user",
8874
+ content: `File name: ${name}
8875
+
8876
+ Content:
8877
+ ${text.slice(0, 12e3)}
8878
+
8879
+ Describe it in 1-2 sentences.`
8880
+ }
8881
+ ],
8882
+ tools: [],
8883
+ onText: () => void 0
8884
+ });
8885
+ return turn.text.trim();
8886
+ }
8887
+ var CLASSIFY_SYSTEM = 'You decide which existing records a newly added document relates to. You are given a catalog of record types (with descriptions) and their records. Return ONLY a JSON array of {"table","id"} objects for records the document clearly relates to \u2014 an empty array if none. Output the JSON in a ```json fenced block and nothing else.';
8888
+ function buildCatalogBlock(catalog) {
8889
+ return catalog.map((e) => {
8890
+ const head = `## ${e.table}${e.description ? ` \u2014 ${e.description}` : ""}`;
8891
+ const rows = e.records.map((r) => `- id=${r.id} | ${r.label}`).join("\n");
8892
+ return `${head}
8893
+ ${rows || "- (no records)"}`;
8894
+ }).join("\n\n");
8895
+ }
8896
+ function parseMatches(raw, catalog) {
8897
+ const fence = /```json\s*([\s\S]*?)```/i.exec(raw);
8898
+ const body = fence ? fence[1] : raw;
8899
+ let parsed;
8900
+ try {
8901
+ parsed = JSON.parse((body ?? "").trim());
8902
+ } catch {
8903
+ return [];
8904
+ }
8905
+ if (!Array.isArray(parsed)) return [];
8906
+ const valid = new Map(catalog.map((e) => [e.table, new Set(e.records.map((r) => r.id))]));
8907
+ const out = [];
8908
+ for (const item of parsed) {
8909
+ if (!item || typeof item !== "object") continue;
8910
+ const table = item.table;
8911
+ const id = item.id;
8912
+ if (typeof table === "string" && typeof id === "string" && valid.get(table)?.has(id)) {
8913
+ out.push({ table, id });
8914
+ }
8915
+ }
8916
+ return out;
8917
+ }
8918
+ var ID_RE = /^[a-z][a-z0-9_]*$/;
8919
+ var RESERVED_COLS = /* @__PURE__ */ new Set(["id", "deleted_at", "created_at", "updated_at"]);
8920
+ var EXTRACT_SYSTEM = 'You build a knowledge base by extracting the key structured objects a document is ABOUT \u2014 e.g. an invoice, a person, a project, a contract, a meeting. You are given the existing entity types (tables) and their columns. For each salient object: reuse an existing entity when one clearly fits; otherwise propose a NEW entity with a short snake_case PLURAL name and 2-6 simple snake_case columns. Extract only objects the document is genuinely about \u2014 prefer 1-3, never more than 3, and never invent data not in the document. Return ONLY a JSON array of objects {"entity","isNew","columns","values","label"}, where "values" is an OBJECT mapping each column name to its value \u2014 e.g. {"invoice_number":"INV-114","total":"6400"} \u2014 in a ```json fenced block.';
8921
+ function buildSchemaBlock(existing) {
8922
+ if (existing.length === 0) return "(no entities yet \u2014 propose new ones)";
8923
+ return existing.map((e) => `## ${e.table}
8924
+ columns: ${e.columns.join(", ")}`).join("\n\n");
8925
+ }
8926
+ function parseObjects(raw) {
8927
+ const fence = /```json\s*([\s\S]*?)```/i.exec(raw);
8928
+ let parsed;
8929
+ try {
8930
+ parsed = JSON.parse((fence ? fence[1] : raw)?.trim() ?? "");
8931
+ } catch {
8932
+ return [];
8933
+ }
8934
+ if (!Array.isArray(parsed)) return [];
8935
+ const out = [];
8936
+ for (const item of parsed) {
8937
+ if (!item || typeof item !== "object") continue;
8938
+ const o = item;
8939
+ const entity = typeof o.entity === "string" ? o.entity.trim().toLowerCase() : "";
8940
+ const label = typeof o.label === "string" ? o.label.trim() : "";
8941
+ if (!ID_RE.test(entity) || !label) continue;
8942
+ let valuesRaw = {};
8943
+ if (Array.isArray(o.values) && Array.isArray(o.columns)) {
8944
+ o.columns.forEach((c, i) => {
8945
+ valuesRaw[String(c)] = o.values[i];
8946
+ });
8947
+ } else if (o.values && typeof o.values === "object") {
8948
+ valuesRaw = o.values;
8949
+ }
8950
+ const values = {};
8951
+ for (const [k, v] of Object.entries(valuesRaw)) {
8952
+ const col = k.trim().toLowerCase();
8953
+ if (ID_RE.test(col) && !RESERVED_COLS.has(col) && (typeof v === "string" || typeof v === "number")) {
8954
+ values[col] = String(v).slice(0, 2e3);
8955
+ }
8956
+ }
8957
+ if (Object.keys(values).length === 0) continue;
8958
+ const cols = Array.isArray(o.columns) ? o.columns.map((c) => String(c).trim().toLowerCase()).filter((c) => ID_RE.test(c) && !RESERVED_COLS.has(c)) : [];
8959
+ const columns = Array.from(/* @__PURE__ */ new Set([...cols, ...Object.keys(values)])).slice(0, 8);
8960
+ out.push({ entity, isNew: o.isNew === true, columns, values, label });
8961
+ if (out.length >= 3) break;
8962
+ }
8963
+ return out;
8964
+ }
8965
+ async function extractObjects(client, text, name, existing, temperature) {
8966
+ if (text.trim().length === 0) return [];
8967
+ const turn = await client.runTurn({
8968
+ model: DEFAULT_MODEL,
8969
+ system: EXTRACT_SYSTEM,
8970
+ messages: [
8971
+ {
8972
+ role: "user",
8973
+ content: `# Existing entities
8974
+ ${buildSchemaBlock(existing)}
8975
+
8976
+ # Document: ${name}
8977
+
8978
+ ${text.slice(0, 12e3)}
8979
+
8980
+ # Task
8981
+ Return the JSON array of objects to create.`
8982
+ }
8983
+ ],
8984
+ tools: [],
8985
+ ...temperature !== void 0 ? { temperature } : {},
8986
+ onText: () => void 0
8987
+ });
8988
+ return parseObjects(turn.text);
8989
+ }
8990
+ async function classifyLinks(client, text, name, catalog) {
8991
+ if (catalog.length === 0 || text.trim().length === 0) return [];
8992
+ let captured = "";
8993
+ const turn = await client.runTurn({
8994
+ model: DEFAULT_MODEL,
8995
+ system: CLASSIFY_SYSTEM,
8996
+ messages: [
8997
+ {
8998
+ role: "user",
8999
+ content: `# Catalog
9000
+ ${buildCatalogBlock(catalog)}
9001
+
9002
+ # Document: ${name}
9003
+
9004
+ ${text.slice(0, 12e3)}
9005
+
9006
+ # Task
9007
+ Return the JSON array of matching {table,id}.`
9008
+ }
9009
+ ],
9010
+ tools: [],
9011
+ onText: (d) => {
9012
+ captured += d;
9013
+ }
9014
+ });
9015
+ return parseMatches(turn.text || captured, catalog);
9016
+ }
9017
+
9018
+ // src/ai/organize.ts
9019
+ async function organizeSource(db, opts) {
9020
+ const { fileId, text, name, catalog, client } = opts;
9021
+ const linkTable = opts.linkTable ?? "file_links";
9022
+ const fallbackTable = opts.fallbackTable ?? "notes";
9023
+ const createIfNecessary = opts.createIfNecessary ?? true;
9024
+ if (!client) {
9025
+ return { skipped: true, description: "", linked: [], created: [], message: "" };
9026
+ }
9027
+ const linkExisting = opts.linkExisting ?? (async (m) => {
9028
+ await db.insert(linkTable, {
9029
+ file_id: fileId,
9030
+ table_name: m.table,
9031
+ row_id: m.id,
9032
+ relevance: "related"
9033
+ });
9034
+ return true;
9035
+ });
9036
+ const createFallback = opts.createFallback ?? (async (title, body) => {
9037
+ const id = await db.insert(fallbackTable, { title, body });
9038
+ await db.insert(linkTable, {
9039
+ file_id: fileId,
9040
+ table_name: fallbackTable,
9041
+ row_id: id,
9042
+ relevance: "primary"
9043
+ });
9044
+ return { table: fallbackTable, id };
9045
+ });
9046
+ const description = (await summarizeText(client, text, name)).trim();
9047
+ const matches = await classifyLinks(client, text, name, catalog);
9048
+ const linked = [];
9049
+ for (const m of matches) {
9050
+ if (await linkExisting(m)) linked.push({ table: m.table, id: m.id });
9051
+ }
9052
+ const created = [];
9053
+ if (linked.length === 0 && createIfNecessary && text.trim().length > 0) {
9054
+ const title = name.replace(/\.[^./\\]+$/, "").trim() || "Note";
9055
+ const body = description.length > 0 ? description : text.slice(0, 2e3);
9056
+ const result = await createFallback(title, body);
9057
+ if (result) created.push({ table: result.table, id: result.id, title });
9058
+ }
9059
+ return {
9060
+ skipped: false,
9061
+ description,
9062
+ linked,
9063
+ created,
9064
+ message: buildMessage(linked, created)
9065
+ };
9066
+ }
9067
+ function buildMessage(linked, created) {
9068
+ const parts = [];
9069
+ if (linked.length > 0) {
9070
+ const byTable = /* @__PURE__ */ new Map();
9071
+ for (const l of linked) byTable.set(l.table, (byTable.get(l.table) ?? 0) + 1);
9072
+ const where = [...byTable.entries()].map(([t, n]) => `${String(n)} in ${t}`).join(", ");
9073
+ parts.push(
9074
+ `Linked it to ${String(linked.length)} existing record${linked.length === 1 ? "" : "s"} (${where}).`
9075
+ );
9076
+ }
9077
+ for (const c of created) {
9078
+ parts.push(
9079
+ `Created a new ${singular(c.table)} "${c.title}" because it didn't fit any existing record.`
9080
+ );
9081
+ }
9082
+ if (parts.length === 0) parts.push("Saved it; nothing else needed organizing.");
9083
+ parts.push("You can change any of this anytime.");
9084
+ return parts.join(" ");
9085
+ }
9086
+ function singular(table) {
9087
+ if (/ies$/i.test(table)) return table.replace(/ies$/i, "y");
9088
+ if (/s$/i.test(table) && !/ss$/i.test(table)) return table.replace(/s$/i, "");
9089
+ return table;
9090
+ }
9091
+
9092
+ // src/ai/crawl.ts
9093
+ import { JSDOM } from "jsdom";
9094
+ import { Readability } from "@mozilla/readability";
9095
+ import { basename as basename5 } from "path";
9096
+ import { createRequire as createRequire3 } from "module";
9097
+ var DEFAULT_MAX_BYTES2 = 25 * 1024 * 1024;
9098
+ var DEFAULT_TIMEOUT_MS = 3e4;
9099
+ var DEFAULT_UA = "LatticeSQL/2.0 (+https://latticesql.com)";
9100
+ async function crawlUrl(rawUrl, opts = {}) {
9101
+ const u = await assertSafeUrl(rawUrl, opts.allowPrivate ?? false);
9102
+ const fetchImpl = opts.fetcher ?? fetch;
9103
+ const controller = new AbortController();
9104
+ const timer = setTimeout(() => {
9105
+ controller.abort();
9106
+ }, opts.timeoutMs ?? DEFAULT_TIMEOUT_MS);
9107
+ let res;
9108
+ try {
9109
+ res = await safeFetch(u.toString(), fetchImpl, {
9110
+ allowPrivate: opts.allowPrivate ?? false,
9111
+ init: {
9112
+ signal: controller.signal,
9113
+ headers: {
9114
+ "user-agent": opts.userAgent ?? DEFAULT_UA,
9115
+ accept: "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"
9116
+ }
9117
+ }
9118
+ });
9119
+ } finally {
9120
+ clearTimeout(timer);
9121
+ }
9122
+ if (!res.ok) {
9123
+ throw new Error(`Lattice: crawl failed for ${rawUrl}: HTTP ${String(res.status)}`);
9124
+ }
9125
+ let mime = (res.headers.get("content-type") ?? "").split(";")[0]?.trim().toLowerCase() ?? "";
9126
+ const maxBytes = opts.maxBytes ?? DEFAULT_MAX_BYTES2;
9127
+ const raw = Buffer.from(await res.arrayBuffer());
9128
+ const body = raw.length > maxBytes ? raw.subarray(0, maxBytes) : raw;
9129
+ const finalUrl = res.url || u.toString();
9130
+ if (mime === "" || mime === "application/octet-stream") {
9131
+ mime = await sniffMime(body) || mime;
9132
+ }
9133
+ const isHtml = mime.includes("html") || mime.includes("xml");
9134
+ if (mime && !isHtml && !mime.startsWith("text/")) {
9135
+ return {
9136
+ url: finalUrl,
9137
+ title: titleFromUrl(finalUrl),
9138
+ text: body.toString("utf-8"),
9139
+ excerpt: "",
9140
+ mime,
9141
+ byteLength: raw.length
9142
+ };
9143
+ }
9144
+ const dom = new JSDOM(body.toString("utf-8"), { url: finalUrl });
9145
+ const doc = dom.window.document;
9146
+ let title = (doc.title || "").trim();
9147
+ let text = "";
9148
+ let excerpt = "";
9149
+ try {
9150
+ const article = new Readability(doc).parse();
9151
+ if (article) {
9152
+ text = article.textContent.trim();
9153
+ const articleTitle = article.title.trim();
9154
+ if (articleTitle.length > 0) title = articleTitle;
9155
+ excerpt = article.excerpt.trim();
9156
+ }
9157
+ } catch {
9158
+ }
9159
+ if (text.length === 0) text = strippedBodyText(dom);
9160
+ if (!opts.noJs && text.length < 200) {
9161
+ const rendered = await renderViaPlaywright(finalUrl, opts.timeoutMs ?? DEFAULT_TIMEOUT_MS);
9162
+ if (rendered) {
9163
+ const rdom = new JSDOM(rendered, { url: finalUrl });
9164
+ const rdoc = rdom.window.document;
9165
+ try {
9166
+ const a = new Readability(rdoc).parse();
9167
+ if (a && a.textContent.trim().length > text.length) {
9168
+ text = a.textContent.trim();
9169
+ if (a.title.trim().length > 0) title = a.title.trim();
9170
+ if (a.excerpt.trim().length > 0) excerpt = a.excerpt.trim();
9171
+ }
9172
+ } catch {
9173
+ }
9174
+ if (text.length === 0) text = strippedBodyText(rdom);
9175
+ }
9176
+ }
9177
+ if (title.length === 0) title = titleFromUrl(finalUrl);
9178
+ return { url: finalUrl, title, text, excerpt, mime: mime || "text/html", byteLength: raw.length };
9179
+ }
9180
+ function strippedBodyText(dom) {
9181
+ const doc = dom.window.document;
9182
+ for (const el of Array.from(doc.querySelectorAll("script, style, noscript, template"))) {
9183
+ el.remove();
9184
+ }
9185
+ return (doc.body.textContent ?? "").replace(/\s+\n/g, "\n").replace(/[ \t]{2,}/g, " ").trim();
9186
+ }
9187
+ function titleFromUrl(rawUrl) {
9188
+ try {
9189
+ const u = new URL(rawUrl);
9190
+ const last = basename5(u.pathname);
9191
+ return last && last !== "/" ? last : u.hostname;
9192
+ } catch {
9193
+ return rawUrl;
9194
+ }
9195
+ }
9196
+ async function sniffMime(body) {
9197
+ try {
9198
+ const ft = await import("file-type");
9199
+ const result = await ft.fileTypeFromBuffer(body);
9200
+ return result?.mime ?? "";
9201
+ } catch {
9202
+ return "";
9203
+ }
9204
+ }
9205
+ async function renderViaPlaywright(url, timeoutMs) {
9206
+ let chromium;
9207
+ try {
9208
+ const importMetaUrl = import.meta.url;
9209
+ const req = importMetaUrl ? createRequire3(importMetaUrl) : __require;
9210
+ const pw = req("playwright");
9211
+ chromium = pw.chromium;
9212
+ } catch {
9213
+ return null;
9214
+ }
9215
+ let browser = null;
9216
+ try {
9217
+ browser = await chromium.launch({ headless: true });
9218
+ const page = await browser.newPage();
9219
+ await page.goto(url, { waitUntil: "networkidle", timeout: timeoutMs });
9220
+ return await page.content();
9221
+ } catch {
9222
+ return null;
9223
+ } finally {
9224
+ if (browser) await browser.close().catch(() => void 0);
9225
+ }
9226
+ }
9227
+
9228
+ // src/ai/enrich.ts
9229
+ var ENRICH_SYSTEM = 'You are writing the body of a knowledge-base entry by synthesizing several source documents into one coherent, factual summary. Integrate concrete facts (dates, names, amounts) and note relationships across sources. Do not invent anything; if the sources are thin, keep it short. Output ONLY the body markdown \u2014 no title, no headings like "Sources", no preamble, no fences.';
9230
+ async function enrichKnowledge(db, opts) {
9231
+ const { client } = opts;
9232
+ if (!client) return { skipped: true, enriched: [], examined: 0 };
9233
+ const knowledgeTable = opts.knowledgeTable ?? "notes";
9234
+ const bodyColumn = opts.bodyColumn ?? "body";
9235
+ const linkTable = opts.linkTable ?? "file_links";
9236
+ const sourceTable = opts.sourceTable ?? "files";
9237
+ const sourceTextColumn = opts.sourceTextColumn ?? "extracted_text";
9238
+ const minSources = opts.minSources ?? 2;
9239
+ const thinBodyChars = opts.thinBodyChars ?? 500;
9240
+ const maxObjects = opts.maxObjects ?? 40;
9241
+ const links = await db.query(linkTable);
9242
+ const objects = await db.query(knowledgeTable);
9243
+ const sourceIdsByObject = /* @__PURE__ */ new Map();
9244
+ for (const l of links) {
9245
+ if (String(l.table_name) !== knowledgeTable) continue;
9246
+ const rowId = String(l.row_id);
9247
+ const arr = sourceIdsByObject.get(rowId);
9248
+ if (arr) arr.push(String(l.file_id));
9249
+ else sourceIdsByObject.set(rowId, [String(l.file_id)]);
9250
+ }
9251
+ const enriched = [];
9252
+ let examined = 0;
9253
+ for (const obj of objects) {
9254
+ if (enriched.length >= maxObjects) break;
9255
+ const idVal = obj.id;
9256
+ const id = typeof idVal === "string" ? idVal : "";
9257
+ if (id.length === 0) continue;
9258
+ const sourceIds = sourceIdsByObject.get(id) ?? [];
9259
+ if (sourceIds.length < minSources) continue;
9260
+ const rawBody = obj[bodyColumn];
9261
+ const currentBody = typeof rawBody === "string" ? rawBody : "";
9262
+ if (currentBody.length >= thinBodyChars) continue;
9263
+ examined++;
9264
+ const snippets = [];
9265
+ for (const sid of sourceIds) {
9266
+ const src = await db.get(sourceTable, sid);
9267
+ const rawText = src ? src[sourceTextColumn] : "";
9268
+ if (typeof rawText === "string" && rawText.trim().length > 0) {
9269
+ snippets.push(rawText.slice(0, 4e3));
9270
+ }
9271
+ }
9272
+ if (snippets.length < minSources) continue;
9273
+ const titleVal = obj.title ?? obj.name;
9274
+ const title = typeof titleVal === "string" && titleVal.length > 0 ? titleVal : id;
9275
+ const userBlock = `# Entry: ${title}
9276
+
9277
+ Current body:
9278
+ ${currentBody || "(empty)"}
9279
+
9280
+ ` + snippets.map((s, i) => `## Source ${String(i + 1)}
9281
+ ${s}`).join("\n\n") + `
9282
+
9283
+ # Task
9284
+ Write the improved body.`;
9285
+ let newBody = "";
9286
+ try {
9287
+ const turn = await client.runTurn({
9288
+ model: DEFAULT_MODEL,
9289
+ system: ENRICH_SYSTEM,
9290
+ messages: [{ role: "user", content: userBlock }],
9291
+ tools: [],
9292
+ onText: () => void 0
9293
+ });
9294
+ newBody = turn.text.trim();
9295
+ } catch {
9296
+ continue;
9297
+ }
9298
+ if (isBetter(newBody, currentBody)) {
9299
+ await db.update(knowledgeTable, id, { [bodyColumn]: newBody });
9300
+ enriched.push(id);
9301
+ }
9302
+ }
9303
+ return { skipped: false, enriched, examined };
9304
+ }
9305
+ function isBetter(next, prev) {
9306
+ if (next.length === 0) return false;
9307
+ if (prev.trim().length < 40 && next.length > 120) return true;
9308
+ return next.length > prev.length + 80;
9309
+ }
9310
+
9311
+ // src/ai/vision.ts
9312
+ import { createRequire as createRequire4 } from "module";
9313
+ import { readFile as readFile2 } from "fs/promises";
9314
+ var DEFAULT_PROMPT = "Describe this image for a knowledge base in 2-4 factual sentences: what it shows, any visible text, and notable details. No preamble.";
9315
+ var MAX_DIM = 1568;
9316
+ async function describeImage(auth, path2, opts = {}) {
9317
+ const data = (await normalizeImage(path2, opts.maxBytes ?? 14e5)).toString("base64");
9318
+ const sender = opts.sender ?? defaultSender(auth);
9319
+ const text = await sender({
9320
+ media_type: "image/jpeg",
9321
+ data,
9322
+ prompt: opts.prompt ?? DEFAULT_PROMPT,
9323
+ model: opts.model ?? DEFAULT_MODEL
9324
+ });
9325
+ return text.trim();
9326
+ }
9327
+ var DEFAULT_PDF_PROMPT = "Read this document for a knowledge base. First transcribe its readable text, then add a 2-4 sentence factual summary of what it is and its key details. It may be a scanned/image-only PDF \u2014 read the text from the page images. No preamble.";
9328
+ async function describePdf(auth, path2, opts = {}) {
9329
+ const buf = await readFile2(path2);
9330
+ const maxBytes = opts.maxBytes ?? 3e7;
9331
+ if (buf.length > maxBytes) {
9332
+ throw new Error(
9333
+ `PDF too large for a direct model read (${String(buf.length)} > ${String(maxBytes)} bytes)`
9334
+ );
9335
+ }
9336
+ const sender = opts.sender ?? defaultPdfSender(auth);
9337
+ const text = await sender({
9338
+ data: buf.toString("base64"),
9339
+ prompt: opts.prompt ?? DEFAULT_PDF_PROMPT,
9340
+ model: opts.model ?? DEFAULT_MODEL
9341
+ });
9342
+ return text.trim();
9343
+ }
9344
+ async function normalizeImage(path2, maxBytes) {
9345
+ const sharpMod = await import("sharp");
9346
+ const sharp = sharpMod.default;
9347
+ let quality = 80;
9348
+ let buf = await renderJpeg(sharp, path2, quality);
9349
+ while (buf.length > maxBytes && quality > 35) {
9350
+ quality -= 15;
9351
+ buf = await renderJpeg(sharp, path2, quality);
9352
+ }
9353
+ return buf;
9354
+ }
9355
+ function renderJpeg(sharp, path2, quality) {
9356
+ return sharp(path2).rotate().resize({ width: MAX_DIM, height: MAX_DIM, fit: "inside", withoutEnlargement: true }).jpeg({ quality }).toBuffer();
9357
+ }
9358
+ function defaultSender(auth) {
9359
+ return async (input) => {
9360
+ const importMetaUrl = import.meta.url;
9361
+ const req = importMetaUrl ? createRequire4(importMetaUrl) : __require;
9362
+ const sdk = req("@anthropic-ai/sdk");
9363
+ const Anthropic = sdk.Anthropic ?? sdk.default;
9364
+ if (!Anthropic) throw new Error("Could not resolve Anthropic from '@anthropic-ai/sdk'");
9365
+ const config = {};
9366
+ if (auth.authToken) config.authToken = auth.authToken;
9367
+ else if (auth.apiKey) config.apiKey = auth.apiKey;
9368
+ if (auth.betaHeader) config.defaultHeaders = { "anthropic-beta": auth.betaHeader };
9369
+ const client = new Anthropic(config);
9370
+ const res = await client.messages.create({
9371
+ model: input.model,
9372
+ max_tokens: 1024,
9373
+ messages: [
9374
+ {
9375
+ role: "user",
9376
+ content: [
9377
+ {
9378
+ type: "image",
9379
+ source: { type: "base64", media_type: input.media_type, data: input.data }
9380
+ },
9381
+ { type: "text", text: input.prompt }
9382
+ ]
9383
+ }
9384
+ ]
9385
+ });
9386
+ return res.content.filter((b) => b.type === "text").map((b) => b.text ?? "").join("");
9387
+ };
9388
+ }
9389
+ function defaultPdfSender(auth) {
9390
+ return async (input) => {
9391
+ const importMetaUrl = import.meta.url;
9392
+ const req = importMetaUrl ? createRequire4(importMetaUrl) : __require;
9393
+ const sdk = req("@anthropic-ai/sdk");
9394
+ const Anthropic = sdk.Anthropic ?? sdk.default;
9395
+ if (!Anthropic) throw new Error("Could not resolve Anthropic from '@anthropic-ai/sdk'");
9396
+ const config = {};
9397
+ if (auth.authToken) config.authToken = auth.authToken;
9398
+ else if (auth.apiKey) config.apiKey = auth.apiKey;
9399
+ if (auth.betaHeader) config.defaultHeaders = { "anthropic-beta": auth.betaHeader };
9400
+ const client = new Anthropic(config);
9401
+ const res = await client.messages.create({
9402
+ model: input.model,
9403
+ max_tokens: 4096,
9404
+ messages: [
9405
+ {
9406
+ role: "user",
9407
+ content: [
9408
+ {
9409
+ type: "document",
9410
+ source: { type: "base64", media_type: "application/pdf", data: input.data }
9411
+ },
9412
+ { type: "text", text: input.prompt }
9413
+ ]
9414
+ }
9415
+ ]
9416
+ });
9417
+ return res.content.filter((b) => b.type === "text").map((b) => b.text ?? "").join("");
9418
+ };
9419
+ }
8799
9420
  export {
8800
9421
  CONFIG_SUBDIR,
8801
9422
  DEFAULT_ENTRY_TYPES,
@@ -8825,8 +9446,10 @@ export {
8825
9446
  attachBlob,
8826
9447
  autoFtsColumns,
8827
9448
  autoUpdate,
9449
+ classifyLinks,
8828
9450
  configDir,
8829
9451
  contentHash,
9452
+ crawlUrl,
8830
9453
  createReadOnlyHeader,
8831
9454
  createSQLiteStateStore,
8832
9455
  decrypt,
@@ -8835,11 +9458,15 @@ export {
8835
9458
  deleteToken,
8836
9459
  deriveCanonicalContexts,
8837
9460
  deriveKey,
9461
+ describeImage,
9462
+ describePdf,
8838
9463
  encrypt,
9464
+ enrichKnowledge,
8839
9465
  ensureFtsIndex,
8840
9466
  ensureLatticeRoot,
8841
9467
  entityFileNames,
8842
9468
  estimateTokens,
9469
+ extractObjects,
8843
9470
  findLatticeRoot,
8844
9471
  fixSchemaConflicts,
8845
9472
  frontmatter,
@@ -8868,9 +9495,12 @@ export {
8868
9495
  migrateLatticeData,
8869
9496
  normalizeEntityFiles,
8870
9497
  openTargetLatticeForMigration,
9498
+ organizeSource,
8871
9499
  parseConfigFile,
8872
9500
  parseConfigString,
8873
9501
  parseMarkdownEntries,
9502
+ parseMatches,
9503
+ parseObjects,
8874
9504
  parseSessionMD,
8875
9505
  parseSessionWrites,
8876
9506
  probeCloud,
@@ -8893,6 +9523,7 @@ export {
8893
9523
  saveDbCredentialForTeam,
8894
9524
  setActiveWorkspace,
8895
9525
  slugify,
9526
+ summarizeText,
8896
9527
  toSafeDirName,
8897
9528
  truncate,
8898
9529
  validateEntryId,