latticesql 1.16.4 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +32 -0
- package/dist/cli.js +4860 -715
- package/dist/index.cjs +646 -3
- package/dist/index.d.cts +306 -1
- package/dist/index.d.ts +306 -1
- package/dist/index.js +634 -3
- package/package.json +9 -2
package/dist/index.js
CHANGED
|
@@ -298,6 +298,9 @@ var PostgresAdapter = class {
|
|
|
298
298
|
connectionString: this._connectionString,
|
|
299
299
|
max: this._poolSize
|
|
300
300
|
});
|
|
301
|
+
this._pool.on("error", (err) => {
|
|
302
|
+
console.error("[latticesql] recovered from idle Postgres client error:", err.message);
|
|
303
|
+
});
|
|
301
304
|
this._polyfillsReady = this._registerPolyfills();
|
|
302
305
|
this._opened = true;
|
|
303
306
|
}
|
|
@@ -832,6 +835,15 @@ var SchemaManager = class {
|
|
|
832
835
|
}
|
|
833
836
|
this._entityContexts.set(table, def);
|
|
834
837
|
}
|
|
838
|
+
/**
|
|
839
|
+
* Register OR replace an entity context — unlike {@link defineEntityContext}
|
|
840
|
+
* (which throws on a redefine), this overwrites. Used to re-derive a
|
|
841
|
+
* runtime-created table's canonical context after a schema change (e.g. a new
|
|
842
|
+
* junction adds a hasMany rollup to the tables it links) without a reopen.
|
|
843
|
+
*/
|
|
844
|
+
redefineEntityContext(table, def) {
|
|
845
|
+
this._entityContexts.set(table, def);
|
|
846
|
+
}
|
|
835
847
|
getTables() {
|
|
836
848
|
return this._tables;
|
|
837
849
|
}
|
|
@@ -1907,20 +1919,25 @@ function cleanupEntityContexts(outputDir, entityContexts, currentSlugsByTable, m
|
|
|
1907
1919
|
}
|
|
1908
1920
|
|
|
1909
1921
|
// src/render/engine.ts
|
|
1922
|
+
var NOOP_RENDER = () => "";
|
|
1910
1923
|
var RenderEngine = class {
|
|
1911
1924
|
_schema;
|
|
1912
1925
|
_adapter;
|
|
1913
1926
|
_getTaskContext;
|
|
1914
|
-
|
|
1927
|
+
/** When true, skip the read + write for spec-less (no-op render) tables. */
|
|
1928
|
+
_skipEmpty;
|
|
1929
|
+
constructor(schema, adapter, getTaskContext, options) {
|
|
1915
1930
|
this._schema = schema;
|
|
1916
1931
|
this._adapter = adapter;
|
|
1917
1932
|
this._getTaskContext = getTaskContext ?? (() => "");
|
|
1933
|
+
this._skipEmpty = options?.skipEmpty ?? false;
|
|
1918
1934
|
}
|
|
1919
1935
|
async render(outputDir) {
|
|
1920
1936
|
const start = Date.now();
|
|
1921
1937
|
const filesWritten = [];
|
|
1922
1938
|
const counters = { skipped: 0 };
|
|
1923
1939
|
for (const [name, def] of this._schema.getTables()) {
|
|
1940
|
+
if (this._skipEmpty && def.render === NOOP_RENDER) continue;
|
|
1924
1941
|
let rows = await this._schema.queryTable(this._adapter, name);
|
|
1925
1942
|
if (def.relevanceFilter) {
|
|
1926
1943
|
const ctx = this._getTaskContext();
|
|
@@ -4262,7 +4279,9 @@ var Lattice = class _Lattice {
|
|
|
4262
4279
|
this._adapter = options.adapter ?? buildAdapter(dbPath, options);
|
|
4263
4280
|
this._schema = new SchemaManager();
|
|
4264
4281
|
this._sanitizer = new Sanitizer(options.security);
|
|
4265
|
-
this._render = new RenderEngine(this._schema, this._adapter, () => this._taskContext
|
|
4282
|
+
this._render = new RenderEngine(this._schema, this._adapter, () => this._taskContext, {
|
|
4283
|
+
skipEmpty: options.renderSkipsEmpty ?? false
|
|
4284
|
+
});
|
|
4266
4285
|
this._reverseSync = new ReverseSyncEngine(this._schema, this._adapter);
|
|
4267
4286
|
this._reverseSeedEngine = new ReverseSeedEngine(this._schema, this._adapter);
|
|
4268
4287
|
this._loop = new SyncLoop(this._render);
|
|
@@ -4383,7 +4402,7 @@ var Lattice = class _Lattice {
|
|
|
4383
4402
|
table,
|
|
4384
4403
|
this._schema,
|
|
4385
4404
|
this._adapter
|
|
4386
|
-
) :
|
|
4405
|
+
) : NOOP_RENDER,
|
|
4387
4406
|
outputFile: def.outputFile ?? `.schema-only/${table}.md`,
|
|
4388
4407
|
...renderTemplateName ? { _renderTemplateName: renderTemplateName } : {}
|
|
4389
4408
|
};
|
|
@@ -4399,6 +4418,15 @@ var Lattice = class _Lattice {
|
|
|
4399
4418
|
this._schema.defineEntityContext(table, def);
|
|
4400
4419
|
return this;
|
|
4401
4420
|
}
|
|
4421
|
+
/**
|
|
4422
|
+
* Register or REPLACE an entity context (overwrites instead of throwing on a
|
|
4423
|
+
* redefine — see {@link SchemaManager.redefineEntityContext}). Used to refresh
|
|
4424
|
+
* a canonical context at runtime after a related schema change.
|
|
4425
|
+
*/
|
|
4426
|
+
redefineEntityContext(table, def) {
|
|
4427
|
+
this._schema.redefineEntityContext(table, def);
|
|
4428
|
+
return this;
|
|
4429
|
+
}
|
|
4402
4430
|
/**
|
|
4403
4431
|
* All entity contexts currently registered on this Lattice — both those
|
|
4404
4432
|
* declared in `lattice.config.yml` and those added programmatically via
|
|
@@ -6424,6 +6452,39 @@ var NATIVE_ENTITY_DEFS = {
|
|
|
6424
6452
|
},
|
|
6425
6453
|
render: () => "",
|
|
6426
6454
|
outputFile: ".lattice-native/notes.md"
|
|
6455
|
+
},
|
|
6456
|
+
chat_threads: {
|
|
6457
|
+
// An assistant conversation. Native so chat history survives across
|
|
6458
|
+
// sessions and is queryable/renderable like any other Lattice entity.
|
|
6459
|
+
columns: {
|
|
6460
|
+
id: "TEXT PRIMARY KEY",
|
|
6461
|
+
title: "TEXT",
|
|
6462
|
+
created_at: "TEXT NOT NULL DEFAULT (datetime('now'))",
|
|
6463
|
+
updated_at: "TEXT NOT NULL DEFAULT (datetime('now'))",
|
|
6464
|
+
deleted_at: "TEXT"
|
|
6465
|
+
},
|
|
6466
|
+
render: () => "",
|
|
6467
|
+
outputFile: ".lattice-native/chat-threads.md"
|
|
6468
|
+
},
|
|
6469
|
+
chat_messages: {
|
|
6470
|
+
// One turn (or feed entry) within a chat_thread.
|
|
6471
|
+
columns: {
|
|
6472
|
+
id: "TEXT PRIMARY KEY",
|
|
6473
|
+
// Soft reference to chat_threads.id. Kept as a plain column (no FK)
|
|
6474
|
+
// to match the generic, dialect-agnostic native-entity style.
|
|
6475
|
+
thread_id: "TEXT",
|
|
6476
|
+
// user | assistant | tool | feed | system
|
|
6477
|
+
role: "TEXT NOT NULL DEFAULT 'user'",
|
|
6478
|
+
// JSON payload: text, tool_use / tool_result blocks, attachments, or
|
|
6479
|
+
// (for role='feed') the feed-event details.
|
|
6480
|
+
content_json: "TEXT",
|
|
6481
|
+
// ai | gui | cli | ingest — meaningful for role='feed'.
|
|
6482
|
+
source: "TEXT",
|
|
6483
|
+
created_at: "TEXT NOT NULL DEFAULT (datetime('now'))",
|
|
6484
|
+
deleted_at: "TEXT"
|
|
6485
|
+
},
|
|
6486
|
+
render: () => "",
|
|
6487
|
+
outputFile: ".lattice-native/chat-messages.md"
|
|
6427
6488
|
}
|
|
6428
6489
|
};
|
|
6429
6490
|
var NATIVE_ENTITY_NAMES = new Set(Object.keys(NATIVE_ENTITY_DEFS));
|
|
@@ -8796,6 +8857,566 @@ function archiveLocalSqlite(dbPath) {
|
|
|
8796
8857
|
}
|
|
8797
8858
|
return backupPath;
|
|
8798
8859
|
}
|
|
8860
|
+
|
|
8861
|
+
// src/ai/llm-client.ts
|
|
8862
|
+
import { createRequire as createRequire2 } from "module";
|
|
8863
|
+
var DEFAULT_MODEL = "claude-haiku-4-5";
|
|
8864
|
+
|
|
8865
|
+
// src/ai/summarize.ts
|
|
8866
|
+
var SUMMARY_SYSTEM = 'You write a one or two sentence factual description of a document for a knowledge base, focused on what it is and what it contains. No preamble, no "This document". Plain text only.';
|
|
8867
|
+
async function summarizeText(client, text, name) {
|
|
8868
|
+
const turn = await client.runTurn({
|
|
8869
|
+
model: DEFAULT_MODEL,
|
|
8870
|
+
system: SUMMARY_SYSTEM,
|
|
8871
|
+
messages: [
|
|
8872
|
+
{
|
|
8873
|
+
role: "user",
|
|
8874
|
+
content: `File name: ${name}
|
|
8875
|
+
|
|
8876
|
+
Content:
|
|
8877
|
+
${text.slice(0, 12e3)}
|
|
8878
|
+
|
|
8879
|
+
Describe it in 1-2 sentences.`
|
|
8880
|
+
}
|
|
8881
|
+
],
|
|
8882
|
+
tools: [],
|
|
8883
|
+
onText: () => void 0
|
|
8884
|
+
});
|
|
8885
|
+
return turn.text.trim();
|
|
8886
|
+
}
|
|
8887
|
+
var CLASSIFY_SYSTEM = 'You decide which existing records a newly added document relates to. You are given a catalog of record types (with descriptions) and their records. Return ONLY a JSON array of {"table","id"} objects for records the document clearly relates to \u2014 an empty array if none. Output the JSON in a ```json fenced block and nothing else.';
|
|
8888
|
+
function buildCatalogBlock(catalog) {
|
|
8889
|
+
return catalog.map((e) => {
|
|
8890
|
+
const head = `## ${e.table}${e.description ? ` \u2014 ${e.description}` : ""}`;
|
|
8891
|
+
const rows = e.records.map((r) => `- id=${r.id} | ${r.label}`).join("\n");
|
|
8892
|
+
return `${head}
|
|
8893
|
+
${rows || "- (no records)"}`;
|
|
8894
|
+
}).join("\n\n");
|
|
8895
|
+
}
|
|
8896
|
+
function parseMatches(raw, catalog) {
|
|
8897
|
+
const fence = /```json\s*([\s\S]*?)```/i.exec(raw);
|
|
8898
|
+
const body = fence ? fence[1] : raw;
|
|
8899
|
+
let parsed;
|
|
8900
|
+
try {
|
|
8901
|
+
parsed = JSON.parse((body ?? "").trim());
|
|
8902
|
+
} catch {
|
|
8903
|
+
return [];
|
|
8904
|
+
}
|
|
8905
|
+
if (!Array.isArray(parsed)) return [];
|
|
8906
|
+
const valid = new Map(catalog.map((e) => [e.table, new Set(e.records.map((r) => r.id))]));
|
|
8907
|
+
const out = [];
|
|
8908
|
+
for (const item of parsed) {
|
|
8909
|
+
if (!item || typeof item !== "object") continue;
|
|
8910
|
+
const table = item.table;
|
|
8911
|
+
const id = item.id;
|
|
8912
|
+
if (typeof table === "string" && typeof id === "string" && valid.get(table)?.has(id)) {
|
|
8913
|
+
out.push({ table, id });
|
|
8914
|
+
}
|
|
8915
|
+
}
|
|
8916
|
+
return out;
|
|
8917
|
+
}
|
|
8918
|
+
var ID_RE = /^[a-z][a-z0-9_]*$/;
|
|
8919
|
+
var RESERVED_COLS = /* @__PURE__ */ new Set(["id", "deleted_at", "created_at", "updated_at"]);
|
|
8920
|
+
var EXTRACT_SYSTEM = 'You build a knowledge base by extracting the key structured objects a document is ABOUT \u2014 e.g. an invoice, a person, a project, a contract, a meeting. You are given the existing entity types (tables) and their columns. For each salient object: reuse an existing entity when one clearly fits; otherwise propose a NEW entity with a short snake_case PLURAL name and 2-6 simple snake_case columns. Extract only objects the document is genuinely about \u2014 prefer 1-3, never more than 3, and never invent data not in the document. Return ONLY a JSON array of objects {"entity","isNew","columns","values","label"}, where "values" is an OBJECT mapping each column name to its value \u2014 e.g. {"invoice_number":"INV-114","total":"6400"} \u2014 in a ```json fenced block.';
|
|
8921
|
+
function buildSchemaBlock(existing) {
|
|
8922
|
+
if (existing.length === 0) return "(no entities yet \u2014 propose new ones)";
|
|
8923
|
+
return existing.map((e) => `## ${e.table}
|
|
8924
|
+
columns: ${e.columns.join(", ")}`).join("\n\n");
|
|
8925
|
+
}
|
|
8926
|
+
function parseObjects(raw) {
|
|
8927
|
+
const fence = /```json\s*([\s\S]*?)```/i.exec(raw);
|
|
8928
|
+
let parsed;
|
|
8929
|
+
try {
|
|
8930
|
+
parsed = JSON.parse((fence ? fence[1] : raw)?.trim() ?? "");
|
|
8931
|
+
} catch {
|
|
8932
|
+
return [];
|
|
8933
|
+
}
|
|
8934
|
+
if (!Array.isArray(parsed)) return [];
|
|
8935
|
+
const out = [];
|
|
8936
|
+
for (const item of parsed) {
|
|
8937
|
+
if (!item || typeof item !== "object") continue;
|
|
8938
|
+
const o = item;
|
|
8939
|
+
const entity = typeof o.entity === "string" ? o.entity.trim().toLowerCase() : "";
|
|
8940
|
+
const label = typeof o.label === "string" ? o.label.trim() : "";
|
|
8941
|
+
if (!ID_RE.test(entity) || !label) continue;
|
|
8942
|
+
let valuesRaw = {};
|
|
8943
|
+
if (Array.isArray(o.values) && Array.isArray(o.columns)) {
|
|
8944
|
+
o.columns.forEach((c, i) => {
|
|
8945
|
+
valuesRaw[String(c)] = o.values[i];
|
|
8946
|
+
});
|
|
8947
|
+
} else if (o.values && typeof o.values === "object") {
|
|
8948
|
+
valuesRaw = o.values;
|
|
8949
|
+
}
|
|
8950
|
+
const values = {};
|
|
8951
|
+
for (const [k, v] of Object.entries(valuesRaw)) {
|
|
8952
|
+
const col = k.trim().toLowerCase();
|
|
8953
|
+
if (ID_RE.test(col) && !RESERVED_COLS.has(col) && (typeof v === "string" || typeof v === "number")) {
|
|
8954
|
+
values[col] = String(v).slice(0, 2e3);
|
|
8955
|
+
}
|
|
8956
|
+
}
|
|
8957
|
+
if (Object.keys(values).length === 0) continue;
|
|
8958
|
+
const cols = Array.isArray(o.columns) ? o.columns.map((c) => String(c).trim().toLowerCase()).filter((c) => ID_RE.test(c) && !RESERVED_COLS.has(c)) : [];
|
|
8959
|
+
const columns = Array.from(/* @__PURE__ */ new Set([...cols, ...Object.keys(values)])).slice(0, 8);
|
|
8960
|
+
out.push({ entity, isNew: o.isNew === true, columns, values, label });
|
|
8961
|
+
if (out.length >= 3) break;
|
|
8962
|
+
}
|
|
8963
|
+
return out;
|
|
8964
|
+
}
|
|
8965
|
+
async function extractObjects(client, text, name, existing, temperature) {
|
|
8966
|
+
if (text.trim().length === 0) return [];
|
|
8967
|
+
const turn = await client.runTurn({
|
|
8968
|
+
model: DEFAULT_MODEL,
|
|
8969
|
+
system: EXTRACT_SYSTEM,
|
|
8970
|
+
messages: [
|
|
8971
|
+
{
|
|
8972
|
+
role: "user",
|
|
8973
|
+
content: `# Existing entities
|
|
8974
|
+
${buildSchemaBlock(existing)}
|
|
8975
|
+
|
|
8976
|
+
# Document: ${name}
|
|
8977
|
+
|
|
8978
|
+
${text.slice(0, 12e3)}
|
|
8979
|
+
|
|
8980
|
+
# Task
|
|
8981
|
+
Return the JSON array of objects to create.`
|
|
8982
|
+
}
|
|
8983
|
+
],
|
|
8984
|
+
tools: [],
|
|
8985
|
+
...temperature !== void 0 ? { temperature } : {},
|
|
8986
|
+
onText: () => void 0
|
|
8987
|
+
});
|
|
8988
|
+
return parseObjects(turn.text);
|
|
8989
|
+
}
|
|
8990
|
+
async function classifyLinks(client, text, name, catalog) {
|
|
8991
|
+
if (catalog.length === 0 || text.trim().length === 0) return [];
|
|
8992
|
+
let captured = "";
|
|
8993
|
+
const turn = await client.runTurn({
|
|
8994
|
+
model: DEFAULT_MODEL,
|
|
8995
|
+
system: CLASSIFY_SYSTEM,
|
|
8996
|
+
messages: [
|
|
8997
|
+
{
|
|
8998
|
+
role: "user",
|
|
8999
|
+
content: `# Catalog
|
|
9000
|
+
${buildCatalogBlock(catalog)}
|
|
9001
|
+
|
|
9002
|
+
# Document: ${name}
|
|
9003
|
+
|
|
9004
|
+
${text.slice(0, 12e3)}
|
|
9005
|
+
|
|
9006
|
+
# Task
|
|
9007
|
+
Return the JSON array of matching {table,id}.`
|
|
9008
|
+
}
|
|
9009
|
+
],
|
|
9010
|
+
tools: [],
|
|
9011
|
+
onText: (d) => {
|
|
9012
|
+
captured += d;
|
|
9013
|
+
}
|
|
9014
|
+
});
|
|
9015
|
+
return parseMatches(turn.text || captured, catalog);
|
|
9016
|
+
}
|
|
9017
|
+
|
|
9018
|
+
// src/ai/organize.ts
|
|
9019
|
+
async function organizeSource(db, opts) {
|
|
9020
|
+
const { fileId, text, name, catalog, client } = opts;
|
|
9021
|
+
const linkTable = opts.linkTable ?? "file_links";
|
|
9022
|
+
const fallbackTable = opts.fallbackTable ?? "notes";
|
|
9023
|
+
const createIfNecessary = opts.createIfNecessary ?? true;
|
|
9024
|
+
if (!client) {
|
|
9025
|
+
return { skipped: true, description: "", linked: [], created: [], message: "" };
|
|
9026
|
+
}
|
|
9027
|
+
const linkExisting = opts.linkExisting ?? (async (m) => {
|
|
9028
|
+
await db.insert(linkTable, {
|
|
9029
|
+
file_id: fileId,
|
|
9030
|
+
table_name: m.table,
|
|
9031
|
+
row_id: m.id,
|
|
9032
|
+
relevance: "related"
|
|
9033
|
+
});
|
|
9034
|
+
return true;
|
|
9035
|
+
});
|
|
9036
|
+
const createFallback = opts.createFallback ?? (async (title, body) => {
|
|
9037
|
+
const id = await db.insert(fallbackTable, { title, body });
|
|
9038
|
+
await db.insert(linkTable, {
|
|
9039
|
+
file_id: fileId,
|
|
9040
|
+
table_name: fallbackTable,
|
|
9041
|
+
row_id: id,
|
|
9042
|
+
relevance: "primary"
|
|
9043
|
+
});
|
|
9044
|
+
return { table: fallbackTable, id };
|
|
9045
|
+
});
|
|
9046
|
+
const description = (await summarizeText(client, text, name)).trim();
|
|
9047
|
+
const matches = await classifyLinks(client, text, name, catalog);
|
|
9048
|
+
const linked = [];
|
|
9049
|
+
for (const m of matches) {
|
|
9050
|
+
if (await linkExisting(m)) linked.push({ table: m.table, id: m.id });
|
|
9051
|
+
}
|
|
9052
|
+
const created = [];
|
|
9053
|
+
if (linked.length === 0 && createIfNecessary && text.trim().length > 0) {
|
|
9054
|
+
const title = name.replace(/\.[^./\\]+$/, "").trim() || "Note";
|
|
9055
|
+
const body = description.length > 0 ? description : text.slice(0, 2e3);
|
|
9056
|
+
const result = await createFallback(title, body);
|
|
9057
|
+
if (result) created.push({ table: result.table, id: result.id, title });
|
|
9058
|
+
}
|
|
9059
|
+
return {
|
|
9060
|
+
skipped: false,
|
|
9061
|
+
description,
|
|
9062
|
+
linked,
|
|
9063
|
+
created,
|
|
9064
|
+
message: buildMessage(linked, created)
|
|
9065
|
+
};
|
|
9066
|
+
}
|
|
9067
|
+
function buildMessage(linked, created) {
|
|
9068
|
+
const parts = [];
|
|
9069
|
+
if (linked.length > 0) {
|
|
9070
|
+
const byTable = /* @__PURE__ */ new Map();
|
|
9071
|
+
for (const l of linked) byTable.set(l.table, (byTable.get(l.table) ?? 0) + 1);
|
|
9072
|
+
const where = [...byTable.entries()].map(([t, n]) => `${String(n)} in ${t}`).join(", ");
|
|
9073
|
+
parts.push(
|
|
9074
|
+
`Linked it to ${String(linked.length)} existing record${linked.length === 1 ? "" : "s"} (${where}).`
|
|
9075
|
+
);
|
|
9076
|
+
}
|
|
9077
|
+
for (const c of created) {
|
|
9078
|
+
parts.push(
|
|
9079
|
+
`Created a new ${singular(c.table)} "${c.title}" because it didn't fit any existing record.`
|
|
9080
|
+
);
|
|
9081
|
+
}
|
|
9082
|
+
if (parts.length === 0) parts.push("Saved it; nothing else needed organizing.");
|
|
9083
|
+
parts.push("You can change any of this anytime.");
|
|
9084
|
+
return parts.join(" ");
|
|
9085
|
+
}
|
|
9086
|
+
function singular(table) {
|
|
9087
|
+
if (/ies$/i.test(table)) return table.replace(/ies$/i, "y");
|
|
9088
|
+
if (/s$/i.test(table) && !/ss$/i.test(table)) return table.replace(/s$/i, "");
|
|
9089
|
+
return table;
|
|
9090
|
+
}
|
|
9091
|
+
|
|
9092
|
+
// src/ai/crawl.ts
|
|
9093
|
+
import { JSDOM } from "jsdom";
|
|
9094
|
+
import { Readability } from "@mozilla/readability";
|
|
9095
|
+
import { basename as basename5 } from "path";
|
|
9096
|
+
import { createRequire as createRequire3 } from "module";
|
|
9097
|
+
var DEFAULT_MAX_BYTES2 = 25 * 1024 * 1024;
|
|
9098
|
+
var DEFAULT_TIMEOUT_MS = 3e4;
|
|
9099
|
+
var DEFAULT_UA = "LatticeSQL/2.0 (+https://latticesql.com)";
|
|
9100
|
+
async function crawlUrl(rawUrl, opts = {}) {
|
|
9101
|
+
const u = await assertSafeUrl(rawUrl, opts.allowPrivate ?? false);
|
|
9102
|
+
const fetchImpl = opts.fetcher ?? fetch;
|
|
9103
|
+
const controller = new AbortController();
|
|
9104
|
+
const timer = setTimeout(() => {
|
|
9105
|
+
controller.abort();
|
|
9106
|
+
}, opts.timeoutMs ?? DEFAULT_TIMEOUT_MS);
|
|
9107
|
+
let res;
|
|
9108
|
+
try {
|
|
9109
|
+
res = await safeFetch(u.toString(), fetchImpl, {
|
|
9110
|
+
allowPrivate: opts.allowPrivate ?? false,
|
|
9111
|
+
init: {
|
|
9112
|
+
signal: controller.signal,
|
|
9113
|
+
headers: {
|
|
9114
|
+
"user-agent": opts.userAgent ?? DEFAULT_UA,
|
|
9115
|
+
accept: "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"
|
|
9116
|
+
}
|
|
9117
|
+
}
|
|
9118
|
+
});
|
|
9119
|
+
} finally {
|
|
9120
|
+
clearTimeout(timer);
|
|
9121
|
+
}
|
|
9122
|
+
if (!res.ok) {
|
|
9123
|
+
throw new Error(`Lattice: crawl failed for ${rawUrl}: HTTP ${String(res.status)}`);
|
|
9124
|
+
}
|
|
9125
|
+
let mime = (res.headers.get("content-type") ?? "").split(";")[0]?.trim().toLowerCase() ?? "";
|
|
9126
|
+
const maxBytes = opts.maxBytes ?? DEFAULT_MAX_BYTES2;
|
|
9127
|
+
const raw = Buffer.from(await res.arrayBuffer());
|
|
9128
|
+
const body = raw.length > maxBytes ? raw.subarray(0, maxBytes) : raw;
|
|
9129
|
+
const finalUrl = res.url || u.toString();
|
|
9130
|
+
if (mime === "" || mime === "application/octet-stream") {
|
|
9131
|
+
mime = await sniffMime(body) || mime;
|
|
9132
|
+
}
|
|
9133
|
+
const isHtml = mime.includes("html") || mime.includes("xml");
|
|
9134
|
+
if (mime && !isHtml && !mime.startsWith("text/")) {
|
|
9135
|
+
return {
|
|
9136
|
+
url: finalUrl,
|
|
9137
|
+
title: titleFromUrl(finalUrl),
|
|
9138
|
+
text: body.toString("utf-8"),
|
|
9139
|
+
excerpt: "",
|
|
9140
|
+
mime,
|
|
9141
|
+
byteLength: raw.length
|
|
9142
|
+
};
|
|
9143
|
+
}
|
|
9144
|
+
const dom = new JSDOM(body.toString("utf-8"), { url: finalUrl });
|
|
9145
|
+
const doc = dom.window.document;
|
|
9146
|
+
let title = (doc.title || "").trim();
|
|
9147
|
+
let text = "";
|
|
9148
|
+
let excerpt = "";
|
|
9149
|
+
try {
|
|
9150
|
+
const article = new Readability(doc).parse();
|
|
9151
|
+
if (article) {
|
|
9152
|
+
text = article.textContent.trim();
|
|
9153
|
+
const articleTitle = article.title.trim();
|
|
9154
|
+
if (articleTitle.length > 0) title = articleTitle;
|
|
9155
|
+
excerpt = article.excerpt.trim();
|
|
9156
|
+
}
|
|
9157
|
+
} catch {
|
|
9158
|
+
}
|
|
9159
|
+
if (text.length === 0) text = strippedBodyText(dom);
|
|
9160
|
+
if (!opts.noJs && text.length < 200) {
|
|
9161
|
+
const rendered = await renderViaPlaywright(finalUrl, opts.timeoutMs ?? DEFAULT_TIMEOUT_MS);
|
|
9162
|
+
if (rendered) {
|
|
9163
|
+
const rdom = new JSDOM(rendered, { url: finalUrl });
|
|
9164
|
+
const rdoc = rdom.window.document;
|
|
9165
|
+
try {
|
|
9166
|
+
const a = new Readability(rdoc).parse();
|
|
9167
|
+
if (a && a.textContent.trim().length > text.length) {
|
|
9168
|
+
text = a.textContent.trim();
|
|
9169
|
+
if (a.title.trim().length > 0) title = a.title.trim();
|
|
9170
|
+
if (a.excerpt.trim().length > 0) excerpt = a.excerpt.trim();
|
|
9171
|
+
}
|
|
9172
|
+
} catch {
|
|
9173
|
+
}
|
|
9174
|
+
if (text.length === 0) text = strippedBodyText(rdom);
|
|
9175
|
+
}
|
|
9176
|
+
}
|
|
9177
|
+
if (title.length === 0) title = titleFromUrl(finalUrl);
|
|
9178
|
+
return { url: finalUrl, title, text, excerpt, mime: mime || "text/html", byteLength: raw.length };
|
|
9179
|
+
}
|
|
9180
|
+
function strippedBodyText(dom) {
|
|
9181
|
+
const doc = dom.window.document;
|
|
9182
|
+
for (const el of Array.from(doc.querySelectorAll("script, style, noscript, template"))) {
|
|
9183
|
+
el.remove();
|
|
9184
|
+
}
|
|
9185
|
+
return (doc.body.textContent ?? "").replace(/\s+\n/g, "\n").replace(/[ \t]{2,}/g, " ").trim();
|
|
9186
|
+
}
|
|
9187
|
+
function titleFromUrl(rawUrl) {
|
|
9188
|
+
try {
|
|
9189
|
+
const u = new URL(rawUrl);
|
|
9190
|
+
const last = basename5(u.pathname);
|
|
9191
|
+
return last && last !== "/" ? last : u.hostname;
|
|
9192
|
+
} catch {
|
|
9193
|
+
return rawUrl;
|
|
9194
|
+
}
|
|
9195
|
+
}
|
|
9196
|
+
async function sniffMime(body) {
|
|
9197
|
+
try {
|
|
9198
|
+
const ft = await import("file-type");
|
|
9199
|
+
const result = await ft.fileTypeFromBuffer(body);
|
|
9200
|
+
return result?.mime ?? "";
|
|
9201
|
+
} catch {
|
|
9202
|
+
return "";
|
|
9203
|
+
}
|
|
9204
|
+
}
|
|
9205
|
+
async function renderViaPlaywright(url, timeoutMs) {
|
|
9206
|
+
let chromium;
|
|
9207
|
+
try {
|
|
9208
|
+
const importMetaUrl = import.meta.url;
|
|
9209
|
+
const req = importMetaUrl ? createRequire3(importMetaUrl) : __require;
|
|
9210
|
+
const pw = req("playwright");
|
|
9211
|
+
chromium = pw.chromium;
|
|
9212
|
+
} catch {
|
|
9213
|
+
return null;
|
|
9214
|
+
}
|
|
9215
|
+
let browser = null;
|
|
9216
|
+
try {
|
|
9217
|
+
browser = await chromium.launch({ headless: true });
|
|
9218
|
+
const page = await browser.newPage();
|
|
9219
|
+
await page.goto(url, { waitUntil: "networkidle", timeout: timeoutMs });
|
|
9220
|
+
return await page.content();
|
|
9221
|
+
} catch {
|
|
9222
|
+
return null;
|
|
9223
|
+
} finally {
|
|
9224
|
+
if (browser) await browser.close().catch(() => void 0);
|
|
9225
|
+
}
|
|
9226
|
+
}
|
|
9227
|
+
|
|
9228
|
+
// src/ai/enrich.ts
|
|
9229
|
+
var ENRICH_SYSTEM = 'You are writing the body of a knowledge-base entry by synthesizing several source documents into one coherent, factual summary. Integrate concrete facts (dates, names, amounts) and note relationships across sources. Do not invent anything; if the sources are thin, keep it short. Output ONLY the body markdown \u2014 no title, no headings like "Sources", no preamble, no fences.';
|
|
9230
|
+
async function enrichKnowledge(db, opts) {
|
|
9231
|
+
const { client } = opts;
|
|
9232
|
+
if (!client) return { skipped: true, enriched: [], examined: 0 };
|
|
9233
|
+
const knowledgeTable = opts.knowledgeTable ?? "notes";
|
|
9234
|
+
const bodyColumn = opts.bodyColumn ?? "body";
|
|
9235
|
+
const linkTable = opts.linkTable ?? "file_links";
|
|
9236
|
+
const sourceTable = opts.sourceTable ?? "files";
|
|
9237
|
+
const sourceTextColumn = opts.sourceTextColumn ?? "extracted_text";
|
|
9238
|
+
const minSources = opts.minSources ?? 2;
|
|
9239
|
+
const thinBodyChars = opts.thinBodyChars ?? 500;
|
|
9240
|
+
const maxObjects = opts.maxObjects ?? 40;
|
|
9241
|
+
const links = await db.query(linkTable);
|
|
9242
|
+
const objects = await db.query(knowledgeTable);
|
|
9243
|
+
const sourceIdsByObject = /* @__PURE__ */ new Map();
|
|
9244
|
+
for (const l of links) {
|
|
9245
|
+
if (String(l.table_name) !== knowledgeTable) continue;
|
|
9246
|
+
const rowId = String(l.row_id);
|
|
9247
|
+
const arr = sourceIdsByObject.get(rowId);
|
|
9248
|
+
if (arr) arr.push(String(l.file_id));
|
|
9249
|
+
else sourceIdsByObject.set(rowId, [String(l.file_id)]);
|
|
9250
|
+
}
|
|
9251
|
+
const enriched = [];
|
|
9252
|
+
let examined = 0;
|
|
9253
|
+
for (const obj of objects) {
|
|
9254
|
+
if (enriched.length >= maxObjects) break;
|
|
9255
|
+
const idVal = obj.id;
|
|
9256
|
+
const id = typeof idVal === "string" ? idVal : "";
|
|
9257
|
+
if (id.length === 0) continue;
|
|
9258
|
+
const sourceIds = sourceIdsByObject.get(id) ?? [];
|
|
9259
|
+
if (sourceIds.length < minSources) continue;
|
|
9260
|
+
const rawBody = obj[bodyColumn];
|
|
9261
|
+
const currentBody = typeof rawBody === "string" ? rawBody : "";
|
|
9262
|
+
if (currentBody.length >= thinBodyChars) continue;
|
|
9263
|
+
examined++;
|
|
9264
|
+
const snippets = [];
|
|
9265
|
+
for (const sid of sourceIds) {
|
|
9266
|
+
const src = await db.get(sourceTable, sid);
|
|
9267
|
+
const rawText = src ? src[sourceTextColumn] : "";
|
|
9268
|
+
if (typeof rawText === "string" && rawText.trim().length > 0) {
|
|
9269
|
+
snippets.push(rawText.slice(0, 4e3));
|
|
9270
|
+
}
|
|
9271
|
+
}
|
|
9272
|
+
if (snippets.length < minSources) continue;
|
|
9273
|
+
const titleVal = obj.title ?? obj.name;
|
|
9274
|
+
const title = typeof titleVal === "string" && titleVal.length > 0 ? titleVal : id;
|
|
9275
|
+
const userBlock = `# Entry: ${title}
|
|
9276
|
+
|
|
9277
|
+
Current body:
|
|
9278
|
+
${currentBody || "(empty)"}
|
|
9279
|
+
|
|
9280
|
+
` + snippets.map((s, i) => `## Source ${String(i + 1)}
|
|
9281
|
+
${s}`).join("\n\n") + `
|
|
9282
|
+
|
|
9283
|
+
# Task
|
|
9284
|
+
Write the improved body.`;
|
|
9285
|
+
let newBody = "";
|
|
9286
|
+
try {
|
|
9287
|
+
const turn = await client.runTurn({
|
|
9288
|
+
model: DEFAULT_MODEL,
|
|
9289
|
+
system: ENRICH_SYSTEM,
|
|
9290
|
+
messages: [{ role: "user", content: userBlock }],
|
|
9291
|
+
tools: [],
|
|
9292
|
+
onText: () => void 0
|
|
9293
|
+
});
|
|
9294
|
+
newBody = turn.text.trim();
|
|
9295
|
+
} catch {
|
|
9296
|
+
continue;
|
|
9297
|
+
}
|
|
9298
|
+
if (isBetter(newBody, currentBody)) {
|
|
9299
|
+
await db.update(knowledgeTable, id, { [bodyColumn]: newBody });
|
|
9300
|
+
enriched.push(id);
|
|
9301
|
+
}
|
|
9302
|
+
}
|
|
9303
|
+
return { skipped: false, enriched, examined };
|
|
9304
|
+
}
|
|
9305
|
+
function isBetter(next, prev) {
|
|
9306
|
+
if (next.length === 0) return false;
|
|
9307
|
+
if (prev.trim().length < 40 && next.length > 120) return true;
|
|
9308
|
+
return next.length > prev.length + 80;
|
|
9309
|
+
}
|
|
9310
|
+
|
|
9311
|
+
// src/ai/vision.ts
|
|
9312
|
+
import { createRequire as createRequire4 } from "module";
|
|
9313
|
+
import { readFile as readFile2 } from "fs/promises";
|
|
9314
|
+
var DEFAULT_PROMPT = "Describe this image for a knowledge base in 2-4 factual sentences: what it shows, any visible text, and notable details. No preamble.";
|
|
9315
|
+
var MAX_DIM = 1568;
|
|
9316
|
+
async function describeImage(auth, path2, opts = {}) {
|
|
9317
|
+
const data = (await normalizeImage(path2, opts.maxBytes ?? 14e5)).toString("base64");
|
|
9318
|
+
const sender = opts.sender ?? defaultSender(auth);
|
|
9319
|
+
const text = await sender({
|
|
9320
|
+
media_type: "image/jpeg",
|
|
9321
|
+
data,
|
|
9322
|
+
prompt: opts.prompt ?? DEFAULT_PROMPT,
|
|
9323
|
+
model: opts.model ?? DEFAULT_MODEL
|
|
9324
|
+
});
|
|
9325
|
+
return text.trim();
|
|
9326
|
+
}
|
|
9327
|
+
var DEFAULT_PDF_PROMPT = "Read this document for a knowledge base. First transcribe its readable text, then add a 2-4 sentence factual summary of what it is and its key details. It may be a scanned/image-only PDF \u2014 read the text from the page images. No preamble.";
|
|
9328
|
+
async function describePdf(auth, path2, opts = {}) {
|
|
9329
|
+
const buf = await readFile2(path2);
|
|
9330
|
+
const maxBytes = opts.maxBytes ?? 3e7;
|
|
9331
|
+
if (buf.length > maxBytes) {
|
|
9332
|
+
throw new Error(
|
|
9333
|
+
`PDF too large for a direct model read (${String(buf.length)} > ${String(maxBytes)} bytes)`
|
|
9334
|
+
);
|
|
9335
|
+
}
|
|
9336
|
+
const sender = opts.sender ?? defaultPdfSender(auth);
|
|
9337
|
+
const text = await sender({
|
|
9338
|
+
data: buf.toString("base64"),
|
|
9339
|
+
prompt: opts.prompt ?? DEFAULT_PDF_PROMPT,
|
|
9340
|
+
model: opts.model ?? DEFAULT_MODEL
|
|
9341
|
+
});
|
|
9342
|
+
return text.trim();
|
|
9343
|
+
}
|
|
9344
|
+
async function normalizeImage(path2, maxBytes) {
|
|
9345
|
+
const sharpMod = await import("sharp");
|
|
9346
|
+
const sharp = sharpMod.default;
|
|
9347
|
+
let quality = 80;
|
|
9348
|
+
let buf = await renderJpeg(sharp, path2, quality);
|
|
9349
|
+
while (buf.length > maxBytes && quality > 35) {
|
|
9350
|
+
quality -= 15;
|
|
9351
|
+
buf = await renderJpeg(sharp, path2, quality);
|
|
9352
|
+
}
|
|
9353
|
+
return buf;
|
|
9354
|
+
}
|
|
9355
|
+
function renderJpeg(sharp, path2, quality) {
|
|
9356
|
+
return sharp(path2).rotate().resize({ width: MAX_DIM, height: MAX_DIM, fit: "inside", withoutEnlargement: true }).jpeg({ quality }).toBuffer();
|
|
9357
|
+
}
|
|
9358
|
+
function defaultSender(auth) {
|
|
9359
|
+
return async (input) => {
|
|
9360
|
+
const importMetaUrl = import.meta.url;
|
|
9361
|
+
const req = importMetaUrl ? createRequire4(importMetaUrl) : __require;
|
|
9362
|
+
const sdk = req("@anthropic-ai/sdk");
|
|
9363
|
+
const Anthropic = sdk.Anthropic ?? sdk.default;
|
|
9364
|
+
if (!Anthropic) throw new Error("Could not resolve Anthropic from '@anthropic-ai/sdk'");
|
|
9365
|
+
const config = {};
|
|
9366
|
+
if (auth.authToken) config.authToken = auth.authToken;
|
|
9367
|
+
else if (auth.apiKey) config.apiKey = auth.apiKey;
|
|
9368
|
+
if (auth.betaHeader) config.defaultHeaders = { "anthropic-beta": auth.betaHeader };
|
|
9369
|
+
const client = new Anthropic(config);
|
|
9370
|
+
const res = await client.messages.create({
|
|
9371
|
+
model: input.model,
|
|
9372
|
+
max_tokens: 1024,
|
|
9373
|
+
messages: [
|
|
9374
|
+
{
|
|
9375
|
+
role: "user",
|
|
9376
|
+
content: [
|
|
9377
|
+
{
|
|
9378
|
+
type: "image",
|
|
9379
|
+
source: { type: "base64", media_type: input.media_type, data: input.data }
|
|
9380
|
+
},
|
|
9381
|
+
{ type: "text", text: input.prompt }
|
|
9382
|
+
]
|
|
9383
|
+
}
|
|
9384
|
+
]
|
|
9385
|
+
});
|
|
9386
|
+
return res.content.filter((b) => b.type === "text").map((b) => b.text ?? "").join("");
|
|
9387
|
+
};
|
|
9388
|
+
}
|
|
9389
|
+
function defaultPdfSender(auth) {
|
|
9390
|
+
return async (input) => {
|
|
9391
|
+
const importMetaUrl = import.meta.url;
|
|
9392
|
+
const req = importMetaUrl ? createRequire4(importMetaUrl) : __require;
|
|
9393
|
+
const sdk = req("@anthropic-ai/sdk");
|
|
9394
|
+
const Anthropic = sdk.Anthropic ?? sdk.default;
|
|
9395
|
+
if (!Anthropic) throw new Error("Could not resolve Anthropic from '@anthropic-ai/sdk'");
|
|
9396
|
+
const config = {};
|
|
9397
|
+
if (auth.authToken) config.authToken = auth.authToken;
|
|
9398
|
+
else if (auth.apiKey) config.apiKey = auth.apiKey;
|
|
9399
|
+
if (auth.betaHeader) config.defaultHeaders = { "anthropic-beta": auth.betaHeader };
|
|
9400
|
+
const client = new Anthropic(config);
|
|
9401
|
+
const res = await client.messages.create({
|
|
9402
|
+
model: input.model,
|
|
9403
|
+
max_tokens: 4096,
|
|
9404
|
+
messages: [
|
|
9405
|
+
{
|
|
9406
|
+
role: "user",
|
|
9407
|
+
content: [
|
|
9408
|
+
{
|
|
9409
|
+
type: "document",
|
|
9410
|
+
source: { type: "base64", media_type: "application/pdf", data: input.data }
|
|
9411
|
+
},
|
|
9412
|
+
{ type: "text", text: input.prompt }
|
|
9413
|
+
]
|
|
9414
|
+
}
|
|
9415
|
+
]
|
|
9416
|
+
});
|
|
9417
|
+
return res.content.filter((b) => b.type === "text").map((b) => b.text ?? "").join("");
|
|
9418
|
+
};
|
|
9419
|
+
}
|
|
8799
9420
|
export {
|
|
8800
9421
|
CONFIG_SUBDIR,
|
|
8801
9422
|
DEFAULT_ENTRY_TYPES,
|
|
@@ -8825,8 +9446,10 @@ export {
|
|
|
8825
9446
|
attachBlob,
|
|
8826
9447
|
autoFtsColumns,
|
|
8827
9448
|
autoUpdate,
|
|
9449
|
+
classifyLinks,
|
|
8828
9450
|
configDir,
|
|
8829
9451
|
contentHash,
|
|
9452
|
+
crawlUrl,
|
|
8830
9453
|
createReadOnlyHeader,
|
|
8831
9454
|
createSQLiteStateStore,
|
|
8832
9455
|
decrypt,
|
|
@@ -8835,11 +9458,15 @@ export {
|
|
|
8835
9458
|
deleteToken,
|
|
8836
9459
|
deriveCanonicalContexts,
|
|
8837
9460
|
deriveKey,
|
|
9461
|
+
describeImage,
|
|
9462
|
+
describePdf,
|
|
8838
9463
|
encrypt,
|
|
9464
|
+
enrichKnowledge,
|
|
8839
9465
|
ensureFtsIndex,
|
|
8840
9466
|
ensureLatticeRoot,
|
|
8841
9467
|
entityFileNames,
|
|
8842
9468
|
estimateTokens,
|
|
9469
|
+
extractObjects,
|
|
8843
9470
|
findLatticeRoot,
|
|
8844
9471
|
fixSchemaConflicts,
|
|
8845
9472
|
frontmatter,
|
|
@@ -8868,9 +9495,12 @@ export {
|
|
|
8868
9495
|
migrateLatticeData,
|
|
8869
9496
|
normalizeEntityFiles,
|
|
8870
9497
|
openTargetLatticeForMigration,
|
|
9498
|
+
organizeSource,
|
|
8871
9499
|
parseConfigFile,
|
|
8872
9500
|
parseConfigString,
|
|
8873
9501
|
parseMarkdownEntries,
|
|
9502
|
+
parseMatches,
|
|
9503
|
+
parseObjects,
|
|
8874
9504
|
parseSessionMD,
|
|
8875
9505
|
parseSessionWrites,
|
|
8876
9506
|
probeCloud,
|
|
@@ -8893,6 +9523,7 @@ export {
|
|
|
8893
9523
|
saveDbCredentialForTeam,
|
|
8894
9524
|
setActiveWorkspace,
|
|
8895
9525
|
slugify,
|
|
9526
|
+
summarizeText,
|
|
8896
9527
|
toSafeDirName,
|
|
8897
9528
|
truncate,
|
|
8898
9529
|
validateEntryId,
|