latticesql 2.2.2 → 2.2.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +6 -4
- package/dist/cli.js +689 -89
- package/dist/index.d.cts +2 -2
- package/dist/index.d.ts +2 -2
- package/package.json +6 -2
package/README.md
CHANGED
|
@@ -2137,8 +2137,9 @@ Optional extras, each enabled by its own key/binary:
|
|
|
2137
2137
|
- **Voice** — set an OpenAI (Whisper) or ElevenLabs key to dictate into the composer.
|
|
2138
2138
|
- **File ingest** — reference a local file or paste text; it becomes a row in the
|
|
2139
2139
|
native `files` entity with extracted text + (with a Claude key) an
|
|
2140
|
-
LLM-written description and links to related records.
|
|
2141
|
-
|
|
2140
|
+
LLM-written description and links to related records. Documents (PDF, Word,
|
|
2141
|
+
PowerPoint, Excel, OpenDocument, EPUB, RTF) are parsed natively in-process —
|
|
2142
|
+
no external CLI.
|
|
2142
2143
|
|
|
2143
2144
|
Chat threads, files, and secrets are all stored as native Lattice entities.
|
|
2144
2145
|
|
|
@@ -2295,8 +2296,9 @@ the library API is unchanged and fully backwards-compatible.
|
|
|
2295
2296
|
entity. A subscription **Connect** link (PKCE) appears when the `ANTHROPIC_OAUTH_*`
|
|
2296
2297
|
values are set (see [`.env.example`](.env.example)).
|
|
2297
2298
|
- **Drop files / paste text / images / URLs.** Sources become native `files` rows
|
|
2298
|
-
(referenced, not copied) and are extracted —
|
|
2299
|
-
|
|
2299
|
+
(referenced, not copied) and are extracted — documents (PDF / Office /
|
|
2300
|
+
OpenDocument / EPUB / RTF) parsed **natively in-process**, **images via Claude
|
|
2301
|
+
vision**, a pasted **URL crawled** for readable text —
|
|
2300
2302
|
then summarized with **Claude Haiku** and classified against your records, and
|
|
2301
2303
|
**added, enriched, and linked** automatically, **auto-creating the junction table
|
|
2302
2304
|
when none exists** (and a new object when a source fits nothing). All audited and
|
package/dist/cli.js
CHANGED
|
@@ -6279,7 +6279,7 @@ async function checkForUpdate(pkgName, currentVersion) {
|
|
|
6279
6279
|
|
|
6280
6280
|
// src/gui/server.ts
|
|
6281
6281
|
import { createServer } from "http";
|
|
6282
|
-
import { spawn as
|
|
6282
|
+
import { spawn as spawn2 } from "child_process";
|
|
6283
6283
|
import {
|
|
6284
6284
|
existsSync as existsSync21,
|
|
6285
6285
|
mkdirSync as mkdirSync10,
|
|
@@ -7584,6 +7584,13 @@ var css = `
|
|
|
7584
7584
|
.grants-panel .grants-title { font-weight: 600; margin-bottom: 6px; }
|
|
7585
7585
|
.grants-panel .grants-row { display: flex; align-items: center; gap: 8px; padding: 3px 0; cursor: pointer; }
|
|
7586
7586
|
.grants-panel .grants-row input { accent-color: var(--accent); }
|
|
7587
|
+
/* Reconnect-required notice: a cloud opened via an unsupported direct
|
|
7588
|
+
database connection serves no data until reconnected through a server. */
|
|
7589
|
+
.cloud-reconnect {
|
|
7590
|
+
padding: 10px 16px; font-size: 13px; line-height: 1.4;
|
|
7591
|
+
background: rgba(239, 68, 68, 0.12); color: var(--text);
|
|
7592
|
+
border-bottom: 1px solid rgba(239, 68, 68, 0.5);
|
|
7593
|
+
}
|
|
7587
7594
|
|
|
7588
7595
|
/* Inline create-row at the bottom of every table */
|
|
7589
7596
|
tr.create-row td { background: var(--surface-2); }
|
|
@@ -9074,6 +9081,20 @@ var appJs = `
|
|
|
9074
9081
|
|
|
9075
9082
|
window.addEventListener('hashchange', renderRoute);
|
|
9076
9083
|
|
|
9084
|
+
// 2.2.3: a cloud reached via a raw postgres:// connection is refused (it
|
|
9085
|
+
// can't enforce per-user access). The server serves no cloud data; tell the
|
|
9086
|
+
// operator to reconnect through a user-authenticated server.
|
|
9087
|
+
function initCloudReconnectNotice() {
|
|
9088
|
+
fetchJson('/api/dbconfig').then(function (d) {
|
|
9089
|
+
if (!d || !d.cloudReconnectRequired) return;
|
|
9090
|
+
var bar = document.getElementById('cloud-reconnect');
|
|
9091
|
+
if (!bar) return;
|
|
9092
|
+
bar.textContent = 'This cloud is connected with a direct database connection, which is no longer supported. Reconnect through a server (sign in as a user) to access it securely.';
|
|
9093
|
+
bar.hidden = false;
|
|
9094
|
+
}).catch(function () { /* dbconfig unavailable (server mode) \u2014 nothing to show */ });
|
|
9095
|
+
}
|
|
9096
|
+
initCloudReconnectNotice();
|
|
9097
|
+
|
|
9077
9098
|
// \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500
|
|
9078
9099
|
// Sidebar
|
|
9079
9100
|
// \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500
|
|
@@ -14198,6 +14219,7 @@ var guiAppHtml = `<!doctype html>
|
|
|
14198
14219
|
</svg>
|
|
14199
14220
|
</button>
|
|
14200
14221
|
</header>
|
|
14222
|
+
<div class="cloud-reconnect" id="cloud-reconnect" hidden></div>
|
|
14201
14223
|
<div class="layout">
|
|
14202
14224
|
<nav class="sidebar">
|
|
14203
14225
|
<label class="sidebar-advanced toggle" title="Advanced mode \u2014 row/table editor instead of the file workspace">
|
|
@@ -19359,7 +19381,10 @@ async function dispatchDbConfigRoute(req, res, ctx) {
|
|
|
19359
19381
|
// without a local `__lattice_team_connections` row (which doesn't
|
|
19360
19382
|
// exist when the team cloud itself is the active database).
|
|
19361
19383
|
teamId: ctx.teamMembership?.teamId ?? null,
|
|
19362
|
-
myUserId: ctx.teamMembership?.myUserId ?? null
|
|
19384
|
+
myUserId: ctx.teamMembership?.myUserId ?? null,
|
|
19385
|
+
// 2.2.3: a direct postgres:// cloud connection is refused — the SPA
|
|
19386
|
+
// shows a "reconnect through a server" prompt instead of cloud data.
|
|
19387
|
+
cloudReconnectRequired: ctx.cloudReconnectRequired
|
|
19363
19388
|
});
|
|
19364
19389
|
});
|
|
19365
19390
|
return true;
|
|
@@ -21533,10 +21558,538 @@ import { tmpdir as tmpdir2 } from "os";
|
|
|
21533
21558
|
import { basename as basename10, extname as extname2, resolve as resolve8, join as join20 } from "path";
|
|
21534
21559
|
|
|
21535
21560
|
// src/gui/ai/extract.ts
|
|
21536
|
-
import { readFile } from "fs/promises";
|
|
21561
|
+
import { readFile as readFile2 } from "fs/promises";
|
|
21537
21562
|
import { extname, basename as basename7 } from "path";
|
|
21538
|
-
|
|
21563
|
+
|
|
21564
|
+
// src/gui/ai/doc-extractors.ts
|
|
21565
|
+
import { readFile } from "fs/promises";
|
|
21539
21566
|
var MAX_TEXT = 2e5;
|
|
21567
|
+
var MAX_ENTRY_BYTES = 64 * 1024 * 1024;
|
|
21568
|
+
var MAX_TOTAL_BYTES = 256 * 1024 * 1024;
|
|
21569
|
+
var PDF_TIMEOUT_MS = 3e4;
|
|
21570
|
+
var textDecoder = new TextDecoder("utf-8");
|
|
21571
|
+
function decodeUtf8(bytes) {
|
|
21572
|
+
return textDecoder.decode(bytes);
|
|
21573
|
+
}
|
|
21574
|
+
async function loadOptional(specifier) {
|
|
21575
|
+
try {
|
|
21576
|
+
return await import(specifier);
|
|
21577
|
+
} catch {
|
|
21578
|
+
return null;
|
|
21579
|
+
}
|
|
21580
|
+
}
|
|
21581
|
+
function nullIfEmpty(s) {
|
|
21582
|
+
const t = s.trim();
|
|
21583
|
+
return t ? t : null;
|
|
21584
|
+
}
|
|
21585
|
+
function withTimeout(p, ms, label) {
|
|
21586
|
+
let timer;
|
|
21587
|
+
const timeout = new Promise((_, reject) => {
|
|
21588
|
+
timer = setTimeout(() => {
|
|
21589
|
+
reject(new Error(label));
|
|
21590
|
+
}, ms);
|
|
21591
|
+
timer.unref?.();
|
|
21592
|
+
});
|
|
21593
|
+
return Promise.race([
|
|
21594
|
+
p.finally(() => {
|
|
21595
|
+
clearTimeout(timer);
|
|
21596
|
+
}),
|
|
21597
|
+
timeout
|
|
21598
|
+
]);
|
|
21599
|
+
}
|
|
21600
|
+
function decodeXmlEntities(s) {
|
|
21601
|
+
return s.replace(/</g, "<").replace(/>/g, ">").replace(/"/g, '"').replace(/'/g, "'").replace(/&#x([0-9a-fA-F]+);/g, (_, h) => safeCodePoint(parseInt(h, 16))).replace(/&#(\d+);/g, (_, d) => safeCodePoint(parseInt(d, 10))).replace(/&/g, "&");
|
|
21602
|
+
}
|
|
21603
|
+
function safeCodePoint(n) {
|
|
21604
|
+
if (!Number.isFinite(n) || n < 0 || n > 1114111) return "";
|
|
21605
|
+
try {
|
|
21606
|
+
return String.fromCodePoint(n);
|
|
21607
|
+
} catch {
|
|
21608
|
+
return "";
|
|
21609
|
+
}
|
|
21610
|
+
}
|
|
21611
|
+
function stripTags(s) {
|
|
21612
|
+
let out = "";
|
|
21613
|
+
let i = 0;
|
|
21614
|
+
while (i < s.length) {
|
|
21615
|
+
const lt = s.indexOf("<", i);
|
|
21616
|
+
if (lt < 0) {
|
|
21617
|
+
out += s.slice(i);
|
|
21618
|
+
break;
|
|
21619
|
+
}
|
|
21620
|
+
out += s.slice(i, lt);
|
|
21621
|
+
const gt = s.indexOf(">", lt + 1);
|
|
21622
|
+
if (gt < 0) break;
|
|
21623
|
+
i = gt + 1;
|
|
21624
|
+
}
|
|
21625
|
+
return out;
|
|
21626
|
+
}
|
|
21627
|
+
function isNameBoundary(code) {
|
|
21628
|
+
return code === 32 || // space
|
|
21629
|
+
code === 9 || // tab
|
|
21630
|
+
code === 10 || // \n
|
|
21631
|
+
code === 13 || // \r
|
|
21632
|
+
code === 62 || // >
|
|
21633
|
+
code === 47 || // /
|
|
21634
|
+
Number.isNaN(code);
|
|
21635
|
+
}
|
|
21636
|
+
function eachElement(xml, tag, cb) {
|
|
21637
|
+
const open = "<" + tag;
|
|
21638
|
+
const close = "</" + tag + ">";
|
|
21639
|
+
let i = 0;
|
|
21640
|
+
while (i < xml.length) {
|
|
21641
|
+
const s = xml.indexOf(open, i);
|
|
21642
|
+
if (s < 0) break;
|
|
21643
|
+
const ne = s + open.length;
|
|
21644
|
+
if (!isNameBoundary(xml.charCodeAt(ne))) {
|
|
21645
|
+
i = ne;
|
|
21646
|
+
continue;
|
|
21647
|
+
}
|
|
21648
|
+
const gt = xml.indexOf(">", ne);
|
|
21649
|
+
if (gt < 0) break;
|
|
21650
|
+
const selfClose = xml.charCodeAt(gt - 1) === 47;
|
|
21651
|
+
const attrs = xml.slice(ne, selfClose ? gt - 1 : gt);
|
|
21652
|
+
if (selfClose) {
|
|
21653
|
+
cb(attrs, "", s);
|
|
21654
|
+
i = gt + 1;
|
|
21655
|
+
continue;
|
|
21656
|
+
}
|
|
21657
|
+
const e = xml.indexOf(close, gt + 1);
|
|
21658
|
+
if (e < 0) break;
|
|
21659
|
+
cb(attrs, xml.slice(gt + 1, e), s);
|
|
21660
|
+
i = e + close.length;
|
|
21661
|
+
}
|
|
21662
|
+
}
|
|
21663
|
+
function stripElement(xml, tag) {
|
|
21664
|
+
const open = "<" + tag;
|
|
21665
|
+
const close = "</" + tag + ">";
|
|
21666
|
+
let out = "";
|
|
21667
|
+
let i = 0;
|
|
21668
|
+
while (i < xml.length) {
|
|
21669
|
+
const s = xml.indexOf(open, i);
|
|
21670
|
+
if (s < 0) {
|
|
21671
|
+
out += xml.slice(i);
|
|
21672
|
+
break;
|
|
21673
|
+
}
|
|
21674
|
+
const ne = s + open.length;
|
|
21675
|
+
if (!isNameBoundary(xml.charCodeAt(ne))) {
|
|
21676
|
+
out += xml.slice(i, ne);
|
|
21677
|
+
i = ne;
|
|
21678
|
+
continue;
|
|
21679
|
+
}
|
|
21680
|
+
const gt = xml.indexOf(">", ne);
|
|
21681
|
+
if (gt < 0) {
|
|
21682
|
+
out += xml.slice(i);
|
|
21683
|
+
break;
|
|
21684
|
+
}
|
|
21685
|
+
out += xml.slice(i, s);
|
|
21686
|
+
if (xml.charCodeAt(gt - 1) === 47) {
|
|
21687
|
+
i = gt + 1;
|
|
21688
|
+
continue;
|
|
21689
|
+
}
|
|
21690
|
+
const e = xml.indexOf(close, gt + 1);
|
|
21691
|
+
if (e < 0) break;
|
|
21692
|
+
i = e + close.length;
|
|
21693
|
+
}
|
|
21694
|
+
return out;
|
|
21695
|
+
}
|
|
21696
|
+
function concatTagText(xml, tag) {
|
|
21697
|
+
let out = "";
|
|
21698
|
+
eachElement(xml, tag, (_, inner) => {
|
|
21699
|
+
out += decodeXmlEntities(stripTags(inner));
|
|
21700
|
+
});
|
|
21701
|
+
return out;
|
|
21702
|
+
}
|
|
21703
|
+
function firstTagText(xml, tag) {
|
|
21704
|
+
let found = "";
|
|
21705
|
+
let done = false;
|
|
21706
|
+
eachElement(xml, tag, (_, inner) => {
|
|
21707
|
+
if (done) return;
|
|
21708
|
+
found = inner;
|
|
21709
|
+
done = true;
|
|
21710
|
+
});
|
|
21711
|
+
return found;
|
|
21712
|
+
}
|
|
21713
|
+
function stripHtml(html) {
|
|
21714
|
+
const noScript = stripElement(stripElement(html, "script"), "style");
|
|
21715
|
+
const text = decodeXmlEntities(stripTags(noScript));
|
|
21716
|
+
return text.replace(/[ \t\f\r]+/g, " ").replace(/ *\n */g, "\n").replace(/\n{3,}/g, "\n\n").trim();
|
|
21717
|
+
}
|
|
21718
|
+
async function unzip(path2) {
|
|
21719
|
+
const fflate = await loadOptional("fflate");
|
|
21720
|
+
if (!fflate || typeof fflate.unzipSync !== "function") return null;
|
|
21721
|
+
try {
|
|
21722
|
+
const buf = await readFile(path2);
|
|
21723
|
+
let total = 0;
|
|
21724
|
+
return fflate.unzipSync(new Uint8Array(buf.buffer, buf.byteOffset, buf.byteLength), {
|
|
21725
|
+
filter: (file) => {
|
|
21726
|
+
const size = file.originalSize || 0;
|
|
21727
|
+
if (size > MAX_ENTRY_BYTES) throw new Error("zip entry exceeds size cap");
|
|
21728
|
+
total += size;
|
|
21729
|
+
if (total > MAX_TOTAL_BYTES) throw new Error("zip total exceeds size cap");
|
|
21730
|
+
return true;
|
|
21731
|
+
}
|
|
21732
|
+
});
|
|
21733
|
+
} catch {
|
|
21734
|
+
return null;
|
|
21735
|
+
}
|
|
21736
|
+
}
|
|
21737
|
+
async function extractDocx(path2) {
|
|
21738
|
+
const mod = await loadOptional("mammoth");
|
|
21739
|
+
const lib = mod?.default ?? mod;
|
|
21740
|
+
if (!lib || typeof lib.extractRawText !== "function") return null;
|
|
21741
|
+
try {
|
|
21742
|
+
const { value } = await lib.extractRawText({ path: path2 });
|
|
21743
|
+
return nullIfEmpty(value);
|
|
21744
|
+
} catch {
|
|
21745
|
+
return null;
|
|
21746
|
+
}
|
|
21747
|
+
}
|
|
21748
|
+
async function extractDoc(path2) {
|
|
21749
|
+
const mod = await loadOptional(
|
|
21750
|
+
"word-extractor"
|
|
21751
|
+
);
|
|
21752
|
+
const Ctor = mod && "default" in mod ? mod.default : mod;
|
|
21753
|
+
if (typeof Ctor !== "function") return null;
|
|
21754
|
+
try {
|
|
21755
|
+
const doc = await new Ctor().extract(path2);
|
|
21756
|
+
return nullIfEmpty(doc.getBody());
|
|
21757
|
+
} catch {
|
|
21758
|
+
return null;
|
|
21759
|
+
}
|
|
21760
|
+
}
|
|
21761
|
+
async function extractPdf(path2) {
|
|
21762
|
+
const unpdf = await loadOptional("unpdf");
|
|
21763
|
+
if (!unpdf || typeof unpdf.getDocumentProxy !== "function") return null;
|
|
21764
|
+
try {
|
|
21765
|
+
const buf = await readFile(path2);
|
|
21766
|
+
const data = new Uint8Array(buf.buffer, buf.byteOffset, buf.byteLength);
|
|
21767
|
+
const text = await withTimeout(
|
|
21768
|
+
(async () => {
|
|
21769
|
+
const pdf = await unpdf.getDocumentProxy(data);
|
|
21770
|
+
const out = await unpdf.extractText(pdf, { mergePages: true });
|
|
21771
|
+
return out.text;
|
|
21772
|
+
})(),
|
|
21773
|
+
PDF_TIMEOUT_MS,
|
|
21774
|
+
"pdf extract timeout"
|
|
21775
|
+
);
|
|
21776
|
+
return nullIfEmpty(text);
|
|
21777
|
+
} catch {
|
|
21778
|
+
return null;
|
|
21779
|
+
}
|
|
21780
|
+
}
|
|
21781
|
+
function partNumber(name) {
|
|
21782
|
+
const m = /(\d+)\.xml$/.exec(name);
|
|
21783
|
+
return m?.[1] ? parseInt(m[1], 10) : 0;
|
|
21784
|
+
}
|
|
21785
|
+
function slideText(xml) {
|
|
21786
|
+
const paras = [];
|
|
21787
|
+
eachElement(xml, "a:p", (_, inner) => {
|
|
21788
|
+
const runs = concatTagText(inner, "a:t");
|
|
21789
|
+
if (runs.trim()) paras.push(runs);
|
|
21790
|
+
});
|
|
21791
|
+
if (paras.length === 0) {
|
|
21792
|
+
const runs = concatTagText(xml, "a:t");
|
|
21793
|
+
if (runs.trim()) paras.push(runs);
|
|
21794
|
+
}
|
|
21795
|
+
return paras.join("\n");
|
|
21796
|
+
}
|
|
21797
|
+
async function extractPptx(path2) {
|
|
21798
|
+
const entries = await unzip(path2);
|
|
21799
|
+
if (!entries) return null;
|
|
21800
|
+
const slides = Object.keys(entries).filter((n) => /^ppt\/slides\/slide\d+\.xml$/.test(n)).sort((a, b) => partNumber(a) - partNumber(b));
|
|
21801
|
+
if (slides.length === 0) return null;
|
|
21802
|
+
const parts = [];
|
|
21803
|
+
let total = 0;
|
|
21804
|
+
for (const n of slides) {
|
|
21805
|
+
if (total >= MAX_TEXT) break;
|
|
21806
|
+
const bytes = entries[n];
|
|
21807
|
+
if (!bytes) continue;
|
|
21808
|
+
const text = slideText(decodeUtf8(bytes)).replace(/[ \t]+/g, " ").trim();
|
|
21809
|
+
if (text) {
|
|
21810
|
+
parts.push(text);
|
|
21811
|
+
total += text.length + 2;
|
|
21812
|
+
}
|
|
21813
|
+
}
|
|
21814
|
+
return nullIfEmpty(parts.join("\n\n"));
|
|
21815
|
+
}
|
|
21816
|
+
async function extractXlsx(path2) {
|
|
21817
|
+
const entries = await unzip(path2);
|
|
21818
|
+
if (!entries) return null;
|
|
21819
|
+
const shared = [];
|
|
21820
|
+
const ssBytes = entries["xl/sharedStrings.xml"];
|
|
21821
|
+
if (ssBytes) {
|
|
21822
|
+
eachElement(decodeUtf8(ssBytes), "si", (_, inner) => {
|
|
21823
|
+
shared.push(concatTagText(stripElement(inner, "rPh"), "t"));
|
|
21824
|
+
});
|
|
21825
|
+
}
|
|
21826
|
+
const sheetNames = Object.keys(entries).filter((n) => /^xl\/worksheets\/sheet\d+\.xml$/.test(n)).sort((a, b) => partNumber(a) - partNumber(b));
|
|
21827
|
+
const rowsOut = [];
|
|
21828
|
+
let total = 0;
|
|
21829
|
+
for (const n of sheetNames) {
|
|
21830
|
+
if (total >= MAX_TEXT) break;
|
|
21831
|
+
const bytes = entries[n];
|
|
21832
|
+
if (!bytes) continue;
|
|
21833
|
+
eachElement(decodeUtf8(bytes), "row", (_, rowInner) => {
|
|
21834
|
+
if (total >= MAX_TEXT) return;
|
|
21835
|
+
const cells = [];
|
|
21836
|
+
eachElement(rowInner, "c", (attrs, body) => {
|
|
21837
|
+
const type = /\bt="([^"]+)"/.exec(attrs)?.[1];
|
|
21838
|
+
let val = "";
|
|
21839
|
+
if (type === "s") {
|
|
21840
|
+
const idx = parseInt(firstTagText(body, "v"), 10);
|
|
21841
|
+
val = Number.isInteger(idx) ? shared[idx] ?? "" : "";
|
|
21842
|
+
} else if (type === "inlineStr") {
|
|
21843
|
+
val = concatTagText(body, "t");
|
|
21844
|
+
} else {
|
|
21845
|
+
val = decodeXmlEntities(firstTagText(body, "v"));
|
|
21846
|
+
}
|
|
21847
|
+
if (val) cells.push(val);
|
|
21848
|
+
});
|
|
21849
|
+
if (cells.length) {
|
|
21850
|
+
const line = cells.join(" ");
|
|
21851
|
+
rowsOut.push(line);
|
|
21852
|
+
total += line.length + 1;
|
|
21853
|
+
}
|
|
21854
|
+
});
|
|
21855
|
+
}
|
|
21856
|
+
return nullIfEmpty(rowsOut.join("\n"));
|
|
21857
|
+
}
|
|
21858
|
+
function odfWhitespace(s) {
|
|
21859
|
+
return s.replace(/<text:tab\b[^>]{0,400}\/?>/g, " ").replace(/<text:line-break\b[^>]{0,400}\/?>/g, "\n").replace(
|
|
21860
|
+
/<text:s\b[^>]{0,400}\btext:c="(\d+)"[^>]{0,400}\/?>/g,
|
|
21861
|
+
(_, c) => " ".repeat(Math.min(parseInt(c, 10) || 1, 100))
|
|
21862
|
+
).replace(/<text:s\b[^>]{0,400}\/?>/g, " ");
|
|
21863
|
+
}
|
|
21864
|
+
function odfParagraph(inner) {
|
|
21865
|
+
return decodeXmlEntities(stripTags(odfWhitespace(inner))).trim();
|
|
21866
|
+
}
|
|
21867
|
+
async function extractOdfText(path2) {
|
|
21868
|
+
const entries = await unzip(path2);
|
|
21869
|
+
if (!entries) return null;
|
|
21870
|
+
const contentBytes = entries["content.xml"];
|
|
21871
|
+
if (!contentBytes) return null;
|
|
21872
|
+
const xml = decodeUtf8(contentBytes);
|
|
21873
|
+
const items = [];
|
|
21874
|
+
const collect = (_, inner, start) => {
|
|
21875
|
+
const line = odfParagraph(inner);
|
|
21876
|
+
if (line) items.push([start, line]);
|
|
21877
|
+
};
|
|
21878
|
+
eachElement(xml, "text:p", collect);
|
|
21879
|
+
eachElement(xml, "text:h", collect);
|
|
21880
|
+
items.sort((a, b) => a[0] - b[0]);
|
|
21881
|
+
const lines = [];
|
|
21882
|
+
let total = 0;
|
|
21883
|
+
for (const [, line] of items) {
|
|
21884
|
+
if (total >= MAX_TEXT) break;
|
|
21885
|
+
lines.push(line);
|
|
21886
|
+
total += line.length + 1;
|
|
21887
|
+
}
|
|
21888
|
+
return nullIfEmpty(lines.join("\n"));
|
|
21889
|
+
}
|
|
21890
|
+
async function extractOds(path2) {
|
|
21891
|
+
const entries = await unzip(path2);
|
|
21892
|
+
if (!entries) return null;
|
|
21893
|
+
const contentBytes = entries["content.xml"];
|
|
21894
|
+
if (!contentBytes) return null;
|
|
21895
|
+
const xml = decodeUtf8(contentBytes);
|
|
21896
|
+
const rows = [];
|
|
21897
|
+
let total = 0;
|
|
21898
|
+
eachElement(xml, "table:table-row", (_, rowInner) => {
|
|
21899
|
+
if (total >= MAX_TEXT) return;
|
|
21900
|
+
const cells = [];
|
|
21901
|
+
eachElement(rowInner, "table:table-cell", (attrs, body) => {
|
|
21902
|
+
const parts = [];
|
|
21903
|
+
eachElement(body, "text:p", (__, p) => {
|
|
21904
|
+
const t = odfParagraph(p);
|
|
21905
|
+
if (t) parts.push(t);
|
|
21906
|
+
});
|
|
21907
|
+
let val = parts.join(" ").trim();
|
|
21908
|
+
if (!val) {
|
|
21909
|
+
const ov = /\boffice:(?:value|date-value|time-value|string-value|boolean-value)="([^"]*)"/.exec(
|
|
21910
|
+
attrs
|
|
21911
|
+
)?.[1];
|
|
21912
|
+
if (ov) val = decodeXmlEntities(ov);
|
|
21913
|
+
}
|
|
21914
|
+
if (val) cells.push(val);
|
|
21915
|
+
});
|
|
21916
|
+
if (cells.length) {
|
|
21917
|
+
const line = cells.join(" ");
|
|
21918
|
+
rows.push(line);
|
|
21919
|
+
total += line.length + 1;
|
|
21920
|
+
}
|
|
21921
|
+
});
|
|
21922
|
+
return nullIfEmpty(rows.join("\n"));
|
|
21923
|
+
}
|
|
21924
|
+
function normalizeZipPath(p) {
|
|
21925
|
+
const parts = [];
|
|
21926
|
+
for (const seg of p.split("/")) {
|
|
21927
|
+
if (seg === "" || seg === ".") continue;
|
|
21928
|
+
if (seg === "..") parts.pop();
|
|
21929
|
+
else parts.push(seg);
|
|
21930
|
+
}
|
|
21931
|
+
return parts.join("/");
|
|
21932
|
+
}
|
|
21933
|
+
function resolveHref(baseDir, href) {
|
|
21934
|
+
let h = href.split("#")[0]?.split("?")[0] ?? "";
|
|
21935
|
+
try {
|
|
21936
|
+
h = decodeURIComponent(h);
|
|
21937
|
+
} catch {
|
|
21938
|
+
}
|
|
21939
|
+
return normalizeZipPath(baseDir + h);
|
|
21940
|
+
}
|
|
21941
|
+
async function extractEpub(path2) {
|
|
21942
|
+
const entries = await unzip(path2);
|
|
21943
|
+
if (!entries) return null;
|
|
21944
|
+
let order = [];
|
|
21945
|
+
const container = entries["META-INF/container.xml"];
|
|
21946
|
+
const opfPath = container ? /full-path="([^"]+)"/.exec(decodeUtf8(container))?.[1] : void 0;
|
|
21947
|
+
if (opfPath && entries[opfPath]) {
|
|
21948
|
+
const opf = decodeUtf8(entries[opfPath]);
|
|
21949
|
+
const manifest = {};
|
|
21950
|
+
eachElement(opf, "item", (attrs) => {
|
|
21951
|
+
const id = /\bid="([^"]+)"/.exec(attrs)?.[1];
|
|
21952
|
+
const href = /\bhref="([^"]+)"/.exec(attrs)?.[1];
|
|
21953
|
+
if (id && href) manifest[id] = href;
|
|
21954
|
+
});
|
|
21955
|
+
const baseDir = opfPath.includes("/") ? opfPath.slice(0, opfPath.lastIndexOf("/") + 1) : "";
|
|
21956
|
+
eachElement(opf, "itemref", (attrs) => {
|
|
21957
|
+
const idref = /\bidref="([^"]+)"/.exec(attrs)?.[1];
|
|
21958
|
+
const href = idref ? manifest[idref] : void 0;
|
|
21959
|
+
if (href) order.push(resolveHref(baseDir, href));
|
|
21960
|
+
});
|
|
21961
|
+
}
|
|
21962
|
+
if (order.length === 0) {
|
|
21963
|
+
order = Object.keys(entries).filter((n) => /\.x?html?$/i.test(n)).sort((a, b) => a.localeCompare(b, void 0, { numeric: true }));
|
|
21964
|
+
}
|
|
21965
|
+
const parts = [];
|
|
21966
|
+
let total = 0;
|
|
21967
|
+
for (const n of order) {
|
|
21968
|
+
if (total >= MAX_TEXT) break;
|
|
21969
|
+
const bytes = entries[n];
|
|
21970
|
+
if (!bytes) continue;
|
|
21971
|
+
const body = stripHtml(decodeUtf8(bytes));
|
|
21972
|
+
if (body) {
|
|
21973
|
+
parts.push(body);
|
|
21974
|
+
total += body.length + 2;
|
|
21975
|
+
}
|
|
21976
|
+
}
|
|
21977
|
+
return nullIfEmpty(parts.join("\n\n"));
|
|
21978
|
+
}
|
|
21979
|
+
var RTF_IGNORED_DESTINATIONS = /^\\(?:\*|(?:fonttbl|colortbl|stylesheet|info|pict|themedata|colorschememapping|latentstyles|datastore|listtable|listoverridetable|rsidtbl|generator|operator|xmlnstbl|wgrffmtfilter|mmathPr)(?![a-zA-Z]))/;
|
|
21980
|
+
function stripRtfDestinations(s) {
|
|
21981
|
+
const kept = [];
|
|
21982
|
+
let keepFrom = 0;
|
|
21983
|
+
let i = 0;
|
|
21984
|
+
while (i < s.length) {
|
|
21985
|
+
if (s[i] === "{" && RTF_IGNORED_DESTINATIONS.test(s.slice(i + 1, i + 40))) {
|
|
21986
|
+
const pre = s.slice(keepFrom, i);
|
|
21987
|
+
kept.push(pre);
|
|
21988
|
+
if (/\\[a-zA-Z]+$/.test(pre.slice(-40))) kept.push(" ");
|
|
21989
|
+
let depth = 1;
|
|
21990
|
+
let j = i + 1;
|
|
21991
|
+
for (; j < s.length && depth > 0; j++) {
|
|
21992
|
+
const ch = s[j];
|
|
21993
|
+
if (ch === "\\")
|
|
21994
|
+
j++;
|
|
21995
|
+
else if (ch === "{") depth++;
|
|
21996
|
+
else if (ch === "}") depth--;
|
|
21997
|
+
}
|
|
21998
|
+
i = j;
|
|
21999
|
+
keepFrom = i;
|
|
22000
|
+
} else {
|
|
22001
|
+
i++;
|
|
22002
|
+
}
|
|
22003
|
+
}
|
|
22004
|
+
kept.push(s.slice(keepFrom));
|
|
22005
|
+
return kept.join("");
|
|
22006
|
+
}
|
|
22007
|
+
var CP1252_HIGH = {
|
|
22008
|
+
128: 8364,
|
|
22009
|
+
130: 8218,
|
|
22010
|
+
131: 402,
|
|
22011
|
+
132: 8222,
|
|
22012
|
+
133: 8230,
|
|
22013
|
+
134: 8224,
|
|
22014
|
+
135: 8225,
|
|
22015
|
+
136: 710,
|
|
22016
|
+
137: 8240,
|
|
22017
|
+
138: 352,
|
|
22018
|
+
139: 8249,
|
|
22019
|
+
140: 338,
|
|
22020
|
+
142: 381,
|
|
22021
|
+
145: 8216,
|
|
22022
|
+
146: 8217,
|
|
22023
|
+
147: 8220,
|
|
22024
|
+
148: 8221,
|
|
22025
|
+
149: 8226,
|
|
22026
|
+
150: 8211,
|
|
22027
|
+
151: 8212,
|
|
22028
|
+
152: 732,
|
|
22029
|
+
153: 8482,
|
|
22030
|
+
154: 353,
|
|
22031
|
+
155: 8250,
|
|
22032
|
+
156: 339,
|
|
22033
|
+
158: 382,
|
|
22034
|
+
159: 376
|
|
22035
|
+
};
|
|
22036
|
+
function cp1252Char(byte) {
|
|
22037
|
+
if (byte >= 128 && byte <= 159) {
|
|
22038
|
+
const cp = CP1252_HIGH[byte];
|
|
22039
|
+
return cp ? safeCodePoint(cp) : "";
|
|
22040
|
+
}
|
|
22041
|
+
return safeCodePoint(byte);
|
|
22042
|
+
}
|
|
22043
|
+
function rtfToText(rtf) {
|
|
22044
|
+
let s = stripRtfDestinations(rtf);
|
|
22045
|
+
s = s.replace(/\\'([0-9a-fA-F]{2})/g, (_, h) => cp1252Char(parseInt(h, 16)));
|
|
22046
|
+
s = s.replace(/\\u(-?\d+)\s?\??/g, (_, d) => {
|
|
22047
|
+
let n = parseInt(d, 10);
|
|
22048
|
+
if (n < 0) n += 65536;
|
|
22049
|
+
return safeCodePoint(n);
|
|
22050
|
+
});
|
|
22051
|
+
s = s.replace(/\\par[d]?\b/g, "\n").replace(/\\line\b/g, "\n").replace(/\\sect\b/g, "\n").replace(/\\page\b/g, "\n").replace(/\\tab\b/g, " ");
|
|
22052
|
+
s = s.replace(/\\[a-zA-Z]+-?\d*\s?/g, "").replace(/\\[^a-zA-Z]/g, "");
|
|
22053
|
+
s = s.replace(/[{}]/g, "");
|
|
22054
|
+
return s.replace(/[ \t]+/g, (m) => m.includes(" ") ? " " : " ").replace(/[ \t]\n/g, "\n").replace(/\n{3,}/g, "\n\n").trim();
|
|
22055
|
+
}
|
|
22056
|
+
async function extractRtf(path2) {
|
|
22057
|
+
try {
|
|
22058
|
+
const raw = await readFile(path2, "latin1");
|
|
22059
|
+
if (!raw.startsWith("{\\rtf")) return null;
|
|
22060
|
+
return nullIfEmpty(rtfToText(raw));
|
|
22061
|
+
} catch {
|
|
22062
|
+
return null;
|
|
22063
|
+
}
|
|
22064
|
+
}
|
|
22065
|
+
async function extractDocument(path2, ext) {
|
|
22066
|
+
switch (ext) {
|
|
22067
|
+
case ".docx":
|
|
22068
|
+
return extractDocx(path2);
|
|
22069
|
+
case ".doc":
|
|
22070
|
+
return extractDoc(path2);
|
|
22071
|
+
case ".pdf":
|
|
22072
|
+
return extractPdf(path2);
|
|
22073
|
+
case ".pptx":
|
|
22074
|
+
return extractPptx(path2);
|
|
22075
|
+
case ".xlsx":
|
|
22076
|
+
return extractXlsx(path2);
|
|
22077
|
+
case ".odt":
|
|
22078
|
+
case ".odp":
|
|
22079
|
+
return extractOdfText(path2);
|
|
22080
|
+
case ".ods":
|
|
22081
|
+
return extractOds(path2);
|
|
22082
|
+
case ".epub":
|
|
22083
|
+
return extractEpub(path2);
|
|
22084
|
+
case ".rtf":
|
|
22085
|
+
return extractRtf(path2);
|
|
22086
|
+
default:
|
|
22087
|
+
return null;
|
|
22088
|
+
}
|
|
22089
|
+
}
|
|
22090
|
+
|
|
22091
|
+
// src/gui/ai/extract.ts
|
|
22092
|
+
var MAX_TEXT2 = 2e5;
|
|
21540
22093
|
var CODE_LANGS = {
|
|
21541
22094
|
".ts": "typescript",
|
|
21542
22095
|
".tsx": "typescript",
|
|
@@ -21584,81 +22137,25 @@ var TEXT_EXT = /* @__PURE__ */ new Set([
|
|
|
21584
22137
|
".htm"
|
|
21585
22138
|
]);
|
|
21586
22139
|
var TEXT_MIME = /^(text\/|application\/(json|xml|xhtml\+xml|x-yaml|yaml|toml))/;
|
|
21587
|
-
var MARKITDOWN_EXT = /* @__PURE__ */ new Set([
|
|
21588
|
-
".pdf",
|
|
21589
|
-
".docx",
|
|
21590
|
-
".doc",
|
|
21591
|
-
".pptx",
|
|
21592
|
-
".ppt",
|
|
21593
|
-
".xlsx",
|
|
21594
|
-
".xls",
|
|
21595
|
-
".epub",
|
|
21596
|
-
".rtf",
|
|
21597
|
-
".odt",
|
|
21598
|
-
".ods",
|
|
21599
|
-
".odp"
|
|
21600
|
-
]);
|
|
21601
|
-
var MARKITDOWN_TIMEOUT_MS = 12e4;
|
|
21602
|
-
var MARKITDOWN_MAX_BYTES = 5e7;
|
|
21603
|
-
function runMarkitdown(path2) {
|
|
21604
|
-
return new Promise((resolve12) => {
|
|
21605
|
-
const bin = process.env.MARKITDOWN_BIN ?? "markitdown";
|
|
21606
|
-
let child;
|
|
21607
|
-
try {
|
|
21608
|
-
child = spawn2(bin, [path2], { stdio: ["ignore", "pipe", "ignore"] });
|
|
21609
|
-
} catch {
|
|
21610
|
-
resolve12(null);
|
|
21611
|
-
return;
|
|
21612
|
-
}
|
|
21613
|
-
let out = "";
|
|
21614
|
-
let bytes = 0;
|
|
21615
|
-
let settled = false;
|
|
21616
|
-
const finish = (v) => {
|
|
21617
|
-
if (settled) return;
|
|
21618
|
-
settled = true;
|
|
21619
|
-
clearTimeout(timer);
|
|
21620
|
-
resolve12(v);
|
|
21621
|
-
};
|
|
21622
|
-
const timer = setTimeout(() => {
|
|
21623
|
-
child.kill();
|
|
21624
|
-
finish(null);
|
|
21625
|
-
}, MARKITDOWN_TIMEOUT_MS);
|
|
21626
|
-
child.stdout.on("data", (c) => {
|
|
21627
|
-
bytes += c.length;
|
|
21628
|
-
if (bytes > MARKITDOWN_MAX_BYTES) {
|
|
21629
|
-
child.kill();
|
|
21630
|
-
finish(null);
|
|
21631
|
-
} else {
|
|
21632
|
-
out += c.toString("utf8");
|
|
21633
|
-
}
|
|
21634
|
-
});
|
|
21635
|
-
child.on("error", () => {
|
|
21636
|
-
finish(null);
|
|
21637
|
-
});
|
|
21638
|
-
child.on("close", (code) => {
|
|
21639
|
-
finish(code === 0 && out.trim() ? out.trim() : null);
|
|
21640
|
-
});
|
|
21641
|
-
});
|
|
21642
|
-
}
|
|
21643
22140
|
function languageOf(name) {
|
|
21644
22141
|
return CODE_LANGS[extname(name).toLowerCase()] ?? null;
|
|
21645
22142
|
}
|
|
21646
22143
|
function truncate(s) {
|
|
21647
|
-
return s.length >
|
|
22144
|
+
return s.length > MAX_TEXT2 ? s.slice(0, MAX_TEXT2) : s;
|
|
21648
22145
|
}
|
|
21649
22146
|
async function parseFile(path2, mimeHint, originalName) {
|
|
21650
22147
|
const name = originalName ?? basename7(path2);
|
|
21651
22148
|
const ext = extname(name).toLowerCase();
|
|
21652
22149
|
const lang = languageOf(name);
|
|
21653
22150
|
if (lang) {
|
|
21654
|
-
return { text: truncate(await
|
|
22151
|
+
return { text: truncate(await readFile2(path2, "utf8")), language: lang };
|
|
21655
22152
|
}
|
|
21656
22153
|
if (mimeHint && TEXT_MIME.test(mimeHint) || TEXT_EXT.has(ext)) {
|
|
21657
|
-
return { text: truncate(await
|
|
22154
|
+
return { text: truncate(await readFile2(path2, "utf8")) };
|
|
21658
22155
|
}
|
|
21659
|
-
|
|
21660
|
-
|
|
21661
|
-
|
|
22156
|
+
const doc = await extractDocument(path2, ext);
|
|
22157
|
+
if (doc != null) {
|
|
22158
|
+
return { text: truncate(doc) };
|
|
21662
22159
|
}
|
|
21663
22160
|
return { text: "", skip: true };
|
|
21664
22161
|
}
|
|
@@ -21672,7 +22169,7 @@ function describe(text, mime, name) {
|
|
|
21672
22169
|
|
|
21673
22170
|
// src/ai/vision.ts
|
|
21674
22171
|
import { createRequire as createRequire5 } from "module";
|
|
21675
|
-
import { readFile as
|
|
22172
|
+
import { readFile as readFile3 } from "fs/promises";
|
|
21676
22173
|
var DEFAULT_PROMPT = "Describe this image for a knowledge base in 2-4 factual sentences: what it shows, any visible text, and notable details. No preamble.";
|
|
21677
22174
|
var MAX_DIM = 1568;
|
|
21678
22175
|
async function describeImage(auth, path2, opts = {}) {
|
|
@@ -21688,7 +22185,7 @@ async function describeImage(auth, path2, opts = {}) {
|
|
|
21688
22185
|
}
|
|
21689
22186
|
var DEFAULT_PDF_PROMPT = "Read this document for a knowledge base. First transcribe its readable text, then add a 2-4 sentence factual summary of what it is and its key details. It may be a scanned/image-only PDF \u2014 read the text from the page images. No preamble.";
|
|
21690
22187
|
async function describePdf(auth, path2, opts = {}) {
|
|
21691
|
-
const buf = await
|
|
22188
|
+
const buf = await readFile3(path2);
|
|
21692
22189
|
const maxBytes = opts.maxBytes ?? 3e7;
|
|
21693
22190
|
if (buf.length > maxBytes) {
|
|
21694
22191
|
throw new Error(
|
|
@@ -22037,6 +22534,10 @@ function fileSlug(name, id) {
|
|
|
22037
22534
|
const base = slugify(name.replace(/\.[^./\\]+$/, "")) || "file";
|
|
22038
22535
|
return `${base}-${id.slice(0, 8)}`;
|
|
22039
22536
|
}
|
|
22537
|
+
function fileIdentity(displayName, id) {
|
|
22538
|
+
const label = displayName.trim() || "file";
|
|
22539
|
+
return { slug: fileSlug(displayName, id), name: label, title: label };
|
|
22540
|
+
}
|
|
22040
22541
|
var MIME_BY_EXT = {
|
|
22041
22542
|
".pdf": "application/pdf",
|
|
22042
22543
|
".png": "image/png",
|
|
@@ -22105,6 +22606,45 @@ function labelColumn(cols) {
|
|
|
22105
22606
|
const text = Object.keys(cols).find((c) => !STRUCTURAL.has(c) && !c.endsWith("_id"));
|
|
22106
22607
|
return text ?? null;
|
|
22107
22608
|
}
|
|
22609
|
+
var TEXT_COL_RE = /\b(TEXT|VARCHAR|CHAR|CLOB|CHARACTER|STRING|NAME|CITEXT)\b/i;
|
|
22610
|
+
async function requiredTextFileColumns(db) {
|
|
22611
|
+
const out = /* @__PURE__ */ new Set();
|
|
22612
|
+
try {
|
|
22613
|
+
if (db.getDialect() === "postgres") {
|
|
22614
|
+
const rows = await allAsyncOrSync(
|
|
22615
|
+
db.adapter,
|
|
22616
|
+
`SELECT column_name AS name, data_type AS type, is_nullable, column_default AS dflt
|
|
22617
|
+
FROM information_schema.columns
|
|
22618
|
+
WHERE table_name = 'files' AND table_schema = current_schema()`
|
|
22619
|
+
);
|
|
22620
|
+
for (const r of rows) {
|
|
22621
|
+
if (String(r.is_nullable).toUpperCase() === "NO" && r.dflt == null && TEXT_COL_RE.test(String(r.type))) {
|
|
22622
|
+
out.add(String(r.name));
|
|
22623
|
+
}
|
|
22624
|
+
}
|
|
22625
|
+
} else {
|
|
22626
|
+
const rows = await allAsyncOrSync(db.adapter, `PRAGMA table_info("files")`);
|
|
22627
|
+
for (const r of rows) {
|
|
22628
|
+
if (Number(r.notnull) === 1 && r.dflt_value == null && Number(r.pk) === 0 && TEXT_COL_RE.test(String(r.type))) {
|
|
22629
|
+
out.add(String(r.name));
|
|
22630
|
+
}
|
|
22631
|
+
}
|
|
22632
|
+
}
|
|
22633
|
+
} catch {
|
|
22634
|
+
}
|
|
22635
|
+
return out;
|
|
22636
|
+
}
|
|
22637
|
+
async function requiredFileDefaults(db, displayName, id, provided) {
|
|
22638
|
+
const required = await requiredTextFileColumns(db);
|
|
22639
|
+
const label = displayName.trim() || "file";
|
|
22640
|
+
const out = {};
|
|
22641
|
+
for (const col of required) {
|
|
22642
|
+
if (STRUCTURAL.has(col)) continue;
|
|
22643
|
+
if (provided[col] != null) continue;
|
|
22644
|
+
out[col] = /slug/i.test(col) ? fileSlug(displayName, id) : label;
|
|
22645
|
+
}
|
|
22646
|
+
return out;
|
|
22647
|
+
}
|
|
22108
22648
|
async function buildCatalog(db, descriptions) {
|
|
22109
22649
|
const out = [];
|
|
22110
22650
|
for (const name of db.getRegisteredTableNames()) {
|
|
@@ -22341,7 +22881,8 @@ function looksLikeUrl(s) {
|
|
|
22341
22881
|
const t = s.trim();
|
|
22342
22882
|
return /^https?:\/\/\S+$/i.test(t) && !/\s/.test(t);
|
|
22343
22883
|
}
|
|
22344
|
-
|
|
22884
|
+
var MAX_INGEST_BYTES = 5e7;
|
|
22885
|
+
function readBuffer2(req, maxBytes = MAX_INGEST_BYTES) {
|
|
22345
22886
|
return new Promise((resolve_, reject) => {
|
|
22346
22887
|
const chunks = [];
|
|
22347
22888
|
let size = 0;
|
|
@@ -22405,9 +22946,19 @@ async function dispatchIngestRoute(req, res, ctx) {
|
|
|
22405
22946
|
await rm(tmp, { force: true }).catch(() => void 0);
|
|
22406
22947
|
}
|
|
22407
22948
|
const fileId = crypto.randomUUID();
|
|
22408
|
-
const
|
|
22949
|
+
const rawFilePath = typeof req.headers["x-filepath"] === "string" && req.headers["x-filepath"] || "";
|
|
22950
|
+
let realPath = "";
|
|
22951
|
+
if (rawFilePath) {
|
|
22952
|
+
try {
|
|
22953
|
+
realPath = decodeURIComponent(rawFilePath);
|
|
22954
|
+
} catch {
|
|
22955
|
+
realPath = rawFilePath;
|
|
22956
|
+
}
|
|
22957
|
+
}
|
|
22958
|
+
const uploadRow = {
|
|
22409
22959
|
id: fileId,
|
|
22410
|
-
|
|
22960
|
+
...fileIdentity(name2, fileId),
|
|
22961
|
+
...realPath ? { path: realPath } : {},
|
|
22411
22962
|
original_name: name2,
|
|
22412
22963
|
mime: mime2,
|
|
22413
22964
|
size_bytes: buf.length,
|
|
@@ -22415,6 +22966,10 @@ async function dispatchIngestRoute(req, res, ctx) {
|
|
|
22415
22966
|
description: describe(result.text, mime2, name2),
|
|
22416
22967
|
extraction_status: result.skip ? "skipped" : "extracted",
|
|
22417
22968
|
...blob ? { ref_kind: "blob", blob_path: blob.blob_path, sha256: blob.sha256 } : {}
|
|
22969
|
+
};
|
|
22970
|
+
const { id: id2 } = await createRow(mctx, "files", {
|
|
22971
|
+
...await requiredFileDefaults(ctx.db, name2, fileId, uploadRow),
|
|
22972
|
+
...uploadRow
|
|
22418
22973
|
});
|
|
22419
22974
|
let suggestedLinks = [];
|
|
22420
22975
|
if (!result.skip) {
|
|
@@ -22459,9 +23014,9 @@ async function dispatchIngestRoute(req, res, ctx) {
|
|
|
22459
23014
|
}
|
|
22460
23015
|
}
|
|
22461
23016
|
const textFileId = crypto.randomUUID();
|
|
22462
|
-
const
|
|
23017
|
+
const textRow = {
|
|
22463
23018
|
id: textFileId,
|
|
22464
|
-
|
|
23019
|
+
...fileIdentity(title, textFileId),
|
|
22465
23020
|
original_name: title,
|
|
22466
23021
|
mime: mime2,
|
|
22467
23022
|
size_bytes: Buffer.byteLength(content, "utf8"),
|
|
@@ -22469,6 +23024,10 @@ async function dispatchIngestRoute(req, res, ctx) {
|
|
|
22469
23024
|
description: describe(content, mime2, title),
|
|
22470
23025
|
extraction_status: "extracted",
|
|
22471
23026
|
...sourceUrl ? { ref_kind: "cloud_ref", ref_uri: sourceUrl, ref_provider: "web" } : {}
|
|
23027
|
+
};
|
|
23028
|
+
const { id: id2 } = await createRow(mctx, "files", {
|
|
23029
|
+
...await requiredFileDefaults(ctx.db, title, textFileId, textRow),
|
|
23030
|
+
...textRow
|
|
22472
23031
|
});
|
|
22473
23032
|
const suggestedLinks = await enrichOrFail(mctx, ctx.db, id2, content, title, ctx, res);
|
|
22474
23033
|
if (suggestedLinks === null) return true;
|
|
@@ -22493,17 +23052,25 @@ async function dispatchIngestRoute(req, res, ctx) {
|
|
|
22493
23052
|
sendJson5(res, { error: `file not found: ${abs}` }, 400);
|
|
22494
23053
|
return true;
|
|
22495
23054
|
}
|
|
23055
|
+
if (size > MAX_INGEST_BYTES) {
|
|
23056
|
+
sendJson5(res, { error: "file too large" }, 413);
|
|
23057
|
+
return true;
|
|
23058
|
+
}
|
|
22496
23059
|
const name = basename10(abs);
|
|
22497
23060
|
const mime = mimeFor(name);
|
|
22498
23061
|
const localFileId = crypto.randomUUID();
|
|
22499
|
-
const
|
|
23062
|
+
const localRow = {
|
|
22500
23063
|
id: localFileId,
|
|
22501
|
-
|
|
23064
|
+
...fileIdentity(name, localFileId),
|
|
22502
23065
|
path: abs,
|
|
22503
23066
|
original_name: name,
|
|
22504
23067
|
mime,
|
|
22505
23068
|
size_bytes: size,
|
|
22506
23069
|
extraction_status: "pending"
|
|
23070
|
+
};
|
|
23071
|
+
const { id } = await createRow(mctx, "files", {
|
|
23072
|
+
...await requiredFileDefaults(ctx.db, name, localFileId, localRow),
|
|
23073
|
+
...localRow
|
|
22507
23074
|
});
|
|
22508
23075
|
try {
|
|
22509
23076
|
const result = await extractSource(ctx.db, abs, mime, name);
|
|
@@ -22608,7 +23175,7 @@ function sendText(res, body, status = 200, contentType = "text/plain; charset=ut
|
|
|
22608
23175
|
function openUrl(url) {
|
|
22609
23176
|
const command = process.platform === "darwin" ? "open" : process.platform === "win32" ? "cmd" : "xdg-open";
|
|
22610
23177
|
const args = process.platform === "win32" ? ["/c", "start", "", url] : [url];
|
|
22611
|
-
const child =
|
|
23178
|
+
const child = spawn2(command, args, { stdio: "ignore", detached: true });
|
|
22612
23179
|
child.unref();
|
|
22613
23180
|
}
|
|
22614
23181
|
function listen(server, port, host) {
|
|
@@ -22858,7 +23425,7 @@ function resolveOutputDirForConfig(configPath) {
|
|
|
22858
23425
|
}
|
|
22859
23426
|
return resolve9(base, "context");
|
|
22860
23427
|
}
|
|
22861
|
-
async function openConfig(configPath, outputDir, autoRender = false) {
|
|
23428
|
+
async function openConfig(configPath, outputDir, autoRender = false, teamCloud = false) {
|
|
22862
23429
|
const parsed = parseConfigFile(configPath);
|
|
22863
23430
|
if (!/^postgres(ql)?:\/\//i.test(parsed.dbPath) && !parsed.dbPath.startsWith("file:") && parsed.dbPath !== ":memory:") {
|
|
22864
23431
|
mkdirSync10(dirname11(parsed.dbPath), { recursive: true });
|
|
@@ -22987,6 +23554,7 @@ async function openConfig(configPath, outputDir, autoRender = false) {
|
|
|
22987
23554
|
}
|
|
22988
23555
|
}
|
|
22989
23556
|
let teamContext = null;
|
|
23557
|
+
let cloudReconnectRequired = false;
|
|
22990
23558
|
if (db.getDialect() === "postgres") {
|
|
22991
23559
|
let teamEnabled = false;
|
|
22992
23560
|
try {
|
|
@@ -22994,7 +23562,14 @@ async function openConfig(configPath, outputDir, autoRender = false) {
|
|
|
22994
23562
|
} catch {
|
|
22995
23563
|
teamEnabled = false;
|
|
22996
23564
|
}
|
|
22997
|
-
|
|
23565
|
+
const directGuiPostgres = !teamCloud && isPostgresUrl(parsed.dbPath);
|
|
23566
|
+
if (directGuiPostgres) {
|
|
23567
|
+
if (teamEnabled) {
|
|
23568
|
+
cloudReconnectRequired = true;
|
|
23569
|
+
teamEnabled = false;
|
|
23570
|
+
validTables.clear();
|
|
23571
|
+
}
|
|
23572
|
+
} else if (!teamEnabled) {
|
|
22998
23573
|
try {
|
|
22999
23574
|
const rawDb = parseDocument3(readFileSync15(configPath, "utf8")).get("db");
|
|
23000
23575
|
const dbLine = typeof rawDb === "string" ? rawDb.trim() : "";
|
|
@@ -23036,7 +23611,7 @@ async function openConfig(configPath, outputDir, autoRender = false) {
|
|
|
23036
23611
|
}
|
|
23037
23612
|
}
|
|
23038
23613
|
let realtime = null;
|
|
23039
|
-
if (db.getDialect() === "postgres") {
|
|
23614
|
+
if (db.getDialect() === "postgres" && !cloudReconnectRequired) {
|
|
23040
23615
|
try {
|
|
23041
23616
|
realtime = new RealtimeBroker(parsed.dbPath);
|
|
23042
23617
|
await realtime.start();
|
|
@@ -23075,6 +23650,8 @@ async function openConfig(configPath, outputDir, autoRender = false) {
|
|
|
23075
23650
|
teamsClient,
|
|
23076
23651
|
validTables,
|
|
23077
23652
|
teamContext,
|
|
23653
|
+
teamCloud,
|
|
23654
|
+
cloudReconnectRequired,
|
|
23078
23655
|
junctionTables,
|
|
23079
23656
|
entityContextByTable,
|
|
23080
23657
|
manifest,
|
|
@@ -23169,7 +23746,7 @@ async function disposeActive(active) {
|
|
|
23169
23746
|
async function reopenSameConfig(active, autoRender) {
|
|
23170
23747
|
const feed = active.feed;
|
|
23171
23748
|
await disposeActive(active);
|
|
23172
|
-
const next = await openConfig(active.configPath, active.outputDir, autoRender);
|
|
23749
|
+
const next = await openConfig(active.configPath, active.outputDir, autoRender, active.teamCloud);
|
|
23173
23750
|
next.feed = feed;
|
|
23174
23751
|
return next;
|
|
23175
23752
|
}
|
|
@@ -23304,7 +23881,7 @@ async function applySchemaConfig(active, entry, direction, autoRender) {
|
|
|
23304
23881
|
for (const sql of ddl) await execSql(active.db, sql);
|
|
23305
23882
|
saveConfigDoc(active.configPath, doc);
|
|
23306
23883
|
await disposeActive(active);
|
|
23307
|
-
return openConfig(active.configPath, active.outputDir, autoRender);
|
|
23884
|
+
return openConfig(active.configPath, active.outputDir, autoRender, active.teamCloud);
|
|
23308
23885
|
}
|
|
23309
23886
|
function schemaReverseSummary(verb, entry) {
|
|
23310
23887
|
const what = entry.operation.replace("schema.", "").replace(/_/g, " ");
|
|
@@ -23318,7 +23895,7 @@ async function startGuiServer(options) {
|
|
|
23318
23895
|
const teamCloud = options.teamCloud ?? false;
|
|
23319
23896
|
const autoRender = options.autoRender ?? false;
|
|
23320
23897
|
const sessionId = crypto.randomUUID();
|
|
23321
|
-
let active = await openConfig(configPath, outputDir, autoRender);
|
|
23898
|
+
let active = await openConfig(configPath, outputDir, autoRender, teamCloud);
|
|
23322
23899
|
const latticeRoot = findLatticeRoot(dirname11(configPath));
|
|
23323
23900
|
let currentWorkspaceId = null;
|
|
23324
23901
|
if (latticeRoot) {
|
|
@@ -24508,7 +25085,7 @@ data: ${JSON.stringify(data)}
|
|
|
24508
25085
|
const paths = resolveWorkspacePaths(latticeRoot, ws);
|
|
24509
25086
|
let next;
|
|
24510
25087
|
try {
|
|
24511
|
-
next = await openConfig(paths.configPath, paths.contextDir, autoRender);
|
|
25088
|
+
next = await openConfig(paths.configPath, paths.contextDir, autoRender, teamCloud);
|
|
24512
25089
|
} catch (e) {
|
|
24513
25090
|
const err = e;
|
|
24514
25091
|
sendJson(
|
|
@@ -24550,7 +25127,12 @@ data: ${JSON.stringify(data)}
|
|
|
24550
25127
|
const newPaths = resolveWorkspacePaths(latticeRoot, created);
|
|
24551
25128
|
let newActive;
|
|
24552
25129
|
try {
|
|
24553
|
-
newActive = await openConfig(
|
|
25130
|
+
newActive = await openConfig(
|
|
25131
|
+
newPaths.configPath,
|
|
25132
|
+
newPaths.contextDir,
|
|
25133
|
+
autoRender,
|
|
25134
|
+
teamCloud
|
|
25135
|
+
);
|
|
24554
25136
|
} catch (e) {
|
|
24555
25137
|
sendJson(
|
|
24556
25138
|
res,
|
|
@@ -24609,7 +25191,12 @@ data: ${JSON.stringify(data)}
|
|
|
24609
25191
|
const fbPaths = resolveWorkspacePaths(latticeRoot, fallback);
|
|
24610
25192
|
let next;
|
|
24611
25193
|
try {
|
|
24612
|
-
next = await openConfig(
|
|
25194
|
+
next = await openConfig(
|
|
25195
|
+
fbPaths.configPath,
|
|
25196
|
+
fbPaths.contextDir,
|
|
25197
|
+
autoRender,
|
|
25198
|
+
teamCloud
|
|
25199
|
+
);
|
|
24613
25200
|
} catch (e) {
|
|
24614
25201
|
const err = e;
|
|
24615
25202
|
const codePrefix = err.code ? `[${err.code}] ` : "";
|
|
@@ -24701,7 +25288,12 @@ data: ${JSON.stringify(data)}
|
|
|
24701
25288
|
}
|
|
24702
25289
|
let next;
|
|
24703
25290
|
try {
|
|
24704
|
-
next = await openConfig(
|
|
25291
|
+
next = await openConfig(
|
|
25292
|
+
newPath,
|
|
25293
|
+
resolveOutputDirForConfig(newPath),
|
|
25294
|
+
autoRender,
|
|
25295
|
+
teamCloud
|
|
25296
|
+
);
|
|
24705
25297
|
} catch (e) {
|
|
24706
25298
|
const err = e;
|
|
24707
25299
|
console.error(`[dbconfig.switch] openConfig(${newPath}) failed:`, err);
|
|
@@ -24728,7 +25320,8 @@ data: ${JSON.stringify(data)}
|
|
|
24728
25320
|
const next = await openConfig(
|
|
24729
25321
|
newConfigPath,
|
|
24730
25322
|
resolveOutputDirForConfig(newConfigPath),
|
|
24731
|
-
autoRender
|
|
25323
|
+
autoRender,
|
|
25324
|
+
teamCloud
|
|
24732
25325
|
);
|
|
24733
25326
|
await disposeActive(active);
|
|
24734
25327
|
active = next;
|
|
@@ -24770,7 +25363,8 @@ data: ${JSON.stringify(data)}
|
|
|
24770
25363
|
next = await openConfig(
|
|
24771
25364
|
fallback.path,
|
|
24772
25365
|
resolveOutputDirForConfig(fallback.path),
|
|
24773
|
-
autoRender
|
|
25366
|
+
autoRender,
|
|
25367
|
+
teamCloud
|
|
24774
25368
|
);
|
|
24775
25369
|
} catch (e) {
|
|
24776
25370
|
const err = e;
|
|
@@ -25170,8 +25764,14 @@ data: ${JSON.stringify(data)}
|
|
|
25170
25764
|
teamId: active.teamContext.teamId,
|
|
25171
25765
|
myUserId: active.teamContext.myUserId
|
|
25172
25766
|
} : null,
|
|
25767
|
+
cloudReconnectRequired: active.cloudReconnectRequired,
|
|
25173
25768
|
swap: async () => {
|
|
25174
|
-
const next = await openConfig(
|
|
25769
|
+
const next = await openConfig(
|
|
25770
|
+
active.configPath,
|
|
25771
|
+
active.outputDir,
|
|
25772
|
+
autoRender,
|
|
25773
|
+
active.teamCloud
|
|
25774
|
+
);
|
|
25175
25775
|
await disposeActive(active);
|
|
25176
25776
|
active = next;
|
|
25177
25777
|
}
|
package/dist/index.d.cts
CHANGED
|
@@ -4509,8 +4509,8 @@ interface PdfOptions {
|
|
|
4509
4509
|
}
|
|
4510
4510
|
/**
|
|
4511
4511
|
* Read a PDF with Claude's native document support — works on text PDFs AND
|
|
4512
|
-
* scanned/image-only PDFs (no text layer),
|
|
4513
|
-
* AI-gated; the model call is injectable for tests.
|
|
4512
|
+
* scanned/image-only PDFs (no text layer), where in-process text extraction
|
|
4513
|
+
* finds nothing. AI-gated; the model call is injectable for tests.
|
|
4514
4514
|
*/
|
|
4515
4515
|
declare function describePdf(auth: ClaudeAuth, path: string, opts?: PdfOptions): Promise<string>;
|
|
4516
4516
|
|
package/dist/index.d.ts
CHANGED
|
@@ -4509,8 +4509,8 @@ interface PdfOptions {
|
|
|
4509
4509
|
}
|
|
4510
4510
|
/**
|
|
4511
4511
|
* Read a PDF with Claude's native document support — works on text PDFs AND
|
|
4512
|
-
* scanned/image-only PDFs (no text layer),
|
|
4513
|
-
* AI-gated; the model call is injectable for tests.
|
|
4512
|
+
* scanned/image-only PDFs (no text layer), where in-process text extraction
|
|
4513
|
+
* finds nothing. AI-gated; the model call is injectable for tests.
|
|
4514
4514
|
*/
|
|
4515
4515
|
declare function describePdf(auth: ClaudeAuth, path: string, opts?: PdfOptions): Promise<string>;
|
|
4516
4516
|
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "latticesql",
|
|
3
|
-
"version": "2.2.
|
|
3
|
+
"version": "2.2.4",
|
|
4
4
|
"description": "Persistent structured memory for AI agent systems — pluggable SQLite or Postgres backend, LLM context bridge",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "./dist/index.js",
|
|
@@ -57,10 +57,14 @@
|
|
|
57
57
|
},
|
|
58
58
|
"optionalDependencies": {
|
|
59
59
|
"@anthropic-ai/sdk": "^0.71.2",
|
|
60
|
+
"fflate": "^0.8.3",
|
|
60
61
|
"file-type": "^19.6.0",
|
|
62
|
+
"mammoth": "^1.12.0",
|
|
61
63
|
"pg": "^8.11.0",
|
|
62
64
|
"playwright": "^1.48.0",
|
|
63
|
-
"sharp": "^0.33.5"
|
|
65
|
+
"sharp": "^0.33.5",
|
|
66
|
+
"unpdf": "^1.6.2",
|
|
67
|
+
"word-extractor": "^1.0.4"
|
|
64
68
|
},
|
|
65
69
|
"devDependencies": {
|
|
66
70
|
"@anthropic-ai/sdk": "^0.71.0",
|