wayfind 2.0.36 → 2.0.38
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/content-store.js +108 -0
- package/bin/storage/sqlite-backend.js +20 -15
- package/package.json +1 -1
package/bin/content-store.js
CHANGED
|
@@ -1852,6 +1852,114 @@ async function indexSignals(options = {}) {
|
|
|
1852
1852
|
}
|
|
1853
1853
|
}
|
|
1854
1854
|
|
|
1855
|
+
// ── Chunk long signal entries for better embedding retrieval ──────────────
|
|
1856
|
+
// Split signal content by ## headings into section-level entries.
|
|
1857
|
+
// Each chunk gets its own embedding so semantic search matches at section level.
|
|
1858
|
+
const MIN_CHUNK_CHARS = 200;
|
|
1859
|
+
const MAX_CHUNK_CHARS = 3000;
|
|
1860
|
+
|
|
1861
|
+
// Collect all signal files across all channels for chunking
|
|
1862
|
+
const allSignalFiles = [];
|
|
1863
|
+
for (const ch of channels) {
|
|
1864
|
+
const chDir = path.join(signalsDir, ch);
|
|
1865
|
+
try {
|
|
1866
|
+
const entries = fs.readdirSync(chDir, { withFileTypes: true });
|
|
1867
|
+
for (const e of entries) {
|
|
1868
|
+
if (e.isFile() && e.name.endsWith('.md')) {
|
|
1869
|
+
allSignalFiles.push({ filePath: path.join(chDir, e.name), file: e.name, repo: 'signals/' + ch, channel: ch });
|
|
1870
|
+
}
|
|
1871
|
+
}
|
|
1872
|
+
for (const ownerEntry of entries) {
|
|
1873
|
+
if (!ownerEntry.isDirectory()) continue;
|
|
1874
|
+
const ownerDir = path.join(chDir, ownerEntry.name);
|
|
1875
|
+
let repoEntries;
|
|
1876
|
+
try { repoEntries = fs.readdirSync(ownerDir, { withFileTypes: true }); } catch { continue; }
|
|
1877
|
+
for (const repoEntry of repoEntries) {
|
|
1878
|
+
if (!repoEntry.isDirectory()) continue;
|
|
1879
|
+
const repoDir = path.join(ownerDir, repoEntry.name);
|
|
1880
|
+
let repoFiles;
|
|
1881
|
+
try { repoFiles = fs.readdirSync(repoDir).filter(f => f.endsWith('.md')); } catch { continue; }
|
|
1882
|
+
for (const f of repoFiles) {
|
|
1883
|
+
allSignalFiles.push({ filePath: path.join(repoDir, f), file: f, repo: `${ownerEntry.name}/${repoEntry.name}`, channel: ch });
|
|
1884
|
+
}
|
|
1885
|
+
}
|
|
1886
|
+
}
|
|
1887
|
+
} catch { continue; }
|
|
1888
|
+
}
|
|
1889
|
+
|
|
1890
|
+
for (const { filePath, file, repo, channel: ch } of allSignalFiles) {
|
|
1891
|
+
let content;
|
|
1892
|
+
try {
|
|
1893
|
+
content = fs.readFileSync(filePath, 'utf8');
|
|
1894
|
+
} catch {
|
|
1895
|
+
continue;
|
|
1896
|
+
}
|
|
1897
|
+
if (content.length < MIN_CHUNK_CHARS * 2) continue; // Too short to chunk
|
|
1898
|
+
|
|
1899
|
+
const dateMatch = file.match(/^(\d{4}-\d{2}-\d{2})/);
|
|
1900
|
+
const date = dateMatch ? dateMatch[1] : file.replace(/\.md$/, '');
|
|
1901
|
+
const titleMatch = content.match(/^#\s+(.+)$/m);
|
|
1902
|
+
const parentTitle = titleMatch ? titleMatch[1].trim() : file.replace(/\.md$/, '');
|
|
1903
|
+
const parentId = generateEntryId(date, repo, file.replace(/\.md$/, ''));
|
|
1904
|
+
|
|
1905
|
+
// Split by ## headings
|
|
1906
|
+
const sections = content.split(/^(?=##\s)/m).filter(s => s.trim().length >= MIN_CHUNK_CHARS);
|
|
1907
|
+
if (sections.length <= 1) continue; // Only one section — parent embedding is sufficient
|
|
1908
|
+
|
|
1909
|
+
for (let i = 0; i < sections.length; i++) {
|
|
1910
|
+
let section = sections[i];
|
|
1911
|
+
const headingMatch = section.match(/^##\s+(.+)$/m);
|
|
1912
|
+
const sectionTitle = headingMatch ? headingMatch[1].trim() : `Section ${i + 1}`;
|
|
1913
|
+
const chunkTitle = `${parentTitle} — ${sectionTitle}`;
|
|
1914
|
+
|
|
1915
|
+
if (section.length > MAX_CHUNK_CHARS) {
|
|
1916
|
+
section = section.slice(0, MAX_CHUNK_CHARS);
|
|
1917
|
+
}
|
|
1918
|
+
|
|
1919
|
+
const chunkId = generateEntryId(date, repo, `chunk-${i}-${file.replace(/\.md$/, '')}`);
|
|
1920
|
+
const chunkHash = contentHash(section);
|
|
1921
|
+
const existingChunk = existingIndex.entries[chunkId];
|
|
1922
|
+
|
|
1923
|
+
if (existingChunk && existingChunk.contentHash === chunkHash) {
|
|
1924
|
+
if (doEmbeddings && !existingChunk.hasEmbedding) {
|
|
1925
|
+
try {
|
|
1926
|
+
const vec = await llm.generateEmbedding(section);
|
|
1927
|
+
existingEmbeddings[chunkId] = vec;
|
|
1928
|
+
existingChunk.hasEmbedding = true;
|
|
1929
|
+
} catch {
|
|
1930
|
+
// Skip
|
|
1931
|
+
}
|
|
1932
|
+
}
|
|
1933
|
+
continue;
|
|
1934
|
+
}
|
|
1935
|
+
|
|
1936
|
+
existingIndex.entries[chunkId] = {
|
|
1937
|
+
date,
|
|
1938
|
+
repo,
|
|
1939
|
+
title: chunkTitle,
|
|
1940
|
+
source: 'signal-chunk',
|
|
1941
|
+
parentId,
|
|
1942
|
+
chunkIndex: i,
|
|
1943
|
+
user: '',
|
|
1944
|
+
drifted: false,
|
|
1945
|
+
contentHash: chunkHash,
|
|
1946
|
+
contentLength: section.length,
|
|
1947
|
+
tags: [ch, sectionTitle.toLowerCase()],
|
|
1948
|
+
hasEmbedding: false,
|
|
1949
|
+
};
|
|
1950
|
+
|
|
1951
|
+
if (doEmbeddings) {
|
|
1952
|
+
try {
|
|
1953
|
+
const vec = await llm.generateEmbedding(section);
|
|
1954
|
+
existingEmbeddings[chunkId] = vec;
|
|
1955
|
+
existingIndex.entries[chunkId].hasEmbedding = true;
|
|
1956
|
+
} catch {
|
|
1957
|
+
// Continue without embedding
|
|
1958
|
+
}
|
|
1959
|
+
}
|
|
1960
|
+
}
|
|
1961
|
+
}
|
|
1962
|
+
|
|
1855
1963
|
// Save
|
|
1856
1964
|
existingIndex.entryCount = Object.keys(existingIndex.entries).length;
|
|
1857
1965
|
backend.saveIndex(existingIndex);
|
|
@@ -155,6 +155,26 @@ class SqliteBackend {
|
|
|
155
155
|
fs.mkdirSync(this.storePath, { recursive: true });
|
|
156
156
|
this.db = new Database(this.dbPath);
|
|
157
157
|
this.db.pragma('journal_mode = WAL');
|
|
158
|
+
|
|
159
|
+
// Migrate existing databases BEFORE running full schema (which creates
|
|
160
|
+
// indexes on columns that may not exist yet in pre-v2.0.29 databases).
|
|
161
|
+
const tables = this.db.prepare("SELECT name FROM sqlite_master WHERE type='table'").all().map(r => r.name);
|
|
162
|
+
if (tables.includes('decisions')) {
|
|
163
|
+
const cols = this.db.prepare('PRAGMA table_info(decisions)').all().map(c => c.name);
|
|
164
|
+
if (!cols.includes('quality_score')) {
|
|
165
|
+
this.db.exec('ALTER TABLE decisions ADD COLUMN quality_score INTEGER DEFAULT 0');
|
|
166
|
+
}
|
|
167
|
+
if (!cols.includes('distill_tier')) {
|
|
168
|
+
this.db.exec("ALTER TABLE decisions ADD COLUMN distill_tier TEXT DEFAULT 'raw'");
|
|
169
|
+
}
|
|
170
|
+
if (!cols.includes('distilled_from')) {
|
|
171
|
+
this.db.exec('ALTER TABLE decisions ADD COLUMN distilled_from TEXT DEFAULT NULL');
|
|
172
|
+
}
|
|
173
|
+
if (!cols.includes('distilled_at')) {
|
|
174
|
+
this.db.exec('ALTER TABLE decisions ADD COLUMN distilled_at INTEGER DEFAULT NULL');
|
|
175
|
+
}
|
|
176
|
+
}
|
|
177
|
+
|
|
158
178
|
this.db.exec(SCHEMA_SQL);
|
|
159
179
|
fs.chmodSync(this.dbPath, 0o600);
|
|
160
180
|
|
|
@@ -162,21 +182,6 @@ class SqliteBackend {
|
|
|
162
182
|
if (!existing) {
|
|
163
183
|
this.db.prepare('INSERT INTO metadata (key, value) VALUES (?, ?)').run('schema_version', SCHEMA_VERSION);
|
|
164
184
|
}
|
|
165
|
-
|
|
166
|
-
// Migrate existing databases: add new columns if they don't exist
|
|
167
|
-
const cols = this.db.prepare('PRAGMA table_info(decisions)').all().map(c => c.name);
|
|
168
|
-
if (!cols.includes('quality_score')) {
|
|
169
|
-
this.db.exec('ALTER TABLE decisions ADD COLUMN quality_score INTEGER DEFAULT 0');
|
|
170
|
-
}
|
|
171
|
-
if (!cols.includes('distill_tier')) {
|
|
172
|
-
this.db.exec('ALTER TABLE decisions ADD COLUMN distill_tier TEXT DEFAULT \'raw\'');
|
|
173
|
-
}
|
|
174
|
-
if (!cols.includes('distilled_from')) {
|
|
175
|
-
this.db.exec('ALTER TABLE decisions ADD COLUMN distilled_from TEXT DEFAULT NULL');
|
|
176
|
-
}
|
|
177
|
-
if (!cols.includes('distilled_at')) {
|
|
178
|
-
this.db.exec('ALTER TABLE decisions ADD COLUMN distilled_at INTEGER DEFAULT NULL');
|
|
179
|
-
}
|
|
180
185
|
}
|
|
181
186
|
|
|
182
187
|
close() {
|
package/package.json
CHANGED