@voidwire/lore 0.9.1 → 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/cli.ts +66 -1
- package/lib/config.ts +134 -0
- package/lib/db.ts +2 -2
- package/lib/indexer.ts +213 -0
- package/lib/indexers/blogs.ts +146 -0
- package/lib/indexers/captures.ts +105 -0
- package/lib/indexers/commits.ts +90 -0
- package/lib/indexers/development.ts +68 -0
- package/lib/indexers/events.ts +61 -0
- package/lib/indexers/explorations.ts +89 -0
- package/lib/indexers/flux.ts +142 -0
- package/lib/indexers/index.ts +41 -0
- package/lib/indexers/insights.ts +53 -0
- package/lib/indexers/learnings.ts +53 -0
- package/lib/indexers/observations.ts +53 -0
- package/lib/indexers/obsidian.ts +151 -0
- package/lib/indexers/personal.ts +262 -0
- package/lib/indexers/readmes.ts +49 -0
- package/lib/indexers/sessions.ts +127 -0
- package/lib/indexers/teachings.ts +52 -0
- package/lib/info.ts +4 -8
- package/lib/list.ts +25 -39
- package/lib/projects.ts +28 -37
- package/lib/realtime.ts +16 -23
- package/lib/search.ts +6 -12
- package/lib/semantic.ts +6 -31
- package/package.json +3 -2
package/cli.ts
CHANGED
|
@@ -57,6 +57,8 @@ import {
|
|
|
57
57
|
type ObservationConfidence,
|
|
58
58
|
} from "./index";
|
|
59
59
|
import { isValidLoreType, LORE_TYPES } from "./lib/types";
|
|
60
|
+
import { runIndexer } from "./lib/indexer";
|
|
61
|
+
import { indexers } from "./lib/indexers/index";
|
|
60
62
|
|
|
61
63
|
// ============================================================================
|
|
62
64
|
// Argument Parsing
|
|
@@ -99,6 +101,8 @@ const BOOLEAN_FLAGS = new Set([
|
|
|
99
101
|
"exact",
|
|
100
102
|
"semantic",
|
|
101
103
|
"brief",
|
|
104
|
+
"list",
|
|
105
|
+
"rebuild",
|
|
102
106
|
]);
|
|
103
107
|
|
|
104
108
|
function getPositionalArgs(args: string[]): string[] {
|
|
@@ -830,6 +834,63 @@ async function handleCapture(args: string[]): Promise<void> {
|
|
|
830
834
|
}
|
|
831
835
|
}
|
|
832
836
|
|
|
837
|
+
// ============================================================================
|
|
838
|
+
// Index Command
|
|
839
|
+
// ============================================================================
|
|
840
|
+
|
|
841
|
+
async function handleIndex(args: string[]): Promise<void> {
|
|
842
|
+
if (hasFlag(args, "help")) {
|
|
843
|
+
showIndexHelp();
|
|
844
|
+
}
|
|
845
|
+
|
|
846
|
+
if (hasFlag(args, "list")) {
|
|
847
|
+
console.log("Registered indexers:");
|
|
848
|
+
const names = Object.keys(indexers);
|
|
849
|
+
if (names.length === 0) {
|
|
850
|
+
console.log(" (none)");
|
|
851
|
+
} else {
|
|
852
|
+
names.forEach((name) => console.log(` - ${name}`));
|
|
853
|
+
}
|
|
854
|
+
process.exit(0);
|
|
855
|
+
}
|
|
856
|
+
|
|
857
|
+
const positional = getPositionalArgs(args);
|
|
858
|
+
const source = positional.length > 0 ? positional[0] : "all";
|
|
859
|
+
const rebuild = hasFlag(args, "rebuild");
|
|
860
|
+
|
|
861
|
+
try {
|
|
862
|
+
await runIndexer(source, rebuild, indexers);
|
|
863
|
+
process.exit(0);
|
|
864
|
+
} catch (error) {
|
|
865
|
+
const message = error instanceof Error ? error.message : "Unknown error";
|
|
866
|
+
fail(`Index failed: ${message}`, 2);
|
|
867
|
+
}
|
|
868
|
+
}
|
|
869
|
+
|
|
870
|
+
function showIndexHelp(): void {
|
|
871
|
+
console.log(`
|
|
872
|
+
lore index - Run indexers to populate the search database
|
|
873
|
+
|
|
874
|
+
Usage:
|
|
875
|
+
lore index Run all registered indexers
|
|
876
|
+
lore index <source> Run a specific indexer
|
|
877
|
+
lore index --rebuild Clear and rebuild all sources
|
|
878
|
+
lore index --list List registered indexers
|
|
879
|
+
|
|
880
|
+
Options:
|
|
881
|
+
--rebuild Clear existing entries before indexing
|
|
882
|
+
--list Show registered indexers and exit
|
|
883
|
+
--help Show this help
|
|
884
|
+
|
|
885
|
+
Examples:
|
|
886
|
+
lore index --list
|
|
887
|
+
lore index obsidian
|
|
888
|
+
lore index --rebuild
|
|
889
|
+
lore index commits --rebuild
|
|
890
|
+
`);
|
|
891
|
+
process.exit(0);
|
|
892
|
+
}
|
|
893
|
+
|
|
833
894
|
// ============================================================================
|
|
834
895
|
// Help & Main
|
|
835
896
|
// ============================================================================
|
|
@@ -853,6 +914,7 @@ Usage:
|
|
|
853
914
|
lore about <project> Aggregate view of project knowledge
|
|
854
915
|
lore about <project> --brief Compact project summary
|
|
855
916
|
lore capture task|knowledge|note|teaching Capture knowledge
|
|
917
|
+
lore index [source] [--rebuild] [--list] Run indexers
|
|
856
918
|
|
|
857
919
|
Search Options:
|
|
858
920
|
--exact Use FTS5 text search (bypasses semantic search)
|
|
@@ -1234,9 +1296,12 @@ async function main(): Promise<void> {
|
|
|
1234
1296
|
case "capture":
|
|
1235
1297
|
await handleCapture(commandArgs);
|
|
1236
1298
|
break;
|
|
1299
|
+
case "index":
|
|
1300
|
+
await handleIndex(commandArgs);
|
|
1301
|
+
break;
|
|
1237
1302
|
default:
|
|
1238
1303
|
fail(
|
|
1239
|
-
`Unknown command: ${command}. Use: search, list, sources, info, projects, about, or
|
|
1304
|
+
`Unknown command: ${command}. Use: search, list, sources, info, projects, about, capture, or index`,
|
|
1240
1305
|
);
|
|
1241
1306
|
}
|
|
1242
1307
|
}
|
package/lib/config.ts
ADDED
|
@@ -0,0 +1,134 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* lib/config.ts - TOML configuration reader
|
|
3
|
+
*
|
|
4
|
+
* Reads ~/.config/lore/config.toml, validates required fields,
|
|
5
|
+
* resolves ~ to absolute paths, and caches the result.
|
|
6
|
+
*
|
|
7
|
+
* Usage:
|
|
8
|
+
* import { getConfig } from "./config";
|
|
9
|
+
* const config = getConfig();
|
|
10
|
+
* console.log(config.paths.data); // /Users/rudy/.local/share/lore
|
|
11
|
+
* console.log(config.database.sqlite); // /Users/rudy/.local/share/lore/lore.db
|
|
12
|
+
*/
|
|
13
|
+
|
|
14
|
+
import { readFileSync } from "fs";
|
|
15
|
+
import { homedir } from "os";
|
|
16
|
+
import { parse as parseToml } from "@iarna/toml";
|
|
17
|
+
|
|
18
|
+
export interface LoreConfig {
|
|
19
|
+
paths: {
|
|
20
|
+
data: string;
|
|
21
|
+
obsidian: string;
|
|
22
|
+
explorations: string;
|
|
23
|
+
blogs: string;
|
|
24
|
+
projects: string;
|
|
25
|
+
personal: string;
|
|
26
|
+
session_events?: string;
|
|
27
|
+
flux?: string;
|
|
28
|
+
flux_projects?: string;
|
|
29
|
+
};
|
|
30
|
+
database: {
|
|
31
|
+
sqlite: string;
|
|
32
|
+
};
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
let cachedConfig: LoreConfig | null = null;
|
|
36
|
+
|
|
37
|
+
/**
|
|
38
|
+
* Resolve ~ to the user's home directory
|
|
39
|
+
*/
|
|
40
|
+
function resolvePath(path: string): string {
|
|
41
|
+
return path.replace(/^~/, homedir());
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
/**
|
|
45
|
+
* Read and parse the TOML config, validate required fields,
|
|
46
|
+
* resolve paths, and cache the result.
|
|
47
|
+
*/
|
|
48
|
+
export function getConfig(): LoreConfig {
|
|
49
|
+
if (cachedConfig) return cachedConfig;
|
|
50
|
+
|
|
51
|
+
const configPath = `${homedir()}/.config/lore/config.toml`;
|
|
52
|
+
|
|
53
|
+
let raw: string;
|
|
54
|
+
try {
|
|
55
|
+
raw = readFileSync(configPath, "utf-8");
|
|
56
|
+
} catch {
|
|
57
|
+
throw new Error(
|
|
58
|
+
`Config file not found: ${configPath}\n` +
|
|
59
|
+
`Create it with [paths] and [database] sections.\n` +
|
|
60
|
+
`See: https://github.com/nickpending/llmcli-tools/tree/main/packages/lore#configuration`,
|
|
61
|
+
);
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
let parsed: Record<string, unknown>;
|
|
65
|
+
try {
|
|
66
|
+
parsed = parseToml(raw) as Record<string, unknown>;
|
|
67
|
+
} catch (err) {
|
|
68
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
69
|
+
throw new Error(`Failed to parse config.toml: ${message}`);
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
// Validate required sections
|
|
73
|
+
if (!parsed.paths || typeof parsed.paths !== "object") {
|
|
74
|
+
throw new Error("Invalid config: missing [paths] section in config.toml");
|
|
75
|
+
}
|
|
76
|
+
if (!parsed.database || typeof parsed.database !== "object") {
|
|
77
|
+
throw new Error(
|
|
78
|
+
"Invalid config: missing [database] section in config.toml",
|
|
79
|
+
);
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
const paths = parsed.paths as Record<string, unknown>;
|
|
83
|
+
const database = parsed.database as Record<string, unknown>;
|
|
84
|
+
|
|
85
|
+
// Validate required path fields
|
|
86
|
+
const requiredPaths = [
|
|
87
|
+
"data",
|
|
88
|
+
"obsidian",
|
|
89
|
+
"explorations",
|
|
90
|
+
"blogs",
|
|
91
|
+
"projects",
|
|
92
|
+
"personal",
|
|
93
|
+
];
|
|
94
|
+
for (const field of requiredPaths) {
|
|
95
|
+
if (typeof paths[field] !== "string") {
|
|
96
|
+
throw new Error(
|
|
97
|
+
`Invalid config: paths.${field} is missing or not a string`,
|
|
98
|
+
);
|
|
99
|
+
}
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
if (typeof database.sqlite !== "string") {
|
|
103
|
+
throw new Error(
|
|
104
|
+
"Invalid config: database.sqlite is missing or not a string",
|
|
105
|
+
);
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
// Build config with resolved paths
|
|
109
|
+
cachedConfig = {
|
|
110
|
+
paths: {
|
|
111
|
+
data: resolvePath(paths.data as string),
|
|
112
|
+
obsidian: resolvePath(paths.obsidian as string),
|
|
113
|
+
explorations: resolvePath(paths.explorations as string),
|
|
114
|
+
blogs: resolvePath(paths.blogs as string),
|
|
115
|
+
projects: resolvePath(paths.projects as string),
|
|
116
|
+
personal: resolvePath(paths.personal as string),
|
|
117
|
+
session_events:
|
|
118
|
+
typeof paths.session_events === "string"
|
|
119
|
+
? resolvePath(paths.session_events)
|
|
120
|
+
: undefined,
|
|
121
|
+
flux:
|
|
122
|
+
typeof paths.flux === "string" ? resolvePath(paths.flux) : undefined,
|
|
123
|
+
flux_projects:
|
|
124
|
+
typeof paths.flux_projects === "string"
|
|
125
|
+
? resolvePath(paths.flux_projects)
|
|
126
|
+
: undefined,
|
|
127
|
+
},
|
|
128
|
+
database: {
|
|
129
|
+
sqlite: resolvePath(database.sqlite as string),
|
|
130
|
+
},
|
|
131
|
+
};
|
|
132
|
+
|
|
133
|
+
return cachedConfig;
|
|
134
|
+
}
|
package/lib/db.ts
CHANGED
|
@@ -7,7 +7,7 @@
|
|
|
7
7
|
|
|
8
8
|
import { Database } from "bun:sqlite";
|
|
9
9
|
import { existsSync } from "fs";
|
|
10
|
-
import {
|
|
10
|
+
import { getConfig } from "./config";
|
|
11
11
|
|
|
12
12
|
// Use Homebrew SQLite on macOS to enable extension loading
|
|
13
13
|
// Must be called before any Database instances are created
|
|
@@ -20,7 +20,7 @@ if (existsSync(HOMEBREW_SQLITE)) {
|
|
|
20
20
|
* Get the path to the lore database
|
|
21
21
|
*/
|
|
22
22
|
export function getDatabasePath(): string {
|
|
23
|
-
return
|
|
23
|
+
return getConfig().database.sqlite;
|
|
24
24
|
}
|
|
25
25
|
|
|
26
26
|
/**
|
package/lib/indexer.ts
ADDED
|
@@ -0,0 +1,213 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* lib/indexer.ts - Indexer framework core
|
|
3
|
+
*
|
|
4
|
+
* Shared framework for all indexers. Handles:
|
|
5
|
+
* - IndexEntry/IndexerContext interfaces
|
|
6
|
+
* - Content chunking (2500 chars, 200 overlap, sentence boundaries)
|
|
7
|
+
* - Content hash dedup (SHA-256)
|
|
8
|
+
* - Entry validation (no topic/content in metadata, no internals)
|
|
9
|
+
* - FTS5 parameterized INSERT
|
|
10
|
+
* - Orchestration (runIndexer)
|
|
11
|
+
*
|
|
12
|
+
* Usage:
|
|
13
|
+
* import { runIndexer, type IndexerFunction } from "./indexer";
|
|
14
|
+
* const myIndexer: IndexerFunction = async (ctx) => {
|
|
15
|
+
* ctx.insert({ source: "mySource", title: "...", content: "...", topic: "..." });
|
|
16
|
+
* };
|
|
17
|
+
*/
|
|
18
|
+
|
|
19
|
+
import { Database } from "bun:sqlite";
|
|
20
|
+
import { createHash } from "crypto";
|
|
21
|
+
import { getConfig, type LoreConfig } from "./config";
|
|
22
|
+
|
|
23
|
+
export interface IndexEntry {
|
|
24
|
+
source: string;
|
|
25
|
+
title: string;
|
|
26
|
+
content: string;
|
|
27
|
+
topic: string;
|
|
28
|
+
type?: string;
|
|
29
|
+
timestamp?: string;
|
|
30
|
+
metadata?: Record<string, unknown>;
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
export interface IndexerContext {
|
|
34
|
+
db: Database;
|
|
35
|
+
config: LoreConfig;
|
|
36
|
+
insert: (entry: IndexEntry) => void;
|
|
37
|
+
rebuild: boolean;
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
export type IndexerFunction = (ctx: IndexerContext) => Promise<void>;
|
|
41
|
+
|
|
42
|
+
/**
|
|
43
|
+
* Content chunking with overlap.
|
|
44
|
+
* Splits content at sentence boundaries when possible.
|
|
45
|
+
* Chunk size: 2500 chars, overlap: 200 chars.
|
|
46
|
+
*/
|
|
47
|
+
function chunkContent(content: string): string[] {
|
|
48
|
+
const CHUNK_SIZE = 2500;
|
|
49
|
+
const OVERLAP = 200;
|
|
50
|
+
|
|
51
|
+
if (content.length <= CHUNK_SIZE) return [content];
|
|
52
|
+
|
|
53
|
+
const chunks: string[] = [];
|
|
54
|
+
let start = 0;
|
|
55
|
+
|
|
56
|
+
while (start < content.length) {
|
|
57
|
+
let end = start + CHUNK_SIZE;
|
|
58
|
+
|
|
59
|
+
// Break at sentence boundary if possible
|
|
60
|
+
if (end < content.length) {
|
|
61
|
+
const slice = content.slice(start, end);
|
|
62
|
+
// Try paragraph break first, then sentence break
|
|
63
|
+
const paragraphBreak = slice.lastIndexOf("\n\n");
|
|
64
|
+
if (paragraphBreak > CHUNK_SIZE - 500) {
|
|
65
|
+
end = start + paragraphBreak + 2;
|
|
66
|
+
} else {
|
|
67
|
+
const sentenceBreak = slice.search(/[.!?]\s+(?=[A-Z])/);
|
|
68
|
+
if (sentenceBreak > -1) {
|
|
69
|
+
// Find the last sentence break, not the first
|
|
70
|
+
const lastSentenceBreak = slice
|
|
71
|
+
.slice(0, end - start)
|
|
72
|
+
.lastIndexOf(". ");
|
|
73
|
+
if (lastSentenceBreak > CHUNK_SIZE - 500) {
|
|
74
|
+
end = start + lastSentenceBreak + 2;
|
|
75
|
+
}
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
} else {
|
|
79
|
+
end = content.length;
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
chunks.push(content.slice(start, end));
|
|
83
|
+
|
|
84
|
+
if (end >= content.length) break;
|
|
85
|
+
start = end - OVERLAP;
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
return chunks;
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
/**
|
|
92
|
+
* Validate entry before insert.
|
|
93
|
+
* Ensures metadata does not contain promoted columns or framework internals.
|
|
94
|
+
*/
|
|
95
|
+
function validateEntry(entry: IndexEntry): void {
|
|
96
|
+
const meta = entry.metadata || {};
|
|
97
|
+
|
|
98
|
+
if ("topic" in meta) {
|
|
99
|
+
console.warn(
|
|
100
|
+
`WARNING: topic should not be in metadata for ${entry.source}:${entry.title}`,
|
|
101
|
+
);
|
|
102
|
+
}
|
|
103
|
+
if ("content" in meta) {
|
|
104
|
+
console.warn(
|
|
105
|
+
`WARNING: content should not be in metadata for ${entry.source}:${entry.title}`,
|
|
106
|
+
);
|
|
107
|
+
}
|
|
108
|
+
const forbidden = ["content_hash", "chunk_idx", "total_chunks"];
|
|
109
|
+
for (const key of forbidden) {
|
|
110
|
+
if (key in meta) {
|
|
111
|
+
throw new Error(
|
|
112
|
+
`Framework internal '${key}' found in metadata for ${entry.source}:${entry.title}`,
|
|
113
|
+
);
|
|
114
|
+
}
|
|
115
|
+
}
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
/**
|
|
119
|
+
* Create an IndexerContext with insert helper that handles
|
|
120
|
+
* validation, dedup, chunking, and FTS5 insert.
|
|
121
|
+
*/
|
|
122
|
+
export function createIndexerContext(
|
|
123
|
+
db: Database,
|
|
124
|
+
config: LoreConfig,
|
|
125
|
+
rebuild: boolean,
|
|
126
|
+
seenHashes: Set<string>,
|
|
127
|
+
): IndexerContext {
|
|
128
|
+
const insertStmt = db.prepare(
|
|
129
|
+
"INSERT INTO search (source, title, content, metadata, topic, type, timestamp) VALUES (?, ?, ?, ?, ?, ?, ?)",
|
|
130
|
+
);
|
|
131
|
+
|
|
132
|
+
return {
|
|
133
|
+
db,
|
|
134
|
+
config,
|
|
135
|
+
rebuild,
|
|
136
|
+
insert: (entry: IndexEntry) => {
|
|
137
|
+
validateEntry(entry);
|
|
138
|
+
|
|
139
|
+
// Generate content hash for dedup
|
|
140
|
+
const contentHash = createHash("sha256")
|
|
141
|
+
.update(entry.content)
|
|
142
|
+
.digest("hex");
|
|
143
|
+
|
|
144
|
+
// Skip if already indexed
|
|
145
|
+
if (seenHashes.has(contentHash)) {
|
|
146
|
+
return;
|
|
147
|
+
}
|
|
148
|
+
seenHashes.add(contentHash);
|
|
149
|
+
|
|
150
|
+
// Chunk content if needed
|
|
151
|
+
const chunks = chunkContent(entry.content);
|
|
152
|
+
|
|
153
|
+
// Insert each chunk
|
|
154
|
+
for (const chunk of chunks) {
|
|
155
|
+
insertStmt.run(
|
|
156
|
+
entry.source,
|
|
157
|
+
entry.title,
|
|
158
|
+
chunk,
|
|
159
|
+
JSON.stringify(entry.metadata || {}),
|
|
160
|
+
entry.topic,
|
|
161
|
+
entry.type || "",
|
|
162
|
+
entry.timestamp || "",
|
|
163
|
+
);
|
|
164
|
+
}
|
|
165
|
+
},
|
|
166
|
+
};
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
/**
|
|
170
|
+
* Main indexing orchestrator.
|
|
171
|
+
* Runs registered indexers for the given source (or all).
|
|
172
|
+
*/
|
|
173
|
+
export async function runIndexer(
|
|
174
|
+
source: string | "all",
|
|
175
|
+
rebuild: boolean,
|
|
176
|
+
registry: Record<string, IndexerFunction>,
|
|
177
|
+
): Promise<void> {
|
|
178
|
+
const config = getConfig();
|
|
179
|
+
const db = new Database(config.database.sqlite);
|
|
180
|
+
|
|
181
|
+
try {
|
|
182
|
+
db.run("PRAGMA busy_timeout = 5000");
|
|
183
|
+
|
|
184
|
+
// Initialize seen hashes set
|
|
185
|
+
const seenHashes = new Set<string>();
|
|
186
|
+
|
|
187
|
+
const ctx = createIndexerContext(db, config, rebuild, seenHashes);
|
|
188
|
+
|
|
189
|
+
// Determine which indexers to run
|
|
190
|
+
const toRun = source === "all" ? Object.keys(registry) : [source];
|
|
191
|
+
|
|
192
|
+
for (const src of toRun) {
|
|
193
|
+
const indexer = registry[src];
|
|
194
|
+
if (!indexer) {
|
|
195
|
+
console.error(`Unknown source: ${src}`);
|
|
196
|
+
continue;
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
console.log(`Indexing ${src}...`);
|
|
200
|
+
|
|
201
|
+
// Clear source if rebuilding
|
|
202
|
+
if (rebuild) {
|
|
203
|
+
db.run("DELETE FROM search WHERE source = ?", [src]);
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
await indexer(ctx);
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
console.log("Indexing complete");
|
|
210
|
+
} finally {
|
|
211
|
+
db.close();
|
|
212
|
+
}
|
|
213
|
+
}
|
|
@@ -0,0 +1,146 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* lib/indexers/blogs.ts - Hugo blog posts indexer
|
|
3
|
+
*
|
|
4
|
+
* Scans blog content/posts directory for markdown files.
|
|
5
|
+
* Extracts title, date, categories, tags from frontmatter.
|
|
6
|
+
* Derives URL from filename when slug not available.
|
|
7
|
+
*
|
|
8
|
+
* Source: blogs
|
|
9
|
+
* Topic: frontmatter categories joined (empty if none)
|
|
10
|
+
* Type: (empty)
|
|
11
|
+
* Timestamp: frontmatter date or file mtime as ISO 8601
|
|
12
|
+
*/
|
|
13
|
+
|
|
14
|
+
import { readdirSync, readFileSync, statSync, existsSync } from "fs";
|
|
15
|
+
import { join, basename } from "path";
|
|
16
|
+
import type { IndexerContext } from "../indexer";
|
|
17
|
+
|
|
18
|
+
function walkMarkdownFiles(dir: string, files: string[] = []): string[] {
|
|
19
|
+
if (!existsSync(dir)) return files;
|
|
20
|
+
|
|
21
|
+
const entries = readdirSync(dir, { withFileTypes: true });
|
|
22
|
+
|
|
23
|
+
for (const entry of entries) {
|
|
24
|
+
const fullPath = join(dir, entry.name);
|
|
25
|
+
|
|
26
|
+
if (entry.isDirectory()) {
|
|
27
|
+
walkMarkdownFiles(fullPath, files);
|
|
28
|
+
} else if (entry.isFile() && entry.name.endsWith(".md")) {
|
|
29
|
+
files.push(fullPath);
|
|
30
|
+
}
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
return files;
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
export async function indexBlogs(ctx: IndexerContext): Promise<void> {
|
|
37
|
+
const blogsDir = ctx.config.paths.blogs;
|
|
38
|
+
const postsDir = join(blogsDir, "content", "posts");
|
|
39
|
+
|
|
40
|
+
if (!existsSync(postsDir)) {
|
|
41
|
+
console.log(`Blog posts directory not found: ${postsDir}`);
|
|
42
|
+
return;
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
const files = walkMarkdownFiles(postsDir);
|
|
46
|
+
|
|
47
|
+
for (const filePath of files) {
|
|
48
|
+
try {
|
|
49
|
+
const raw = readFileSync(filePath, "utf-8");
|
|
50
|
+
|
|
51
|
+
let content = raw;
|
|
52
|
+
let title = basename(filePath, ".md");
|
|
53
|
+
let date: string | undefined;
|
|
54
|
+
let categories: string[] = [];
|
|
55
|
+
let tags: string[] = [];
|
|
56
|
+
let slug: string | undefined;
|
|
57
|
+
|
|
58
|
+
// Extract frontmatter
|
|
59
|
+
const frontmatterMatch = raw.match(/^---\n([\s\S]*?)\n---\n/);
|
|
60
|
+
if (frontmatterMatch) {
|
|
61
|
+
const frontmatter = frontmatterMatch[1];
|
|
62
|
+
|
|
63
|
+
const titleMatch = frontmatter.match(/^title:\s*"?(.+?)"?$/m);
|
|
64
|
+
const dateMatch = frontmatter.match(/^date:\s*(.+)$/m);
|
|
65
|
+
const slugMatch = frontmatter.match(/^slug:\s*"?(.+?)"?$/m);
|
|
66
|
+
|
|
67
|
+
if (titleMatch) title = titleMatch[1].trim();
|
|
68
|
+
if (dateMatch) date = dateMatch[1].trim();
|
|
69
|
+
if (slugMatch) slug = slugMatch[1].trim();
|
|
70
|
+
|
|
71
|
+
// Try inline: categories: [foo, bar]
|
|
72
|
+
const categoriesMatch = frontmatter.match(/^categories:\s*\[(.+)\]$/m);
|
|
73
|
+
if (categoriesMatch) {
|
|
74
|
+
categories = categoriesMatch[1]
|
|
75
|
+
.split(",")
|
|
76
|
+
.map((c) => c.trim().replace(/"/g, ""));
|
|
77
|
+
} else {
|
|
78
|
+
// Try multi-line: categories:\n - foo\n - bar
|
|
79
|
+
const multiMatch = frontmatter.match(
|
|
80
|
+
/^categories:\s*\n((?:\s+-\s+.+\n?)+)/m,
|
|
81
|
+
);
|
|
82
|
+
if (multiMatch) {
|
|
83
|
+
categories = multiMatch[1]
|
|
84
|
+
.split("\n")
|
|
85
|
+
.map((l) => l.replace(/^\s+-\s+/, "").trim())
|
|
86
|
+
.filter(Boolean);
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
// Try inline: tags: [foo, bar]
|
|
91
|
+
const tagsInlineMatch = frontmatter.match(/^tags:\s*\[(.+)\]$/m);
|
|
92
|
+
if (tagsInlineMatch) {
|
|
93
|
+
tags = tagsInlineMatch[1]
|
|
94
|
+
.split(",")
|
|
95
|
+
.map((t) => t.trim().replace(/"/g, ""));
|
|
96
|
+
} else {
|
|
97
|
+
// Try multi-line: tags:\n - foo\n - bar
|
|
98
|
+
const tagsMultiMatch = frontmatter.match(
|
|
99
|
+
/^tags:\s*\n((?:\s+-\s+.+\n?)+)/m,
|
|
100
|
+
);
|
|
101
|
+
if (tagsMultiMatch) {
|
|
102
|
+
tags = tagsMultiMatch[1]
|
|
103
|
+
.split("\n")
|
|
104
|
+
.map((l) => l.replace(/^\s+-\s+/, "").trim())
|
|
105
|
+
.filter(Boolean);
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
content = raw.slice(frontmatterMatch[0].length);
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
// Append tags to content for search visibility
|
|
113
|
+
if (tags.length > 0) {
|
|
114
|
+
content += `\nTags: ${tags.join(", ")}`;
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
// Topic from categories
|
|
118
|
+
const topic = categories.length > 0 ? categories.join(" ") : "";
|
|
119
|
+
|
|
120
|
+
// URL from slug or filename
|
|
121
|
+
const urlSlug = slug || basename(filePath, ".md");
|
|
122
|
+
const url = `https://labs.voidwire.info/posts/${urlSlug}/`;
|
|
123
|
+
|
|
124
|
+
// Word count
|
|
125
|
+
const wordCount = content.split(/\s+/).filter(Boolean).length;
|
|
126
|
+
|
|
127
|
+
const timestamp = date || statSync(filePath).mtime.toISOString();
|
|
128
|
+
|
|
129
|
+
const metadata: Record<string, unknown> = {};
|
|
130
|
+
if (url) metadata.url = url;
|
|
131
|
+
if (wordCount) metadata.word_count = wordCount;
|
|
132
|
+
|
|
133
|
+
ctx.insert({
|
|
134
|
+
source: "blogs",
|
|
135
|
+
title: `[blog] ${title}`,
|
|
136
|
+
content,
|
|
137
|
+
topic,
|
|
138
|
+
timestamp,
|
|
139
|
+
metadata: Object.keys(metadata).length > 0 ? metadata : undefined,
|
|
140
|
+
});
|
|
141
|
+
} catch (e) {
|
|
142
|
+
console.warn(`Failed to read ${filePath}: ${e}`);
|
|
143
|
+
continue;
|
|
144
|
+
}
|
|
145
|
+
}
|
|
146
|
+
}
|