ex-brain 0.2.7 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/ai/ax-pipeline.ts +114 -0
- package/src/ai/compiler.ts +118 -113
- package/src/ai/entity-link.ts +96 -78
- package/src/ai/timeline-extractor.ts +110 -99
- package/src/commands/compile-cmd.ts +1 -1
- package/src/commands/entity-links.ts +105 -0
- package/src/commands/import-cmd.ts +464 -0
- package/src/commands/index.ts +30 -2314
- package/src/commands/misc-cmds.ts +190 -0
- package/src/commands/misc-commands.ts +252 -0
- package/src/commands/put-cmd.ts +525 -0
- package/src/commands/query-cmd.ts +486 -0
- package/src/commands/shared.ts +109 -0
- package/src/commands/timeline-cmd.ts +159 -0
- package/src/config/index.ts +53 -0
- package/src/config/init.ts +50 -0
- package/src/config/paths.ts +21 -0
- package/src/config/schema.ts +121 -0
- package/src/config/settings.ts +168 -0
- package/src/db/client.ts +1 -1
- package/src/markdown/document-loader.ts +30 -2
- package/src/repositories/brain-repo.ts +43 -1
- package/src/settings.ts +27 -282
- /package/src/{config.ts → slug-utils.ts} +0 -0
|
@@ -0,0 +1,159 @@
|
|
|
1
|
+
import { Command } from "commander";
|
|
2
|
+
import { loadSettings } from "../settings";
|
|
3
|
+
import { addDryRun, isDryRun, withRepo, isJson, print } from "./shared";
|
|
4
|
+
import { createProgress, formatDuration } from "../utils/progress";
|
|
5
|
+
|
|
6
|
+
export function registerTimelineCommand(program: Command): void {
|
|
7
|
+
const timelineCmd = program
|
|
8
|
+
.command("timeline")
|
|
9
|
+
.description("manage timeline entries");
|
|
10
|
+
|
|
11
|
+
// timeline list
|
|
12
|
+
timelineCmd
|
|
13
|
+
.command("list")
|
|
14
|
+
.argument("<slug>", "page slug")
|
|
15
|
+
.option("--limit <number>", "max results", "50")
|
|
16
|
+
.description("list timeline entries for a page")
|
|
17
|
+
.addHelpText("after", `
|
|
18
|
+
Examples:
|
|
19
|
+
ebrain timeline list projects/alpha
|
|
20
|
+
ebrain timeline list projects/alpha --limit 10
|
|
21
|
+
`)
|
|
22
|
+
.action(async (slug: string, opts: Record<string, string>) => {
|
|
23
|
+
await withRepo(program, async (repo) => {
|
|
24
|
+
const rows = await repo.timeline(slug, Number(opts.limit ?? 50));
|
|
25
|
+
print(program, rows);
|
|
26
|
+
});
|
|
27
|
+
});
|
|
28
|
+
|
|
29
|
+
// timeline add
|
|
30
|
+
addDryRun(
|
|
31
|
+
timelineCmd
|
|
32
|
+
.command("add")
|
|
33
|
+
.argument("<slug>", "page slug")
|
|
34
|
+
.requiredOption("--date <date>", "date (YYYY-MM-DD or ISO)")
|
|
35
|
+
.requiredOption("--summary <summary>", "one-line summary")
|
|
36
|
+
.option("--source <source>", "event source", "manual")
|
|
37
|
+
.option("--detail <detail>", "detail markdown", "")
|
|
38
|
+
.description("add a timeline entry")
|
|
39
|
+
.addHelpText("after", `
|
|
40
|
+
Examples:
|
|
41
|
+
ebrain timeline add projects/alpha --date 2025-03-15 --summary "v1.0 shipped"
|
|
42
|
+
ebrain timeline add projects/alpha --date 2025-03-15 --summary "launch" --source release
|
|
43
|
+
ebrain timeline add projects/alpha --date 2025-03-15 --summary "launch" --dry-run
|
|
44
|
+
`),
|
|
45
|
+
).action(async (slug: string, opts: {
|
|
46
|
+
date: string;
|
|
47
|
+
summary: string;
|
|
48
|
+
source?: string;
|
|
49
|
+
detail?: string;
|
|
50
|
+
dryRun?: boolean;
|
|
51
|
+
}) => {
|
|
52
|
+
if (isDryRun(opts)) {
|
|
53
|
+
print(program, {
|
|
54
|
+
dryRun: true,
|
|
55
|
+
action: "timeline-add",
|
|
56
|
+
slug,
|
|
57
|
+
date: opts.date,
|
|
58
|
+
summary: opts.summary,
|
|
59
|
+
source: opts.source ?? "manual",
|
|
60
|
+
});
|
|
61
|
+
return;
|
|
62
|
+
}
|
|
63
|
+
await withRepo(program, async (repo) => {
|
|
64
|
+
await repo.timelineAdd({
|
|
65
|
+
pageSlug: slug,
|
|
66
|
+
date: opts.date,
|
|
67
|
+
source: opts.source ?? "manual",
|
|
68
|
+
summary: opts.summary,
|
|
69
|
+
detail: opts.detail ?? "",
|
|
70
|
+
});
|
|
71
|
+
print(program, {
|
|
72
|
+
ok: true,
|
|
73
|
+
action: "timeline-add",
|
|
74
|
+
slug,
|
|
75
|
+
date: opts.date,
|
|
76
|
+
});
|
|
77
|
+
});
|
|
78
|
+
});
|
|
79
|
+
|
|
80
|
+
// timeline extract
|
|
81
|
+
addDryRun(
|
|
82
|
+
timelineCmd
|
|
83
|
+
.command("extract")
|
|
84
|
+
.argument("<slug>", "page slug")
|
|
85
|
+
.option("--source <source>", "source identifier", "extracted")
|
|
86
|
+
.option("--default-date <date>", "default date (YYYY-MM-DD)")
|
|
87
|
+
.description("extract timeline events from page content using AI")
|
|
88
|
+
.addHelpText("after", `
|
|
89
|
+
Examples:
|
|
90
|
+
ebrain timeline extract companies/river-ai
|
|
91
|
+
ebrain timeline extract docs/meeting --source meeting_notes --default-date 2024-03-15
|
|
92
|
+
`),
|
|
93
|
+
).action(async (slug: string, opts: { source?: string; defaultDate?: string; dryRun?: boolean }) => {
|
|
94
|
+
if (isDryRun(opts)) {
|
|
95
|
+
print(program, {
|
|
96
|
+
dryRun: true,
|
|
97
|
+
action: "timeline-extract",
|
|
98
|
+
slug,
|
|
99
|
+
source: opts.source ?? "extracted",
|
|
100
|
+
defaultDate: opts.defaultDate ?? new Date().toISOString().slice(0, 10),
|
|
101
|
+
});
|
|
102
|
+
return;
|
|
103
|
+
}
|
|
104
|
+
await withRepo(program, async (repo) => {
|
|
105
|
+
const page = await repo.getPage(slug);
|
|
106
|
+
if (!page) {
|
|
107
|
+
throw new Error(`page not found: ${slug}`);
|
|
108
|
+
}
|
|
109
|
+
const settings = await loadSettings();
|
|
110
|
+
|
|
111
|
+
const progress = createProgress();
|
|
112
|
+
progress.start(`Extracting timeline from ${slug}...`);
|
|
113
|
+
const startTime = Date.now();
|
|
114
|
+
|
|
115
|
+
const result = await repo.extractAndAddTimeline(
|
|
116
|
+
slug,
|
|
117
|
+
page.compiledTruth,
|
|
118
|
+
opts.source ?? "extracted",
|
|
119
|
+
opts.defaultDate ?? new Date().toISOString().slice(0, 10),
|
|
120
|
+
settings.llm,
|
|
121
|
+
);
|
|
122
|
+
|
|
123
|
+
const duration = formatDuration(Date.now() - startTime);
|
|
124
|
+
|
|
125
|
+
if (result.entries.length > 0) {
|
|
126
|
+
progress.succeed(`${result.entries.length} events extracted (${duration})`);
|
|
127
|
+
} else {
|
|
128
|
+
progress.stop();
|
|
129
|
+
process.stderr.write(`No events found (${duration})\n`);
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
print(program, {
|
|
133
|
+
ok: true,
|
|
134
|
+
action: "timeline-extract",
|
|
135
|
+
slug,
|
|
136
|
+
entriesAdded: result.entries.length,
|
|
137
|
+
entries: result.entries,
|
|
138
|
+
confidence: result.confidence,
|
|
139
|
+
});
|
|
140
|
+
});
|
|
141
|
+
});
|
|
142
|
+
|
|
143
|
+
// timeline global
|
|
144
|
+
timelineCmd
|
|
145
|
+
.command("global")
|
|
146
|
+
.option("--limit <number>", "max results", "100")
|
|
147
|
+
.description("list timeline entries across all pages")
|
|
148
|
+
.addHelpText("after", `
|
|
149
|
+
Examples:
|
|
150
|
+
ebrain timeline global
|
|
151
|
+
ebrain timeline global --limit 20
|
|
152
|
+
`)
|
|
153
|
+
.action(async (opts: Record<string, string>) => {
|
|
154
|
+
await withRepo(program, async (repo) => {
|
|
155
|
+
const entries = await repo.timelineGlobal(Number(opts.limit ?? 100));
|
|
156
|
+
print(program, entries);
|
|
157
|
+
});
|
|
158
|
+
});
|
|
159
|
+
}
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
// ---------------------------------------------------------------------------
|
|
2
|
+
// Public API — re-export from individual modules
|
|
3
|
+
// ---------------------------------------------------------------------------
|
|
4
|
+
|
|
5
|
+
export {
|
|
6
|
+
SETTINGS_DIR,
|
|
7
|
+
SETTINGS_PATH,
|
|
8
|
+
DEFAULT_DB_PATH,
|
|
9
|
+
expandTilde,
|
|
10
|
+
} from "./paths";
|
|
11
|
+
|
|
12
|
+
export {
|
|
13
|
+
SettingsSchema,
|
|
14
|
+
RemoteDbSchema,
|
|
15
|
+
EmbedSchema,
|
|
16
|
+
LLMSchema,
|
|
17
|
+
type RawSettings,
|
|
18
|
+
type ResolvedSettings,
|
|
19
|
+
type ResolvedExtraction,
|
|
20
|
+
type ResolvedRemoteDb,
|
|
21
|
+
type ResolvedEmbed,
|
|
22
|
+
type ResolvedLLM,
|
|
23
|
+
DEFAULT_REMOTE,
|
|
24
|
+
DEFAULT_EMBED,
|
|
25
|
+
DEFAULT_LLM,
|
|
26
|
+
DEFAULT_EXTRACTION,
|
|
27
|
+
} from "./schema";
|
|
28
|
+
|
|
29
|
+
export {
|
|
30
|
+
type EnvSource,
|
|
31
|
+
readSettingsFile,
|
|
32
|
+
resolveSettings,
|
|
33
|
+
loadSettings,
|
|
34
|
+
} from "./settings";
|
|
35
|
+
|
|
36
|
+
export {
|
|
37
|
+
createDefaultSettings,
|
|
38
|
+
} from "./init";
|
|
39
|
+
|
|
40
|
+
// ---------------------------------------------------------------------------
|
|
41
|
+
// Slug utilities (from sibling file)
|
|
42
|
+
// ---------------------------------------------------------------------------
|
|
43
|
+
|
|
44
|
+
export {
|
|
45
|
+
DEFAULT_DB_NAME,
|
|
46
|
+
PAGES_COLLECTION,
|
|
47
|
+
MAX_SLUG_LENGTH,
|
|
48
|
+
nowIso,
|
|
49
|
+
slugToTitle,
|
|
50
|
+
inferTypeFromSlug,
|
|
51
|
+
slugify,
|
|
52
|
+
normalizeLongSlug,
|
|
53
|
+
} from "../slug-utils";
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
import { SETTINGS_PATH, SETTINGS_DIR } from "./paths";
|
|
2
|
+
import { fileExists } from "../markdown/io";
|
|
3
|
+
import { mkdirSync, writeFileSync } from "node:fs";
|
|
4
|
+
|
|
5
|
+
/**
|
|
6
|
+
* Generate a minimal settings.json if it doesn't already exist.
|
|
7
|
+
* Returns true if a new file was created.
|
|
8
|
+
*/
|
|
9
|
+
export async function createDefaultSettings(): Promise<boolean> {
|
|
10
|
+
if (await fileExists(SETTINGS_PATH)) {
|
|
11
|
+
return false;
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
mkdirSync(SETTINGS_DIR, { recursive: true });
|
|
15
|
+
|
|
16
|
+
// All fields present but empty — user fills in their values
|
|
17
|
+
const defaults = {
|
|
18
|
+
db: {
|
|
19
|
+
path: "",
|
|
20
|
+
remote: {
|
|
21
|
+
host: "",
|
|
22
|
+
port: 0,
|
|
23
|
+
user: "",
|
|
24
|
+
password: "",
|
|
25
|
+
database: "",
|
|
26
|
+
tenant: "",
|
|
27
|
+
},
|
|
28
|
+
},
|
|
29
|
+
embed: {
|
|
30
|
+
provider: "hash",
|
|
31
|
+
baseURL: "",
|
|
32
|
+
model: "",
|
|
33
|
+
dimensions: 0,
|
|
34
|
+
apiKey: "",
|
|
35
|
+
apiKeyEnv: "",
|
|
36
|
+
},
|
|
37
|
+
llm: {
|
|
38
|
+
baseURL: "",
|
|
39
|
+
model: "",
|
|
40
|
+
apiKey: "",
|
|
41
|
+
apiKeyEnv: "",
|
|
42
|
+
},
|
|
43
|
+
extraction: {
|
|
44
|
+
confidenceThreshold: 0.7,
|
|
45
|
+
},
|
|
46
|
+
};
|
|
47
|
+
|
|
48
|
+
writeFileSync(SETTINGS_PATH, JSON.stringify(defaults, null, 2) + "\n", "utf-8");
|
|
49
|
+
return true;
|
|
50
|
+
}
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
import { homedir } from "node:os";
|
|
2
|
+
import { join, resolve } from "node:path";
|
|
3
|
+
|
|
4
|
+
/** Settings directory: ~/.ebrain */
|
|
5
|
+
export const SETTINGS_DIR = join(homedir(), ".ebrain");
|
|
6
|
+
|
|
7
|
+
/** Settings file path: ~/.ebrain/settings.json */
|
|
8
|
+
export const SETTINGS_PATH = join(SETTINGS_DIR, "settings.json");
|
|
9
|
+
|
|
10
|
+
/** Default database path: ~/.ebrain/data/ebrain.db */
|
|
11
|
+
export const DEFAULT_DB_PATH = resolve(SETTINGS_DIR, "data", "ebrain.db");
|
|
12
|
+
|
|
13
|
+
/**
|
|
14
|
+
* Resolve a path that may start with ~ to the user's home directory.
|
|
15
|
+
*/
|
|
16
|
+
export function expandTilde(p: string): string {
|
|
17
|
+
if (p.startsWith("~")) {
|
|
18
|
+
return join(homedir(), p.slice(1));
|
|
19
|
+
}
|
|
20
|
+
return resolve(p);
|
|
21
|
+
}
|
|
@@ -0,0 +1,121 @@
|
|
|
1
|
+
import { z } from "zod";
|
|
2
|
+
|
|
3
|
+
// ---------------------------------------------------------------------------
|
|
4
|
+
// Raw schema (matches settings.json structure)
|
|
5
|
+
// ---------------------------------------------------------------------------
|
|
6
|
+
|
|
7
|
+
export const RemoteDbSchema = z.object({
|
|
8
|
+
host: z.string().optional(),
|
|
9
|
+
port: z.number().optional(),
|
|
10
|
+
user: z.string().optional(),
|
|
11
|
+
password: z.string().optional(),
|
|
12
|
+
database: z.string().optional(),
|
|
13
|
+
tenant: z.string().optional(),
|
|
14
|
+
});
|
|
15
|
+
|
|
16
|
+
export const EmbedSchema = z.object({
|
|
17
|
+
provider: z.enum(["hash", "openai_compatible"]).optional(),
|
|
18
|
+
baseURL: z.string().optional(),
|
|
19
|
+
model: z.string().optional(),
|
|
20
|
+
dimensions: z.number().optional(),
|
|
21
|
+
apiKey: z.string().optional(),
|
|
22
|
+
apiKeyEnv: z.string().optional(),
|
|
23
|
+
});
|
|
24
|
+
|
|
25
|
+
export const LLMSchema = z.object({
|
|
26
|
+
baseURL: z.string().optional(),
|
|
27
|
+
model: z.string().optional(),
|
|
28
|
+
apiKey: z.string().optional(),
|
|
29
|
+
apiKeyEnv: z.string().optional(),
|
|
30
|
+
});
|
|
31
|
+
|
|
32
|
+
export const SettingsSchema = z.object({
|
|
33
|
+
db: z
|
|
34
|
+
.object({
|
|
35
|
+
path: z.string().optional(),
|
|
36
|
+
remote: RemoteDbSchema.optional(),
|
|
37
|
+
})
|
|
38
|
+
.optional(),
|
|
39
|
+
embed: EmbedSchema.optional(),
|
|
40
|
+
llm: LLMSchema.optional(),
|
|
41
|
+
extraction: z
|
|
42
|
+
.object({
|
|
43
|
+
confidenceThreshold: z.number().min(0).max(1).optional(),
|
|
44
|
+
})
|
|
45
|
+
.optional(),
|
|
46
|
+
});
|
|
47
|
+
|
|
48
|
+
export type RawSettings = z.infer<typeof SettingsSchema>;
|
|
49
|
+
|
|
50
|
+
// ---------------------------------------------------------------------------
|
|
51
|
+
// Resolved types (all values present after defaults + env merge)
|
|
52
|
+
// ---------------------------------------------------------------------------
|
|
53
|
+
|
|
54
|
+
export interface ResolvedSettings {
|
|
55
|
+
dbPath: string;
|
|
56
|
+
remote: ResolvedRemoteDb | null;
|
|
57
|
+
embed: ResolvedEmbed;
|
|
58
|
+
llm: ResolvedLLM;
|
|
59
|
+
extraction: ResolvedExtraction;
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
export interface ResolvedExtraction {
|
|
63
|
+
confidenceThreshold: number;
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
export interface ResolvedRemoteDb {
|
|
67
|
+
host: string;
|
|
68
|
+
port: number;
|
|
69
|
+
user: string;
|
|
70
|
+
password: string;
|
|
71
|
+
database: string;
|
|
72
|
+
tenant: string;
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
export interface ResolvedEmbed {
|
|
76
|
+
provider: "hash" | "openai_compatible";
|
|
77
|
+
baseURL: string;
|
|
78
|
+
model: string;
|
|
79
|
+
dimensions: number;
|
|
80
|
+
apiKey: string;
|
|
81
|
+
apiKeyEnv: string;
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
export interface ResolvedLLM {
|
|
85
|
+
baseURL: string;
|
|
86
|
+
model: string;
|
|
87
|
+
apiKey: string;
|
|
88
|
+
apiKeyEnv: string;
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
// ---------------------------------------------------------------------------
|
|
92
|
+
// Default values
|
|
93
|
+
// ---------------------------------------------------------------------------
|
|
94
|
+
|
|
95
|
+
export const DEFAULT_REMOTE: Omit<ResolvedRemoteDb, "host"> = {
|
|
96
|
+
port: 3306,
|
|
97
|
+
user: "root",
|
|
98
|
+
password: "",
|
|
99
|
+
database: "ebrain",
|
|
100
|
+
tenant: "",
|
|
101
|
+
};
|
|
102
|
+
|
|
103
|
+
export const DEFAULT_EMBED: Omit<ResolvedEmbed, "provider"> & { provider: "hash" } = {
|
|
104
|
+
provider: "hash",
|
|
105
|
+
baseURL: "https://dashscope.aliyuncs.com/compatible-mode/v1",
|
|
106
|
+
model: "text-embedding-v4",
|
|
107
|
+
dimensions: 1024,
|
|
108
|
+
apiKey: "",
|
|
109
|
+
apiKeyEnv: "DASHSCOPE_API_KEY",
|
|
110
|
+
};
|
|
111
|
+
|
|
112
|
+
export const DEFAULT_LLM: ResolvedLLM = {
|
|
113
|
+
baseURL: "",
|
|
114
|
+
model: "qwen-plus",
|
|
115
|
+
apiKey: "",
|
|
116
|
+
apiKeyEnv: "DASHSCOPE_API_KEY",
|
|
117
|
+
};
|
|
118
|
+
|
|
119
|
+
export const DEFAULT_EXTRACTION: ResolvedExtraction = {
|
|
120
|
+
confidenceThreshold: 0.7,
|
|
121
|
+
};
|
|
@@ -0,0 +1,168 @@
|
|
|
1
|
+
import { SETTINGS_PATH, SETTINGS_DIR, expandTilde, DEFAULT_DB_PATH } from "./paths";
|
|
2
|
+
import {
|
|
3
|
+
SettingsSchema,
|
|
4
|
+
DEFAULT_REMOTE,
|
|
5
|
+
DEFAULT_EMBED,
|
|
6
|
+
DEFAULT_LLM,
|
|
7
|
+
DEFAULT_EXTRACTION,
|
|
8
|
+
type RawSettings,
|
|
9
|
+
type ResolvedSettings,
|
|
10
|
+
type ResolvedRemoteDb,
|
|
11
|
+
type ResolvedEmbed,
|
|
12
|
+
type ResolvedLLM,
|
|
13
|
+
type ResolvedExtraction,
|
|
14
|
+
} from "./schema";
|
|
15
|
+
|
|
16
|
+
// ---------------------------------------------------------------------------
|
|
17
|
+
// Env abstraction for testability
|
|
18
|
+
// ---------------------------------------------------------------------------
|
|
19
|
+
|
|
20
|
+
export interface EnvSource {
|
|
21
|
+
get(key: string): string | undefined;
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
const defaultEnv: EnvSource = { get: (k) => process.env[k] };
|
|
25
|
+
|
|
26
|
+
// ---------------------------------------------------------------------------
|
|
27
|
+
// File I/O
|
|
28
|
+
// ---------------------------------------------------------------------------
|
|
29
|
+
|
|
30
|
+
export async function readSettingsFile(): Promise<unknown | null> {
|
|
31
|
+
const { fileExists, readTextFile } = await import("../markdown/io");
|
|
32
|
+
if (!(await fileExists(SETTINGS_PATH))) {
|
|
33
|
+
return null;
|
|
34
|
+
}
|
|
35
|
+
const text = await readTextFile(SETTINGS_PATH);
|
|
36
|
+
try {
|
|
37
|
+
return JSON.parse(text) as unknown;
|
|
38
|
+
} catch {
|
|
39
|
+
console.warn(
|
|
40
|
+
`[ebrain] Failed to parse ${SETTINGS_PATH}, using defaults.`,
|
|
41
|
+
);
|
|
42
|
+
return null;
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
// ---------------------------------------------------------------------------
|
|
47
|
+
// Resolution: raw settings + env → resolved settings
|
|
48
|
+
// ---------------------------------------------------------------------------
|
|
49
|
+
|
|
50
|
+
export function resolveSettings(
|
|
51
|
+
parsed: RawSettings,
|
|
52
|
+
env: EnvSource = defaultEnv,
|
|
53
|
+
): ResolvedSettings {
|
|
54
|
+
const dbConf = parsed.db ?? {};
|
|
55
|
+
const remoteConf = dbConf.remote ?? {};
|
|
56
|
+
const embedConf = parsed.embed ?? {};
|
|
57
|
+
const extractionConf = parsed.extraction ?? {};
|
|
58
|
+
|
|
59
|
+
// Remote: settings → env → defaults
|
|
60
|
+
const host = nonEmpty(remoteConf.host ?? env.get("EBRAIN_SEEKDB_HOST"), "");
|
|
61
|
+
if (host) {
|
|
62
|
+
const remote: ResolvedRemoteDb = {
|
|
63
|
+
host: host.trim(),
|
|
64
|
+
port: numOr(remoteConf.port ?? env.get("EBRAIN_SEEKDB_PORT"), DEFAULT_REMOTE.port),
|
|
65
|
+
user: nonEmpty(remoteConf.user ?? env.get("EBRAIN_SEEKDB_USER"), DEFAULT_REMOTE.user),
|
|
66
|
+
password: nonEmpty(
|
|
67
|
+
remoteConf.password ?? env.get("EBRAIN_SEEKDB_PASSWORD"),
|
|
68
|
+
DEFAULT_REMOTE.password,
|
|
69
|
+
),
|
|
70
|
+
database: nonEmpty(
|
|
71
|
+
remoteConf.database ?? env.get("EBRAIN_SEEKDB_DATABASE"),
|
|
72
|
+
DEFAULT_REMOTE.database,
|
|
73
|
+
),
|
|
74
|
+
tenant: nonEmpty(remoteConf.tenant ?? env.get("EBRAIN_SEEKDB_TENANT"), ""),
|
|
75
|
+
};
|
|
76
|
+
return {
|
|
77
|
+
dbPath: dbConf.path ?? DEFAULT_DB_PATH,
|
|
78
|
+
remote,
|
|
79
|
+
embed: resolveEmbed(embedConf, env),
|
|
80
|
+
llm: resolveLLM(parsed.llm ?? {}, env),
|
|
81
|
+
extraction: resolveExtraction(extractionConf, env),
|
|
82
|
+
};
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
// Local mode
|
|
86
|
+
const dbPath = dbConf.path ? expandTilde(dbConf.path) : DEFAULT_DB_PATH;
|
|
87
|
+
return {
|
|
88
|
+
dbPath,
|
|
89
|
+
remote: null,
|
|
90
|
+
embed: resolveEmbed(embedConf, env),
|
|
91
|
+
llm: resolveLLM(parsed.llm ?? {}, env),
|
|
92
|
+
extraction: resolveExtraction(extractionConf, env),
|
|
93
|
+
};
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
function resolveEmbed(
|
|
97
|
+
conf: NonNullable<RawSettings["embed"]>,
|
|
98
|
+
env: EnvSource = defaultEnv,
|
|
99
|
+
): ResolvedEmbed {
|
|
100
|
+
const provider = nonEmpty(
|
|
101
|
+
conf.provider ?? env.get("EBRAIN_EMBED_PROVIDER"),
|
|
102
|
+
DEFAULT_EMBED.provider,
|
|
103
|
+
).trim().toLowerCase() as "hash" | "openai_compatible";
|
|
104
|
+
const baseURL = nonEmpty(conf.baseURL ?? env.get("EBRAIN_EMBED_BASE_URL"), DEFAULT_EMBED.baseURL);
|
|
105
|
+
const model = nonEmpty(conf.model ?? env.get("EBRAIN_EMBED_MODEL"), DEFAULT_EMBED.model);
|
|
106
|
+
const dimensions = numOr(conf.dimensions ?? env.get("EBRAIN_EMBED_DIMENSIONS"), DEFAULT_EMBED.dimensions);
|
|
107
|
+
const apiKey = nonEmpty(conf.apiKey ?? env.get("EBRAIN_EMBED_API_KEY"), "");
|
|
108
|
+
const apiKeyEnv = nonEmpty(conf.apiKeyEnv ?? env.get("EBRAIN_EMBED_API_KEY_ENV"), DEFAULT_EMBED.apiKeyEnv);
|
|
109
|
+
return { provider, baseURL, model, dimensions, apiKey, apiKeyEnv };
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
function resolveLLM(
|
|
113
|
+
conf: NonNullable<RawSettings["llm"]>,
|
|
114
|
+
env: EnvSource = defaultEnv,
|
|
115
|
+
): ResolvedLLM {
|
|
116
|
+
const baseURL = nonEmpty(conf.baseURL, DEFAULT_LLM.baseURL);
|
|
117
|
+
const model = nonEmpty(conf.model, DEFAULT_LLM.model);
|
|
118
|
+
const apiKey = nonEmpty(conf.apiKey, DEFAULT_LLM.apiKey);
|
|
119
|
+
const apiKeyEnv = nonEmpty(conf.apiKeyEnv, DEFAULT_LLM.apiKeyEnv);
|
|
120
|
+
return { baseURL, model, apiKey, apiKeyEnv };
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
function resolveExtraction(
|
|
124
|
+
conf: NonNullable<RawSettings["extraction"]>,
|
|
125
|
+
env: EnvSource = defaultEnv,
|
|
126
|
+
): ResolvedExtraction {
|
|
127
|
+
const threshold = conf.confidenceThreshold ?? env.get("EBRAIN_CONFIDENCE_THRESHOLD");
|
|
128
|
+
const value = typeof threshold === "number"
|
|
129
|
+
? threshold
|
|
130
|
+
: (threshold ? parseFloat(threshold) : DEFAULT_EXTRACTION.confidenceThreshold);
|
|
131
|
+
return { confidenceThreshold: Math.max(0, Math.min(1, value)) };
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
// ---------------------------------------------------------------------------
|
|
135
|
+
// Public load function
|
|
136
|
+
// ---------------------------------------------------------------------------
|
|
137
|
+
|
|
138
|
+
export async function loadSettings(
|
|
139
|
+
env: EnvSource = defaultEnv,
|
|
140
|
+
): Promise<ResolvedSettings> {
|
|
141
|
+
const raw = await readSettingsFile();
|
|
142
|
+
const parsed = SettingsSchema.parse(raw ?? {});
|
|
143
|
+
return resolveSettings(parsed, env);
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
// ---------------------------------------------------------------------------
|
|
147
|
+
// Helpers
|
|
148
|
+
// ---------------------------------------------------------------------------
|
|
149
|
+
|
|
150
|
+
function nonEmpty(val: string | undefined, fallback: string): string {
|
|
151
|
+
const trimmed = val?.trim();
|
|
152
|
+
return trimmed || fallback;
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
function numOr(val: number | string | undefined, fallback: number): number {
|
|
156
|
+
if (typeof val === "number") return val;
|
|
157
|
+
if (typeof val === "string") {
|
|
158
|
+
const n = Number(val.trim());
|
|
159
|
+
if (Number.isFinite(n)) return n;
|
|
160
|
+
}
|
|
161
|
+
return fallback;
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
// ---------------------------------------------------------------------------
|
|
165
|
+
// Re-export paths for backward compatibility
|
|
166
|
+
// ---------------------------------------------------------------------------
|
|
167
|
+
|
|
168
|
+
export { SETTINGS_DIR, SETTINGS_PATH, DEFAULT_DB_PATH, expandTilde };
|
package/src/db/client.ts
CHANGED
|
@@ -4,7 +4,7 @@ import { SeekdbAdminClient, SeekdbClient, DEFAULT_PORT, DEFAULT_USER } from "see
|
|
|
4
4
|
import type { Collection } from "seekdb";
|
|
5
5
|
import type { ResolvedSettings } from "../settings";
|
|
6
6
|
import { createBrainEmbeddingFunction } from "../ai/embed-factory";
|
|
7
|
-
import { DEFAULT_DB_NAME, PAGES_COLLECTION } from "../
|
|
7
|
+
import { DEFAULT_DB_NAME, PAGES_COLLECTION } from "../slug-utils";
|
|
8
8
|
import { SQL_SCHEMA } from "./schema";
|
|
9
9
|
import { DbError, wrapDbError, DbErrorCategory } from "./errors";
|
|
10
10
|
|
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import { readFile, stat } from "node:fs/promises";
|
|
2
|
-
import { basename, extname, resolve } from "node:path";
|
|
1
|
+
import { readFile, readdir, stat } from "node:fs/promises";
|
|
2
|
+
import { basename, extname, join, resolve } from "node:path";
|
|
3
3
|
|
|
4
4
|
/** Supported document kinds for ingestion. */
|
|
5
5
|
export type DocumentKind =
|
|
@@ -484,3 +484,31 @@ function looksLikeText(bytes: Buffer): boolean {
|
|
|
484
484
|
}
|
|
485
485
|
return textLike / sample.length >= 0.95;
|
|
486
486
|
}
|
|
487
|
+
|
|
488
|
+
/** File extensions eligible for document ingestion (binary/office formats). */
|
|
489
|
+
const DOCUMENT_EXTENSIONS = new Set(["pdf", "docx"]);
|
|
490
|
+
|
|
491
|
+
/**
|
|
492
|
+
* Recursively collect `.docx` and `.pdf` files under `dir`.
|
|
493
|
+
* Returns sorted absolute paths.
|
|
494
|
+
*/
|
|
495
|
+
export async function collectDocumentFiles(dir: string): Promise<string[]> {
|
|
496
|
+
const root = resolve(dir);
|
|
497
|
+
const files: string[] = [];
|
|
498
|
+
async function walk(current: string): Promise<void> {
|
|
499
|
+
const entries = await readdir(current, { withFileTypes: true });
|
|
500
|
+
for (const entry of entries) {
|
|
501
|
+
const next = join(current, entry.name);
|
|
502
|
+
if (entry.isDirectory()) {
|
|
503
|
+
await walk(next);
|
|
504
|
+
} else if (
|
|
505
|
+
entry.isFile() &&
|
|
506
|
+
DOCUMENT_EXTENSIONS.has(extname(entry.name).toLowerCase().replace(/^\./, ""))
|
|
507
|
+
) {
|
|
508
|
+
files.push(next);
|
|
509
|
+
}
|
|
510
|
+
}
|
|
511
|
+
}
|
|
512
|
+
await walk(root);
|
|
513
|
+
return files.sort();
|
|
514
|
+
}
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { nowIso } from "../
|
|
1
|
+
import { nowIso } from "../slug-utils";
|
|
2
2
|
import type {
|
|
3
3
|
BrainStats,
|
|
4
4
|
PageRecord,
|
|
@@ -630,6 +630,48 @@ export class BrainRepository {
|
|
|
630
630
|
}
|
|
631
631
|
}
|
|
632
632
|
|
|
633
|
+
/**
|
|
634
|
+
* Sync tags from frontmatter to the page_tags table.
|
|
635
|
+
* This ensures `ebrain list --tag <tag>` works correctly for pages
|
|
636
|
+
* created via `put` with frontmatter tags.
|
|
637
|
+
*/
|
|
638
|
+
async syncTagsFromFrontmatter(slug: string, frontmatter: Record<string, unknown>): Promise<number> {
|
|
639
|
+
const fmTags = frontmatter.tags;
|
|
640
|
+
if (!fmTags) return 0;
|
|
641
|
+
|
|
642
|
+
const tags: string[] = Array.isArray(fmTags)
|
|
643
|
+
? fmTags.filter((t): t is string => typeof t === "string")
|
|
644
|
+
: typeof fmTags === "string"
|
|
645
|
+
? [fmTags]
|
|
646
|
+
: [];
|
|
647
|
+
|
|
648
|
+
if (tags.length === 0) return 0;
|
|
649
|
+
|
|
650
|
+
// Get current DB tags for this page
|
|
651
|
+
const existingTags = new Set(await this.tags(slug));
|
|
652
|
+
const desiredTags = new Set(tags);
|
|
653
|
+
|
|
654
|
+
let synced = 0;
|
|
655
|
+
|
|
656
|
+
// Add missing tags
|
|
657
|
+
for (const tag of desiredTags) {
|
|
658
|
+
if (!existingTags.has(tag)) {
|
|
659
|
+
await this.tag(slug, tag);
|
|
660
|
+
synced++;
|
|
661
|
+
}
|
|
662
|
+
}
|
|
663
|
+
|
|
664
|
+
// Remove tags no longer in frontmatter
|
|
665
|
+
for (const tag of existingTags) {
|
|
666
|
+
if (!desiredTags.has(tag)) {
|
|
667
|
+
await this.untag(slug, tag);
|
|
668
|
+
synced++;
|
|
669
|
+
}
|
|
670
|
+
}
|
|
671
|
+
|
|
672
|
+
return synced;
|
|
673
|
+
}
|
|
674
|
+
|
|
633
675
|
async readRaw(slug: string, source?: string): Promise<unknown[]> {
|
|
634
676
|
try {
|
|
635
677
|
const params: unknown[] = [slug];
|