@rekal/mem 0.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/db-BMh1OP4b.mjs +294 -0
- package/dist/doc-DnYN4jAU.mjs +116 -0
- package/dist/embed-rUMZxqed.mjs +100 -0
- package/dist/fs-DMp26Byo.mjs +32 -0
- package/dist/glob.d.mts +27 -0
- package/dist/glob.mjs +132 -0
- package/dist/index.d.mts +1465 -0
- package/dist/index.mjs +351 -0
- package/dist/llama-CT3dc9Cn.mjs +75 -0
- package/dist/models-DFQSgBNr.mjs +77 -0
- package/dist/openai-j2_2GM4J.mjs +76 -0
- package/dist/progress-B1JdNapX.mjs +263 -0
- package/dist/query-VFSpErTB.mjs +125 -0
- package/dist/runtime.node-DlQPaGrV.mjs +35 -0
- package/dist/search-BllHWtZF.mjs +166 -0
- package/dist/store-DE7S35SS.mjs +137 -0
- package/dist/transformers-CJ3QA2PK.mjs +55 -0
- package/dist/uri-CehXVDGB.mjs +28 -0
- package/dist/util-DNyrmcA3.mjs +11 -0
- package/dist/vfs-CNQbkhsf.mjs +222 -0
- package/foo.ts +3 -0
- package/foo2.ts +20 -0
- package/package.json +61 -0
- package/src/context.ts +77 -0
- package/src/db.ts +464 -0
- package/src/doc.ts +163 -0
- package/src/embed/base.ts +122 -0
- package/src/embed/index.ts +67 -0
- package/src/embed/llama.ts +111 -0
- package/src/embed/models.ts +104 -0
- package/src/embed/openai.ts +95 -0
- package/src/embed/transformers.ts +81 -0
- package/src/frecency.ts +58 -0
- package/src/fs.ts +36 -0
- package/src/glob.ts +163 -0
- package/src/index.ts +15 -0
- package/src/log.ts +60 -0
- package/src/md.ts +204 -0
- package/src/progress.ts +121 -0
- package/src/query.ts +131 -0
- package/src/runtime.bun.ts +33 -0
- package/src/runtime.node.ts +47 -0
- package/src/search.ts +230 -0
- package/src/snippet.ts +248 -0
- package/src/sqlite.ts +1 -0
- package/src/store.ts +180 -0
- package/src/uri.ts +28 -0
- package/src/util.ts +21 -0
- package/src/vfs.ts +257 -0
- package/test/doc.test.ts +61 -0
- package/test/fixtures/ignore-test/keep.md +0 -0
- package/test/fixtures/ignore-test/skip.log +0 -0
- package/test/fixtures/ignore-test/sub/keep.md +0 -0
- package/test/fixtures/store/agent/index.md +9 -0
- package/test/fixtures/store/agent/lessons.md +21 -0
- package/test/fixtures/store/agent/soul.md +28 -0
- package/test/fixtures/store/agent/tools.md +25 -0
- package/test/fixtures/store/concepts/frecency.md +30 -0
- package/test/fixtures/store/concepts/index.md +9 -0
- package/test/fixtures/store/concepts/memory-coherence.md +33 -0
- package/test/fixtures/store/concepts/rag.md +27 -0
- package/test/fixtures/store/index.md +9 -0
- package/test/fixtures/store/projects/index.md +9 -0
- package/test/fixtures/store/projects/rekall-inc/architecture.md +41 -0
- package/test/fixtures/store/projects/rekall-inc/decisions/index.md +9 -0
- package/test/fixtures/store/projects/rekall-inc/decisions/no-military.md +20 -0
- package/test/fixtures/store/projects/rekall-inc/index.md +28 -0
- package/test/fixtures/store/user/family.md +13 -0
- package/test/fixtures/store/user/index.md +9 -0
- package/test/fixtures/store/user/preferences.md +29 -0
- package/test/fixtures/store/user/profile.md +29 -0
- package/test/fs.test.ts +15 -0
- package/test/glob.test.ts +190 -0
- package/test/md.test.ts +177 -0
- package/test/query.test.ts +105 -0
- package/test/uri.test.ts +46 -0
- package/test/util.test.ts +62 -0
- package/test/vfs.test.ts +164 -0
- package/tsconfig.json +3 -0
- package/tsdown.config.ts +8 -0
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
import type { Context } from "../context.ts"
|
|
2
|
+
import type {
|
|
3
|
+
EmbedderBackend,
|
|
4
|
+
EmbedderChunk,
|
|
5
|
+
EmbedderDoc,
|
|
6
|
+
EmbedderOptions,
|
|
7
|
+
ModelBackend,
|
|
8
|
+
ResolvedEmbedderModel,
|
|
9
|
+
ResolvedEmbedderOptions,
|
|
10
|
+
} from "./index.ts"
|
|
11
|
+
|
|
12
|
+
import { availableParallelism } from "node:os"
|
|
13
|
+
import { chunkMarkdown } from "../md.ts"
|
|
14
|
+
import { Progress } from "../progress.ts"
|
|
15
|
+
import { loadModel, parseModelUri, resolveModel } from "./models.ts"
|
|
16
|
+
|
|
17
|
+
const defaults = {
|
|
18
|
+
batchSize: 0, // 0 = auto
|
|
19
|
+
maxDims: 512,
|
|
20
|
+
maxTokens: 512,
|
|
21
|
+
useGpu: true,
|
|
22
|
+
} satisfies EmbedderOptions
|
|
23
|
+
|
|
24
|
+
const backend_defaults: Record<ModelBackend, EmbedderOptions> = {
|
|
25
|
+
llama: {},
|
|
26
|
+
openai: { batchSize: 50 },
|
|
27
|
+
transformers: {},
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
function isChunk(input: any): input is EmbedderChunk {
|
|
31
|
+
return typeof (input as EmbedderChunk | undefined)?.prompt === "string"
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
export class Embedder {
|
|
35
|
+
#loading?: Promise<EmbedderBackend>
|
|
36
|
+
opts: ResolvedEmbedderOptions
|
|
37
|
+
model: ResolvedEmbedderModel
|
|
38
|
+
status = new Progress("embedder")
|
|
39
|
+
#backend?: EmbedderBackend
|
|
40
|
+
|
|
41
|
+
constructor(public ctx: Context) {
|
|
42
|
+
const opts = ctx.opts.embedder ?? {}
|
|
43
|
+
this.model = resolveModel(opts.model)
|
|
44
|
+
const { backend } = parseModelUri(this.model.uri)
|
|
45
|
+
const base = { ...defaults, ...backend_defaults[backend] }
|
|
46
|
+
const threads = Math.max(1, opts.threads ?? Math.min(8, availableParallelism() - 2))
|
|
47
|
+
this.opts = {
|
|
48
|
+
threads,
|
|
49
|
+
...base,
|
|
50
|
+
...opts,
|
|
51
|
+
model: this.model,
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
info() {
|
|
56
|
+
return parseModelUri(this.model.uri)
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
async backend() {
|
|
60
|
+
this.#loading ??= (async () => {
|
|
61
|
+
this.status.name = `Loading model \`${this.model.uri}\``
|
|
62
|
+
|
|
63
|
+
const t = setTimeout(() => {
|
|
64
|
+
this.opts.onProgress?.(this.status)
|
|
65
|
+
}, 500)
|
|
66
|
+
|
|
67
|
+
this.#backend = await loadModel({
|
|
68
|
+
logger: this.ctx,
|
|
69
|
+
opts: this.opts,
|
|
70
|
+
root: this.ctx.root,
|
|
71
|
+
status: this.status,
|
|
72
|
+
})
|
|
73
|
+
|
|
74
|
+
clearTimeout(t)
|
|
75
|
+
|
|
76
|
+
this.opts.maxTokens = Math.min(this.opts.maxTokens, this.#backend.maxTokens)
|
|
77
|
+
this.opts.maxDims = Math.min(this.opts.maxDims, this.#backend.dims)
|
|
78
|
+
if (this.opts.batchSize === 0) this.opts.batchSize = this.#backend.device === "gpu" ? 50 : 1
|
|
79
|
+
this.ctx.debug({
|
|
80
|
+
batchSize: this.opts.batchSize,
|
|
81
|
+
device: this.#backend.device,
|
|
82
|
+
threads: this.opts.threads,
|
|
83
|
+
useGpu: this.opts.useGpu,
|
|
84
|
+
})
|
|
85
|
+
this.status.stop()
|
|
86
|
+
return this.#backend
|
|
87
|
+
})()
|
|
88
|
+
return (this.#backend ??= await this.#loading)
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
transform(input: string | EmbedderDoc | EmbedderChunk): string {
|
|
92
|
+
if (isChunk(input)) return input.prompt
|
|
93
|
+
const { prompt } = this.model
|
|
94
|
+
return typeof input === "string" ? prompt.query(input) : prompt.document(input)
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
async embed(input: string | EmbedderDoc | EmbedderChunk): Promise<number[]>
|
|
98
|
+
async embed(input: (string | EmbedderDoc | EmbedderChunk)[]): Promise<number[][]>
|
|
99
|
+
async embed(
|
|
100
|
+
input: string | EmbedderDoc | EmbedderChunk | (string | EmbedderDoc | EmbedderChunk)[]
|
|
101
|
+
): Promise<number[][] | number[]> {
|
|
102
|
+
const single = !Array.isArray(input)
|
|
103
|
+
const todo = single ? [input] : input
|
|
104
|
+
const backend = await this.backend()
|
|
105
|
+
const ret = await backend.embed(todo.map((item) => this.transform(item)))
|
|
106
|
+
return single ? ret[0] : ret
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
async chunk(input: string | EmbedderDoc): Promise<EmbedderChunk[]>
|
|
110
|
+
async chunk(input: string | EmbedderDoc): Promise<EmbedderChunk[]> {
|
|
111
|
+
const backend = await this.backend()
|
|
112
|
+
const isQuery = typeof input === "string"
|
|
113
|
+
const fixed = this.transform(isQuery ? "" : { text: "", title: input.title })
|
|
114
|
+
const chunkText = isQuery ? input : input.text
|
|
115
|
+
const tokens = this.opts.maxTokens - backend.toks(fixed)
|
|
116
|
+
return chunkMarkdown(chunkText, backend, tokens).map((text, seq) => ({
|
|
117
|
+
prompt: isQuery ? this.transform(text) : this.transform({ text, title: input.title }),
|
|
118
|
+
seq,
|
|
119
|
+
text,
|
|
120
|
+
}))
|
|
121
|
+
}
|
|
122
|
+
}
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
import type { SetOptional, Merge } from "type-fest"
|
|
2
|
+
import type { Logger } from "../log.ts"
|
|
3
|
+
import type { Progress } from "../progress.ts"
|
|
4
|
+
|
|
5
|
+
export * from "./base.ts"
|
|
6
|
+
|
|
7
|
+
export type TokenCounter = {
|
|
8
|
+
toks(input: string): number
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
export type EmbedderDevice = "cpu" | "gpu" | "api"
|
|
12
|
+
export interface EmbedderBackend extends TokenCounter {
|
|
13
|
+
device: EmbedderDevice // what hardware/service this backend uses
|
|
14
|
+
maxTokens: number // maximum number of tokens that can be embedded
|
|
15
|
+
dims: number // dimensionality of the output vectors
|
|
16
|
+
embed(input: string[]): Promise<number[][]>
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
export type EmbedderDoc = {
|
|
20
|
+
text: string
|
|
21
|
+
title?: string
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
export type EmbedderPrompt = {
|
|
25
|
+
document?: (doc: EmbedderDoc) => string
|
|
26
|
+
query?: (query: string) => string
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
export type EmbedderModel = {
|
|
30
|
+
uri: string
|
|
31
|
+
base?: string // base model to inherit options from (e.g. for ONNX variants)
|
|
32
|
+
models?: string[] // models based on this configuration
|
|
33
|
+
prompt?: EmbedderPrompt
|
|
34
|
+
pooling?: "none" | "mean" | "cls" | "first_token" | "eos" | "last_token" // transformers-only
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
export type ModelBackend = "transformers" | "llama" | "openai"
|
|
38
|
+
|
|
39
|
+
export type EmbedderContext = {
|
|
40
|
+
opts: ResolvedEmbedderOptions
|
|
41
|
+
root: string
|
|
42
|
+
status: Progress
|
|
43
|
+
logger: Logger
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
export type EmbedderChunk = {
|
|
47
|
+
seq: number
|
|
48
|
+
text: string // raw chunk text (e.g. a passage from a document)
|
|
49
|
+
prompt: string // the actual text sent to the model for embedding (e.g. with instructions or title added)
|
|
50
|
+
embedding?: number[] // populated after embedding
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
export type EmbedderOptions = {
|
|
54
|
+
batchSize?: number // Embed this many texts at a time.
|
|
55
|
+
model?: EmbedderModel | string
|
|
56
|
+
maxTokens?: number // Chunk input into pieces of this many tokens or fewer (depending on the model).
|
|
57
|
+
maxDims?: number // Truncate output to this many dimensions. Only supported by some backends.
|
|
58
|
+
threads?: number // CPU threads for local inference. 0 = auto. Defaults to half available cores.
|
|
59
|
+
useGpu?: boolean // Whether to use GPU for local inference if available. Defaults to true.
|
|
60
|
+
onProgress?: (status: Progress) => void
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
export type ResolvedEmbedderModel = Merge<EmbedderModel, { prompt: Required<EmbedderPrompt> }>
|
|
64
|
+
export type ResolvedEmbedderOptions = Merge<
|
|
65
|
+
SetOptional<Required<EmbedderOptions>, "onProgress">,
|
|
66
|
+
{ model: ResolvedEmbedderModel }
|
|
67
|
+
>
|
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
import type { Llama, LlamaEmbeddingContext, LlamaModel } from "node-llama-cpp"
|
|
2
|
+
import type { LogLevel } from "../log.ts"
|
|
3
|
+
import type { EmbedderBackend, EmbedderContext, EmbedderDevice } from "./index.ts"
|
|
4
|
+
|
|
5
|
+
import { LlamaLogLevel } from "node-llama-cpp"
|
|
6
|
+
import { availableParallelism } from "node:os"
|
|
7
|
+
import { join } from "pathe"
|
|
8
|
+
import { parseModelUri } from "./models.ts"
|
|
9
|
+
|
|
10
|
+
export class LlamaBackend implements EmbedderBackend {
|
|
11
|
+
static llama?: Llama
|
|
12
|
+
device: EmbedderDevice
|
|
13
|
+
maxTokens: number
|
|
14
|
+
dims: number
|
|
15
|
+
#contexts: LlamaEmbeddingContext[] = []
|
|
16
|
+
#model: LlamaModel
|
|
17
|
+
#poolSize: number
|
|
18
|
+
#threadsPerCtx: number
|
|
19
|
+
#ctx: EmbedderContext
|
|
20
|
+
|
|
21
|
+
private constructor(llama: Llama, model: LlamaModel, ctx: EmbedderContext) {
|
|
22
|
+
this.#model = model
|
|
23
|
+
this.#ctx = ctx
|
|
24
|
+
this.maxTokens = model.trainContextSize
|
|
25
|
+
this.dims = model.embeddingVectorSize
|
|
26
|
+
this.device = llama.gpu ? "gpu" : "cpu"
|
|
27
|
+
|
|
28
|
+
const useGpu = llama.gpu && ctx.opts.useGpu
|
|
29
|
+
const cores = availableParallelism()
|
|
30
|
+
const poolSize = useGpu
|
|
31
|
+
? Math.min(8, Math.max(1, Math.floor(cores / 4)))
|
|
32
|
+
: Math.min(8, Math.max(1, Math.floor(cores / 4)))
|
|
33
|
+
|
|
34
|
+
this.#threadsPerCtx = useGpu ? 0 : Math.max(1, Math.floor(ctx.opts.threads / poolSize))
|
|
35
|
+
this.#poolSize = poolSize
|
|
36
|
+
ctx.logger.debug({
|
|
37
|
+
cores,
|
|
38
|
+
gpu: llama.gpu,
|
|
39
|
+
poolSize,
|
|
40
|
+
threadsPerCtx: this.#threadsPerCtx,
|
|
41
|
+
})
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
static async load(this: void, ctx: EmbedderContext) {
|
|
45
|
+
const { model, variant } = parseModelUri(ctx.opts.model.uri)
|
|
46
|
+
const { getLlama, resolveModelFile } = await import("node-llama-cpp")
|
|
47
|
+
const modelsDir = join(ctx.root, "models")
|
|
48
|
+
const uri = `hf:${model}${variant ? `:${variant}` : ""}`
|
|
49
|
+
const modelPath = await resolveModelFile(uri, modelsDir)
|
|
50
|
+
|
|
51
|
+
LlamaBackend.llama ??= await getLlama({
|
|
52
|
+
gpu: ctx.opts.useGpu ? "auto" : false,
|
|
53
|
+
logLevel: LlamaLogLevel.error,
|
|
54
|
+
logger: (level, message) => {
|
|
55
|
+
const l = level as LogLevel
|
|
56
|
+
// oxlint-disable-next-line typescript/no-unnecessary-condition
|
|
57
|
+
;(ctx.logger[l] ?? ctx.logger.log)(level.toString(), message)
|
|
58
|
+
},
|
|
59
|
+
})
|
|
60
|
+
|
|
61
|
+
const lm = await LlamaBackend.llama.loadModel({
|
|
62
|
+
// the below makes GPU super slow
|
|
63
|
+
// defaultContextFlashAttention: true,
|
|
64
|
+
modelPath,
|
|
65
|
+
})
|
|
66
|
+
return new LlamaBackend(LlamaBackend.llama, lm, ctx)
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
/** Get or create up to `count` embedding contexts */
|
|
70
|
+
private async acquire(count: number): Promise<LlamaEmbeddingContext[]> {
|
|
71
|
+
const needed = Math.min(count, this.#poolSize) - this.#contexts.length
|
|
72
|
+
for (let i = 0; i < needed; i++) {
|
|
73
|
+
try {
|
|
74
|
+
this.#ctx.logger.debug(
|
|
75
|
+
`Creating embedding context ${this.#contexts.length + 1}/${this.#poolSize}...`
|
|
76
|
+
)
|
|
77
|
+
this.#contexts.push(
|
|
78
|
+
// oxlint-disable-next-line no-await-in-loop
|
|
79
|
+
await this.#model.createEmbeddingContext({
|
|
80
|
+
contextSize: this.#ctx.opts.maxTokens,
|
|
81
|
+
threads: this.#threadsPerCtx,
|
|
82
|
+
})
|
|
83
|
+
)
|
|
84
|
+
} catch {
|
|
85
|
+
this.#ctx.logger.warn(
|
|
86
|
+
`Failed to create embedding context ${this.#contexts.length + 1}. Adjusting pool size down to ${this.#contexts.length}.`
|
|
87
|
+
)
|
|
88
|
+
// adjust pool size down if we fail to create contexts,
|
|
89
|
+
// which can happen if we run out of VRAM or hit some other resource limit
|
|
90
|
+
this.#poolSize = this.#contexts.length
|
|
91
|
+
break
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
return this.#contexts
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
async embed(texts: string[]): Promise<number[][]> {
|
|
98
|
+
const contexts = await this.acquire(texts.length)
|
|
99
|
+
return Promise.all(
|
|
100
|
+
texts.map((text, idx) =>
|
|
101
|
+
contexts[idx % contexts.length]
|
|
102
|
+
.getEmbeddingFor(text)
|
|
103
|
+
.then((embedding) => [...embedding.vector])
|
|
104
|
+
)
|
|
105
|
+
)
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
toks(input: string) {
|
|
109
|
+
return this.#model.tokenize(input).length
|
|
110
|
+
}
|
|
111
|
+
}
|
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
import type { SetOptional } from "type-fest"
|
|
2
|
+
import type {
|
|
3
|
+
EmbedderBackend,
|
|
4
|
+
EmbedderContext,
|
|
5
|
+
EmbedderModel,
|
|
6
|
+
ModelBackend,
|
|
7
|
+
ResolvedEmbedderModel,
|
|
8
|
+
} from "./index.ts"
|
|
9
|
+
|
|
10
|
+
import { defu } from "defu"
|
|
11
|
+
|
|
12
|
+
export function parseModelUri(uri: string) {
|
|
13
|
+
const [backend, model, variant] = uri.split(":")
|
|
14
|
+
if (!backend || !model)
|
|
15
|
+
throw new Error(`Invalid model URI: ${uri}. Expected format "provider:model[:variant]"`)
|
|
16
|
+
return { backend, model, variant } as { model: string; backend: ModelBackend; variant?: string }
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
export function resolveModel(opts?: string | EmbedderModel): ResolvedEmbedderModel {
|
|
20
|
+
const uri = typeof opts === "string" ? opts : (opts?.uri ?? DEFAULTS.uri)
|
|
21
|
+
const options = typeof opts === "string" ? { uri } : (opts ?? { uri })
|
|
22
|
+
const base = MODELS[options.base ?? MODELS[uri]?.base ?? ""] ?? {}
|
|
23
|
+
const model = MODELS[uri] ?? {}
|
|
24
|
+
return defu(options, model, base, DEFAULTS) as ResolvedEmbedderModel
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
export function loadModel(ctx: EmbedderContext): Promise<EmbedderBackend> {
|
|
28
|
+
const { backend } = parseModelUri(ctx.opts.model.uri) as { backend: string }
|
|
29
|
+
if (backend === "transformers") {
|
|
30
|
+
return import("./transformers.ts").then(({ TransformersBackend }) =>
|
|
31
|
+
TransformersBackend.load(ctx)
|
|
32
|
+
)
|
|
33
|
+
} else if (backend === "llama") {
|
|
34
|
+
return import("./llama.ts").then(({ LlamaBackend }) => LlamaBackend.load(ctx))
|
|
35
|
+
} else if (backend === "openai") {
|
|
36
|
+
return import("./openai.ts").then(({ OpenAIBackend }) => OpenAIBackend.load(ctx))
|
|
37
|
+
} else {
|
|
38
|
+
throw new Error(`Unsupported model backend: ${String(backend)}`)
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
const DEFAULTS: ResolvedEmbedderModel = {
|
|
43
|
+
prompt: {
|
|
44
|
+
document: (doc) => {
|
|
45
|
+
// If the title isn't found near the start of the text,
|
|
46
|
+
// prepend it to ensure it's included in the embedding.
|
|
47
|
+
const title = doc.title?.trim()
|
|
48
|
+
if (title?.length) {
|
|
49
|
+
const idx = doc.text.indexOf(title)
|
|
50
|
+
if (idx === -1 || idx > 10) return `${title}\n\n${doc.text}`
|
|
51
|
+
}
|
|
52
|
+
return doc.text
|
|
53
|
+
},
|
|
54
|
+
query: (query) => query,
|
|
55
|
+
},
|
|
56
|
+
uri: "transformers:Snowflake/snowflake-arctic-embed-s:q8",
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
const MODELS: Record<string, SetOptional<EmbedderModel, "uri">> = {
|
|
60
|
+
BAAI: {
|
|
61
|
+
models: [
|
|
62
|
+
"transformers:BAAI/bge-large-en-v1.5", // 55.44%, 335M params, 304M active, 1024 dims, 512 context
|
|
63
|
+
"transformers:BAAI/bge-base-en-v1.5", // 54.75%, 109M params, 86M active, 768 dims, 512 context
|
|
64
|
+
"transformers:BAAI/bge-small-en-v1.5", // 53.86%, 33M params, 22M active, 512 dims, 512 context
|
|
65
|
+
],
|
|
66
|
+
pooling: "cls",
|
|
67
|
+
prompt: {
|
|
68
|
+
query: (query) => `Represent this sentence for searching relevant passages: ${query}`,
|
|
69
|
+
},
|
|
70
|
+
},
|
|
71
|
+
openai: {
|
|
72
|
+
models: [
|
|
73
|
+
"openai:text-embedding-3-small",
|
|
74
|
+
"openai:text-embedding-3-large",
|
|
75
|
+
"openai:text-embedding-ada-002",
|
|
76
|
+
],
|
|
77
|
+
},
|
|
78
|
+
snowflake: {
|
|
79
|
+
models: [
|
|
80
|
+
"transformers:Snowflake/snowflake-arctic-embed-m-v2.0:q8", // 58.41%, 305M params, 113M active, 768 dims, 8192 context
|
|
81
|
+
"transformers:Snowflake/snowflake-arctic-embed-s:q8", // 54.85%, 33M params, 22M active, 384 dims, 512 context
|
|
82
|
+
"transformers:Snowflake/snowflake-arctic-embed-xs:q8", // 52.65%, 23M params, 11M active, 384 dims, 512 context
|
|
83
|
+
"llama:mradermacher/snowflake-arctic-embed-s-GGUF:Q4_K_M",
|
|
84
|
+
],
|
|
85
|
+
pooling: "cls",
|
|
86
|
+
prompt: {
|
|
87
|
+
query: (query) => `Represent this sentence for searching relevant passages: ${query}`,
|
|
88
|
+
},
|
|
89
|
+
},
|
|
90
|
+
"transformers:onnx-community/embeddinggemma-300m-ONNX:q8": {
|
|
91
|
+
pooling: "mean",
|
|
92
|
+
// 55.69%, 308M params, 106M active, 768 dims, 2048 context
|
|
93
|
+
prompt: {
|
|
94
|
+
document: (doc) => `title: ${doc.title ?? "none"} | ${doc.text}`,
|
|
95
|
+
query: (query) => `task: search result | query: ${query}`,
|
|
96
|
+
},
|
|
97
|
+
},
|
|
98
|
+
}
|
|
99
|
+
|
|
100
|
+
for (const [base, options] of Object.entries(MODELS)) {
|
|
101
|
+
for (const model of options.models ?? []) {
|
|
102
|
+
MODELS[model] = { base, uri: model }
|
|
103
|
+
}
|
|
104
|
+
}
|
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
import type { encode as Tokenize } from "gpt-tokenizer"
|
|
2
|
+
import type { EmbedderBackend, EmbedderContext } from "./index.ts"
|
|
3
|
+
|
|
4
|
+
import { parseModelUri } from "./models.ts"
|
|
5
|
+
|
|
6
|
+
const OPENAI_EMBEDDING_URL = "https://api.openai.com/v1/embeddings"
|
|
7
|
+
|
|
8
|
+
// Context sizes for known OpenAI embedding models
|
|
9
|
+
const MODEL_INFO: Record<string, { contextSize: number; vectorSize: number } | undefined> = {
|
|
10
|
+
"text-embedding-3-large": { contextSize: 8191, vectorSize: 3072 },
|
|
11
|
+
"text-embedding-3-small": { contextSize: 8191, vectorSize: 1536 },
|
|
12
|
+
"text-embedding-ada-002": { contextSize: 8191, vectorSize: 1536 },
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
export class OpenAIBackend implements EmbedderBackend {
|
|
16
|
+
device = "api" as const
|
|
17
|
+
maxTokens: number
|
|
18
|
+
dims: number
|
|
19
|
+
#model: string
|
|
20
|
+
#apiKey: string
|
|
21
|
+
#tokenizer: typeof Tokenize
|
|
22
|
+
#ctx: EmbedderContext
|
|
23
|
+
|
|
24
|
+
// oxlint-disable-next-line max-params
|
|
25
|
+
private constructor(
|
|
26
|
+
model: string,
|
|
27
|
+
apiKey: string,
|
|
28
|
+
info: { contextSize: number; vectorSize: number },
|
|
29
|
+
tokenizer: typeof Tokenize,
|
|
30
|
+
ctx: EmbedderContext
|
|
31
|
+
) {
|
|
32
|
+
this.#model = model
|
|
33
|
+
this.#apiKey = apiKey
|
|
34
|
+
this.maxTokens = info.contextSize
|
|
35
|
+
this.dims = info.vectorSize
|
|
36
|
+
this.#tokenizer = tokenizer
|
|
37
|
+
this.#ctx = ctx
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
static async load(this: void, ctx: EmbedderContext): Promise<OpenAIBackend> {
|
|
41
|
+
const { model } = parseModelUri(ctx.opts.model.uri)
|
|
42
|
+
const apiKey = process.env.OPENAI_API_KEY
|
|
43
|
+
if (!apiKey)
|
|
44
|
+
throw new Error("Missing `OPENAI_API_KEY` environment variable for OpenAI embeddings.")
|
|
45
|
+
|
|
46
|
+
const { encode } = await import(`gpt-tokenizer/model/${model}`)
|
|
47
|
+
// Probe the model for dimensions if not in our known list
|
|
48
|
+
let info = MODEL_INFO[model]
|
|
49
|
+
if (!info) {
|
|
50
|
+
ctx.status.status = "probing model dimensions..."
|
|
51
|
+
const backend = new OpenAIBackend(
|
|
52
|
+
model,
|
|
53
|
+
apiKey,
|
|
54
|
+
{ contextSize: 8191, vectorSize: 0 },
|
|
55
|
+
encode,
|
|
56
|
+
ctx
|
|
57
|
+
)
|
|
58
|
+
const result = await backend.embed(["test"])
|
|
59
|
+
info = { contextSize: 8191, vectorSize: result[0].length }
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
return new OpenAIBackend(model, apiKey, info, encode, ctx)
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
async embed(texts: string[]): Promise<number[][]> {
|
|
66
|
+
const response = await fetch(OPENAI_EMBEDDING_URL, {
|
|
67
|
+
body: JSON.stringify({
|
|
68
|
+
dimensions: this.#ctx.opts.maxDims,
|
|
69
|
+
input: texts,
|
|
70
|
+
model: this.#model,
|
|
71
|
+
}),
|
|
72
|
+
headers: {
|
|
73
|
+
Authorization: `Bearer ${this.#apiKey}`,
|
|
74
|
+
"Content-Type": "application/json",
|
|
75
|
+
},
|
|
76
|
+
method: "POST",
|
|
77
|
+
})
|
|
78
|
+
|
|
79
|
+
if (!response.ok) {
|
|
80
|
+
const error = await response.text()
|
|
81
|
+
throw new Error(`OpenAI embedding API error (${response.status}): ${error}`)
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
const data = (await response.json()) as {
|
|
85
|
+
data: { embedding: number[]; index: number }[]
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
// Sort by index to maintain input order
|
|
89
|
+
return data.data.toSorted((a, b) => a.index - b.index).map((d) => d.embedding)
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
toks(input: string): number {
|
|
93
|
+
return this.#tokenizer(input).length
|
|
94
|
+
}
|
|
95
|
+
}
|
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
import type {
|
|
2
|
+
DataType,
|
|
3
|
+
FeatureExtractionPipeline,
|
|
4
|
+
ProgressInfo,
|
|
5
|
+
Tensor,
|
|
6
|
+
} from "@huggingface/transformers"
|
|
7
|
+
import type { EmbedderBackend, EmbedderContext, EmbedderDevice } from "./index.ts"
|
|
8
|
+
|
|
9
|
+
import { parseModelUri } from "./models.ts"
|
|
10
|
+
|
|
11
|
+
export class TransformersBackend implements EmbedderBackend {
|
|
12
|
+
device: EmbedderDevice = "cpu"
|
|
13
|
+
maxTokens: number
|
|
14
|
+
dims: number
|
|
15
|
+
#pipeline: FeatureExtractionPipeline
|
|
16
|
+
#ctx: EmbedderContext
|
|
17
|
+
private normalize?: (tensor: Tensor) => Tensor // optional normalization function, e.g. for L2 normalization after truncation
|
|
18
|
+
|
|
19
|
+
private constructor(pipeline: FeatureExtractionPipeline, ctx: EmbedderContext) {
|
|
20
|
+
this.#pipeline = pipeline
|
|
21
|
+
this.#ctx = ctx
|
|
22
|
+
this.maxTokens = pipeline.model.config.max_position_embeddings
|
|
23
|
+
this.dims = (pipeline.model.config as { hidden_size?: number }).hidden_size ?? 0
|
|
24
|
+
this.device = pipeline.model.sessions.model?.config?.device ?? "cpu"
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
static async load(this: void, ctx: EmbedderContext) {
|
|
28
|
+
const parsed = parseModelUri(ctx.opts.model.uri)
|
|
29
|
+
const { pipeline, layer_norm } = await import("@huggingface/transformers")
|
|
30
|
+
|
|
31
|
+
const extractor = await pipeline("feature-extraction", parsed.model, {
|
|
32
|
+
// device: "webgpu",
|
|
33
|
+
dtype: (parsed.variant ?? "auto") as DataType,
|
|
34
|
+
progress_callback: (event) => TransformersBackend.onProgress(ctx, event),
|
|
35
|
+
session_options: { intraOpNumThreads: ctx.opts.threads },
|
|
36
|
+
})
|
|
37
|
+
const backend = new TransformersBackend(extractor, ctx)
|
|
38
|
+
|
|
39
|
+
// Matryoshka: layer_norm → truncate
|
|
40
|
+
const dims = ctx.opts.maxDims
|
|
41
|
+
if (dims < backend.dims)
|
|
42
|
+
backend.normalize = (output) =>
|
|
43
|
+
layer_norm(output, [output.dims[1] ?? 0])
|
|
44
|
+
// oxlint-disable-next-line unicorn/no-null
|
|
45
|
+
.slice(null, [0, dims])
|
|
46
|
+
.normalize(2, -1)
|
|
47
|
+
|
|
48
|
+
return backend
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
async embed(texts: string[]): Promise<number[][]> {
|
|
52
|
+
const output = await this.#pipeline(texts, {
|
|
53
|
+
normalize: !this.normalize,
|
|
54
|
+
pooling: this.#ctx.opts.model.pooling,
|
|
55
|
+
})
|
|
56
|
+
return (this.normalize?.(output) ?? output).tolist() as number[][]
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
toks(input: string) {
|
|
60
|
+
return this.#pipeline.tokenizer.tokenize(input).length
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
static onProgress(ctx: EmbedderContext, event: ProgressInfo) {
|
|
64
|
+
if (event.status === "initiate") {
|
|
65
|
+
ctx.status.child(event.name).child(event.file).status = event.status
|
|
66
|
+
} else if (event.status === "download") {
|
|
67
|
+
ctx.status.child(event.name).child(event.file).status = event.status
|
|
68
|
+
} else if (event.status === "progress") {
|
|
69
|
+
ctx.status.child(event.name).child(event.file).set({
|
|
70
|
+
max: event.total,
|
|
71
|
+
status: event.status,
|
|
72
|
+
value: event.loaded,
|
|
73
|
+
})
|
|
74
|
+
} else if (event.status === "done") {
|
|
75
|
+
ctx.status.child(event.name).child(event.file).set({ status: event.status }).stop()
|
|
76
|
+
} else if (event.status === "ready") {
|
|
77
|
+
ctx.status.name = `model \`${ctx.opts.model.uri}\` loaded`
|
|
78
|
+
ctx.status.child(event.task).set({ status: event.status }).stop()
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
}
|
package/src/frecency.ts
ADDED
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
import type { Db, DocRow } from "./db.ts"
|
|
2
|
+
|
|
3
|
+
// Exponential decay frecency, based on:
|
|
4
|
+
// https://wiki.mozilla.org/User:Jesse/NewFrecency
|
|
5
|
+
// Ported from snacks.nvim picker frecency
|
|
6
|
+
|
|
7
|
+
const HALF_LIFE = 30 * 24 * 3600 // 30 days in seconds
|
|
8
|
+
const LAMBDA = Math.LN2 / HALF_LIFE // λ = ln(2) / half_life
|
|
9
|
+
const VISIT_VALUE = 1 // default points per visit
|
|
10
|
+
|
|
11
|
+
export class Frecency {
|
|
12
|
+
#db: Db
|
|
13
|
+
#now: number
|
|
14
|
+
|
|
15
|
+
constructor(db: Db, now = Date.now() / 1000) {
|
|
16
|
+
this.#db = db
|
|
17
|
+
this.#now = now
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
/** Convert a score into a deadline timestamp.
|
|
21
|
+
* deadline = now + ln(score) / λ */
|
|
22
|
+
toDeadline(score: number): number {
|
|
23
|
+
return this.#now + Math.log(score) / LAMBDA
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
/** Convert a deadline timestamp back into a current score.
|
|
27
|
+
* score = e^(λ * (deadline - now)) */
|
|
28
|
+
toScore(deadline: number): number {
|
|
29
|
+
return Math.exp(LAMBDA * (deadline - this.#now))
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
/** Get the current frecency score for a doc.
|
|
33
|
+
* If no deadline exists, estimates from updatedAt (a single visit at file mtime, decayed to now). */
|
|
34
|
+
get(doc: DocRow): number {
|
|
35
|
+
if (doc.deadline) return this.toScore(doc.deadline)
|
|
36
|
+
// Seed: treat updatedAt as a single past visit, decayed to now
|
|
37
|
+
// Since ln(1)/λ = 0 for VISIT_VALUE=1, deadline = timestamp
|
|
38
|
+
const ts = new Date(doc.updated_at).getTime() / 1000
|
|
39
|
+
return this.toScore(ts)
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
/** Record a visit — adds a decayed value to the doc's score.
|
|
43
|
+
* @param opts.value - points to add (default: 1)
|
|
44
|
+
* @param opts.ts - timestamp of the visit in seconds (default: now).
|
|
45
|
+
* Use file mtime for seeding from external edits. */
|
|
46
|
+
visit(doc: DocRow, opts?: { value?: number; ts?: number }) {
|
|
47
|
+
const value = opts?.value ?? VISIT_VALUE
|
|
48
|
+
const ts = opts?.ts ?? this.#now
|
|
49
|
+
const visitDeadline = ts + Math.log(value) / LAMBDA
|
|
50
|
+
const score = this.get(doc) + this.toScore(visitDeadline)
|
|
51
|
+
this.#db.setDeadline(doc.id, this.toDeadline(score))
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
/** Get frecency scores for multiple docs, sorted descending. */
|
|
55
|
+
rank(docs: DocRow[]): { doc: DocRow; score: number }[] {
|
|
56
|
+
return docs.map((doc) => ({ doc, score: this.get(doc) })).toSorted((a, b) => b.score - a.score)
|
|
57
|
+
}
|
|
58
|
+
}
|
package/src/fs.ts
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
import { statSync, existsSync } from "node:fs"
|
|
2
|
+
import { stat } from "node:fs/promises"
|
|
3
|
+
import { homedir } from "node:os"
|
|
4
|
+
import { dirname, join, resolve } from "pathe"
|
|
5
|
+
|
|
6
|
+
export function sstat(path: string) {
|
|
7
|
+
try {
|
|
8
|
+
return statSync(path)
|
|
9
|
+
} catch {}
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
export async function astat(path: string) {
|
|
13
|
+
return await stat(path).catch(() => undefined)
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
export function findUp(root: string, name: string, stop?: string) {
|
|
17
|
+
let current = resolve(root)
|
|
18
|
+
// oxlint-disable-next-line typescript/no-unnecessary-condition
|
|
19
|
+
while (true) {
|
|
20
|
+
const check = join(current, name)
|
|
21
|
+
if (sstat(check)?.isFile()) return check
|
|
22
|
+
if (stop && existsSync(join(current, stop))) return // reached stop directory without finding the file
|
|
23
|
+
const next = dirname(current)
|
|
24
|
+
if (next === current) break // reached filesystem root
|
|
25
|
+
current = next
|
|
26
|
+
}
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
// Similar to path.resolve but also expands ~ to the user home directory
|
|
30
|
+
export function normPath(...paths: string[]) {
|
|
31
|
+
return resolve(...paths.map((p) => p.replace(/^~(?=\/|\\|$)/, homedir())))
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
export function gitRoot(path: string) {
|
|
35
|
+
return findUp(path, ".git")
|
|
36
|
+
}
|