@rekal/mem 0.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (80) hide show
  1. package/dist/db-BMh1OP4b.mjs +294 -0
  2. package/dist/doc-DnYN4jAU.mjs +116 -0
  3. package/dist/embed-rUMZxqed.mjs +100 -0
  4. package/dist/fs-DMp26Byo.mjs +32 -0
  5. package/dist/glob.d.mts +27 -0
  6. package/dist/glob.mjs +132 -0
  7. package/dist/index.d.mts +1465 -0
  8. package/dist/index.mjs +351 -0
  9. package/dist/llama-CT3dc9Cn.mjs +75 -0
  10. package/dist/models-DFQSgBNr.mjs +77 -0
  11. package/dist/openai-j2_2GM4J.mjs +76 -0
  12. package/dist/progress-B1JdNapX.mjs +263 -0
  13. package/dist/query-VFSpErTB.mjs +125 -0
  14. package/dist/runtime.node-DlQPaGrV.mjs +35 -0
  15. package/dist/search-BllHWtZF.mjs +166 -0
  16. package/dist/store-DE7S35SS.mjs +137 -0
  17. package/dist/transformers-CJ3QA2PK.mjs +55 -0
  18. package/dist/uri-CehXVDGB.mjs +28 -0
  19. package/dist/util-DNyrmcA3.mjs +11 -0
  20. package/dist/vfs-CNQbkhsf.mjs +222 -0
  21. package/foo.ts +3 -0
  22. package/foo2.ts +20 -0
  23. package/package.json +61 -0
  24. package/src/context.ts +77 -0
  25. package/src/db.ts +464 -0
  26. package/src/doc.ts +163 -0
  27. package/src/embed/base.ts +122 -0
  28. package/src/embed/index.ts +67 -0
  29. package/src/embed/llama.ts +111 -0
  30. package/src/embed/models.ts +104 -0
  31. package/src/embed/openai.ts +95 -0
  32. package/src/embed/transformers.ts +81 -0
  33. package/src/frecency.ts +58 -0
  34. package/src/fs.ts +36 -0
  35. package/src/glob.ts +163 -0
  36. package/src/index.ts +15 -0
  37. package/src/log.ts +60 -0
  38. package/src/md.ts +204 -0
  39. package/src/progress.ts +121 -0
  40. package/src/query.ts +131 -0
  41. package/src/runtime.bun.ts +33 -0
  42. package/src/runtime.node.ts +47 -0
  43. package/src/search.ts +230 -0
  44. package/src/snippet.ts +248 -0
  45. package/src/sqlite.ts +1 -0
  46. package/src/store.ts +180 -0
  47. package/src/uri.ts +28 -0
  48. package/src/util.ts +21 -0
  49. package/src/vfs.ts +257 -0
  50. package/test/doc.test.ts +61 -0
  51. package/test/fixtures/ignore-test/keep.md +0 -0
  52. package/test/fixtures/ignore-test/skip.log +0 -0
  53. package/test/fixtures/ignore-test/sub/keep.md +0 -0
  54. package/test/fixtures/store/agent/index.md +9 -0
  55. package/test/fixtures/store/agent/lessons.md +21 -0
  56. package/test/fixtures/store/agent/soul.md +28 -0
  57. package/test/fixtures/store/agent/tools.md +25 -0
  58. package/test/fixtures/store/concepts/frecency.md +30 -0
  59. package/test/fixtures/store/concepts/index.md +9 -0
  60. package/test/fixtures/store/concepts/memory-coherence.md +33 -0
  61. package/test/fixtures/store/concepts/rag.md +27 -0
  62. package/test/fixtures/store/index.md +9 -0
  63. package/test/fixtures/store/projects/index.md +9 -0
  64. package/test/fixtures/store/projects/rekall-inc/architecture.md +41 -0
  65. package/test/fixtures/store/projects/rekall-inc/decisions/index.md +9 -0
  66. package/test/fixtures/store/projects/rekall-inc/decisions/no-military.md +20 -0
  67. package/test/fixtures/store/projects/rekall-inc/index.md +28 -0
  68. package/test/fixtures/store/user/family.md +13 -0
  69. package/test/fixtures/store/user/index.md +9 -0
  70. package/test/fixtures/store/user/preferences.md +29 -0
  71. package/test/fixtures/store/user/profile.md +29 -0
  72. package/test/fs.test.ts +15 -0
  73. package/test/glob.test.ts +190 -0
  74. package/test/md.test.ts +177 -0
  75. package/test/query.test.ts +105 -0
  76. package/test/uri.test.ts +46 -0
  77. package/test/util.test.ts +62 -0
  78. package/test/vfs.test.ts +164 -0
  79. package/tsconfig.json +3 -0
  80. package/tsdown.config.ts +8 -0
@@ -0,0 +1,122 @@
1
+ import type { Context } from "../context.ts"
2
+ import type {
3
+ EmbedderBackend,
4
+ EmbedderChunk,
5
+ EmbedderDoc,
6
+ EmbedderOptions,
7
+ ModelBackend,
8
+ ResolvedEmbedderModel,
9
+ ResolvedEmbedderOptions,
10
+ } from "./index.ts"
11
+
12
+ import { availableParallelism } from "node:os"
13
+ import { chunkMarkdown } from "../md.ts"
14
+ import { Progress } from "../progress.ts"
15
+ import { loadModel, parseModelUri, resolveModel } from "./models.ts"
16
+
17
+ const defaults = {
18
+ batchSize: 0, // 0 = auto
19
+ maxDims: 512,
20
+ maxTokens: 512,
21
+ useGpu: true,
22
+ } satisfies EmbedderOptions
23
+
24
+ const backend_defaults: Record<ModelBackend, EmbedderOptions> = {
25
+ llama: {},
26
+ openai: { batchSize: 50 },
27
+ transformers: {},
28
+ }
29
+
30
+ function isChunk(input: any): input is EmbedderChunk {
31
+ return typeof (input as EmbedderChunk | undefined)?.prompt === "string"
32
+ }
33
+
34
+ export class Embedder {
35
+ #loading?: Promise<EmbedderBackend>
36
+ opts: ResolvedEmbedderOptions
37
+ model: ResolvedEmbedderModel
38
+ status = new Progress("embedder")
39
+ #backend?: EmbedderBackend
40
+
41
+ constructor(public ctx: Context) {
42
+ const opts = ctx.opts.embedder ?? {}
43
+ this.model = resolveModel(opts.model)
44
+ const { backend } = parseModelUri(this.model.uri)
45
+ const base = { ...defaults, ...backend_defaults[backend] }
46
+ const threads = Math.max(1, opts.threads ?? Math.min(8, availableParallelism() - 2))
47
+ this.opts = {
48
+ threads,
49
+ ...base,
50
+ ...opts,
51
+ model: this.model,
52
+ }
53
+ }
54
+
55
+ info() {
56
+ return parseModelUri(this.model.uri)
57
+ }
58
+
59
+ async backend() {
60
+ this.#loading ??= (async () => {
61
+ this.status.name = `Loading model \`${this.model.uri}\``
62
+
63
+ const t = setTimeout(() => {
64
+ this.opts.onProgress?.(this.status)
65
+ }, 500)
66
+
67
+ this.#backend = await loadModel({
68
+ logger: this.ctx,
69
+ opts: this.opts,
70
+ root: this.ctx.root,
71
+ status: this.status,
72
+ })
73
+
74
+ clearTimeout(t)
75
+
76
+ this.opts.maxTokens = Math.min(this.opts.maxTokens, this.#backend.maxTokens)
77
+ this.opts.maxDims = Math.min(this.opts.maxDims, this.#backend.dims)
78
+ if (this.opts.batchSize === 0) this.opts.batchSize = this.#backend.device === "gpu" ? 50 : 1
79
+ this.ctx.debug({
80
+ batchSize: this.opts.batchSize,
81
+ device: this.#backend.device,
82
+ threads: this.opts.threads,
83
+ useGpu: this.opts.useGpu,
84
+ })
85
+ this.status.stop()
86
+ return this.#backend
87
+ })()
88
+ return (this.#backend ??= await this.#loading)
89
+ }
90
+
91
+ transform(input: string | EmbedderDoc | EmbedderChunk): string {
92
+ if (isChunk(input)) return input.prompt
93
+ const { prompt } = this.model
94
+ return typeof input === "string" ? prompt.query(input) : prompt.document(input)
95
+ }
96
+
97
+ async embed(input: string | EmbedderDoc | EmbedderChunk): Promise<number[]>
98
+ async embed(input: (string | EmbedderDoc | EmbedderChunk)[]): Promise<number[][]>
99
+ async embed(
100
+ input: string | EmbedderDoc | EmbedderChunk | (string | EmbedderDoc | EmbedderChunk)[]
101
+ ): Promise<number[][] | number[]> {
102
+ const single = !Array.isArray(input)
103
+ const todo = single ? [input] : input
104
+ const backend = await this.backend()
105
+ const ret = await backend.embed(todo.map((item) => this.transform(item)))
106
+ return single ? ret[0] : ret
107
+ }
108
+
109
+ async chunk(input: string | EmbedderDoc): Promise<EmbedderChunk[]>
110
+ async chunk(input: string | EmbedderDoc): Promise<EmbedderChunk[]> {
111
+ const backend = await this.backend()
112
+ const isQuery = typeof input === "string"
113
+ const fixed = this.transform(isQuery ? "" : { text: "", title: input.title })
114
+ const chunkText = isQuery ? input : input.text
115
+ const tokens = this.opts.maxTokens - backend.toks(fixed)
116
+ return chunkMarkdown(chunkText, backend, tokens).map((text, seq) => ({
117
+ prompt: isQuery ? this.transform(text) : this.transform({ text, title: input.title }),
118
+ seq,
119
+ text,
120
+ }))
121
+ }
122
+ }
@@ -0,0 +1,67 @@
1
+ import type { SetOptional, Merge } from "type-fest"
2
+ import type { Logger } from "../log.ts"
3
+ import type { Progress } from "../progress.ts"
4
+
5
+ export * from "./base.ts"
6
+
7
+ export type TokenCounter = {
8
+ toks(input: string): number
9
+ }
10
+
11
+ export type EmbedderDevice = "cpu" | "gpu" | "api"
12
+ export interface EmbedderBackend extends TokenCounter {
13
+ device: EmbedderDevice // what hardware/service this backend uses
14
+ maxTokens: number // maximum number of tokens that can be embedded
15
+ dims: number // dimensionality of the output vectors
16
+ embed(input: string[]): Promise<number[][]>
17
+ }
18
+
19
+ export type EmbedderDoc = {
20
+ text: string
21
+ title?: string
22
+ }
23
+
24
+ export type EmbedderPrompt = {
25
+ document?: (doc: EmbedderDoc) => string
26
+ query?: (query: string) => string
27
+ }
28
+
29
+ export type EmbedderModel = {
30
+ uri: string
31
+ base?: string // base model to inherit options from (e.g. for ONNX variants)
32
+ models?: string[] // models based on this configuration
33
+ prompt?: EmbedderPrompt
34
+ pooling?: "none" | "mean" | "cls" | "first_token" | "eos" | "last_token" // transformers-only
35
+ }
36
+
37
+ export type ModelBackend = "transformers" | "llama" | "openai"
38
+
39
+ export type EmbedderContext = {
40
+ opts: ResolvedEmbedderOptions
41
+ root: string
42
+ status: Progress
43
+ logger: Logger
44
+ }
45
+
46
+ export type EmbedderChunk = {
47
+ seq: number
48
+ text: string // raw chunk text (e.g. a passage from a document)
49
+ prompt: string // the actual text sent to the model for embedding (e.g. with instructions or title added)
50
+ embedding?: number[] // populated after embedding
51
+ }
52
+
53
+ export type EmbedderOptions = {
54
+ batchSize?: number // Embed this many texts at a time.
55
+ model?: EmbedderModel | string
56
+ maxTokens?: number // Chunk input into pieces of this many tokens or fewer (depending on the model).
57
+ maxDims?: number // Truncate output to this many dimensions. Only supported by some backends.
58
+ threads?: number // CPU threads for local inference. 0 = auto. Defaults to half available cores.
59
+ useGpu?: boolean // Whether to use GPU for local inference if available. Defaults to true.
60
+ onProgress?: (status: Progress) => void
61
+ }
62
+
63
+ export type ResolvedEmbedderModel = Merge<EmbedderModel, { prompt: Required<EmbedderPrompt> }>
64
+ export type ResolvedEmbedderOptions = Merge<
65
+ SetOptional<Required<EmbedderOptions>, "onProgress">,
66
+ { model: ResolvedEmbedderModel }
67
+ >
@@ -0,0 +1,111 @@
1
+ import type { Llama, LlamaEmbeddingContext, LlamaModel } from "node-llama-cpp"
2
+ import type { LogLevel } from "../log.ts"
3
+ import type { EmbedderBackend, EmbedderContext, EmbedderDevice } from "./index.ts"
4
+
5
+ import { LlamaLogLevel } from "node-llama-cpp"
6
+ import { availableParallelism } from "node:os"
7
+ import { join } from "pathe"
8
+ import { parseModelUri } from "./models.ts"
9
+
10
+ export class LlamaBackend implements EmbedderBackend {
11
+ static llama?: Llama
12
+ device: EmbedderDevice
13
+ maxTokens: number
14
+ dims: number
15
+ #contexts: LlamaEmbeddingContext[] = []
16
+ #model: LlamaModel
17
+ #poolSize: number
18
+ #threadsPerCtx: number
19
+ #ctx: EmbedderContext
20
+
21
+ private constructor(llama: Llama, model: LlamaModel, ctx: EmbedderContext) {
22
+ this.#model = model
23
+ this.#ctx = ctx
24
+ this.maxTokens = model.trainContextSize
25
+ this.dims = model.embeddingVectorSize
26
+ this.device = llama.gpu ? "gpu" : "cpu"
27
+
28
+ const useGpu = llama.gpu && ctx.opts.useGpu
29
+ const cores = availableParallelism()
30
+ const poolSize = useGpu
31
+ ? Math.min(8, Math.max(1, Math.floor(cores / 4)))
32
+ : Math.min(8, Math.max(1, Math.floor(cores / 4)))
33
+
34
+ this.#threadsPerCtx = useGpu ? 0 : Math.max(1, Math.floor(ctx.opts.threads / poolSize))
35
+ this.#poolSize = poolSize
36
+ ctx.logger.debug({
37
+ cores,
38
+ gpu: llama.gpu,
39
+ poolSize,
40
+ threadsPerCtx: this.#threadsPerCtx,
41
+ })
42
+ }
43
+
44
+ static async load(this: void, ctx: EmbedderContext) {
45
+ const { model, variant } = parseModelUri(ctx.opts.model.uri)
46
+ const { getLlama, resolveModelFile } = await import("node-llama-cpp")
47
+ const modelsDir = join(ctx.root, "models")
48
+ const uri = `hf:${model}${variant ? `:${variant}` : ""}`
49
+ const modelPath = await resolveModelFile(uri, modelsDir)
50
+
51
+ LlamaBackend.llama ??= await getLlama({
52
+ gpu: ctx.opts.useGpu ? "auto" : false,
53
+ logLevel: LlamaLogLevel.error,
54
+ logger: (level, message) => {
55
+ const l = level as LogLevel
56
+ // oxlint-disable-next-line typescript/no-unnecessary-condition
57
+ ;(ctx.logger[l] ?? ctx.logger.log)(level.toString(), message)
58
+ },
59
+ })
60
+
61
+ const lm = await LlamaBackend.llama.loadModel({
62
+ // the below makes GPU super slow
63
+ // defaultContextFlashAttention: true,
64
+ modelPath,
65
+ })
66
+ return new LlamaBackend(LlamaBackend.llama, lm, ctx)
67
+ }
68
+
69
+ /** Get or create up to `count` embedding contexts */
70
+ private async acquire(count: number): Promise<LlamaEmbeddingContext[]> {
71
+ const needed = Math.min(count, this.#poolSize) - this.#contexts.length
72
+ for (let i = 0; i < needed; i++) {
73
+ try {
74
+ this.#ctx.logger.debug(
75
+ `Creating embedding context ${this.#contexts.length + 1}/${this.#poolSize}...`
76
+ )
77
+ this.#contexts.push(
78
+ // oxlint-disable-next-line no-await-in-loop
79
+ await this.#model.createEmbeddingContext({
80
+ contextSize: this.#ctx.opts.maxTokens,
81
+ threads: this.#threadsPerCtx,
82
+ })
83
+ )
84
+ } catch {
85
+ this.#ctx.logger.warn(
86
+ `Failed to create embedding context ${this.#contexts.length + 1}. Adjusting pool size down to ${this.#contexts.length}.`
87
+ )
88
+ // adjust pool size down if we fail to create contexts,
89
+ // which can happen if we run out of VRAM or hit some other resource limit
90
+ this.#poolSize = this.#contexts.length
91
+ break
92
+ }
93
+ }
94
+ return this.#contexts
95
+ }
96
+
97
+ async embed(texts: string[]): Promise<number[][]> {
98
+ const contexts = await this.acquire(texts.length)
99
+ return Promise.all(
100
+ texts.map((text, idx) =>
101
+ contexts[idx % contexts.length]
102
+ .getEmbeddingFor(text)
103
+ .then((embedding) => [...embedding.vector])
104
+ )
105
+ )
106
+ }
107
+
108
+ toks(input: string) {
109
+ return this.#model.tokenize(input).length
110
+ }
111
+ }
@@ -0,0 +1,104 @@
1
+ import type { SetOptional } from "type-fest"
2
+ import type {
3
+ EmbedderBackend,
4
+ EmbedderContext,
5
+ EmbedderModel,
6
+ ModelBackend,
7
+ ResolvedEmbedderModel,
8
+ } from "./index.ts"
9
+
10
+ import { defu } from "defu"
11
+
12
+ export function parseModelUri(uri: string) {
13
+ const [backend, model, variant] = uri.split(":")
14
+ if (!backend || !model)
15
+ throw new Error(`Invalid model URI: ${uri}. Expected format "provider:model[:variant]"`)
16
+ return { backend, model, variant } as { model: string; backend: ModelBackend; variant?: string }
17
+ }
18
+
19
+ export function resolveModel(opts?: string | EmbedderModel): ResolvedEmbedderModel {
20
+ const uri = typeof opts === "string" ? opts : (opts?.uri ?? DEFAULTS.uri)
21
+ const options = typeof opts === "string" ? { uri } : (opts ?? { uri })
22
+ const base = MODELS[options.base ?? MODELS[uri]?.base ?? ""] ?? {}
23
+ const model = MODELS[uri] ?? {}
24
+ return defu(options, model, base, DEFAULTS) as ResolvedEmbedderModel
25
+ }
26
+
27
+ export function loadModel(ctx: EmbedderContext): Promise<EmbedderBackend> {
28
+ const { backend } = parseModelUri(ctx.opts.model.uri) as { backend: string }
29
+ if (backend === "transformers") {
30
+ return import("./transformers.ts").then(({ TransformersBackend }) =>
31
+ TransformersBackend.load(ctx)
32
+ )
33
+ } else if (backend === "llama") {
34
+ return import("./llama.ts").then(({ LlamaBackend }) => LlamaBackend.load(ctx))
35
+ } else if (backend === "openai") {
36
+ return import("./openai.ts").then(({ OpenAIBackend }) => OpenAIBackend.load(ctx))
37
+ } else {
38
+ throw new Error(`Unsupported model backend: ${String(backend)}`)
39
+ }
40
+ }
41
+
42
+ const DEFAULTS: ResolvedEmbedderModel = {
43
+ prompt: {
44
+ document: (doc) => {
45
+ // If the title isn't found near the start of the text,
46
+ // prepend it to ensure it's included in the embedding.
47
+ const title = doc.title?.trim()
48
+ if (title?.length) {
49
+ const idx = doc.text.indexOf(title)
50
+ if (idx === -1 || idx > 10) return `${title}\n\n${doc.text}`
51
+ }
52
+ return doc.text
53
+ },
54
+ query: (query) => query,
55
+ },
56
+ uri: "transformers:Snowflake/snowflake-arctic-embed-s:q8",
57
+ }
58
+
59
+ const MODELS: Record<string, SetOptional<EmbedderModel, "uri">> = {
60
+ BAAI: {
61
+ models: [
62
+ "transformers:BAAI/bge-large-en-v1.5", // 55.44%, 335M params, 304M active, 1024 dims, 512 context
63
+ "transformers:BAAI/bge-base-en-v1.5", // 54.75%, 109M params, 86M active, 768 dims, 512 context
64
+ "transformers:BAAI/bge-small-en-v1.5", // 53.86%, 33M params, 22M active, 512 dims, 512 context
65
+ ],
66
+ pooling: "cls",
67
+ prompt: {
68
+ query: (query) => `Represent this sentence for searching relevant passages: ${query}`,
69
+ },
70
+ },
71
+ openai: {
72
+ models: [
73
+ "openai:text-embedding-3-small",
74
+ "openai:text-embedding-3-large",
75
+ "openai:text-embedding-ada-002",
76
+ ],
77
+ },
78
+ snowflake: {
79
+ models: [
80
+ "transformers:Snowflake/snowflake-arctic-embed-m-v2.0:q8", // 58.41%, 305M params, 113M active, 768 dims, 8192 context
81
+ "transformers:Snowflake/snowflake-arctic-embed-s:q8", // 54.85%, 33M params, 22M active, 384 dims, 512 context
82
+ "transformers:Snowflake/snowflake-arctic-embed-xs:q8", // 52.65%, 23M params, 11M active, 384 dims, 512 context
83
+ "llama:mradermacher/snowflake-arctic-embed-s-GGUF:Q4_K_M",
84
+ ],
85
+ pooling: "cls",
86
+ prompt: {
87
+ query: (query) => `Represent this sentence for searching relevant passages: ${query}`,
88
+ },
89
+ },
90
+ "transformers:onnx-community/embeddinggemma-300m-ONNX:q8": {
91
+ pooling: "mean",
92
+ // 55.69%, 308M params, 106M active, 768 dims, 2048 context
93
+ prompt: {
94
+ document: (doc) => `title: ${doc.title ?? "none"} | ${doc.text}`,
95
+ query: (query) => `task: search result | query: ${query}`,
96
+ },
97
+ },
98
+ }
99
+
100
+ for (const [base, options] of Object.entries(MODELS)) {
101
+ for (const model of options.models ?? []) {
102
+ MODELS[model] = { base, uri: model }
103
+ }
104
+ }
@@ -0,0 +1,95 @@
1
+ import type { encode as Tokenize } from "gpt-tokenizer"
2
+ import type { EmbedderBackend, EmbedderContext } from "./index.ts"
3
+
4
+ import { parseModelUri } from "./models.ts"
5
+
6
+ const OPENAI_EMBEDDING_URL = "https://api.openai.com/v1/embeddings"
7
+
8
+ // Context sizes for known OpenAI embedding models
9
+ const MODEL_INFO: Record<string, { contextSize: number; vectorSize: number } | undefined> = {
10
+ "text-embedding-3-large": { contextSize: 8191, vectorSize: 3072 },
11
+ "text-embedding-3-small": { contextSize: 8191, vectorSize: 1536 },
12
+ "text-embedding-ada-002": { contextSize: 8191, vectorSize: 1536 },
13
+ }
14
+
15
+ export class OpenAIBackend implements EmbedderBackend {
16
+ device = "api" as const
17
+ maxTokens: number
18
+ dims: number
19
+ #model: string
20
+ #apiKey: string
21
+ #tokenizer: typeof Tokenize
22
+ #ctx: EmbedderContext
23
+
24
+ // oxlint-disable-next-line max-params
25
+ private constructor(
26
+ model: string,
27
+ apiKey: string,
28
+ info: { contextSize: number; vectorSize: number },
29
+ tokenizer: typeof Tokenize,
30
+ ctx: EmbedderContext
31
+ ) {
32
+ this.#model = model
33
+ this.#apiKey = apiKey
34
+ this.maxTokens = info.contextSize
35
+ this.dims = info.vectorSize
36
+ this.#tokenizer = tokenizer
37
+ this.#ctx = ctx
38
+ }
39
+
40
+ static async load(this: void, ctx: EmbedderContext): Promise<OpenAIBackend> {
41
+ const { model } = parseModelUri(ctx.opts.model.uri)
42
+ const apiKey = process.env.OPENAI_API_KEY
43
+ if (!apiKey)
44
+ throw new Error("Missing `OPENAI_API_KEY` environment variable for OpenAI embeddings.")
45
+
46
+ const { encode } = await import(`gpt-tokenizer/model/${model}`)
47
+ // Probe the model for dimensions if not in our known list
48
+ let info = MODEL_INFO[model]
49
+ if (!info) {
50
+ ctx.status.status = "probing model dimensions..."
51
+ const backend = new OpenAIBackend(
52
+ model,
53
+ apiKey,
54
+ { contextSize: 8191, vectorSize: 0 },
55
+ encode,
56
+ ctx
57
+ )
58
+ const result = await backend.embed(["test"])
59
+ info = { contextSize: 8191, vectorSize: result[0].length }
60
+ }
61
+
62
+ return new OpenAIBackend(model, apiKey, info, encode, ctx)
63
+ }
64
+
65
+ async embed(texts: string[]): Promise<number[][]> {
66
+ const response = await fetch(OPENAI_EMBEDDING_URL, {
67
+ body: JSON.stringify({
68
+ dimensions: this.#ctx.opts.maxDims,
69
+ input: texts,
70
+ model: this.#model,
71
+ }),
72
+ headers: {
73
+ Authorization: `Bearer ${this.#apiKey}`,
74
+ "Content-Type": "application/json",
75
+ },
76
+ method: "POST",
77
+ })
78
+
79
+ if (!response.ok) {
80
+ const error = await response.text()
81
+ throw new Error(`OpenAI embedding API error (${response.status}): ${error}`)
82
+ }
83
+
84
+ const data = (await response.json()) as {
85
+ data: { embedding: number[]; index: number }[]
86
+ }
87
+
88
+ // Sort by index to maintain input order
89
+ return data.data.toSorted((a, b) => a.index - b.index).map((d) => d.embedding)
90
+ }
91
+
92
+ toks(input: string): number {
93
+ return this.#tokenizer(input).length
94
+ }
95
+ }
@@ -0,0 +1,81 @@
1
+ import type {
2
+ DataType,
3
+ FeatureExtractionPipeline,
4
+ ProgressInfo,
5
+ Tensor,
6
+ } from "@huggingface/transformers"
7
+ import type { EmbedderBackend, EmbedderContext, EmbedderDevice } from "./index.ts"
8
+
9
+ import { parseModelUri } from "./models.ts"
10
+
11
+ export class TransformersBackend implements EmbedderBackend {
12
+ device: EmbedderDevice = "cpu"
13
+ maxTokens: number
14
+ dims: number
15
+ #pipeline: FeatureExtractionPipeline
16
+ #ctx: EmbedderContext
17
+ private normalize?: (tensor: Tensor) => Tensor // optional normalization function, e.g. for L2 normalization after truncation
18
+
19
+ private constructor(pipeline: FeatureExtractionPipeline, ctx: EmbedderContext) {
20
+ this.#pipeline = pipeline
21
+ this.#ctx = ctx
22
+ this.maxTokens = pipeline.model.config.max_position_embeddings
23
+ this.dims = (pipeline.model.config as { hidden_size?: number }).hidden_size ?? 0
24
+ this.device = pipeline.model.sessions.model?.config?.device ?? "cpu"
25
+ }
26
+
27
+ static async load(this: void, ctx: EmbedderContext) {
28
+ const parsed = parseModelUri(ctx.opts.model.uri)
29
+ const { pipeline, layer_norm } = await import("@huggingface/transformers")
30
+
31
+ const extractor = await pipeline("feature-extraction", parsed.model, {
32
+ // device: "webgpu",
33
+ dtype: (parsed.variant ?? "auto") as DataType,
34
+ progress_callback: (event) => TransformersBackend.onProgress(ctx, event),
35
+ session_options: { intraOpNumThreads: ctx.opts.threads },
36
+ })
37
+ const backend = new TransformersBackend(extractor, ctx)
38
+
39
+ // Matryoshka: layer_norm → truncate
40
+ const dims = ctx.opts.maxDims
41
+ if (dims < backend.dims)
42
+ backend.normalize = (output) =>
43
+ layer_norm(output, [output.dims[1] ?? 0])
44
+ // oxlint-disable-next-line unicorn/no-null
45
+ .slice(null, [0, dims])
46
+ .normalize(2, -1)
47
+
48
+ return backend
49
+ }
50
+
51
+ async embed(texts: string[]): Promise<number[][]> {
52
+ const output = await this.#pipeline(texts, {
53
+ normalize: !this.normalize,
54
+ pooling: this.#ctx.opts.model.pooling,
55
+ })
56
+ return (this.normalize?.(output) ?? output).tolist() as number[][]
57
+ }
58
+
59
+ toks(input: string) {
60
+ return this.#pipeline.tokenizer.tokenize(input).length
61
+ }
62
+
63
+ static onProgress(ctx: EmbedderContext, event: ProgressInfo) {
64
+ if (event.status === "initiate") {
65
+ ctx.status.child(event.name).child(event.file).status = event.status
66
+ } else if (event.status === "download") {
67
+ ctx.status.child(event.name).child(event.file).status = event.status
68
+ } else if (event.status === "progress") {
69
+ ctx.status.child(event.name).child(event.file).set({
70
+ max: event.total,
71
+ status: event.status,
72
+ value: event.loaded,
73
+ })
74
+ } else if (event.status === "done") {
75
+ ctx.status.child(event.name).child(event.file).set({ status: event.status }).stop()
76
+ } else if (event.status === "ready") {
77
+ ctx.status.name = `model \`${ctx.opts.model.uri}\` loaded`
78
+ ctx.status.child(event.task).set({ status: event.status }).stop()
79
+ }
80
+ }
81
+ }
@@ -0,0 +1,58 @@
1
+ import type { Db, DocRow } from "./db.ts"
2
+
3
+ // Exponential decay frecency, based on:
4
+ // https://wiki.mozilla.org/User:Jesse/NewFrecency
5
+ // Ported from snacks.nvim picker frecency
6
+
7
+ const HALF_LIFE = 30 * 24 * 3600 // 30 days in seconds
8
+ const LAMBDA = Math.LN2 / HALF_LIFE // λ = ln(2) / half_life
9
+ const VISIT_VALUE = 1 // default points per visit
10
+
11
+ export class Frecency {
12
+ #db: Db
13
+ #now: number
14
+
15
+ constructor(db: Db, now = Date.now() / 1000) {
16
+ this.#db = db
17
+ this.#now = now
18
+ }
19
+
20
+ /** Convert a score into a deadline timestamp.
21
+ * deadline = now + ln(score) / λ */
22
+ toDeadline(score: number): number {
23
+ return this.#now + Math.log(score) / LAMBDA
24
+ }
25
+
26
+ /** Convert a deadline timestamp back into a current score.
27
+ * score = e^(λ * (deadline - now)) */
28
+ toScore(deadline: number): number {
29
+ return Math.exp(LAMBDA * (deadline - this.#now))
30
+ }
31
+
32
+ /** Get the current frecency score for a doc.
33
+ * If no deadline exists, estimates from updatedAt (a single visit at file mtime, decayed to now). */
34
+ get(doc: DocRow): number {
35
+ if (doc.deadline) return this.toScore(doc.deadline)
36
+ // Seed: treat updatedAt as a single past visit, decayed to now
37
+ // Since ln(1)/λ = 0 for VISIT_VALUE=1, deadline = timestamp
38
+ const ts = new Date(doc.updated_at).getTime() / 1000
39
+ return this.toScore(ts)
40
+ }
41
+
42
+ /** Record a visit — adds a decayed value to the doc's score.
43
+ * @param opts.value - points to add (default: 1)
44
+ * @param opts.ts - timestamp of the visit in seconds (default: now).
45
+ * Use file mtime for seeding from external edits. */
46
+ visit(doc: DocRow, opts?: { value?: number; ts?: number }) {
47
+ const value = opts?.value ?? VISIT_VALUE
48
+ const ts = opts?.ts ?? this.#now
49
+ const visitDeadline = ts + Math.log(value) / LAMBDA
50
+ const score = this.get(doc) + this.toScore(visitDeadline)
51
+ this.#db.setDeadline(doc.id, this.toDeadline(score))
52
+ }
53
+
54
+ /** Get frecency scores for multiple docs, sorted descending. */
55
+ rank(docs: DocRow[]): { doc: DocRow; score: number }[] {
56
+ return docs.map((doc) => ({ doc, score: this.get(doc) })).toSorted((a, b) => b.score - a.score)
57
+ }
58
+ }
package/src/fs.ts ADDED
@@ -0,0 +1,36 @@
1
+ import { statSync, existsSync } from "node:fs"
2
+ import { stat } from "node:fs/promises"
3
+ import { homedir } from "node:os"
4
+ import { dirname, join, resolve } from "pathe"
5
+
6
+ export function sstat(path: string) {
7
+ try {
8
+ return statSync(path)
9
+ } catch {}
10
+ }
11
+
12
+ export async function astat(path: string) {
13
+ return await stat(path).catch(() => undefined)
14
+ }
15
+
16
+ export function findUp(root: string, name: string, stop?: string) {
17
+ let current = resolve(root)
18
+ // oxlint-disable-next-line typescript/no-unnecessary-condition
19
+ while (true) {
20
+ const check = join(current, name)
21
+ if (sstat(check)?.isFile()) return check
22
+ if (stop && existsSync(join(current, stop))) return // reached stop directory without finding the file
23
+ const next = dirname(current)
24
+ if (next === current) break // reached filesystem root
25
+ current = next
26
+ }
27
+ }
28
+
29
+ // Similar to path.resolve but also expands ~ to the user home directory
30
+ export function normPath(...paths: string[]) {
31
+ return resolve(...paths.map((p) => p.replace(/^~(?=\/|\\|$)/, homedir())))
32
+ }
33
+
34
+ export function gitRoot(path: string) {
35
+ return findUp(path, ".git")
36
+ }