@loreai/core 0.17.1 → 0.19.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/bun/agents-file.d.ts +4 -0
- package/dist/bun/agents-file.d.ts.map +1 -1
- package/dist/bun/config.d.ts +2 -0
- package/dist/bun/config.d.ts.map +1 -1
- package/dist/bun/curator.d.ts +45 -0
- package/dist/bun/curator.d.ts.map +1 -1
- package/dist/bun/data-dir.d.ts +18 -0
- package/dist/bun/data-dir.d.ts.map +1 -0
- package/dist/bun/db.d.ts +85 -0
- package/dist/bun/db.d.ts.map +1 -1
- package/dist/bun/distillation.d.ts +2 -13
- package/dist/bun/distillation.d.ts.map +1 -1
- package/dist/bun/embedding-vendor.d.ts +22 -38
- package/dist/bun/embedding-vendor.d.ts.map +1 -1
- package/dist/bun/embedding-worker-types.d.ts +17 -12
- package/dist/bun/embedding-worker-types.d.ts.map +1 -1
- package/dist/bun/embedding-worker.d.ts +9 -2
- package/dist/bun/embedding-worker.d.ts.map +1 -1
- package/dist/bun/embedding-worker.js +38864 -33
- package/dist/bun/embedding-worker.js.map +4 -4
- package/dist/bun/embedding.d.ts +35 -23
- package/dist/bun/embedding.d.ts.map +1 -1
- package/dist/bun/gradient.d.ts +17 -1
- package/dist/bun/gradient.d.ts.map +1 -1
- package/dist/bun/import/detect.d.ts +14 -0
- package/dist/bun/import/detect.d.ts.map +1 -0
- package/dist/bun/import/extract.d.ts +43 -0
- package/dist/bun/import/extract.d.ts.map +1 -0
- package/dist/bun/import/history.d.ts +40 -0
- package/dist/bun/import/history.d.ts.map +1 -0
- package/dist/bun/import/index.d.ts +17 -0
- package/dist/bun/import/index.d.ts.map +1 -0
- package/dist/bun/import/providers/aider.d.ts +2 -0
- package/dist/bun/import/providers/aider.d.ts.map +1 -0
- package/dist/bun/import/providers/claude-code.d.ts +2 -0
- package/dist/bun/import/providers/claude-code.d.ts.map +1 -0
- package/dist/bun/import/providers/cline.d.ts +2 -0
- package/dist/bun/import/providers/cline.d.ts.map +1 -0
- package/dist/bun/import/providers/codex.d.ts +2 -0
- package/dist/bun/import/providers/codex.d.ts.map +1 -0
- package/dist/bun/import/providers/continue.d.ts +2 -0
- package/dist/bun/import/providers/continue.d.ts.map +1 -0
- package/dist/bun/import/providers/index.d.ts +19 -0
- package/dist/bun/import/providers/index.d.ts.map +1 -0
- package/dist/bun/import/providers/opencode.d.ts +2 -0
- package/dist/bun/import/providers/opencode.d.ts.map +1 -0
- package/dist/bun/import/providers/pi.d.ts +2 -0
- package/dist/bun/import/providers/pi.d.ts.map +1 -0
- package/dist/bun/import/types.d.ts +82 -0
- package/dist/bun/import/types.d.ts.map +1 -0
- package/dist/bun/index.d.ts +5 -2
- package/dist/bun/index.d.ts.map +1 -1
- package/dist/bun/index.js +3150 -439
- package/dist/bun/index.js.map +4 -4
- package/dist/bun/instruction-detect.d.ts +66 -0
- package/dist/bun/instruction-detect.d.ts.map +1 -0
- package/dist/bun/log.d.ts +9 -0
- package/dist/bun/log.d.ts.map +1 -1
- package/dist/bun/ltm.d.ts +139 -5
- package/dist/bun/ltm.d.ts.map +1 -1
- package/dist/bun/pattern-extract.d.ts +7 -0
- package/dist/bun/pattern-extract.d.ts.map +1 -1
- package/dist/bun/prompt.d.ts +1 -1
- package/dist/bun/prompt.d.ts.map +1 -1
- package/dist/bun/recall.d.ts.map +1 -1
- package/dist/bun/search.d.ts +5 -3
- package/dist/bun/search.d.ts.map +1 -1
- package/dist/bun/session-limiter.d.ts +26 -0
- package/dist/bun/session-limiter.d.ts.map +1 -0
- package/dist/bun/temporal.d.ts +2 -0
- package/dist/bun/temporal.d.ts.map +1 -1
- package/dist/bun/types.d.ts +1 -1
- package/dist/node/agents-file.d.ts +4 -0
- package/dist/node/agents-file.d.ts.map +1 -1
- package/dist/node/config.d.ts +2 -0
- package/dist/node/config.d.ts.map +1 -1
- package/dist/node/curator.d.ts +45 -0
- package/dist/node/curator.d.ts.map +1 -1
- package/dist/node/data-dir.d.ts +18 -0
- package/dist/node/data-dir.d.ts.map +1 -0
- package/dist/node/db.d.ts +85 -0
- package/dist/node/db.d.ts.map +1 -1
- package/dist/node/distillation.d.ts +2 -13
- package/dist/node/distillation.d.ts.map +1 -1
- package/dist/node/embedding-vendor.d.ts +22 -38
- package/dist/node/embedding-vendor.d.ts.map +1 -1
- package/dist/node/embedding-worker-types.d.ts +17 -12
- package/dist/node/embedding-worker-types.d.ts.map +1 -1
- package/dist/node/embedding-worker.d.ts +9 -2
- package/dist/node/embedding-worker.d.ts.map +1 -1
- package/dist/node/embedding-worker.js +38864 -33
- package/dist/node/embedding-worker.js.map +4 -4
- package/dist/node/embedding.d.ts +35 -23
- package/dist/node/embedding.d.ts.map +1 -1
- package/dist/node/gradient.d.ts +17 -1
- package/dist/node/gradient.d.ts.map +1 -1
- package/dist/node/import/detect.d.ts +14 -0
- package/dist/node/import/detect.d.ts.map +1 -0
- package/dist/node/import/extract.d.ts +43 -0
- package/dist/node/import/extract.d.ts.map +1 -0
- package/dist/node/import/history.d.ts +40 -0
- package/dist/node/import/history.d.ts.map +1 -0
- package/dist/node/import/index.d.ts +17 -0
- package/dist/node/import/index.d.ts.map +1 -0
- package/dist/node/import/providers/aider.d.ts +2 -0
- package/dist/node/import/providers/aider.d.ts.map +1 -0
- package/dist/node/import/providers/claude-code.d.ts +2 -0
- package/dist/node/import/providers/claude-code.d.ts.map +1 -0
- package/dist/node/import/providers/cline.d.ts +2 -0
- package/dist/node/import/providers/cline.d.ts.map +1 -0
- package/dist/node/import/providers/codex.d.ts +2 -0
- package/dist/node/import/providers/codex.d.ts.map +1 -0
- package/dist/node/import/providers/continue.d.ts +2 -0
- package/dist/node/import/providers/continue.d.ts.map +1 -0
- package/dist/node/import/providers/index.d.ts +19 -0
- package/dist/node/import/providers/index.d.ts.map +1 -0
- package/dist/node/import/providers/opencode.d.ts +2 -0
- package/dist/node/import/providers/opencode.d.ts.map +1 -0
- package/dist/node/import/providers/pi.d.ts +2 -0
- package/dist/node/import/providers/pi.d.ts.map +1 -0
- package/dist/node/import/types.d.ts +82 -0
- package/dist/node/import/types.d.ts.map +1 -0
- package/dist/node/index.d.ts +5 -2
- package/dist/node/index.d.ts.map +1 -1
- package/dist/node/index.js +3150 -439
- package/dist/node/index.js.map +4 -4
- package/dist/node/instruction-detect.d.ts +66 -0
- package/dist/node/instruction-detect.d.ts.map +1 -0
- package/dist/node/log.d.ts +9 -0
- package/dist/node/log.d.ts.map +1 -1
- package/dist/node/ltm.d.ts +139 -5
- package/dist/node/ltm.d.ts.map +1 -1
- package/dist/node/pattern-extract.d.ts +7 -0
- package/dist/node/pattern-extract.d.ts.map +1 -1
- package/dist/node/prompt.d.ts +1 -1
- package/dist/node/prompt.d.ts.map +1 -1
- package/dist/node/recall.d.ts.map +1 -1
- package/dist/node/search.d.ts +5 -3
- package/dist/node/search.d.ts.map +1 -1
- package/dist/node/session-limiter.d.ts +26 -0
- package/dist/node/session-limiter.d.ts.map +1 -0
- package/dist/node/temporal.d.ts +2 -0
- package/dist/node/temporal.d.ts.map +1 -1
- package/dist/node/types.d.ts +1 -1
- package/dist/types/agents-file.d.ts +4 -0
- package/dist/types/agents-file.d.ts.map +1 -1
- package/dist/types/config.d.ts +2 -0
- package/dist/types/config.d.ts.map +1 -1
- package/dist/types/curator.d.ts +45 -0
- package/dist/types/curator.d.ts.map +1 -1
- package/dist/types/data-dir.d.ts +18 -0
- package/dist/types/data-dir.d.ts.map +1 -0
- package/dist/types/db.d.ts +85 -0
- package/dist/types/db.d.ts.map +1 -1
- package/dist/types/distillation.d.ts +2 -13
- package/dist/types/distillation.d.ts.map +1 -1
- package/dist/types/embedding-vendor.d.ts +22 -38
- package/dist/types/embedding-vendor.d.ts.map +1 -1
- package/dist/types/embedding-worker-types.d.ts +17 -12
- package/dist/types/embedding-worker-types.d.ts.map +1 -1
- package/dist/types/embedding-worker.d.ts +9 -2
- package/dist/types/embedding-worker.d.ts.map +1 -1
- package/dist/types/embedding.d.ts +35 -23
- package/dist/types/embedding.d.ts.map +1 -1
- package/dist/types/gradient.d.ts +17 -1
- package/dist/types/gradient.d.ts.map +1 -1
- package/dist/types/import/detect.d.ts +14 -0
- package/dist/types/import/detect.d.ts.map +1 -0
- package/dist/types/import/extract.d.ts +43 -0
- package/dist/types/import/extract.d.ts.map +1 -0
- package/dist/types/import/history.d.ts +40 -0
- package/dist/types/import/history.d.ts.map +1 -0
- package/dist/types/import/index.d.ts +17 -0
- package/dist/types/import/index.d.ts.map +1 -0
- package/dist/types/import/providers/aider.d.ts +2 -0
- package/dist/types/import/providers/aider.d.ts.map +1 -0
- package/dist/types/import/providers/claude-code.d.ts +2 -0
- package/dist/types/import/providers/claude-code.d.ts.map +1 -0
- package/dist/types/import/providers/cline.d.ts +2 -0
- package/dist/types/import/providers/cline.d.ts.map +1 -0
- package/dist/types/import/providers/codex.d.ts +2 -0
- package/dist/types/import/providers/codex.d.ts.map +1 -0
- package/dist/types/import/providers/continue.d.ts +2 -0
- package/dist/types/import/providers/continue.d.ts.map +1 -0
- package/dist/types/import/providers/index.d.ts +19 -0
- package/dist/types/import/providers/index.d.ts.map +1 -0
- package/dist/types/import/providers/opencode.d.ts +2 -0
- package/dist/types/import/providers/opencode.d.ts.map +1 -0
- package/dist/types/import/providers/pi.d.ts +2 -0
- package/dist/types/import/providers/pi.d.ts.map +1 -0
- package/dist/types/import/types.d.ts +82 -0
- package/dist/types/import/types.d.ts.map +1 -0
- package/dist/types/index.d.ts +5 -2
- package/dist/types/index.d.ts.map +1 -1
- package/dist/types/instruction-detect.d.ts +66 -0
- package/dist/types/instruction-detect.d.ts.map +1 -0
- package/dist/types/log.d.ts +9 -0
- package/dist/types/log.d.ts.map +1 -1
- package/dist/types/ltm.d.ts +139 -5
- package/dist/types/ltm.d.ts.map +1 -1
- package/dist/types/pattern-extract.d.ts +7 -0
- package/dist/types/pattern-extract.d.ts.map +1 -1
- package/dist/types/prompt.d.ts +1 -1
- package/dist/types/prompt.d.ts.map +1 -1
- package/dist/types/recall.d.ts.map +1 -1
- package/dist/types/search.d.ts +5 -3
- package/dist/types/search.d.ts.map +1 -1
- package/dist/types/session-limiter.d.ts +26 -0
- package/dist/types/session-limiter.d.ts.map +1 -0
- package/dist/types/temporal.d.ts +2 -0
- package/dist/types/temporal.d.ts.map +1 -1
- package/dist/types/types.d.ts +1 -1
- package/package.json +3 -4
- package/src/agents-file.ts +41 -13
- package/src/config.ts +31 -18
- package/src/curator.ts +163 -75
- package/src/data-dir.ts +76 -0
- package/src/db.ts +457 -11
- package/src/distillation.ts +65 -16
- package/src/embedding-vendor.ts +23 -40
- package/src/embedding-worker-types.ts +19 -11
- package/src/embedding-worker.ts +111 -47
- package/src/embedding.ts +224 -174
- package/src/gradient.ts +192 -75
- package/src/import/detect.ts +37 -0
- package/src/import/extract.ts +137 -0
- package/src/import/history.ts +99 -0
- package/src/import/index.ts +45 -0
- package/src/import/providers/aider.ts +207 -0
- package/src/import/providers/claude-code.ts +339 -0
- package/src/import/providers/cline.ts +324 -0
- package/src/import/providers/codex.ts +369 -0
- package/src/import/providers/continue.ts +304 -0
- package/src/import/providers/index.ts +32 -0
- package/src/import/providers/opencode.ts +272 -0
- package/src/import/providers/pi.ts +332 -0
- package/src/import/types.ts +91 -0
- package/src/index.ts +13 -0
- package/src/instruction-detect.ts +275 -0
- package/src/log.ts +91 -3
- package/src/ltm.ts +789 -41
- package/src/pattern-extract.ts +41 -0
- package/src/prompt.ts +7 -1
- package/src/recall.ts +43 -5
- package/src/search.ts +7 -5
- package/src/session-limiter.ts +47 -0
- package/src/temporal.ts +18 -6
- package/src/types.ts +1 -1
package/src/distillation.ts
CHANGED
|
@@ -14,6 +14,7 @@ import {
|
|
|
14
14
|
} from "./prompt";
|
|
15
15
|
import { toolStripAnnotation } from "./gradient";
|
|
16
16
|
import { workerSessionIDs } from "./worker";
|
|
17
|
+
import { distillLimiter } from "./session-limiter";
|
|
17
18
|
import type { LLMClient } from "./types";
|
|
18
19
|
|
|
19
20
|
// Re-export for backwards compat — index.ts and others may still import from here.
|
|
@@ -610,8 +611,23 @@ function resetOrphans(projectPath: string, sessionID: string): number {
|
|
|
610
611
|
return orphans.length;
|
|
611
612
|
}
|
|
612
613
|
|
|
613
|
-
// Main distillation entry point — called on session.idle or when urgent
|
|
614
|
+
// Main distillation entry point — called on session.idle or when urgent.
|
|
615
|
+
// Serialized per session via p-limit(1) to prevent concurrent runs from
|
|
616
|
+
// reading the same undistilled messages and producing duplicate rows.
|
|
614
617
|
export async function run(input: {
|
|
618
|
+
llm: LLMClient;
|
|
619
|
+
projectPath: string;
|
|
620
|
+
sessionID: string;
|
|
621
|
+
model?: { providerID: string; modelID: string };
|
|
622
|
+
force?: boolean;
|
|
623
|
+
skipMeta?: boolean;
|
|
624
|
+
urgent?: boolean;
|
|
625
|
+
callType?: "batch" | "direct";
|
|
626
|
+
}): Promise<{ rounds: number; distilled: number }> {
|
|
627
|
+
return distillLimiter.get(input.sessionID)(() => runInner(input));
|
|
628
|
+
}
|
|
629
|
+
|
|
630
|
+
async function runInner(input: {
|
|
615
631
|
llm: LLMClient;
|
|
616
632
|
projectPath: string;
|
|
617
633
|
sessionID: string;
|
|
@@ -697,7 +713,8 @@ export async function run(input: {
|
|
|
697
713
|
gen0Count(input.projectPath, input.sessionID) >=
|
|
698
714
|
cfg.distillation.metaThreshold
|
|
699
715
|
) {
|
|
700
|
-
|
|
716
|
+
// Call inner directly — we're already under the per-session limiter.
|
|
717
|
+
await metaDistillInner({
|
|
701
718
|
llm: input.llm,
|
|
702
719
|
projectPath: input.projectPath,
|
|
703
720
|
sessionID: input.sessionID,
|
|
@@ -776,17 +793,29 @@ async function distillSegment(input: {
|
|
|
776
793
|
return null;
|
|
777
794
|
}
|
|
778
795
|
|
|
779
|
-
|
|
780
|
-
|
|
781
|
-
|
|
782
|
-
|
|
783
|
-
|
|
784
|
-
|
|
785
|
-
|
|
786
|
-
|
|
787
|
-
|
|
788
|
-
|
|
789
|
-
|
|
796
|
+
// Atomic: store distillation + mark source messages as distilled in one
|
|
797
|
+
// transaction. Without this, a crash between the two statements would leave
|
|
798
|
+
// messages undistilled but with an existing distillation row, causing
|
|
799
|
+
// re-processing on restart and duplicate distillation content.
|
|
800
|
+
let distillId: string;
|
|
801
|
+
db().exec("BEGIN IMMEDIATE");
|
|
802
|
+
try {
|
|
803
|
+
distillId = storeDistillation({
|
|
804
|
+
projectPath: input.projectPath,
|
|
805
|
+
sessionID: input.sessionID,
|
|
806
|
+
observations: result.observations,
|
|
807
|
+
sourceIDs: input.messages.map((m) => m.id),
|
|
808
|
+
generation: 0,
|
|
809
|
+
rCompression: rComp,
|
|
810
|
+
cNorm,
|
|
811
|
+
callType: input.callType,
|
|
812
|
+
});
|
|
813
|
+
temporal.markDistilled(input.messages.map((m) => m.id));
|
|
814
|
+
db().exec("COMMIT");
|
|
815
|
+
} catch (e) {
|
|
816
|
+
db().exec("ROLLBACK");
|
|
817
|
+
throw e;
|
|
818
|
+
}
|
|
790
819
|
|
|
791
820
|
log.info(
|
|
792
821
|
`distill segment: ${input.messages.length} msgs, ` +
|
|
@@ -811,7 +840,8 @@ async function distillSegment(input: {
|
|
|
811
840
|
|
|
812
841
|
// Fire-and-forget: extract decision/preference patterns → knowledge entries
|
|
813
842
|
if (config().knowledge.enabled) {
|
|
814
|
-
|
|
843
|
+
const patterns = extractPatterns(result.observations);
|
|
844
|
+
for (const pat of patterns) {
|
|
815
845
|
try {
|
|
816
846
|
ltm.create({
|
|
817
847
|
projectPath: input.projectPath,
|
|
@@ -825,6 +855,9 @@ async function distillSegment(input: {
|
|
|
825
855
|
// Dedup guard in ltm.create() handles duplicates — swallow errors
|
|
826
856
|
}
|
|
827
857
|
}
|
|
858
|
+
if (patterns.length > 0) {
|
|
859
|
+
log.info(`pattern extraction: ${patterns.length} entries from distillation`);
|
|
860
|
+
}
|
|
828
861
|
}
|
|
829
862
|
|
|
830
863
|
return result;
|
|
@@ -836,7 +869,8 @@ async function distillSegment(input: {
|
|
|
836
869
|
* via `<previous-meta-summary>` so the LLM updates in place rather than
|
|
837
870
|
* re-deriving from scratch.
|
|
838
871
|
*
|
|
839
|
-
*
|
|
872
|
+
* Serialized per session via the same p-limit(1) as `run()`. Exported for
|
|
873
|
+
* the idle handler which calls metaDistill() independently of run().
|
|
840
874
|
*/
|
|
841
875
|
export async function metaDistill(input: {
|
|
842
876
|
llm: LLMClient;
|
|
@@ -845,6 +879,17 @@ export async function metaDistill(input: {
|
|
|
845
879
|
model?: { providerID: string; modelID: string };
|
|
846
880
|
urgent?: boolean;
|
|
847
881
|
callType?: "batch" | "direct";
|
|
882
|
+
}): Promise<DistillationResult | null> {
|
|
883
|
+
return distillLimiter.get(input.sessionID)(() => metaDistillInner(input));
|
|
884
|
+
}
|
|
885
|
+
|
|
886
|
+
async function metaDistillInner(input: {
|
|
887
|
+
llm: LLMClient;
|
|
888
|
+
projectPath: string;
|
|
889
|
+
sessionID: string;
|
|
890
|
+
model?: { providerID: string; modelID: string };
|
|
891
|
+
urgent?: boolean;
|
|
892
|
+
callType?: "batch" | "direct";
|
|
848
893
|
}): Promise<DistillationResult | null> {
|
|
849
894
|
const existing = loadGen0(input.projectPath, input.sessionID);
|
|
850
895
|
|
|
@@ -928,7 +973,8 @@ export async function metaDistill(input: {
|
|
|
928
973
|
|
|
929
974
|
// Fire-and-forget: extract decision/preference patterns → knowledge entries
|
|
930
975
|
if (config().knowledge.enabled) {
|
|
931
|
-
|
|
976
|
+
const patterns = extractPatterns(result.observations);
|
|
977
|
+
for (const pat of patterns) {
|
|
932
978
|
try {
|
|
933
979
|
ltm.create({
|
|
934
980
|
projectPath: input.projectPath,
|
|
@@ -942,6 +988,9 @@ export async function metaDistill(input: {
|
|
|
942
988
|
// Dedup guard in ltm.create() handles duplicates — swallow errors
|
|
943
989
|
}
|
|
944
990
|
}
|
|
991
|
+
if (patterns.length > 0) {
|
|
992
|
+
log.info(`pattern extraction: ${patterns.length} entries from meta-distillation`);
|
|
993
|
+
}
|
|
945
994
|
}
|
|
946
995
|
|
|
947
996
|
return result;
|
package/src/embedding-vendor.ts
CHANGED
|
@@ -1,31 +1,19 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* Vendored
|
|
2
|
+
* Vendored model registration for the standalone Lore binary.
|
|
3
3
|
*
|
|
4
|
-
* The Bun-compiled `lore` binary
|
|
5
|
-
*
|
|
6
|
-
*
|
|
7
|
-
*
|
|
4
|
+
* The Bun-compiled `lore` binary bundles `@huggingface/transformers`
|
|
5
|
+
* (which includes ONNX Runtime) into the executable. The model weights
|
|
6
|
+
* and tokenizer files (nomic-embed-text-v1.5, INT8 quantized) are
|
|
7
|
+
* embedded as Bun assets and extracted to a cache dir on first run.
|
|
8
8
|
*
|
|
9
|
-
*
|
|
10
|
-
*
|
|
11
|
-
*
|
|
12
|
-
*
|
|
13
|
-
* equivalent) for the actual ONNX Runtime computation library. Bun
|
|
14
|
-
* doesn't follow this kind of dependency. The binary's wrapper
|
|
15
|
-
* pre-loads these libs via `bun:ffi` *before* fastembed evaluates,
|
|
16
|
-
* so when the addon's dlopen fires it finds the cached handle.
|
|
17
|
-
*
|
|
18
|
-
* 2. **Model weights + tokenizer**. fastembed downloads from the HF
|
|
19
|
-
* Hub on first use; we want zero network on first run. The wrapper
|
|
20
|
-
* embeds the bge-small INT8 files as Bun assets, writes them to a
|
|
21
|
-
* real disk dir on first run, and sets `globalThis.__LORE_VENDOR_MODEL__`
|
|
22
|
-
* to that path. This module exposes that registration to the
|
|
23
|
-
* LocalProvider so it can hand the path to fastembed's CUSTOM-mode
|
|
24
|
-
* init (`modelAbsoluteDirPath` + `modelName`).
|
|
9
|
+
* The binary's wrapper sets `globalThis.__LORE_VENDOR_MODEL__` to the
|
|
10
|
+
* extraction path. This module exposes that registration to the
|
|
11
|
+
* `LocalProvider` so it can configure the transformers.js `env` to
|
|
12
|
+
* load from the local path instead of downloading from HuggingFace Hub.
|
|
25
13
|
*
|
|
26
14
|
* In npm-mode usage from `@loreai/opencode` / `@loreai/pi` the global
|
|
27
|
-
* is unset and `vendorModelInfo()` returns `null`, so
|
|
28
|
-
*
|
|
15
|
+
* is unset and `vendorModelInfo()` returns `null`, so transformers.js
|
|
16
|
+
* downloads the model from HF Hub on first use and caches it locally.
|
|
29
17
|
*/
|
|
30
18
|
|
|
31
19
|
// ---------------------------------------------------------------------------
|
|
@@ -34,13 +22,10 @@
|
|
|
34
22
|
|
|
35
23
|
/** What the binary wrapper writes to globalThis after extracting model files. */
|
|
36
24
|
export interface VendorRegistration {
|
|
37
|
-
/** Absolute path to the dir containing the
|
|
38
|
-
* (config.json, tokenizer.json, model_quantized.onnx, …).
|
|
39
|
-
*
|
|
40
|
-
|
|
41
|
-
/** Filename of the ONNX weights inside that dir. Pass to fastembed
|
|
42
|
-
* as `modelName` in CUSTOM init. */
|
|
43
|
-
modelName: string;
|
|
25
|
+
/** Absolute path to the dir containing the model files in HF layout
|
|
26
|
+
* (config.json, tokenizer.json, onnx/model_quantized.onnx, …).
|
|
27
|
+
* Passed to transformers.js as `env.localModelPath`. */
|
|
28
|
+
localModelPath: string;
|
|
44
29
|
/** Target identifier the binary was built for, e.g. "linux-x64".
|
|
45
30
|
* Diagnostic only — the runtime doesn't branch on it. */
|
|
46
31
|
target: string;
|
|
@@ -69,25 +54,23 @@ export function _setVendorRegistration(reg: VendorRegistration | null): void {
|
|
|
69
54
|
// Public entry
|
|
70
55
|
// ---------------------------------------------------------------------------
|
|
71
56
|
|
|
72
|
-
/** Subset of the registration
|
|
73
|
-
* diagnostic fields so the LocalProvider has exactly what it should
|
|
74
|
-
* hand to `FlagEmbedding.init`. */
|
|
57
|
+
/** Subset of the registration the LocalProvider needs. */
|
|
75
58
|
export interface VendorModelInfo {
|
|
76
|
-
|
|
77
|
-
|
|
59
|
+
/** Absolute path to the dir containing the model in HF layout.
|
|
60
|
+
* Set as `env.localModelPath` in the worker thread. */
|
|
61
|
+
localModelPath: string;
|
|
78
62
|
}
|
|
79
63
|
|
|
80
64
|
/**
|
|
81
|
-
* Resolve the
|
|
82
|
-
* `null` when no vendor is registered (npm-mode), so the caller
|
|
83
|
-
* through to
|
|
65
|
+
* Resolve the vendored model path for transformers.js local loading.
|
|
66
|
+
* Returns `null` when no vendor is registered (npm-mode), so the caller
|
|
67
|
+
* falls through to transformers.js's default HF Hub download + cache.
|
|
84
68
|
*/
|
|
85
69
|
export function vendorModelInfo(): VendorModelInfo | null {
|
|
86
70
|
const reg = getRegistration();
|
|
87
71
|
if (!reg) return null;
|
|
88
72
|
return {
|
|
89
|
-
|
|
90
|
-
modelName: reg.modelName,
|
|
73
|
+
localModelPath: reg.localModelPath,
|
|
91
74
|
};
|
|
92
75
|
}
|
|
93
76
|
|
|
@@ -1,10 +1,11 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* Shared message types for the embedding worker thread.
|
|
3
3
|
*
|
|
4
|
-
* The embedding worker (`embedding-worker.ts`) runs
|
|
5
|
-
* in a separate `node:worker_threads` Worker
|
|
6
|
-
* stays free during inference. This file
|
|
7
|
-
* the main thread (`LocalProvider`
|
|
4
|
+
* The embedding worker (`embedding-worker.ts`) runs ONNX inference via
|
|
5
|
+
* `@huggingface/transformers` in a separate `node:worker_threads` Worker
|
|
6
|
+
* so the main thread's event loop stays free during inference. This file
|
|
7
|
+
* defines the message protocol between the main thread (`LocalProvider`
|
|
8
|
+
* in `embedding.ts`) and the worker.
|
|
8
9
|
*
|
|
9
10
|
* Imported by both sides — keep this file free of runtime dependencies.
|
|
10
11
|
*/
|
|
@@ -18,7 +19,7 @@ export interface EmbedRequest {
|
|
|
18
19
|
type: "embed";
|
|
19
20
|
/** Monotonic request ID for correlating responses. */
|
|
20
21
|
id: number;
|
|
21
|
-
/** Texts to embed. */
|
|
22
|
+
/** Texts to embed (already prefixed with task instruction by the caller). */
|
|
22
23
|
texts: string[];
|
|
23
24
|
/** "document" for storage, "query" for search. */
|
|
24
25
|
inputType: "document" | "query";
|
|
@@ -72,11 +73,18 @@ export type WorkerOutbound = EmbedResult | EmbedError | InitError;
|
|
|
72
73
|
|
|
73
74
|
/** Passed to the worker via `workerData` at construction time. */
|
|
74
75
|
export interface WorkerInitData {
|
|
75
|
-
/**
|
|
76
|
-
|
|
76
|
+
/** HuggingFace model ID, e.g. "nomic-ai/nomic-embed-text-v1.5". */
|
|
77
|
+
modelId: string;
|
|
78
|
+
/** Target embedding dimensions. For Nomic v1.5 with Matryoshka,
|
|
79
|
+
* this controls how many leading dims to keep (64–768). */
|
|
80
|
+
dimensions: number;
|
|
77
81
|
/** Vendored model info for binary mode, or null for npm mode.
|
|
78
|
-
*
|
|
79
|
-
*
|
|
80
|
-
*
|
|
81
|
-
vendorModel: {
|
|
82
|
+
* In binary mode, model files are pre-extracted to a local dir
|
|
83
|
+
* and we point transformers.js at that path instead of downloading
|
|
84
|
+
* from HuggingFace Hub. */
|
|
85
|
+
vendorModel: {
|
|
86
|
+
/** Absolute path to the dir containing model files
|
|
87
|
+
* (config.json, tokenizer.json, onnx/model_quantized.onnx, …). */
|
|
88
|
+
localModelPath: string;
|
|
89
|
+
} | null;
|
|
82
90
|
}
|
package/src/embedding-worker.ts
CHANGED
|
@@ -1,8 +1,9 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* Embedding worker thread — runs
|
|
2
|
+
* Embedding worker thread — runs ONNX inference via @huggingface/transformers
|
|
3
|
+
* off the main thread.
|
|
3
4
|
*
|
|
4
5
|
* This file is the entry point for a `node:worker_threads` Worker spawned by
|
|
5
|
-
* `LocalProvider` in `embedding.ts`. It owns the
|
|
6
|
+
* `LocalProvider` in `embedding.ts`. It owns the transformers.js pipeline
|
|
6
7
|
* and processes embed requests sequentially from a priority queue. Moving
|
|
7
8
|
* inference here keeps the main thread's event loop free — HTTP requests,
|
|
8
9
|
* SSE streams, and session APIs are no longer blocked during embedding.
|
|
@@ -10,6 +11,12 @@
|
|
|
10
11
|
* Communication uses `parentPort` message passing with structured clone.
|
|
11
12
|
* Float32Array vectors are sent back directly (Bun preserves identity).
|
|
12
13
|
*
|
|
14
|
+
* The worker applies Nomic's recommended post-processing:
|
|
15
|
+
* 1. Mean pooling (via pipeline option)
|
|
16
|
+
* 2. Layer normalization
|
|
17
|
+
* 3. Matryoshka dimension truncation (if dimensions < full 768)
|
|
18
|
+
* 4. L2 normalization
|
|
19
|
+
*
|
|
13
20
|
* @see embedding-worker-types.ts for the message protocol.
|
|
14
21
|
*/
|
|
15
22
|
|
|
@@ -25,53 +32,72 @@ import type {
|
|
|
25
32
|
// workerData
|
|
26
33
|
// ---------------------------------------------------------------------------
|
|
27
34
|
|
|
28
|
-
const {
|
|
35
|
+
const { modelId, dimensions, vendorModel } = workerData as WorkerInitData;
|
|
29
36
|
|
|
30
37
|
// ---------------------------------------------------------------------------
|
|
31
38
|
// Model lifecycle — lazy init on first embed request
|
|
32
39
|
// ---------------------------------------------------------------------------
|
|
33
40
|
|
|
34
|
-
/** The
|
|
35
|
-
type
|
|
36
|
-
|
|
37
|
-
|
|
41
|
+
/** The transformers.js pipeline instance, typed loosely since the exact
|
|
42
|
+
* return type depends on the pipeline task. */
|
|
43
|
+
type FeatureExtractionPipeline = {
|
|
44
|
+
(texts: string[], options?: Record<string, unknown>): Promise<{
|
|
45
|
+
dims: number[];
|
|
46
|
+
data: Float32Array;
|
|
47
|
+
tolist(): number[][];
|
|
48
|
+
}>;
|
|
49
|
+
dispose?(): Promise<void>;
|
|
38
50
|
};
|
|
39
51
|
|
|
40
|
-
let
|
|
52
|
+
let pipe: FeatureExtractionPipeline | null = null;
|
|
53
|
+
let layerNormFn: ((input: unknown, normalized_shape: number[]) => {
|
|
54
|
+
dims: number[];
|
|
55
|
+
data: Float32Array;
|
|
56
|
+
normalize(p: number, dim: number): { tolist(): number[][]; data: Float32Array; dims: number[] };
|
|
57
|
+
slice(...args: unknown[]): { normalize(p: number, dim: number): { tolist(): number[][]; data: Float32Array; dims: number[] } };
|
|
58
|
+
}) | null = null;
|
|
41
59
|
let initPromise: Promise<void> | null = null;
|
|
42
60
|
let initFailed = false;
|
|
43
61
|
let initError: string | null = null;
|
|
44
62
|
|
|
45
63
|
/**
|
|
46
|
-
* Ensure the
|
|
47
|
-
* dynamic import +
|
|
64
|
+
* Ensure the transformers.js pipeline is loaded. Lazy — first call triggers
|
|
65
|
+
* the dynamic import + pipeline creation, subsequent calls return immediately.
|
|
48
66
|
* On failure, marks the worker as permanently broken and posts `init-error`.
|
|
49
67
|
*/
|
|
50
|
-
async function
|
|
51
|
-
if (
|
|
52
|
-
if (initFailed) throw new Error(initError ?? "
|
|
68
|
+
async function ensurePipeline(): Promise<void> {
|
|
69
|
+
if (pipe) return;
|
|
70
|
+
if (initFailed) throw new Error(initError ?? "pipeline init previously failed");
|
|
53
71
|
|
|
54
72
|
if (!initPromise) {
|
|
55
73
|
initPromise = (async () => {
|
|
56
|
-
const
|
|
57
|
-
const {
|
|
74
|
+
const transformers = await import("@huggingface/transformers");
|
|
75
|
+
const { pipeline, env, layer_norm } = transformers;
|
|
76
|
+
|
|
77
|
+
// Configure transformers.js environment
|
|
78
|
+
env.allowRemoteModels = !vendorModel;
|
|
79
|
+
env.allowLocalModels = true;
|
|
58
80
|
|
|
59
|
-
let m: unknown;
|
|
60
81
|
if (vendorModel) {
|
|
61
|
-
// Binary mode:
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
modelAbsoluteDirPath: vendorModel.modelAbsoluteDirPath,
|
|
65
|
-
modelName: vendorModel.modelName,
|
|
66
|
-
});
|
|
67
|
-
} else {
|
|
68
|
-
// npm mode: resolve model name against fastembed's enum.
|
|
69
|
-
const enumValue = (EmbeddingModel as Record<string, string>)[modelName];
|
|
70
|
-
m = await FlagEmbedding.init({
|
|
71
|
-
model: enumValue ?? modelName,
|
|
72
|
-
} as { model: typeof EmbeddingModel.BGESmallENV15 });
|
|
82
|
+
// Binary mode: point at pre-extracted model files on disk.
|
|
83
|
+
env.localModelPath = vendorModel.localModelPath;
|
|
84
|
+
env.allowRemoteModels = false;
|
|
73
85
|
}
|
|
74
|
-
|
|
86
|
+
|
|
87
|
+
// Create feature-extraction pipeline with ONNX quantized model.
|
|
88
|
+
// dtype: 'q8' selects the INT8 quantized ONNX variant (model_quantized.onnx)
|
|
89
|
+
// which is ~137MB for Nomic v1.5 vs ~547MB for the full FP32 model.
|
|
90
|
+
//
|
|
91
|
+
// device: "cpu" — in npm mode, transformers.js uses onnxruntime-node
|
|
92
|
+
// (native CPU). In the compiled binary, onnxruntime-node is redirected
|
|
93
|
+
// to onnxruntime-web by the build plugin, which handles "cpu" via its
|
|
94
|
+
// WASM+SIMD backend (API-compatible, ~2x faster on batch workloads).
|
|
95
|
+
pipe = (await pipeline("feature-extraction", modelId, {
|
|
96
|
+
dtype: "q8",
|
|
97
|
+
device: "cpu",
|
|
98
|
+
})) as unknown as FeatureExtractionPipeline;
|
|
99
|
+
|
|
100
|
+
layerNormFn = layer_norm as typeof layerNormFn;
|
|
75
101
|
})().catch((err) => {
|
|
76
102
|
initFailed = true;
|
|
77
103
|
initError = err instanceof Error ? err.message : String(err);
|
|
@@ -83,8 +109,7 @@ async function ensureModel(): Promise<FastembedModel> {
|
|
|
83
109
|
}
|
|
84
110
|
|
|
85
111
|
await initPromise;
|
|
86
|
-
if (!
|
|
87
|
-
return model;
|
|
112
|
+
if (!pipe) throw new Error("pipeline init completed but pipe is null");
|
|
88
113
|
}
|
|
89
114
|
|
|
90
115
|
// ---------------------------------------------------------------------------
|
|
@@ -135,31 +160,70 @@ async function drain(): Promise<void> {
|
|
|
135
160
|
// Embed processing
|
|
136
161
|
// ---------------------------------------------------------------------------
|
|
137
162
|
|
|
163
|
+
/**
|
|
164
|
+
* Detect ONNX runtime out-of-memory errors. The runtime throws opaque
|
|
165
|
+
* numeric error codes (e.g. "287180544") for allocation failures rather
|
|
166
|
+
* than a readable message. We match on large numeric-only strings and
|
|
167
|
+
* known OOM patterns.
|
|
168
|
+
*/
|
|
169
|
+
function isOomError(msg: string): boolean {
|
|
170
|
+
// Pure numeric error codes ≥ 6 digits are ORT allocation failures
|
|
171
|
+
if (/^\d{6,}$/.test(msg)) return true;
|
|
172
|
+
// Explicit OOM messages from various ONNX backends
|
|
173
|
+
if (/out.of.memory|alloc.*fail|oom/i.test(msg)) return true;
|
|
174
|
+
return false;
|
|
175
|
+
}
|
|
176
|
+
|
|
138
177
|
async function processEmbed(req: EmbedRequest): Promise<void> {
|
|
139
178
|
try {
|
|
140
|
-
|
|
179
|
+
await ensurePipeline();
|
|
180
|
+
|
|
181
|
+
// Run feature extraction with mean pooling.
|
|
182
|
+
// truncation: true caps each text at the model's max length (8192 tokens
|
|
183
|
+
// for Nomic v1.5), preventing oversized inputs from causing OOM.
|
|
184
|
+
const output = await pipe!(req.texts, { pooling: "mean", truncation: true });
|
|
185
|
+
|
|
186
|
+
// Post-process following Nomic's recipe:
|
|
187
|
+
// 1. Layer normalization over the full hidden dimension
|
|
188
|
+
// 2. Matryoshka truncation to target dimensions
|
|
189
|
+
// 3. L2 normalization
|
|
190
|
+
const fullDim = output.dims[output.dims.length - 1]; // 768 for Nomic v1.5
|
|
191
|
+
const truncate = dimensions < fullDim;
|
|
192
|
+
|
|
193
|
+
let normalized: { tolist(): number[][]; data: Float32Array; dims: number[] };
|
|
194
|
+
if (truncate) {
|
|
195
|
+
// layer_norm → slice → L2 normalize
|
|
196
|
+
normalized = layerNormFn!(output, [fullDim])
|
|
197
|
+
.slice(null, [0, dimensions])
|
|
198
|
+
.normalize(2, -1);
|
|
199
|
+
} else {
|
|
200
|
+
// layer_norm → L2 normalize (no truncation)
|
|
201
|
+
normalized = layerNormFn!(output, [fullDim])
|
|
202
|
+
.normalize(2, -1);
|
|
203
|
+
}
|
|
141
204
|
|
|
142
|
-
|
|
205
|
+
// Extract per-text vectors from the batched tensor.
|
|
206
|
+
const numTexts = req.texts.length;
|
|
207
|
+
const vectors: Float32Array[] = [];
|
|
208
|
+
const dim = truncate ? dimensions : fullDim;
|
|
143
209
|
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
const vec =
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
// Batch document embedding via async generator.
|
|
150
|
-
vectors = [];
|
|
151
|
-
for await (const batch of m.passageEmbed(req.texts)) {
|
|
152
|
-
for (const vec of batch) {
|
|
153
|
-
vectors.push(new Float32Array(vec));
|
|
154
|
-
}
|
|
155
|
-
}
|
|
210
|
+
for (let i = 0; i < numTexts; i++) {
|
|
211
|
+
const start = i * dim;
|
|
212
|
+
const vec = new Float32Array(dim);
|
|
213
|
+
vec.set(normalized.data.subarray(start, start + dim));
|
|
214
|
+
vectors.push(vec);
|
|
156
215
|
}
|
|
157
216
|
|
|
158
217
|
post({ type: "result", id: req.id, vectors });
|
|
159
218
|
} catch (err) {
|
|
160
|
-
// Don't re-post init-error — it was already sent in
|
|
219
|
+
// Don't re-post init-error — it was already sent in ensurePipeline().
|
|
161
220
|
if (!initFailed) {
|
|
162
|
-
const
|
|
221
|
+
const raw = err instanceof Error ? err.message : String(err);
|
|
222
|
+
const msg = isOomError(raw)
|
|
223
|
+
? `ONNX runtime out of memory (batch=${req.texts.length}, ` +
|
|
224
|
+
`longest≈${Math.max(...req.texts.map((t) => t.length))} chars). ` +
|
|
225
|
+
`Try reducing batch size. Raw: ${raw}`
|
|
226
|
+
: raw;
|
|
163
227
|
post({ type: "error", id: req.id, error: msg });
|
|
164
228
|
}
|
|
165
229
|
}
|