@elizaos/plugin-local-ai 2.0.0-alpha.6 → 2.0.0-beta.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,20 +1,4 @@
1
1
  import { createRequire } from "node:module";
2
- var __create = Object.create;
3
- var __getProtoOf = Object.getPrototypeOf;
4
- var __defProp = Object.defineProperty;
5
- var __getOwnPropNames = Object.getOwnPropertyNames;
6
- var __hasOwnProp = Object.prototype.hasOwnProperty;
7
- var __toESM = (mod, isNodeMode, target) => {
8
- target = mod != null ? __create(__getProtoOf(mod)) : {};
9
- const to = isNodeMode || !mod || !mod.__esModule ? __defProp(target, "default", { value: mod, enumerable: true }) : target;
10
- for (let key of __getOwnPropNames(mod))
11
- if (!__hasOwnProp.call(to, key))
12
- __defProp(to, key, {
13
- get: () => mod[key],
14
- enumerable: true
15
- });
16
- return to;
17
- };
18
2
  var __require = /* @__PURE__ */ createRequire(import.meta.url);
19
3
 
20
4
  // index.ts
@@ -22,11 +6,7 @@ import fs5 from "node:fs";
22
6
  import os3 from "node:os";
23
7
  import path5, { basename } from "node:path";
24
8
  import { Readable as Readable2 } from "node:stream";
25
- import {
26
- logger as logger8,
27
- ModelType,
28
- parseKeyValueXml
29
- } from "@elizaos/core";
9
+ import { EventType, logger as logger8, ModelType } from "@elizaos/core";
30
10
  import {
31
11
  getLlama,
32
12
  LlamaChatSession
@@ -35,16 +15,16 @@ import {
35
15
  // environment.ts
36
16
  import { logger } from "@elizaos/core";
37
17
  import { z } from "zod";
38
- var DEFAULT_SMALL_MODEL = "DeepHermes-3-Llama-3-3B-Preview-q4.gguf";
39
- var DEFAULT_LARGE_MODEL = "DeepHermes-3-Llama-3-8B-q4.gguf";
40
- var DEFAULT_EMBEDDING_MODEL = "bge-small-en-v1.5.Q4_K_M.gguf";
18
+ var DEFAULT_SMALL_MODEL = "text/eliza-1-mobile-1_7b-32k.gguf";
19
+ var DEFAULT_LARGE_MODEL = "text/eliza-1-desktop-9b-64k.gguf";
20
+ var DEFAULT_EMBEDDING_MODEL = "text/eliza-1-lite-0_6b-32k.gguf";
41
21
  var configSchema = z.object({
42
22
  LOCAL_SMALL_MODEL: z.string().optional().default(DEFAULT_SMALL_MODEL),
43
23
  LOCAL_LARGE_MODEL: z.string().optional().default(DEFAULT_LARGE_MODEL),
44
24
  LOCAL_EMBEDDING_MODEL: z.string().optional().default(DEFAULT_EMBEDDING_MODEL),
45
25
  MODELS_DIR: z.string().optional(),
46
26
  CACHE_DIR: z.string().optional(),
47
- LOCAL_EMBEDDING_DIMENSIONS: z.string().optional().default("384").transform((val) => parseInt(val, 10))
27
+ LOCAL_EMBEDDING_DIMENSIONS: z.string().optional().default("1024").transform((val) => parseInt(val, 10))
48
28
  });
49
29
  function validateConfig() {
50
30
  try {
@@ -56,67 +36,135 @@ function validateConfig() {
56
36
  CACHE_DIR: process.env.CACHE_DIR,
57
37
  LOCAL_EMBEDDING_DIMENSIONS: process.env.LOCAL_EMBEDDING_DIMENSIONS
58
38
  };
59
- logger.debug("Validating configuration for local AI plugin from env:", {
39
+ logger.debug({
60
40
  LOCAL_SMALL_MODEL: configToParse.LOCAL_SMALL_MODEL,
61
41
  LOCAL_LARGE_MODEL: configToParse.LOCAL_LARGE_MODEL,
62
42
  LOCAL_EMBEDDING_MODEL: configToParse.LOCAL_EMBEDDING_MODEL,
63
43
  MODELS_DIR: configToParse.MODELS_DIR,
64
44
  CACHE_DIR: configToParse.CACHE_DIR,
65
45
  LOCAL_EMBEDDING_DIMENSIONS: configToParse.LOCAL_EMBEDDING_DIMENSIONS
66
- });
46
+ }, "Validating configuration for local AI plugin from env:");
67
47
  const validatedConfig = configSchema.parse(configToParse);
68
- logger.info("Using local AI configuration:", validatedConfig);
48
+ logger.info(validatedConfig, "Using local AI configuration:");
69
49
  return validatedConfig;
70
50
  } catch (error) {
71
51
  if (error instanceof z.ZodError) {
72
- const errorMessages = error.errors.map((err) => `${err.path.join(".")}: ${err.message}`).join(`
52
+ const errorMessages = error.issues.map((issue) => `${issue.path.join(".")}: ${issue.message}`).join(`
73
53
  `);
74
- logger.error("Zod validation failed:", errorMessages);
54
+ logger.error(`Zod validation failed: ${errorMessages}`);
75
55
  throw new Error(`Configuration validation failed:
76
56
  ${errorMessages}`);
77
57
  }
78
- logger.error("Configuration validation failed:", {
58
+ logger.error({
79
59
  error: error instanceof Error ? error.message : String(error),
80
60
  stack: error instanceof Error ? error.stack : undefined
81
- });
61
+ }, "Configuration validation failed:");
82
62
  throw error;
83
63
  }
84
64
  }
85
65
 
66
+ // structured-output.ts
67
+ import {
68
+ defineChatSessionFunction,
69
+ LlamaGrammar,
70
+ LlamaJsonSchemaGrammar
71
+ } from "node-llama-cpp";
72
+ function toGbnfJsonSchema(schema) {
73
+ if (schema == null)
74
+ return;
75
+ if (typeof schema !== "object") {
76
+ throw new Error("[plugin-local-ai] JSON schema must be an object");
77
+ }
78
+ return schema;
79
+ }
80
+ function buildLlamaFunctions(tools) {
81
+ const out = {};
82
+ for (const tool of tools) {
83
+ if (!tool?.name)
84
+ continue;
85
+ out[tool.name] = defineChatSessionFunction({
86
+ description: tool.description,
87
+ params: toGbnfJsonSchema(tool.parameters),
88
+ handler: () => "[deferred to runtime]"
89
+ });
90
+ }
91
+ return out;
92
+ }
93
+ function extractToolCalls(response) {
94
+ const calls = [];
95
+ let i = 0;
96
+ for (const entry of response) {
97
+ if (entry && typeof entry === "object" && entry.type === "functionCall") {
98
+ const fc = entry;
99
+ calls.push({
100
+ id: `call_${i++}`,
101
+ name: fc.name,
102
+ arguments: fc.params ?? {},
103
+ type: "function"
104
+ });
105
+ }
106
+ }
107
+ return calls;
108
+ }
109
+ function buildJsonSchemaGrammar(llama, schema) {
110
+ const gbnf = toGbnfJsonSchema(schema);
111
+ if (gbnf == null) {
112
+ throw new Error("[plugin-local-ai] responseSchema is required to build a JSON schema grammar");
113
+ }
114
+ return new LlamaJsonSchemaGrammar(llama, gbnf);
115
+ }
116
+ async function buildGenericJsonGrammar(llama) {
117
+ return await LlamaGrammar.getFor(llama, "json");
118
+ }
119
+ async function planStructuredRequest(ctx, params) {
120
+ if (params.tools && params.tools.length > 0) {
121
+ return { kind: "tools", functions: buildLlamaFunctions(params.tools) };
122
+ }
123
+ if (params.responseSchema) {
124
+ const grammar = buildJsonSchemaGrammar(ctx.llama, params.responseSchema);
125
+ return { kind: "schema", grammar };
126
+ }
127
+ if (params.responseFormat && typeof params.responseFormat === "object" && params.responseFormat.type === "json_object") {
128
+ const grammar = await buildGenericJsonGrammar(ctx.llama);
129
+ return { kind: "json_object", grammar };
130
+ }
131
+ return { kind: "text" };
132
+ }
133
+
86
134
  // types.ts
87
135
  var MODEL_SPECS = {
88
136
  small: {
89
- name: "DeepHermes-3-Llama-3-3B-Preview-q4.gguf",
90
- repo: "NousResearch/DeepHermes-3-Llama-3-3B-Preview-GGUF",
91
- size: "3B",
92
- quantization: "Q4_0",
93
- contextSize: 8192,
137
+ name: "text/eliza-1-mobile-1_7b-32k.gguf",
138
+ repo: "elizaos/eliza-1-mobile-1_7b",
139
+ size: "1.7B",
140
+ quantization: "fused GGUF",
141
+ contextSize: 32768,
94
142
  tokenizer: {
95
- name: "NousResearch/DeepHermes-3-Llama-3-3B-Preview",
96
- type: "llama"
143
+ name: "elizaos/eliza-1-mobile-1_7b",
144
+ type: "eliza1"
97
145
  }
98
146
  },
99
147
  medium: {
100
- name: "DeepHermes-3-Llama-3-8B-q4.gguf",
101
- repo: "NousResearch/DeepHermes-3-Llama-3-8B-Preview-GGUF",
102
- size: "8B",
103
- quantization: "Q4_0",
104
- contextSize: 8192,
148
+ name: "text/eliza-1-desktop-9b-64k.gguf",
149
+ repo: "elizaos/eliza-1-desktop-9b",
150
+ size: "9B",
151
+ quantization: "fused GGUF",
152
+ contextSize: 65536,
105
153
  tokenizer: {
106
- name: "NousResearch/DeepHermes-3-Llama-3-8B-Preview",
107
- type: "llama"
154
+ name: "elizaos/eliza-1-desktop-9b",
155
+ type: "eliza1"
108
156
  }
109
157
  },
110
158
  embedding: {
111
- name: "bge-small-en-v1.5.Q4_K_M.gguf",
112
- repo: "ChristianAzinn/bge-small-en-v1.5-gguf",
113
- size: "133 MB",
114
- quantization: "Q4_K_M",
115
- contextSize: 512,
116
- dimensions: 384,
159
+ name: "text/eliza-1-lite-0_6b-32k.gguf",
160
+ repo: "elizaos/eliza-1-lite-0_6b",
161
+ size: "512 MB",
162
+ quantization: "fused GGUF",
163
+ contextSize: 32768,
164
+ dimensions: 1024,
117
165
  tokenizer: {
118
- name: "ChristianAzinn/bge-small-en-v1.5-gguf",
119
- type: "llama"
166
+ name: "elizaos/eliza-1-lite-0_6b",
167
+ type: "eliza1"
120
168
  }
121
169
  },
122
170
  vision: {
@@ -139,12 +187,12 @@ var MODEL_SPECS = {
139
187
  ]
140
188
  },
141
189
  visionvl: {
142
- name: "Qwen2.5-VL-3B-Instruct",
143
- repo: "Qwen/Qwen2.5-VL-3B-Instruct",
144
- size: "3B",
145
- modelId: "Qwen/Qwen2.5-VL-3B-Instruct",
146
- contextSize: 32768,
147
- maxTokens: 1024,
190
+ name: "Florence-2-base-ft",
191
+ repo: "onnx-community/Florence-2-base-ft",
192
+ size: "0.23B",
193
+ modelId: "onnx-community/Florence-2-base-ft",
194
+ contextSize: 1024,
195
+ maxTokens: 256,
148
196
  tasks: [
149
197
  "CAPTION",
150
198
  "DETAILED_CAPTION",
@@ -188,7 +236,7 @@ class DownloadManager {
188
236
  return DownloadManager.instance;
189
237
  }
190
238
  ensureCacheDirectory() {
191
- if (!this.cacheDir || this.cacheDir.trim() === "") {
239
+ if (this.cacheDir.trim() === "") {
192
240
  throw new Error("Cache directory path cannot be empty");
193
241
  }
194
242
  if (!fs.existsSync(this.cacheDir)) {
@@ -197,7 +245,7 @@ class DownloadManager {
197
245
  }
198
246
  }
199
247
  ensureModelsDirectory() {
200
- if (!this.modelsDir || this.modelsDir.trim() === "") {
248
+ if (this.modelsDir.trim() === "") {
201
249
  throw new Error("Models directory path cannot be empty");
202
250
  }
203
251
  logger2.debug("Ensuring models directory exists:", this.modelsDir);
@@ -484,7 +532,7 @@ class PlatformManager {
484
532
  logger3.info("Initializing platform detection...");
485
533
  this.capabilities = await this.detectSystemCapabilities();
486
534
  } catch (error) {
487
- logger3.error("Platform detection failed", { error });
535
+ logger3.error({ error }, "Platform detection failed");
488
536
  throw error;
489
537
  }
490
538
  }
@@ -531,7 +579,7 @@ class PlatformManager {
531
579
  return null;
532
580
  }
533
581
  } catch (error) {
534
- logger3.error("GPU detection failed", { error });
582
+ logger3.error({ error }, "GPU detection failed");
535
583
  return null;
536
584
  }
537
585
  }
@@ -554,7 +602,7 @@ class PlatformManager {
554
602
  isAppleSilicon: false
555
603
  };
556
604
  } catch (error) {
557
- logger3.error("Mac GPU detection failed", { error });
605
+ logger3.error({ error }, "Mac GPU detection failed");
558
606
  return {
559
607
  name: "Unknown Mac GPU",
560
608
  type: "metal",
@@ -562,28 +610,49 @@ class PlatformManager {
562
610
  };
563
611
  }
564
612
  }
565
- async detectWindowsGPU() {
613
+ async queryWindowsGpuName() {
614
+ const psCmd = "powershell -NoProfile -NonInteractive -Command " + '"Get-CimInstance Win32_VideoController | Select-Object -ExpandProperty Name"';
615
+ try {
616
+ const { stdout } = await execAsync(psCmd);
617
+ const first = stdout.split(/\r?\n/).map((line) => line.trim()).filter(Boolean)[0];
618
+ if (first)
619
+ return first;
620
+ } catch {}
566
621
  try {
567
622
  const { stdout } = await execAsync("wmic path win32_VideoController get name");
568
- const gpuName = stdout.split(`
569
- `)[1].trim();
623
+ const lines = stdout.split(/\r?\n/).map((line) => line.trim()).filter((line) => line && line.toLowerCase() !== "name");
624
+ return lines[0] ?? null;
625
+ } catch {
626
+ return null;
627
+ }
628
+ }
629
+ async detectWindowsGPU() {
630
+ try {
631
+ const gpuName = await this.queryWindowsGpuName();
632
+ if (!gpuName)
633
+ return null;
570
634
  if (gpuName.toLowerCase().includes("nvidia")) {
571
- const { stdout: nvidiaInfo } = await execAsync("nvidia-smi --query-gpu=name,memory.total --format=csv,noheader");
572
- const [name, memoryStr] = nvidiaInfo.split(",").map((s) => s.trim());
573
- const memory = Number.parseInt(memoryStr, 10);
574
- return {
575
- name,
576
- memory,
577
- type: "cuda",
578
- version: await this.getNvidiaDriverVersion()
579
- };
635
+ try {
636
+ const { stdout: nvidiaInfo } = await execAsync("nvidia-smi --query-gpu=name,memory.total --format=csv,noheader");
637
+ const firstLine = nvidiaInfo.split(/\r?\n/)[0] ?? "";
638
+ const [name, memoryStr] = firstLine.split(",").map((s) => s.trim());
639
+ const memory = Number.parseInt(memoryStr, 10);
640
+ return {
641
+ name: name || gpuName,
642
+ memory: Number.isFinite(memory) ? memory : undefined,
643
+ type: "cuda",
644
+ version: await this.getNvidiaDriverVersion()
645
+ };
646
+ } catch {
647
+ return { name: gpuName, type: "cuda" };
648
+ }
580
649
  }
581
650
  return {
582
651
  name: gpuName,
583
652
  type: "directml"
584
653
  };
585
654
  } catch (error) {
586
- logger3.error("Windows GPU detection failed", { error });
655
+ logger3.error({ error }, "Windows GPU detection failed");
587
656
  return null;
588
657
  }
589
658
  }
@@ -608,7 +677,7 @@ class PlatformManager {
608
677
  type: "none"
609
678
  };
610
679
  } catch (error) {
611
- logger3.error("Linux GPU detection failed", { error });
680
+ logger3.error({ error }, "Linux GPU detection failed");
612
681
  return null;
613
682
  }
614
683
  }
@@ -721,15 +790,15 @@ class TokenizerManager {
721
790
  async loadTokenizer(modelConfig) {
722
791
  try {
723
792
  const tokenizerKey = `${modelConfig.tokenizer.type}-${modelConfig.tokenizer.name}`;
724
- logger4.info("Loading tokenizer:", {
793
+ logger4.info({
725
794
  key: tokenizerKey,
726
795
  name: modelConfig.tokenizer.name,
727
796
  type: modelConfig.tokenizer.type,
728
797
  modelsDir: this.modelsDir,
729
798
  cacheDir: this.cacheDir
730
- });
799
+ }, "Loading tokenizer:");
731
800
  if (this.tokenizers.has(tokenizerKey)) {
732
- logger4.info("Using cached tokenizer:", { key: tokenizerKey });
801
+ logger4.info({ key: tokenizerKey }, "Using cached tokenizer:");
733
802
  const cachedTokenizer = this.tokenizers.get(tokenizerKey);
734
803
  if (!cachedTokenizer) {
735
804
  throw new Error(`Tokenizer ${tokenizerKey} exists in map but returned undefined`);
@@ -748,90 +817,88 @@ class TokenizerManager {
748
817
  local_files_only: false
749
818
  });
750
819
  this.tokenizers.set(tokenizerKey, tokenizer);
751
- logger4.success("Tokenizer loaded successfully:", { key: tokenizerKey });
820
+ logger4.success({ key: tokenizerKey }, "Tokenizer loaded successfully:");
752
821
  return tokenizer;
753
822
  } catch (tokenizeError) {
754
- logger4.error("Failed to load tokenizer from HuggingFace:", {
823
+ logger4.error({
755
824
  error: tokenizeError instanceof Error ? tokenizeError.message : String(tokenizeError),
756
825
  stack: tokenizeError instanceof Error ? tokenizeError.stack : undefined,
757
826
  tokenizer: modelConfig.tokenizer.name,
758
827
  modelsDir: this.modelsDir
759
- });
828
+ }, "Failed to load tokenizer from HuggingFace:");
760
829
  logger4.info("Retrying tokenizer loading...");
761
830
  const tokenizer = await AutoTokenizer.from_pretrained(modelConfig.tokenizer.name, {
762
831
  cache_dir: this.modelsDir,
763
832
  local_files_only: false
764
833
  });
765
834
  this.tokenizers.set(tokenizerKey, tokenizer);
766
- logger4.success("Tokenizer loaded successfully on retry:", {
767
- key: tokenizerKey
768
- });
835
+ logger4.success({ key: tokenizerKey }, "Tokenizer loaded successfully on retry:");
769
836
  return tokenizer;
770
837
  }
771
838
  } catch (error) {
772
- logger4.error("Failed to load tokenizer:", {
839
+ logger4.error({
773
840
  error: error instanceof Error ? error.message : String(error),
774
841
  stack: error instanceof Error ? error.stack : undefined,
775
842
  model: modelConfig.name,
776
843
  tokenizer: modelConfig.tokenizer.name,
777
844
  modelsDir: this.modelsDir
778
- });
845
+ }, "Failed to load tokenizer:");
779
846
  throw error;
780
847
  }
781
848
  }
782
849
  async encode(text, modelConfig) {
783
850
  try {
784
- logger4.info("Encoding text with tokenizer:", {
851
+ logger4.info({
785
852
  length: text.length,
786
853
  tokenizer: modelConfig.tokenizer.name
787
- });
854
+ }, "Encoding text with tokenizer:");
788
855
  const tokenizer = await this.loadTokenizer(modelConfig);
789
856
  logger4.info("Tokenizer loaded, encoding text...");
790
857
  const encoded = await tokenizer.encode(text, {
791
858
  add_special_tokens: true,
792
859
  return_token_type_ids: false
793
860
  });
794
- logger4.info("Text encoded successfully:", {
861
+ logger4.info({
795
862
  tokenCount: encoded.length,
796
863
  tokenizer: modelConfig.tokenizer.name
797
- });
864
+ }, "Text encoded successfully:");
798
865
  return encoded;
799
866
  } catch (error) {
800
- logger4.error("Text encoding failed:", {
867
+ logger4.error({
801
868
  error: error instanceof Error ? error.message : String(error),
802
869
  stack: error instanceof Error ? error.stack : undefined,
803
870
  textLength: text.length,
804
871
  tokenizer: modelConfig.tokenizer.name,
805
872
  modelsDir: this.modelsDir
806
- });
873
+ }, "Text encoding failed:");
807
874
  throw error;
808
875
  }
809
876
  }
810
877
  async decode(tokens, modelConfig) {
811
878
  try {
812
- logger4.info("Decoding tokens with tokenizer:", {
879
+ logger4.info({
813
880
  count: tokens.length,
814
881
  tokenizer: modelConfig.tokenizer.name
815
- });
882
+ }, "Decoding tokens with tokenizer:");
816
883
  const tokenizer = await this.loadTokenizer(modelConfig);
817
884
  logger4.info("Tokenizer loaded, decoding tokens...");
818
885
  const decoded = await tokenizer.decode(tokens, {
819
886
  skip_special_tokens: true,
820
887
  clean_up_tokenization_spaces: true
821
888
  });
822
- logger4.info("Tokens decoded successfully:", {
889
+ logger4.info({
823
890
  textLength: decoded.length,
824
891
  tokenizer: modelConfig.tokenizer.name
825
- });
892
+ }, "Tokens decoded successfully:");
826
893
  return decoded;
827
894
  } catch (error) {
828
- logger4.error("Token decoding failed:", {
895
+ logger4.error({
829
896
  error: error instanceof Error ? error.message : String(error),
830
897
  stack: error instanceof Error ? error.stack : undefined,
831
898
  tokenCount: tokens.length,
832
899
  tokenizer: modelConfig.tokenizer.name,
833
900
  modelsDir: this.modelsDir
834
- });
901
+ }, "Token decoding failed:");
835
902
  throw error;
836
903
  }
837
904
  }
@@ -862,10 +929,10 @@ class TranscribeManager {
862
929
  ffmpegInitialized = false;
863
930
  constructor(cacheDir) {
864
931
  this.cacheDir = path2.join(cacheDir, "whisper");
865
- logger5.debug("Initializing TranscribeManager", {
932
+ logger5.debug({
866
933
  cacheDir: this.cacheDir,
867
934
  timestamp: new Date().toISOString()
868
- });
935
+ }, "Initializing TranscribeManager");
869
936
  this.ensureCacheDirectory();
870
937
  }
871
938
  async ensureFFmpeg() {
@@ -874,11 +941,11 @@ class TranscribeManager {
874
941
  await this.initializeFFmpeg();
875
942
  this.ffmpegInitialized = true;
876
943
  } catch (error) {
877
- logger5.error("FFmpeg initialization failed:", {
944
+ logger5.error({
878
945
  error: error instanceof Error ? error.message : String(error),
879
946
  stack: error instanceof Error ? error.stack : undefined,
880
947
  timestamp: new Date().toISOString()
881
- });
948
+ }, "FFmpeg initialization failed:");
882
949
  return false;
883
950
  }
884
951
  }
@@ -898,16 +965,16 @@ class TranscribeManager {
898
965
  const { stdout } = await execAsync2("ffmpeg -version");
899
966
  this.ffmpegVersion = stdout.split(`
900
967
  `)[0];
901
- logger5.info("FFmpeg version:", {
968
+ logger5.info({
902
969
  version: this.ffmpegVersion,
903
970
  timestamp: new Date().toISOString()
904
- });
971
+ }, "FFmpeg version:");
905
972
  } catch (error) {
906
973
  this.ffmpegVersion = null;
907
- logger5.error("Failed to get FFmpeg version:", {
974
+ logger5.error({
908
975
  error: error instanceof Error ? error.message : String(error),
909
976
  timestamp: new Date().toISOString()
910
- });
977
+ }, "Failed to get FFmpeg version:");
911
978
  }
912
979
  }
913
980
  async initializeFFmpeg() {
@@ -916,21 +983,21 @@ class TranscribeManager {
916
983
  if (this.ffmpegAvailable) {
917
984
  await this.fetchFFmpegVersion();
918
985
  await this.verifyFFmpegCapabilities();
919
- logger5.success("FFmpeg initialized successfully", {
986
+ logger5.success({
920
987
  version: this.ffmpegVersion,
921
988
  path: this.ffmpegPath,
922
989
  timestamp: new Date().toISOString()
923
- });
990
+ }, "FFmpeg initialized successfully");
924
991
  } else {
925
992
  this.logFFmpegInstallInstructions();
926
993
  }
927
994
  } catch (error) {
928
995
  this.ffmpegAvailable = false;
929
- logger5.error("FFmpeg initialization failed:", {
996
+ logger5.error({
930
997
  error: error instanceof Error ? error.message : String(error),
931
998
  stack: error instanceof Error ? error.stack : undefined,
932
999
  timestamp: new Date().toISOString()
933
- });
1000
+ }, "FFmpeg initialization failed:");
934
1001
  this.logFFmpegInstallInstructions();
935
1002
  }
936
1003
  }
@@ -939,19 +1006,19 @@ class TranscribeManager {
939
1006
  const { stdout, stderr } = await execAsync2("which ffmpeg || where ffmpeg");
940
1007
  this.ffmpegPath = stdout.trim();
941
1008
  this.ffmpegAvailable = true;
942
- logger5.info("FFmpeg found at:", {
1009
+ logger5.info({
943
1010
  path: this.ffmpegPath,
944
1011
  stderr: stderr ? stderr.trim() : undefined,
945
1012
  timestamp: new Date().toISOString()
946
- });
1013
+ }, "FFmpeg found at:");
947
1014
  } catch (error) {
948
1015
  this.ffmpegAvailable = false;
949
1016
  this.ffmpegPath = null;
950
- logger5.error("FFmpeg not found in PATH:", {
1017
+ logger5.error({
951
1018
  error: error instanceof Error ? error.message : String(error),
952
- stderr: error instanceof Error && "stderr" in error ? error.stderr : undefined,
1019
+ stderr: error instanceof Error && "stderr" in error ? String(error.code) : undefined,
953
1020
  timestamp: new Date().toISOString()
954
- });
1021
+ }, "FFmpeg not found in PATH:");
955
1022
  }
956
1023
  }
957
1024
  async verifyFFmpegCapabilities() {
@@ -962,15 +1029,15 @@ class TranscribeManager {
962
1029
  throw new Error("FFmpeg installation missing required codecs (pcm_s16le, wav)");
963
1030
  }
964
1031
  } catch (error) {
965
- logger5.error("FFmpeg capabilities verification failed:", {
1032
+ logger5.error({
966
1033
  error: error instanceof Error ? error.message : String(error),
967
1034
  timestamp: new Date().toISOString()
968
- });
1035
+ }, "FFmpeg capabilities verification failed:");
969
1036
  throw error;
970
1037
  }
971
1038
  }
972
1039
  logFFmpegInstallInstructions() {
973
- logger5.warn("FFmpeg is required but not properly installed. Please install FFmpeg:", {
1040
+ logger5.warn({
974
1041
  instructions: {
975
1042
  mac: "brew install ffmpeg",
976
1043
  ubuntu: "sudo apt-get install ffmpeg",
@@ -980,7 +1047,7 @@ class TranscribeManager {
980
1047
  requiredVersion: "4.0 or later",
981
1048
  requiredCodecs: ["pcm_s16le", "wav"],
982
1049
  timestamp: new Date().toISOString()
983
- });
1050
+ }, "FFmpeg is required but not properly installed. Please install FFmpeg:");
984
1051
  }
985
1052
  static getInstance(cacheDir) {
986
1053
  if (!TranscribeManager.instance) {
@@ -1000,18 +1067,18 @@ class TranscribeManager {
1000
1067
  try {
1001
1068
  const { stderr } = await execAsync2(`ffmpeg -y -loglevel error -i "${inputPath}" -acodec pcm_s16le -ar 16000 -ac 1 "${outputPath}"`);
1002
1069
  if (stderr) {
1003
- logger5.warn("FFmpeg conversion error:", {
1070
+ logger5.warn({
1004
1071
  stderr,
1005
1072
  inputPath,
1006
1073
  outputPath,
1007
1074
  timestamp: new Date().toISOString()
1008
- });
1075
+ }, "FFmpeg conversion error:");
1009
1076
  }
1010
1077
  if (!fs2.existsSync(outputPath)) {
1011
1078
  throw new Error("WAV file was not created successfully");
1012
1079
  }
1013
1080
  } catch (error) {
1014
- logger5.error("Audio conversion failed:", {
1081
+ logger5.error({
1015
1082
  error: error instanceof Error ? error.message : String(error),
1016
1083
  stack: error instanceof Error ? error.stack : undefined,
1017
1084
  command: `ffmpeg -y -loglevel error -i "${inputPath}" -acodec pcm_s16le -ar 16000 -ac 1 "${outputPath}"`,
@@ -1019,7 +1086,7 @@ class TranscribeManager {
1019
1086
  ffmpegVersion: this.ffmpegVersion,
1020
1087
  ffmpegPath: this.ffmpegPath,
1021
1088
  timestamp: new Date().toISOString()
1022
- });
1089
+ }, "Audio conversion failed:");
1023
1090
  throw new Error(`Failed to convert audio to WAV format: ${error instanceof Error ? error.message : String(error)}`);
1024
1091
  }
1025
1092
  }
@@ -1043,7 +1110,7 @@ class TranscribeManager {
1043
1110
  return tempWavFile;
1044
1111
  }
1045
1112
  } catch (probeError) {
1046
- logger5.debug("FFprobe failed, continuing with conversion:", probeError);
1113
+ logger5.debug({ error: probeError instanceof Error ? probeError.message : String(probeError) }, "FFprobe failed, continuing with conversion:");
1047
1114
  }
1048
1115
  }
1049
1116
  await this.convertToWav(tempInputFile, tempWavFile);
@@ -1052,12 +1119,12 @@ class TranscribeManager {
1052
1119
  }
1053
1120
  return tempWavFile;
1054
1121
  } catch (error) {
1055
- logger5.error("Audio preprocessing failed:", {
1122
+ logger5.error({
1056
1123
  error: error instanceof Error ? error.message : String(error),
1057
1124
  stack: error instanceof Error ? error.stack : undefined,
1058
1125
  ffmpegAvailable: this.ffmpegAvailable,
1059
1126
  timestamp: new Date().toISOString()
1060
- });
1127
+ }, "Audio preprocessing failed:");
1061
1128
  throw new Error(`Failed to preprocess audio: ${error instanceof Error ? error.message : String(error)}`);
1062
1129
  }
1063
1130
  }
@@ -1086,7 +1153,7 @@ class TranscribeManager {
1086
1153
  logger5.error("Whisper model not found. Please run: npx whisper-node download");
1087
1154
  throw new Error("Whisper model not found. Please install it with: npx whisper-node download");
1088
1155
  }
1089
- logger5.error("Whisper transcription error:", whisperError);
1156
+ logger5.error({ error: whisperError instanceof Error ? whisperError.message : String(whisperError) }, "Whisper transcription error:");
1090
1157
  throw whisperError;
1091
1158
  }
1092
1159
  if (fs2.existsSync(wavFile)) {
@@ -1102,18 +1169,18 @@ class TranscribeManager {
1102
1169
  return { text: "" };
1103
1170
  }
1104
1171
  const cleanText = segments.map((segment) => segment.speech?.trim() || "").filter((text) => text).join(" ");
1105
- logger5.success("Transcription complete:", {
1172
+ logger5.success({
1106
1173
  textLength: cleanText.length,
1107
1174
  segmentCount: segments.length,
1108
1175
  timestamp: new Date().toISOString()
1109
- });
1176
+ }, "Transcription complete:");
1110
1177
  return { text: cleanText };
1111
1178
  } catch (error) {
1112
- logger5.error("Transcription failed:", {
1179
+ logger5.error({
1113
1180
  error: error instanceof Error ? error.message : String(error),
1114
1181
  stack: error instanceof Error ? error.stack : undefined,
1115
1182
  ffmpegAvailable: this.ffmpegAvailable
1116
- });
1183
+ }, "Transcription failed:");
1117
1184
  throw error;
1118
1185
  }
1119
1186
  }
@@ -1125,7 +1192,6 @@ import path3 from "node:path";
1125
1192
  import { PassThrough, Readable } from "node:stream";
1126
1193
  import { logger as logger6 } from "@elizaos/core";
1127
1194
  import { pipeline } from "@huggingface/transformers";
1128
- import { fetch as fetch2 } from "undici";
1129
1195
  function getWavHeader(audioLength, sampleRate, channelCount = 1, bitsPerSample = 16) {
1130
1196
  const wavHeader = Buffer.alloc(44);
1131
1197
  wavHeader.write("RIFF", 0);
@@ -1213,7 +1279,7 @@ class TTSManager {
1213
1279
  logger6.success("Default speaker embedding loaded from cache.");
1214
1280
  } else {
1215
1281
  logger6.info(`Downloading default speaker embedding from: ${speakerEmbeddingUrl}`);
1216
- const response = await fetch2(speakerEmbeddingUrl);
1282
+ const response = await fetch(speakerEmbeddingUrl);
1217
1283
  if (!response.ok) {
1218
1284
  throw new Error(`Failed to download speaker embedding: ${response.statusText}`);
1219
1285
  }
@@ -1232,10 +1298,10 @@ class TTSManager {
1232
1298
  logger6.success("TTS initialization complete (Transformers.js)");
1233
1299
  this.initialized = true;
1234
1300
  } catch (error) {
1235
- logger6.error("TTS (Transformers.js) initialization failed:", {
1301
+ logger6.error({
1236
1302
  error: error instanceof Error ? error.message : String(error),
1237
1303
  stack: error instanceof Error ? error.stack : undefined
1238
- });
1304
+ }, "TTS (Transformers.js) initialization failed:");
1239
1305
  this.initialized = false;
1240
1306
  this.synthesizer = null;
1241
1307
  this.defaultSpeakerEmbedding = null;
@@ -1252,9 +1318,7 @@ class TTSManager {
1252
1318
  if (!this.synthesizer) {
1253
1319
  throw new Error("TTS Manager not properly initialized.");
1254
1320
  }
1255
- logger6.info("Starting speech generation with Transformers.js for text:", {
1256
- text: `${text.substring(0, 50)}...`
1257
- });
1321
+ logger6.info({ text: `${text.substring(0, 50)}...` }, "Starting speech generation with Transformers.js for text:");
1258
1322
  const output = await this.synthesizer(text, {
1259
1323
  ...this.defaultSpeakerEmbedding && {
1260
1324
  speaker_embeddings: this.defaultSpeakerEmbedding
@@ -1262,10 +1326,7 @@ class TTSManager {
1262
1326
  });
1263
1327
  const audioFloat32 = output.audio;
1264
1328
  const samplingRate = output.sampling_rate;
1265
- logger6.info("Raw audio data received from pipeline:", {
1266
- samplingRate,
1267
- length: audioFloat32.length
1268
- });
1329
+ logger6.info({ samplingRate, length: audioFloat32.length }, "Raw audio data received from pipeline:");
1269
1330
  if (!audioFloat32 || audioFloat32.length === 0) {
1270
1331
  throw new Error("TTS pipeline generated empty audio output.");
1271
1332
  }
@@ -1275,18 +1336,16 @@ class TTSManager {
1275
1336
  pcmData[i] = s < 0 ? s * 32768 : s * 32767;
1276
1337
  }
1277
1338
  const audioBuffer = Buffer.from(pcmData.buffer);
1278
- logger6.info("Audio data converted to 16-bit PCM Buffer:", {
1279
- byteLength: audioBuffer.length
1280
- });
1339
+ logger6.info({ byteLength: audioBuffer.length }, "Audio data converted to 16-bit PCM Buffer:");
1281
1340
  const audioStream = prependWavHeader(Readable.from(audioBuffer), audioBuffer.length, samplingRate, 1, 16);
1282
1341
  logger6.success("Speech generation complete (Transformers.js)");
1283
1342
  return audioStream;
1284
1343
  } catch (error) {
1285
- logger6.error("Transformers.js speech generation failed:", {
1344
+ logger6.error({
1286
1345
  error: error instanceof Error ? error.message : String(error),
1287
1346
  text: `${text.substring(0, 50)}...`,
1288
1347
  stack: error instanceof Error ? error.stack : undefined
1289
- });
1348
+ }, "Transformers.js speech generation failed:");
1290
1349
  throw error;
1291
1350
  }
1292
1351
  }
@@ -1333,7 +1392,7 @@ class VisionManager {
1333
1392
  dtype: "fp32",
1334
1393
  useOnnx: true
1335
1394
  };
1336
- if (platform === "darwin" && (arch === "arm64" || arch === "aarch64")) {
1395
+ if (platform === "darwin" && arch === "arm64") {
1337
1396
  config = {
1338
1397
  device: "gpu",
1339
1398
  dtype: "fp16",
@@ -1414,11 +1473,11 @@ class VisionManager {
1414
1473
  this.model = model;
1415
1474
  logger7.success("Florence2 model loaded successfully");
1416
1475
  } catch (error) {
1417
- logger7.error("Failed to load Florence2 model:", {
1476
+ logger7.error({
1418
1477
  error: error instanceof Error ? error.message : String(error),
1419
1478
  stack: error instanceof Error ? error.stack : undefined,
1420
1479
  modelId: modelSpec.modelId
1421
- });
1480
+ }, "Failed to load Florence2 model:");
1422
1481
  throw error;
1423
1482
  }
1424
1483
  logger7.info("Loading vision tokenizer...");
@@ -1446,11 +1505,11 @@ class VisionManager {
1446
1505
  });
1447
1506
  logger7.success("Vision tokenizer loaded successfully");
1448
1507
  } catch (error) {
1449
- logger7.error("Failed to load tokenizer:", {
1508
+ logger7.error({
1450
1509
  error: error instanceof Error ? error.message : String(error),
1451
1510
  stack: error instanceof Error ? error.stack : undefined,
1452
1511
  modelId: modelSpec.modelId
1453
- });
1512
+ }, "Failed to load tokenizer:");
1454
1513
  throw error;
1455
1514
  }
1456
1515
  logger7.info("Loading vision processor...");
@@ -1589,6 +1648,7 @@ class VisionManager {
1589
1648
  }
1590
1649
 
1591
1650
  // index.ts
1651
+ var DEFAULT_LOCAL_SYSTEM_PROMPT = "You are a helpful AI assistant. Respond to the current request only.";
1592
1652
  var wordsToPunish = [
1593
1653
  " please",
1594
1654
  " feel",
@@ -1639,6 +1699,84 @@ var wordsToPunish = [
1639
1699
  " Notably",
1640
1700
  " Therefore"
1641
1701
  ];
1702
+ function estimateTokenCount(text) {
1703
+ return text.length === 0 ? 0 : Math.ceil(text.length / 4);
1704
+ }
1705
+ function estimateUsage(prompt, response) {
1706
+ const responseText = typeof response === "string" ? response : (() => {
1707
+ try {
1708
+ return JSON.stringify(response);
1709
+ } catch {
1710
+ return String(response);
1711
+ }
1712
+ })();
1713
+ const promptTokens = estimateTokenCount(prompt);
1714
+ const completionTokens = estimateTokenCount(responseText);
1715
+ return {
1716
+ promptTokens,
1717
+ completionTokens,
1718
+ totalTokens: promptTokens + completionTokens,
1719
+ estimated: true
1720
+ };
1721
+ }
1722
+ function estimateEmbeddingUsage(text) {
1723
+ const promptTokens = estimateTokenCount(text);
1724
+ return {
1725
+ promptTokens,
1726
+ completionTokens: 0,
1727
+ totalTokens: promptTokens,
1728
+ estimated: true
1729
+ };
1730
+ }
1731
+ function stripThinkTags(text) {
1732
+ return text.includes("<think>") ? text.replace(/<think>[\s\S]*?<\/think>\n?/g, "") : text;
1733
+ }
1734
+ function wantsNativeShape(params) {
1735
+ if (params.tools && params.tools.length > 0)
1736
+ return true;
1737
+ if (params.responseSchema)
1738
+ return true;
1739
+ if (params.toolChoice)
1740
+ return true;
1741
+ if (params.responseFormat && typeof params.responseFormat === "object" && params.responseFormat.type === "json_object") {
1742
+ return true;
1743
+ }
1744
+ return false;
1745
+ }
1746
+ function buildNativeResult(result) {
1747
+ return {
1748
+ text: result.text,
1749
+ toolCalls: result.toolCalls,
1750
+ ...result.finishReason ? { finishReason: result.finishReason } : {}
1751
+ };
1752
+ }
1753
+ function getLocalModelLabel(runtime, type) {
1754
+ const config = validateConfig();
1755
+ if (type === ModelType.TEXT_EMBEDDING) {
1756
+ return String(runtime.getSetting("LOCAL_EMBEDDING_MODEL") || config.LOCAL_EMBEDDING_MODEL);
1757
+ }
1758
+ if (type === ModelType.TEXT_LARGE) {
1759
+ return String(runtime.getSetting("LOCAL_LARGE_MODEL") || config.LOCAL_LARGE_MODEL);
1760
+ }
1761
+ return String(runtime.getSetting("LOCAL_SMALL_MODEL") || config.LOCAL_SMALL_MODEL);
1762
+ }
1763
+ function emitModelUsed(runtime, type, model, usage) {
1764
+ runtime.emitEvent(EventType.MODEL_USED, {
1765
+ runtime,
1766
+ source: "local-ai",
1767
+ provider: "local-ai",
1768
+ type,
1769
+ model,
1770
+ modelName: model,
1771
+ tokens: {
1772
+ prompt: usage.promptTokens,
1773
+ completion: usage.completionTokens,
1774
+ total: usage.totalTokens,
1775
+ ...usage.estimated ? { estimated: true } : {}
1776
+ },
1777
+ ...usage.estimated ? { usageEstimated: true } : {}
1778
+ });
1779
+ }
1642
1780
 
1643
1781
  class LocalAIManager {
1644
1782
  static instance = null;
@@ -1647,9 +1785,7 @@ class LocalAIManager {
1647
1785
  mediumModel;
1648
1786
  embeddingModel;
1649
1787
  embeddingContext;
1650
- ctx;
1651
- sequence;
1652
- chatSession;
1788
+ chatSessions = new Map;
1653
1789
  modelPath;
1654
1790
  mediumModelPath;
1655
1791
  embeddingModelPath;
@@ -1876,80 +2012,120 @@ class LocalAIManager {
1876
2012
  }
1877
2013
  await this.embeddingInitializingPromise;
1878
2014
  }
1879
- async generateText(params) {
1880
- if (this.ctx) {
1881
- this.ctx.dispose();
1882
- this.ctx = undefined;
2015
+ async getOrCreateChatSession(modelType, systemPrompt) {
2016
+ const existing = this.chatSessions.get(modelType);
2017
+ if (existing && existing.systemPrompt === systemPrompt) {
2018
+ return existing;
1883
2019
  }
1884
- await this.initializeEnvironment();
1885
- logger8.info("Generating text with model:", params.modelType);
1886
- if (params.modelType === ModelType.TEXT_LARGE) {
2020
+ if (existing) {
2021
+ try {
2022
+ existing.context.dispose();
2023
+ } catch (err) {
2024
+ logger8.warn("[plugin-local-ai] Failed disposing stale context:", err);
2025
+ }
2026
+ this.chatSessions.delete(modelType);
2027
+ }
2028
+ let model;
2029
+ let contextSize;
2030
+ if (modelType === ModelType.TEXT_LARGE) {
1887
2031
  await this.lazyInitMediumModel();
1888
- if (!this.mediumModel) {
2032
+ if (!this.mediumModel)
1889
2033
  throw new Error("Medium model initialization failed");
1890
- }
2034
+ model = this.mediumModel;
2035
+ contextSize = MODEL_SPECS.medium.contextSize;
1891
2036
  this.activeModelConfig = MODEL_SPECS.medium;
1892
- const mediumModel = this.mediumModel;
1893
- this.ctx = await mediumModel.createContext({
1894
- contextSize: MODEL_SPECS.medium.contextSize
1895
- });
1896
2037
  } else {
1897
2038
  await this.lazyInitSmallModel();
1898
- if (!this.smallModel) {
2039
+ if (!this.smallModel)
1899
2040
  throw new Error("Small model initialization failed");
1900
- }
2041
+ model = this.smallModel;
2042
+ contextSize = MODEL_SPECS.small.contextSize;
1901
2043
  this.activeModelConfig = MODEL_SPECS.small;
1902
- const smallModel = this.smallModel;
1903
- this.ctx = await smallModel.createContext({
1904
- contextSize: MODEL_SPECS.small.contextSize
1905
- });
1906
2044
  }
1907
- if (!this.ctx) {
1908
- throw new Error("Failed to create prompt");
1909
- }
1910
- this.sequence = this.ctx.getSequence();
1911
- this.chatSession = new LlamaChatSession({
1912
- contextSequence: this.sequence
2045
+ const context = await model.createContext({ contextSize });
2046
+ const sequence = context.getSequence();
2047
+ const session = new LlamaChatSession({
2048
+ contextSequence: sequence,
2049
+ systemPrompt
2050
+ });
2051
+ const entry = { context, session, systemPrompt };
2052
+ this.chatSessions.set(modelType, entry);
2053
+ logger8.info("[plugin-local-ai] Created new chat session", {
2054
+ modelType,
2055
+ contextSize,
2056
+ systemPromptLength: systemPrompt.length
1913
2057
  });
1914
- if (!this.chatSession) {
1915
- throw new Error("Failed to create chat session");
1916
- }
1917
- logger8.info("Created new chat session for model:", params.modelType);
1918
- logger8.info("Incoming prompt structure:", {
1919
- contextLength: params.prompt.length,
1920
- hasAction: params.prompt.includes("action"),
1921
- runtime: !!params.runtime,
1922
- stopSequences: params.stopSequences
2058
+ return entry;
2059
+ }
2060
+ async generateText(params) {
2061
+ await this.initializeEnvironment();
2062
+ const modelType = params.modelType ?? ModelType.TEXT_SMALL;
2063
+ const systemPrompt = params.system?.trim() || DEFAULT_LOCAL_SYSTEM_PROMPT;
2064
+ const entry = await this.getOrCreateChatSession(modelType, systemPrompt);
2065
+ const prompt = params.prompt ?? "";
2066
+ if (!this.llama)
2067
+ throw new Error("[plugin-local-ai] Llama runtime not initialized");
2068
+ const plan = await planStructuredRequest({ llama: this.llama }, {
2069
+ tools: params.tools,
2070
+ responseSchema: params.responseSchema,
2071
+ responseFormat: params.responseFormat
1923
2072
  });
1924
- const tokens = await this.tokenizerManager.encode(params.prompt, this.activeModelConfig);
1925
- logger8.info("Input tokens:", { count: tokens.length });
1926
- const systemMessage = "You are a helpful AI assistant. Respond to the current request only.";
1927
- await this.chatSession.prompt(systemMessage, {
1928
- maxTokens: 1,
1929
- temperature: 0
2073
+ const usedTokensBefore = entry.session.sequence?.contextTokens?.length ?? 0;
2074
+ logger8.info("[plugin-local-ai] generateText", {
2075
+ modelType,
2076
+ kind: plan.kind,
2077
+ promptLength: prompt.length,
2078
+ cachedPrefixTokens: usedTokensBefore
1930
2079
  });
1931
- let response = await this.chatSession.prompt(params.prompt, {
1932
- maxTokens: 8192,
1933
- temperature: 0.7,
1934
- topP: 0.9,
2080
+ const punishModel = modelType === ModelType.TEXT_LARGE ? this.mediumModel : this.smallModel;
2081
+ const baseOptions = {
2082
+ maxTokens: params.maxTokens ?? 8192,
2083
+ temperature: params.temperature ?? 0.7,
2084
+ topP: params.topP ?? 0.9,
1935
2085
  repeatPenalty: {
1936
- punishTokensFilter: () => this.smallModel ? this.smallModel.tokenize(wordsToPunish.join(" ")) : [],
2086
+ punishTokensFilter: () => punishModel ? punishModel.tokenize(wordsToPunish.join(" ")) : [],
1937
2087
  penalty: 1.2,
1938
2088
  frequencyPenalty: 0.7,
1939
2089
  presencePenalty: 0.7
1940
2090
  }
1941
- });
1942
- logger8.info("Raw response structure:", {
1943
- responseLength: response.length,
1944
- hasAction: response.includes("action"),
1945
- hasThinkTag: response.includes("<think>")
1946
- });
1947
- if (response.includes("<think>")) {
1948
- logger8.info("Cleaning think tags from response");
1949
- response = response.replace(/<think>[\s\S]*?<\/think>\n?/g, "");
1950
- logger8.info("Think tags removed from response");
2091
+ };
2092
+ if (plan.kind === "tools") {
2093
+ const meta = await entry.session.promptWithMeta(prompt, {
2094
+ ...baseOptions,
2095
+ functions: plan.functions
2096
+ });
2097
+ const toolCalls = extractToolCalls(meta.response);
2098
+ const text2 = stripThinkTags(meta.responseText);
2099
+ const usedTokensAfter2 = entry.session.sequence?.contextTokens?.length ?? 0;
2100
+ logger8.info("[plugin-local-ai] tool-call response", {
2101
+ toolCallCount: toolCalls.length,
2102
+ textLength: text2.length,
2103
+ cacheGrewBy: usedTokensAfter2 - usedTokensBefore
2104
+ });
2105
+ return { text: text2, toolCalls, finishReason: meta.stopReason };
1951
2106
  }
1952
- return response;
2107
+ if (plan.kind === "schema" || plan.kind === "json_object") {
2108
+ const meta = await entry.session.promptWithMeta(prompt, {
2109
+ ...baseOptions,
2110
+ grammar: plan.grammar
2111
+ });
2112
+ const text2 = stripThinkTags(meta.responseText);
2113
+ const usedTokensAfter2 = entry.session.sequence?.contextTokens?.length ?? 0;
2114
+ logger8.info("[plugin-local-ai] structured response", {
2115
+ kind: plan.kind,
2116
+ textLength: text2.length,
2117
+ cacheGrewBy: usedTokensAfter2 - usedTokensBefore
2118
+ });
2119
+ return { text: text2, toolCalls: [], finishReason: meta.stopReason };
2120
+ }
2121
+ const responseText = await entry.session.prompt(prompt, baseOptions);
2122
+ const text = stripThinkTags(responseText);
2123
+ const usedTokensAfter = entry.session.sequence?.contextTokens?.length ?? 0;
2124
+ logger8.info("[plugin-local-ai] text response", {
2125
+ textLength: text.length,
2126
+ cacheGrewBy: usedTokensAfter - usedTokensBefore
2127
+ });
2128
+ return { text, toolCalls: [], finishReason: undefined };
1953
2129
  }
1954
2130
  async describeImage(imageData, mimeType) {
1955
2131
  await this.lazyInitVision();
@@ -1995,11 +2171,6 @@ class LocalAIManager {
1995
2171
  vocabOnly: false
1996
2172
  });
1997
2173
  this.smallModel = smallModel;
1998
- const ctx = await smallModel.createContext({
1999
- contextSize: MODEL_SPECS.small.contextSize
2000
- });
2001
- this.ctx = ctx;
2002
- this.sequence = undefined;
2003
2174
  this.smallModelInitialized = true;
2004
2175
  logger8.info("Small model initialized successfully");
2005
2176
  })();
@@ -2089,7 +2260,7 @@ class LocalAIManager {
2089
2260
  var localAIManager = LocalAIManager.getInstance();
2090
2261
  var localAiPlugin = {
2091
2262
  name: "local-ai",
2092
- description: "Local AI plugin using LLaMA models",
2263
+ description: "Local AI plugin using Eliza-1 GGUF models",
2093
2264
  async init(_config, _runtime) {
2094
2265
  logger8.info("\uD83D\uDE80 Initializing Local AI plugin...");
2095
2266
  await localAIManager.initializeEnvironment();
@@ -2100,7 +2271,7 @@ var localAiPlugin = {
2100
2271
  logger8.warn("- LOCAL_SMALL_MODEL: Path to small language model file");
2101
2272
  logger8.warn("- LOCAL_LARGE_MODEL: Path to large language model file");
2102
2273
  logger8.warn("- LOCAL_EMBEDDING_MODEL: Path to embedding model file");
2103
- logger8.warn("Example: LOCAL_SMALL_MODEL=llama-3.2-1b-instruct-q8_0.gguf");
2274
+ logger8.warn("Example: LOCAL_SMALL_MODEL=text/eliza-1-mobile-1_7b-32k.gguf");
2104
2275
  }
2105
2276
  const modelsDir = config.MODELS_DIR || path5.join(os3.homedir(), ".eliza", "models");
2106
2277
  if (!fs5.existsSync(modelsDir)) {
@@ -2139,247 +2310,33 @@ var localAiPlugin = {
2139
2310
  logger8.info("\uD83D\uDCA1 Models will be loaded on-demand when first used");
2140
2311
  },
2141
2312
  models: {
2142
- [ModelType.TEXT_SMALL]: async (runtime, { prompt, stopSequences = [] }) => {
2313
+ [ModelType.TEXT_SMALL]: async (runtime, params) => {
2143
2314
  await localAIManager.initializeEnvironment();
2144
- return await localAIManager.generateText({
2145
- prompt,
2146
- stopSequences,
2147
- runtime,
2315
+ const result = await localAIManager.generateText({
2316
+ ...params,
2148
2317
  modelType: ModelType.TEXT_SMALL
2149
2318
  });
2319
+ emitModelUsed(runtime, ModelType.TEXT_SMALL, getLocalModelLabel(runtime, ModelType.TEXT_SMALL), estimateUsage(params.prompt ?? "", result.text));
2320
+ return wantsNativeShape(params) ? buildNativeResult(result) : result.text;
2150
2321
  },
2151
- [ModelType.TEXT_LARGE]: async (runtime, { prompt, stopSequences = [] }) => {
2322
+ [ModelType.TEXT_LARGE]: async (runtime, params) => {
2152
2323
  await localAIManager.initializeEnvironment();
2153
- return await localAIManager.generateText({
2154
- prompt,
2155
- stopSequences,
2156
- runtime,
2324
+ const result = await localAIManager.generateText({
2325
+ ...params,
2157
2326
  modelType: ModelType.TEXT_LARGE
2158
2327
  });
2328
+ emitModelUsed(runtime, ModelType.TEXT_LARGE, getLocalModelLabel(runtime, ModelType.TEXT_LARGE), estimateUsage(params.prompt ?? "", result.text));
2329
+ return wantsNativeShape(params) ? buildNativeResult(result) : result.text;
2159
2330
  },
2160
- [ModelType.TEXT_EMBEDDING]: async (_runtime, params) => {
2331
+ [ModelType.TEXT_EMBEDDING]: async (runtime, params) => {
2161
2332
  const text = params?.text;
2162
2333
  if (!text) {
2163
2334
  logger8.debug("Null or empty text input for embedding, returning zero vector");
2164
- return new Array(384).fill(0);
2165
- }
2166
- return await localAIManager.generateEmbedding(text);
2167
- },
2168
- [ModelType.OBJECT_SMALL]: async (runtime, params) => {
2169
- await localAIManager.initializeEnvironment();
2170
- logger8.info("OBJECT_SMALL handler - Processing request:", {
2171
- prompt: params.prompt,
2172
- hasSchema: !!params.schema,
2173
- temperature: params.temperature
2174
- });
2175
- let schemaHint = "";
2176
- if (params.schema) {
2177
- const schemaKeys = Object.keys(params.schema);
2178
- schemaHint = schemaKeys.map((key) => `<${key}>value</${key}>`).join(`
2179
- `);
2180
- }
2181
- const xmlPrompt = `${params.prompt}
2182
-
2183
- Respond using XML format wrapped in <response> tags. ${schemaHint ? `Include these fields:
2184
- ${schemaHint}` : ""}
2185
-
2186
- IMPORTANT: If your response contains code, wrap code blocks in CDATA sections like this:
2187
- <code><![CDATA[
2188
- your code here
2189
- ]]></code>
2190
-
2191
- Example response format:
2192
- <response>
2193
- <thought>Your reasoning here</thought>
2194
- <text>Your response text here</text>
2195
- </response>`;
2196
- const textResponse = await localAIManager.generateText({
2197
- prompt: xmlPrompt,
2198
- stopSequences: params.stopSequences,
2199
- runtime,
2200
- modelType: ModelType.TEXT_SMALL
2201
- });
2202
- try {
2203
- logger8.debug("Raw model response:", textResponse.substring(0, 500));
2204
- const parsedXml = parseKeyValueXml(textResponse);
2205
- if (parsedXml) {
2206
- logger8.debug("Parsed XML result:", parsedXml);
2207
- if (params.schema) {
2208
- for (const key of Object.keys(params.schema)) {
2209
- if (!(key in parsedXml)) {
2210
- parsedXml[key] = null;
2211
- }
2212
- }
2213
- }
2214
- return parsedXml;
2215
- }
2216
- logger8.warn("parseKeyValueXml returned null, attempting manual extraction");
2217
- const result = {};
2218
- const extractTag = (text2, tagName) => {
2219
- const cdataPattern = new RegExp(`<${tagName}>\\s*<!\\[CDATA\\[([\\s\\S]*?)\\]\\]>\\s*</${tagName}>`, "i");
2220
- const cdataMatch = text2.match(cdataPattern);
2221
- if (cdataMatch) {
2222
- return cdataMatch[1];
2223
- }
2224
- const startTag = `<${tagName}>`;
2225
- const endTag = `</${tagName}>`;
2226
- const startIdx = text2.indexOf(startTag);
2227
- if (startIdx === -1)
2228
- return null;
2229
- let depth = 1;
2230
- let searchStart = startIdx + startTag.length;
2231
- while (depth > 0 && searchStart < text2.length) {
2232
- const nextOpen = text2.indexOf(startTag, searchStart);
2233
- const nextClose = text2.indexOf(endTag, searchStart);
2234
- if (nextClose === -1)
2235
- break;
2236
- if (nextOpen !== -1 && nextOpen < nextClose) {
2237
- depth++;
2238
- searchStart = nextOpen + startTag.length;
2239
- } else {
2240
- depth--;
2241
- if (depth === 0) {
2242
- return text2.slice(startIdx + startTag.length, nextClose).trim();
2243
- }
2244
- searchStart = nextClose + endTag.length;
2245
- }
2246
- }
2247
- return null;
2248
- };
2249
- const thought = extractTag(textResponse, "thought");
2250
- const text = extractTag(textResponse, "text");
2251
- const code = extractTag(textResponse, "code");
2252
- if (thought)
2253
- result.thought = thought;
2254
- if (text)
2255
- result.text = text;
2256
- if (code)
2257
- result.code = code;
2258
- if (params.schema) {
2259
- for (const key of Object.keys(params.schema)) {
2260
- if (!(key in result)) {
2261
- const value = extractTag(textResponse, key);
2262
- result[key] = value;
2263
- }
2264
- }
2265
- }
2266
- if (Object.keys(result).length > 0) {
2267
- return result;
2268
- }
2269
- throw new Error("Could not parse XML response");
2270
- } catch (parseError) {
2271
- logger8.error("Failed to parse XML:", parseError);
2272
- logger8.error("Raw response:", textResponse);
2273
- throw new Error("Invalid XML returned from model");
2274
- }
2275
- },
2276
- [ModelType.OBJECT_LARGE]: async (runtime, params) => {
2277
- await localAIManager.initializeEnvironment();
2278
- logger8.info("OBJECT_LARGE handler - Processing request:", {
2279
- prompt: params.prompt,
2280
- hasSchema: !!params.schema,
2281
- temperature: params.temperature
2282
- });
2283
- let schemaHint = "";
2284
- if (params.schema) {
2285
- const schemaKeys = Object.keys(params.schema);
2286
- schemaHint = schemaKeys.map((key) => `<${key}>value</${key}>`).join(`
2287
- `);
2288
- }
2289
- const xmlPrompt = `${params.prompt}
2290
-
2291
- Respond using XML format wrapped in <response> tags. ${schemaHint ? `Include these fields:
2292
- ${schemaHint}` : ""}
2293
-
2294
- IMPORTANT: If your response contains code, wrap code blocks in CDATA sections like this:
2295
- <code><![CDATA[
2296
- your code here
2297
- ]]></code>
2298
-
2299
- Example response format:
2300
- <response>
2301
- <thought>Your reasoning here</thought>
2302
- <text>Your response text here</text>
2303
- </response>`;
2304
- const textResponse = await localAIManager.generateText({
2305
- prompt: xmlPrompt,
2306
- stopSequences: params.stopSequences,
2307
- runtime,
2308
- modelType: ModelType.TEXT_LARGE
2309
- });
2310
- try {
2311
- logger8.debug("Raw model response:", textResponse.substring(0, 500));
2312
- const parsedXml = parseKeyValueXml(textResponse);
2313
- if (parsedXml) {
2314
- logger8.debug("Parsed XML result:", parsedXml);
2315
- if (params.schema) {
2316
- for (const key of Object.keys(params.schema)) {
2317
- if (!(key in parsedXml)) {
2318
- parsedXml[key] = null;
2319
- }
2320
- }
2321
- }
2322
- return parsedXml;
2323
- }
2324
- logger8.warn("parseKeyValueXml returned null, attempting manual extraction");
2325
- const result = {};
2326
- const extractTag = (text2, tagName) => {
2327
- const cdataPattern = new RegExp(`<${tagName}>\\s*<!\\[CDATA\\[([\\s\\S]*?)\\]\\]>\\s*</${tagName}>`, "i");
2328
- const cdataMatch = text2.match(cdataPattern);
2329
- if (cdataMatch) {
2330
- return cdataMatch[1];
2331
- }
2332
- const startTag = `<${tagName}>`;
2333
- const endTag = `</${tagName}>`;
2334
- const startIdx = text2.indexOf(startTag);
2335
- if (startIdx === -1)
2336
- return null;
2337
- let depth = 1;
2338
- let searchStart = startIdx + startTag.length;
2339
- while (depth > 0 && searchStart < text2.length) {
2340
- const nextOpen = text2.indexOf(startTag, searchStart);
2341
- const nextClose = text2.indexOf(endTag, searchStart);
2342
- if (nextClose === -1)
2343
- break;
2344
- if (nextOpen !== -1 && nextOpen < nextClose) {
2345
- depth++;
2346
- searchStart = nextOpen + startTag.length;
2347
- } else {
2348
- depth--;
2349
- if (depth === 0) {
2350
- return text2.slice(startIdx + startTag.length, nextClose).trim();
2351
- }
2352
- searchStart = nextClose + endTag.length;
2353
- }
2354
- }
2355
- return null;
2356
- };
2357
- const thought = extractTag(textResponse, "thought");
2358
- const text = extractTag(textResponse, "text");
2359
- const code = extractTag(textResponse, "code");
2360
- if (thought)
2361
- result.thought = thought;
2362
- if (text)
2363
- result.text = text;
2364
- if (code)
2365
- result.code = code;
2366
- if (params.schema) {
2367
- for (const key of Object.keys(params.schema)) {
2368
- if (!(key in result)) {
2369
- const value = extractTag(textResponse, key);
2370
- result[key] = value;
2371
- }
2372
- }
2373
- }
2374
- if (Object.keys(result).length > 0) {
2375
- return result;
2376
- }
2377
- throw new Error("Could not parse XML response");
2378
- } catch (parseError) {
2379
- logger8.error("Failed to parse XML:", parseError);
2380
- logger8.error("Raw response:", textResponse);
2381
- throw new Error("Invalid XML returned from model");
2335
+ return new Array(1024).fill(0);
2382
2336
  }
2337
+ const embedding = await localAIManager.generateEmbedding(text);
2338
+ emitModelUsed(runtime, ModelType.TEXT_EMBEDDING, getLocalModelLabel(runtime, ModelType.TEXT_EMBEDDING), estimateEmbeddingUsage(text));
2339
+ return embedding;
2383
2340
  },
2384
2341
  [ModelType.TEXT_TOKENIZER_ENCODE]: async (_runtime, { text }) => {
2385
2342
  const manager = localAIManager.getTokenizerManager();
@@ -2666,10 +2623,9 @@ Example response format:
2666
2623
  }
2667
2624
  ]
2668
2625
  };
2669
- var typescript_default = localAiPlugin;
2670
2626
  export {
2671
2627
  localAiPlugin,
2672
- typescript_default as default
2628
+ default2 as default
2673
2629
  };
2674
2630
 
2675
- //# debugId=47689E660851F59964756E2164756E21
2631
+ //# debugId=B1A63AD720108B0264756E2164756E21