npm - cactus-react-native - Versions diffs - 0.2.2 → 0.2.4 - Mend

cactus-react-native 0.2.2 → 0.2.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (80) hide show

package/README.md +1 -1
package/android/src/main/java/com/cactus/Cactus.java +35 -0
package/android/src/main/java/com/cactus/LlamaContext.java +18 -1
package/android/src/main/jni.cpp +11 -0
package/android/src/main/jniLibs/arm64-v8a/libcactus.so +0 -0
package/android/src/main/jniLibs/arm64-v8a/libcactus_v8.so +0 -0
package/android/src/main/jniLibs/arm64-v8a/libcactus_v8_2.so +0 -0
package/android/src/main/jniLibs/arm64-v8a/libcactus_v8_2_dotprod.so +0 -0
package/android/src/main/jniLibs/arm64-v8a/libcactus_v8_2_dotprod_i8mm.so +0 -0
package/android/src/main/jniLibs/arm64-v8a/libcactus_v8_2_i8mm.so +0 -0
package/android/src/newarch/java/com/cactus/CactusModule.java +5 -0
package/android/src/oldarch/java/com/cactus/CactusModule.java +5 -0
package/ios/Cactus.mm +21 -0
package/ios/CactusContext.h +1 -0
package/ios/CactusContext.mm +4 -0
package/ios/cactus.xcframework/ios-arm64/cactus.framework/Headers/cactus_ffi.h +0 -12
package/ios/cactus.xcframework/ios-arm64/cactus.framework/cactus +0 -0
package/ios/cactus.xcframework/ios-arm64_x86_64-simulator/cactus.framework/Headers/cactus_ffi.h +0 -12
package/ios/cactus.xcframework/ios-arm64_x86_64-simulator/cactus.framework/cactus +0 -0
package/ios/cactus.xcframework/tvos-arm64/cactus.framework/Headers/cactus_ffi.h +0 -12
package/ios/cactus.xcframework/tvos-arm64/cactus.framework/cactus +0 -0
package/ios/cactus.xcframework/tvos-arm64_x86_64-simulator/cactus.framework/Headers/cactus_ffi.h +0 -12
package/ios/cactus.xcframework/tvos-arm64_x86_64-simulator/cactus.framework/cactus +0 -0
package/lib/commonjs/NativeCactus.js +0 -1
package/lib/commonjs/NativeCactus.js.map +1 -1
package/lib/commonjs/chat.js +33 -0
package/lib/commonjs/chat.js.map +1 -1
package/lib/commonjs/index.js +0 -23
package/lib/commonjs/index.js.map +1 -1
package/lib/commonjs/lm.js +69 -32
package/lib/commonjs/lm.js.map +1 -1
package/lib/commonjs/tools.js +0 -7
package/lib/commonjs/tools.js.map +1 -1
package/lib/commonjs/tts.js +1 -4
package/lib/commonjs/tts.js.map +1 -1
package/lib/commonjs/vlm.js +25 -7
package/lib/commonjs/vlm.js.map +1 -1
package/lib/module/NativeCactus.js +0 -3
package/lib/module/NativeCactus.js.map +1 -1
package/lib/module/chat.js +31 -0
package/lib/module/chat.js.map +1 -1
package/lib/module/index.js +1 -10
package/lib/module/index.js.map +1 -1
package/lib/module/lm.js +70 -32
package/lib/module/lm.js.map +1 -1
package/lib/module/tools.js +0 -7
package/lib/module/tools.js.map +1 -1
package/lib/module/tts.js +1 -4
package/lib/module/tts.js.map +1 -1
package/lib/module/vlm.js +25 -7
package/lib/module/vlm.js.map +1 -1
package/lib/typescript/NativeCactus.d.ts +1 -142
package/lib/typescript/NativeCactus.d.ts.map +1 -1
package/lib/typescript/chat.d.ts +10 -0
package/lib/typescript/chat.d.ts.map +1 -1
package/lib/typescript/index.d.ts +2 -4
package/lib/typescript/index.d.ts.map +1 -1
package/lib/typescript/lm.d.ts +13 -7
package/lib/typescript/lm.d.ts.map +1 -1
package/lib/typescript/tools.d.ts.map +1 -1
package/lib/typescript/tts.d.ts.map +1 -1
package/lib/typescript/vlm.d.ts +3 -1
package/lib/typescript/vlm.d.ts.map +1 -1
package/package.json +1 -1
package/src/NativeCactus.ts +6 -175
package/src/chat.ts +42 -1
package/src/index.ts +6 -17
package/src/lm.ts +81 -26
package/src/tools.ts +0 -5
package/src/tts.ts +1 -4
package/src/vlm.ts +35 -13
package/android/src/main/jniLibs/x86_64/libcactus.so +0 -0
package/android/src/main/jniLibs/x86_64/libcactus_x86_64.so +0 -0
package/lib/commonjs/grammar.js +0 -560
package/lib/commonjs/grammar.js.map +0 -1
package/lib/module/grammar.js +0 -553
package/lib/module/grammar.js.map +0 -1
package/lib/typescript/grammar.d.ts +0 -37
package/lib/typescript/grammar.d.ts.map +0 -1
package/src/grammar.ts +0 -854

package/src/NativeCactus.ts CHANGED Viewed

@@ -7,68 +7,28 @@ export type NativeEmbeddingParams = {
 export type NativeContextParams = {
   model: string
-  /**
-   * Chat template to override the default one from the model.
-   */
   chat_template?: string
   reasoning_format?: string
   is_model_asset?: boolean
   use_progress_callback?: boolean
   n_ctx?: number
   n_batch?: number
   n_ubatch?: number
   n_threads?: number
-  /**
-   * Number of layers to store in VRAM (Currently only for iOS)
-   */
   n_gpu_layers?: number
-  /**
-   * Skip GPU devices (iOS only)
-   */
   no_gpu_devices?: boolean
-  /**
-   * Enable flash attention, only recommended in GPU device (Experimental in llama.cpp)
-   */
   flash_attn?: boolean
-  /**
-   * KV cache data type for the K (Experimental in llama.cpp)
-   */
   cache_type_k?: string
-  /**
-   * KV cache data type for the V (Experimental in llama.cpp)
-   */
   cache_type_v?: string
   use_mlock?: boolean
   use_mmap?: boolean
   vocab_only?: boolean
-  /**
-   * Single LoRA adapter path
-   */
   lora?: string
-  /**
-   * Single LoRA adapter scale
-   */
   lora_scaled?: number
-  /**
-   * LoRA adapter list
-   */
   lora_list?: Array<{ path: string; scaled?: number }>
   rope_freq_base?: number
   rope_freq_scale?: number
   pooling_type?: number
-  // Embedding params
   embedding?: boolean
   embd_normalize?: number
 }
@@ -76,22 +36,9 @@ export type NativeContextParams = {
 export type NativeCompletionParams = {
   prompt: string
   n_threads?: number
-  /**
-   * JSON schema for convert to grammar for structured JSON output.
-   * It will be override by grammar if both are set.
-   */
   json_schema?: string
-  /**
-   * Set grammar for grammar-based sampling.  Default: no grammar
-   */
   grammar?: string
-  /**
-   * Lazy grammar sampling, trigger by grammar_triggers. Default: false
-   */
   grammar_lazy?: boolean
-  /**
-   * Lazy grammar triggers. Default: []
-   */
   grammar_triggers?: Array<{
     type: number
     value: string
@@ -99,121 +46,32 @@ export type NativeCompletionParams = {
   }>
   preserved_tokens?: Array<string>
   chat_format?: number
-  /**
-   * Specify a JSON array of stopping strings.
-   * These words will not be included in the completion, so make sure to add them to the prompt for the next iteration. Default: `[]`
-   */
   stop?: Array<string>
-  /**
-   * Set the maximum number of tokens to predict when generating text.
-   * **Note:** May exceed the set limit slightly if the last token is a partial multibyte character.
-   * When 0,no tokens will be generated but the prompt is evaluated into the cache. Default: `-1`, where `-1` is infinity.
-   */
   n_predict?: number
-  /**
-   * If greater than 0, the response also contains the probabilities of top N tokens for each generated token given the sampling settings.
-   * Note that for temperature < 0 the tokens are sampled greedily but token probabilities are still being calculated via a simple softmax of the logits without considering any other sampler settings.
-   * Default: `0`
-   */
   n_probs?: number
-  /**
-   * Limit the next token selection to the K most probable tokens.  Default: `40`
-   */
   top_k?: number
-  /**
-   * Limit the next token selection to a subset of tokens with a cumulative probability above a threshold P. Default: `0.95`
-   */
   top_p?: number
-  /**
-   * The minimum probability for a token to be considered, relative to the probability of the most likely token. Default: `0.05`
-   */
   min_p?: number
-  /**
-   * Set the chance for token removal via XTC sampler. Default: `0.0`, which is disabled.
-   */
   xtc_probability?: number
-  /**
-   * Set a minimum probability threshold for tokens to be removed via XTC sampler. Default: `0.1` (> `0.5` disables XTC)
-   */
   xtc_threshold?: number
-  /**
-   * Enable locally typical sampling with parameter p. Default: `1.0`, which is disabled.
-   */
   typical_p?: number
-  /**
-   * Adjust the randomness of the generated text. Default: `0.8`
-   */
   temperature?: number
-  /**
-   * Last n tokens to consider for penalizing repetition. Default: `64`, where `0` is disabled and `-1` is ctx-size.
-   */
   penalty_last_n?: number
-  /**
-   * Control the repetition of token sequences in the generated text. Default: `1.0`
-   */
   penalty_repeat?: number
-  /**
-   * Repeat alpha frequency penalty. Default: `0.0`, which is disabled.
-   */
   penalty_freq?: number
-  /**
-   * Repeat alpha presence penalty. Default: `0.0`, which is disabled.
-   */
   penalty_present?: number
-  /**
-   * Enable Mirostat sampling, controlling perplexity during text generation. Default: `0`, where `0` is disabled, `1` is Mirostat, and `2` is Mirostat 2.0.
-   */
   mirostat?: number
-  /**
-   * Set the Mirostat target entropy, parameter tau. Default: `5.0`
-   */
   mirostat_tau?: number
-  /**
-   * Set the Mirostat learning rate, parameter eta. Default: `0.1`
-   */
   mirostat_eta?: number
-  /**
-   * Set the DRY (Don't Repeat Yourself) repetition penalty multiplier. Default: `0.0`, which is disabled.
-   */
   dry_multiplier?: number
-  /**
-   * Set the DRY repetition penalty base value. Default: `1.75`
-   */
   dry_base?: number
-  /**
-   * Tokens that extend repetition beyond this receive exponentially increasing penalty: multiplier * base ^ (length of repeating sequence before token - allowed length). Default: `2`
-   */
   dry_allowed_length?: number
-  /**
-   * How many tokens to scan for repetitions. Default: `-1`, where `0` is disabled and `-1` is context size.
-   */
   dry_penalty_last_n?: number
-  /**
-   * Specify an array of sequence breakers for DRY sampling. Only a JSON array of strings is accepted. Default: `['\n', ':', '"', '*']`
-   */
   dry_sequence_breakers?: Array<string>
-  /**
-   * Top n sigma sampling as described in academic paper "Top-nσ: Not All Logits Are You Need" https://arxiv.org/pdf/2411.07641. Default: `-1.0` (Disabled)
-   */
   top_n_sigma?: number
-  /**
-   * Ignore end of stream token and continue generating. Default: `false`
-   */
   ignore_eos?: boolean
-  /**
-   * Modify the likelihood of a token appearing in the generated text completion.
-   * For example, use `"logit_bias": [[15043,1.0]]` to increase the likelihood of the token 'Hello', or `"logit_bias": [[15043,-1.0]]` to decrease its likelihood.
-   * Setting the value to false, `"logit_bias": [[15043,false]]` ensures that the token `Hello` is never produced. The tokens can also be represented as strings,
-   * e.g.`[["Hello, World!",-0.5]]` will reduce the likelihood of all the individual tokens that represent the string `Hello, World!`, just like the `presence_penalty` does.
-   * Default: `[]`
-   */
   logit_bias?: Array<Array<number>>
-  /**
-   * Set the random number generator (RNG) seed. Default: `-1`, which is a random seed.
-   */
   seed?: number
   emit_partial_completion: boolean
 }
@@ -239,17 +97,8 @@ export type NativeCompletionResultTimings = {
 }
 export type NativeCompletionResult = {
-  /**
-   * Original text (Ignored reasoning_content / tool_calls)
-   */
   text: string
-  /**
-   * Reasoning content (parsed for reasoning model)
-   */
   reasoning_content: string
-  /**
-   * Tool calls
-   */
   tool_calls: Array<{
     type: 'function'
     function: {
@@ -258,11 +107,7 @@ export type NativeCompletionResult = {
     }
     id?: string
   }>
-  /**
-   * Content text (Filtered text by reasoning_content / tool_calls)
-   */
   content: string
   tokens_predicted: number
   tokens_evaluated: number
   truncated: boolean
@@ -272,13 +117,11 @@ export type NativeCompletionResult = {
   stopping_word: string
   tokens_cached: number
   timings: NativeCompletionResultTimings
   completion_probabilities?: Array<NativeCompletionTokenProb>
 }
 export type NativeTokenizeResult = {
   tokens: Array<number>
-  // New multimodal support
   has_media?: boolean
   bitmap_hashes?: Array<string>
   chunk_pos?: Array<number>
@@ -289,9 +132,8 @@ export type NativeEmbeddingResult = {
   embedding: Array<number>
 }
-// New TTS/Audio types
 export type NativeTTSType = {
-  type: number // TTS_UNKNOWN = -1, TTS_OUTETTS_V0_2 = 1, TTS_OUTETTS_V0_3 = 2
+  type: number
 }
 export type NativeAudioCompletionResult = {
@@ -303,7 +145,7 @@ export type NativeAudioTokensResult = {
 }
 export type NativeAudioDecodeResult = {
-  audio_data: Array<number> // Float array of audio samples
+  audio_data: Array<number>
 }
 export type NativeDeviceInfo = {
@@ -312,6 +154,7 @@ export type NativeDeviceInfo = {
   make: string
   os: string
 }
 export type NativeLlamaContext = {
   contextId: number
   model: {
@@ -320,9 +163,8 @@ export type NativeLlamaContext = {
     nEmbd: number
     nParams: number
     chatTemplates: {
-      llamaChat: boolean // Chat template in llama-chat.cpp
+      llamaChat: boolean
       minja: {
-        // Chat template supported by minja.hpp
         default: boolean
         defaultCaps: {
           tools: boolean
@@ -344,11 +186,8 @@ export type NativeLlamaContext = {
       }
     }
     metadata: Object
-    isChatTemplateSupported: boolean // Deprecated
+    isChatTemplateSupported: boolean
   }
-  /**
-   * Loaded library name for Android
-   */
   androidLib?: string
   gpu: boolean
   reasonNoGPU: string
@@ -381,13 +220,11 @@ export type JinjaFormattedChatResult = {
 export interface Spec extends TurboModule {
   toggleNativeLog(enabled: boolean): Promise<void>
   setContextLimit(limit: number): Promise<void>
   modelInfo(path: string, skip?: string[]): Promise<Object>
   initContext(
     contextId: number,
     params: NativeContextParams,
   ): Promise<NativeLlamaContext>
   getFormattedChat(
     contextId: number,
     messages: string,
@@ -434,7 +271,6 @@ export interface Spec extends TurboModule {
     pl: number,
     nr: number,
   ): Promise<string>
   applyLoraAdapters(
     contextId: number,
     loraAdapters: Array<{ path: string; scaled?: number }>,
@@ -443,8 +279,6 @@ export interface Spec extends TurboModule {
   getLoadedLoraAdapters(
     contextId: number,
   ): Promise<Array<{ path: string; scaled?: number }>>
-  // New Multimodal Methods
   initMultimodal(
     contextId: number,
     mmprojPath: string,
@@ -454,8 +288,6 @@ export interface Spec extends TurboModule {
   isMultimodalSupportVision(contextId: number): Promise<boolean>
   isMultimodalSupportAudio(contextId: number): Promise<boolean>
   releaseMultimodal(contextId: number): Promise<void>
-  // New TTS/Vocoder Methods
   initVocoder(
     contextId: number,
     vocoderModelPath: string,
@@ -477,9 +309,8 @@ export interface Spec extends TurboModule {
   ): Promise<NativeAudioDecodeResult>
   getDeviceInfo(contextId: number): Promise<NativeDeviceInfo>
   releaseVocoder(contextId: number): Promise<void>
+  rewind(contextId: number): Promise<void>
   releaseContext(contextId: number): Promise<void>
   releaseAllContexts(): Promise<void>
 }

package/src/chat.ts CHANGED Viewed

@@ -6,7 +6,7 @@ export type CactusMessagePart = {
 export type CactusOAICompatibleMessage = {
   role: string
-  content?: string | CactusMessagePart[] | any // any for check invalid content type
+  content?: string | CactusMessagePart[] | any
 }
 export function formatChat(
@@ -42,3 +42,44 @@ export function formatChat(
   })
   return chat
 }
+export interface ProcessedMessages {
+  newMessages: CactusOAICompatibleMessage[];
+  requiresReset: boolean;
+}
+export class ConversationHistoryManager {
+  private history: CactusOAICompatibleMessage[] = [];
+  public processNewMessages(
+    fullMessageHistory: CactusOAICompatibleMessage[]
+  ): ProcessedMessages {
+    let divergent = fullMessageHistory.length < this.history.length;
+    if (!divergent) {
+      for (let i = 0; i < this.history.length; i++) {
+        if (JSON.stringify(this.history[i]) !== JSON.stringify(fullMessageHistory[i])) {
+          divergent = true;
+          break;
+        }
+      }
+    }
+    if (divergent) {
+      return { newMessages: fullMessageHistory, requiresReset: true };
+    }
+    const newMessages = fullMessageHistory.slice(this.history.length);
+    return { newMessages, requiresReset: false };
+  }
+  public update(
+    newMessages: CactusOAICompatibleMessage[],
+    assistantResponse: CactusOAICompatibleMessage
+  ) {
+    this.history.push(...newMessages, assistantResponse);
+  }
+  public reset() {
+    this.history = [];
+  }
+}

package/src/index.ts CHANGED Viewed

@@ -1,6 +1,7 @@
 import { NativeEventEmitter, DeviceEventEmitter, Platform } from 'react-native'
 import type { DeviceEventEmitterStatic } from 'react-native'
 import Cactus from './NativeCactus'
 import type {
   NativeContextParams,
   NativeLlamaContext,
@@ -20,15 +21,13 @@ import type {
   NativeAudioDecodeResult,
   NativeDeviceInfo,
 } from './NativeCactus'
-import type {
-  SchemaGrammarConverterPropOrder,
-  SchemaGrammarConverterBuiltinRule,
-} from './grammar'
-import { SchemaGrammarConverter, convertJsonSchemaToGrammar } from './grammar'
 import type { CactusMessagePart, CactusOAICompatibleMessage } from './chat'
 import { formatChat } from './chat'
 import { Tools, parseAndExecuteTool } from './tools'
 import { Telemetry, type TelemetryParams } from './telemetry'
 export type {
   NativeContextParams,
   NativeLlamaContext,
@@ -45,13 +44,9 @@ export type {
   CactusOAICompatibleMessage,
   JinjaFormattedChatResult,
   NativeAudioDecodeResult,
-  // Deprecated
-  SchemaGrammarConverterPropOrder,
-  SchemaGrammarConverterBuiltinRule,
 }
-export { SchemaGrammarConverter, convertJsonSchemaToGrammar, Tools }
+export {Tools }
 export * from './remote'
 const EVENT_ON_INIT_CONTEXT_PROGRESS = '@Cactus_onInitContextProgress'
@@ -254,7 +249,6 @@ export class LlamaContext {
         return this.completion(params, callback);
     }
     if (recursionCount >= recursionLimit) {
-        // console.log(`Recursion limit reached (${recursionCount}/${recursionLimit}), returning default completion`)
         return this.completion({
             ...params,
             jinja: true,
@@ -264,14 +258,12 @@ export class LlamaContext {
     const messages = [...params.messages]; // avoid mutating the original messages
-    // console.log('Calling completion...')
     const result = await this.completion({
         ...params,
         messages: messages,
         jinja: true,
         tools: params.tools.getSchemas()
     }, callback);
-    // console.log('Completion result:', result);
     const {toolCalled, toolName, toolInput, toolOutput} =
         await parseAndExecuteTool(result, params.tools);
@@ -294,8 +286,6 @@ export class LlamaContext {
         messages.push(toolMessage);
-        // console.log('Messages being sent to next completion:', JSON.stringify(messages, null, 2));
         return await this.completionWithTools(
             {...params, messages: messages},
             callback,
@@ -471,8 +461,7 @@ export class LlamaContext {
   }
   async rewind(): Promise<void> {
-    // @ts-ignore
-    return (Cactus as any).rewind(this.id)
+    return Cactus.rewind(this.id)
   }
 }

package/src/lm.ts CHANGED Viewed

@@ -9,8 +9,10 @@ import type {
   EmbeddingParams,
   NativeEmbeddingResult,
 } from './index'
 import { Telemetry } from './telemetry'
 import { setCactusToken, getVertexAIEmbedding } from './remote'
+import { ConversationHistoryManager } from './chat'
 interface CactusLMReturn {
   lm: CactusLM | null
@@ -18,51 +20,105 @@ interface CactusLMReturn {
 }
 export class CactusLM {
-  private context: LlamaContext
+  protected context: LlamaContext
+  protected conversationHistoryManager: ConversationHistoryManager
-  private constructor(context: LlamaContext) {
+  protected constructor(context: LlamaContext) {
     this.context = context
+    this.conversationHistoryManager = new ConversationHistoryManager()
   }
   static async init(
     params: ContextParams,
     onProgress?: (progress: number) => void,
     cactusToken?: string,
+    retryOptions?: { maxRetries?: number; delayMs?: number },
   ): Promise<CactusLMReturn> {
     if (cactusToken) {
       setCactusToken(cactusToken);
     }
-    // Avoid two back-to-back loads on devices where GPU off-load is unsupported (Android).
-    const needGpuAttempt = Platform.OS !== 'android' && (params.n_gpu_layers ?? 0) > 0
-    const configs = needGpuAttempt
-      ? [params, { ...params, n_gpu_layers: 0 }]
-      : [{ ...params, n_gpu_layers: 0 }]
+    const maxRetries = retryOptions?.maxRetries ?? 3;
+    const delayMs = retryOptions?.delayMs ?? 1000;
+    const configs = [
+      params,
+      { ...params, n_gpu_layers: 0 }
+    ];
+    const sleep = (ms: number): Promise<void> => {
+      return new Promise(resolve => {
+        const start = Date.now();
+        const wait = () => {
+          if (Date.now() - start >= ms) {
+            resolve();
+          } else {
+            Promise.resolve().then(wait);
+          }
+        };
+        wait();
+      });
+    };
     for (const config of configs) {
-      try {
-        const context = await initLlama(config, onProgress);
-        return { lm: new CactusLM(context), error: null };
-      } catch (e) {
-        Telemetry.error(e as Error, {
-          n_gpu_layers: config.n_gpu_layers ?? null,
-          n_ctx: config.n_ctx ?? null,
-          model: config.model ?? null,
-        });
-        if (configs.indexOf(config) === configs.length - 1) {
-          return { lm: null, error: e as Error };
+      let lastError: Error | null = null;
+      for (let attempt = 1; attempt <= maxRetries; attempt++) {
+        try {
+          const context = await initLlama(config, onProgress);
+          return { lm: new CactusLM(context), error: null };
+        } catch (e) {
+          lastError = e as Error;
+          const isLastConfig = configs.indexOf(config) === configs.length - 1;
+          const isLastAttempt = attempt === maxRetries;
+          Telemetry.error(e as Error, {
+            n_gpu_layers: config.n_gpu_layers ?? null,
+            n_ctx: config.n_ctx ?? null,
+            model: config.model ?? null,
+          });
+          if (!isLastAttempt) {
+            const delay = delayMs * Math.pow(2, attempt - 1);
+            await sleep(delay);
+          } else if (!isLastConfig) {
+            break;
+          }
         }
       }
+      if (configs.indexOf(config) === configs.length - 1 && lastError) {
+        return { lm: null, error: lastError };
+      }
     }
-    return { lm: null, error: new Error('Failed to initialize CactusLM') };
+    return { lm: null, error: new Error('Failed to initialize CactusLM after all retries') };
   }
-  async completion(
+  completion = async (
     messages: CactusOAICompatibleMessage[],
     params: CompletionParams = {},
     callback?: (data: any) => void,
-  ): Promise<NativeCompletionResult> {
-    return await this.context.completion({ messages, ...params }, callback);
+  ): Promise<NativeCompletionResult> => {
+    const { newMessages, requiresReset } =
+      this.conversationHistoryManager.processNewMessages(messages);
+    if (requiresReset) {
+      this.context?.rewind();
+      this.conversationHistoryManager.reset();
+    }
+    if (newMessages.length === 0) {
+      console.warn('No messages to complete!');
+    }
+    const result = await this.context.completion({ messages: newMessages, ...params }, callback);
+    this.conversationHistoryManager.update(newMessages, {
+      role: 'assistant',
+      content: result.content,
+    });
+    return result;
   }
   async embedding(
@@ -105,19 +161,18 @@ export class CactusLM {
     return result;
   }
-  private async _handleLocalEmbedding(text: string, params?: EmbeddingParams): Promise<NativeEmbeddingResult> {
+  protected async _handleLocalEmbedding(text: string, params?: EmbeddingParams): Promise<NativeEmbeddingResult> {
     return this.context.embedding(text, params);
   }
-  private async _handleRemoteEmbedding(text: string): Promise<NativeEmbeddingResult> {
+  protected async _handleRemoteEmbedding(text: string): Promise<NativeEmbeddingResult> {
     const embeddingValues = await getVertexAIEmbedding(text);
     return {
       embedding: embeddingValues,
     };
   }
-  async rewind(): Promise<void> {
-    // @ts-ignore
+  rewind = async (): Promise<void> => {
     return this.context?.rewind()
   }

package/src/tools.ts CHANGED Viewed

@@ -56,22 +56,18 @@ export class Tools {
 export async function parseAndExecuteTool(result: NativeCompletionResult, tools: Tools): Promise<{toolCalled: boolean, toolName?: string, toolInput?: any, toolOutput?: any}> {
   if (!result.tool_calls || result.tool_calls.length === 0) {
-      // console.log('No tool calls found');
       return {toolCalled: false};
   }
   try {
       const toolCall = result.tool_calls[0];
       if (!toolCall) {
-        // console.log('No tool call found');
         return {toolCalled: false};
       }
       const toolName = toolCall.function.name;
       const toolInput = JSON.parse(toolCall.function.arguments);
-      // console.log('Calling tool:', toolName, toolInput);
       const toolOutput = await tools.execute(toolName, toolInput);
-      // console.log('Tool called result:', toolOutput);
       return {
           toolCalled: true,
@@ -80,7 +76,6 @@ export async function parseAndExecuteTool(result: NativeCompletionResult, tools:
           toolOutput
       };
   } catch (error) {
-      // console.error('Error parsing tool call:', error);
       return {toolCalled: false};
   }
 }

package/src/tts.ts CHANGED Viewed

@@ -31,10 +31,7 @@ export class CactusTTS {
       speakerJsonStr,
       textToSpeak,
     )
-    // This part is simplified. In a real scenario, the tokens from
-    // the main model would be generated and passed to decodeAudioTokens.
-    // For now, we are assuming a direct path which may not be fully functional
-    // without the main model's token output for TTS.
+    // To-DO: Fix
     const tokens = (await this.context.tokenize(formatted_prompt)).tokens
     return decodeAudioTokens(this.context.id, tokens)
   }