npm - react-native-executorch - Versions diffs - 0.5.6 → 0.5.8 - Mend

react-native-executorch 0.5.6 → 0.5.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (237) hide show

package/common/rnexecutorch/threads/HighPerformanceThreadPool.h ADDED Viewed

@@ -0,0 +1,364 @@
+// HighPerformanceThreadPool.h
+#pragma once
+#include <algorithm>
+#include <atomic>
+#include <chrono>
+#include <condition_variable>
+#include <fstream>
+#include <functional>
+#include <future>
+#include <memory>
+#include <mutex>
+#include <pthread.h>
+#include <queue>
+#include <ranges>
+#include <sched.h>
+#include <sys/resource.h>
+#include <thread>
+#include <unistd.h>
+#include <vector>
+#include <executorch/extension/threadpool/cpuinfo_utils.h>
+#include <rnexecutorch/Log.h>
+#ifdef __APPLE__
+#include <sys/syscall.h>
+#endif
+#ifdef __ANDROID__
+#include <sys/types.h>
+#endif
+namespace rnexecutorch::threads {
+enum class Priority { LOW, NORMAL, HIGH, REALTIME };
+struct ThreadConfig {
+  bool pinToPerformanceCores{true};
+  std::string namePrefix{"RN_ET_Worker"};
+};
+class HighPerformanceThreadPool {
+public:
+  explicit HighPerformanceThreadPool(size_t numThreads = 1,
+                                     ThreadConfig cfg = ThreadConfig())
+      : config(std::move(cfg)) {
+#ifdef __ANDROID__
+    detectCPUTopology();
+    numThreads = std::min(numThreads, performanceCores.size());
+#endif
+    for (size_t i = 0; i < numThreads; i++) {
+      workers.emplace_back(&HighPerformanceThreadPool::workerThread, this, i);
+    }
+    log(LOG_LEVEL::Debug, "Thread pool initialized with", numThreads,
+        "workers.");
+  }
+  ~HighPerformanceThreadPool() { shutdown(); }
+  // Submit a task and get a future for the result
+  template <typename Func, typename... Args>
+  auto submit(Func &&func, Args &&...args)
+      -> std::future<decltype(func(args...))> {
+    return submitWithPriority(Priority::NORMAL, std::forward<Func>(func),
+                              std::forward<Args>(args)...);
+  }
+  // Submit a task with specific priority
+  template <typename Func, typename... Args>
+  auto submitWithPriority(Priority priority, Func &&func, Args &&...args)
+      -> std::future<decltype(func(args...))> {
+    using ReturnType = decltype(func(args...));
+    // Create a packaged task
+    auto boundFunc =
+        std::bind(std::forward<Func>(func), std::forward<Args>(args)...);
+    auto task = std::make_unique<Task<decltype(boundFunc), ReturnType>>(
+        std::move(boundFunc));
+    auto future = task->getFuture();
+    // Add to queue
+    {
+      std::scoped_lock lock(queueMutex);
+      if (!running) {
+        throw std::runtime_error("Thread pool is shutting down");
+      }
+      WorkItem item(std::move(task), priority,
+                    std::chrono::steady_clock::now());
+      taskQueue.push(std::move(item));
+    }
+    condition.notify_one();
+    return future;
+  }
+  // Execute a task and wait for result
+  template <typename Func, typename... Args>
+  auto execute(Func &&func, Args &&...args) -> decltype(func(args...)) {
+    auto future = submit(std::forward<Func>(func), std::forward<Args>(args)...);
+    return future.get();
+  }
+  // Fire and forget task
+  template <typename Func, typename... Args>
+  void submitDetached(Func &&func, Args &&...args) {
+    submit(std::forward<Func>(func), std::forward<Args>(args)...);
+    // Future is destroyed, task still runs
+  }
+  void shutdown() {
+    if (!running.exchange(false)) {
+      return;
+    }
+    condition.notify_all();
+    for (auto &worker : workers) {
+      if (worker.joinable()) {
+        worker.join();
+      }
+    }
+  }
+private:
+  // Task wrapper that can hold any callable
+  class ITask {
+  public:
+    virtual ~ITask() = default;
+    virtual void execute() = 0;
+  };
+  template <typename Func, typename Result> class Task : public ITask {
+  public:
+    Task(Func &&f) : func(std::forward<Func>(f)) {}
+    void execute() override {
+      try {
+        if constexpr (std::is_void_v<Result>) {
+          func();
+          promise.set_value();
+        } else {
+          promise.set_value(func());
+        }
+      } catch (...) {
+        promise.set_exception(std::current_exception());
+      }
+    }
+    std::future<Result> getFuture() { return promise.get_future(); }
+  private:
+    Func func;
+    std::promise<Result> promise;
+  };
+  class WorkItem {
+  public:
+    WorkItem() = default;
+    WorkItem(std::unique_ptr<ITask> task, Priority priority,
+             std::chrono::steady_clock::time_point enqueueTime)
+        : task(std::move(task)), priority(priority), enqueueTime(enqueueTime) {}
+    std::unique_ptr<ITask> task;
+    bool operator<(const WorkItem &other) const {
+      return priority != other.priority ? priority < other.priority
+                                        : enqueueTime > other.enqueueTime;
+    }
+  private:
+    Priority priority;
+    std::chrono::steady_clock::time_point enqueueTime;
+  };
+  // Thread pool state
+  std::vector<std::thread> workers;
+  std::priority_queue<WorkItem> taskQueue;
+  std::mutex queueMutex;
+  std::condition_variable condition;
+  std::atomic<bool> running{true};
+  std::atomic<size_t> activeWorkers{0};
+  std::atomic<size_t> totalTasksProcessed{0};
+#ifdef __ANDROID__
+  // Performance cores
+  std::vector<int32_t> performanceCores;
+  std::vector<int32_t> efficiencyCores;
+#endif
+  // Configuration
+  ThreadConfig config;
+  void detectCPUTopology() {
+#ifdef __ANDROID__
+    struct CoreInfo {
+      int32_t id;
+      int64_t maxFreq;
+    };
+    std::vector<CoreInfo> cores;
+    const auto numOfCores = std::thread::hardware_concurrency();
+    for (int32_t i = 0; std::cmp_less(i, numOfCores); ++i) {
+      std::string path = "/sys/devices/system/cpu/cpu" + std::to_string(i) +
+                         "/cpufreq/cpuinfo_max_freq";
+      std::ifstream file(path);
+      if (!file.good()) {
+        break;
+      }
+      CoreInfo info;
+      info.id = i;
+      file >> info.maxFreq;
+      cores.push_back(info);
+    }
+    if (cores.empty()) {
+      log(LOG_LEVEL::Debug, "Could not detect CPU topology");
+      return;
+    }
+    // Sort by frequency
+    std::ranges::sort(cores, [](const CoreInfo &a, const CoreInfo &b) {
+      return a.maxFreq > b.maxFreq;
+    });
+    // Classify cores
+    const auto numOfPerfCores =
+        ::executorch::extension::cpuinfo::get_num_performant_cores();
+    constexpr float kKiloToGigaRatio = 1e6;
+    for (int32_t i = 0; i < cores.size(); ++i) {
+      if (i < numOfPerfCores) {
+        performanceCores.push_back(cores[i].id);
+        log(LOG_LEVEL::Debug, "Performance core:", cores[i].id, "(",
+            cores[i].maxFreq / kKiloToGigaRatio, "GHz)");
+      } else {
+        efficiencyCores.push_back(cores[i].id);
+        log(LOG_LEVEL::Debug, "Efficiency core:", cores[i].id, "(",
+            cores[i].maxFreq / kKiloToGigaRatio, "GHz)");
+      }
+    }
+#endif
+  }
+#ifdef __ANDROID__
+  inline uint64_t getCurrentThreadId() { return gettid(); }
+#endif
+  inline void setCurrentThreadName(const std::string &name) {
+#ifdef __ANDROID__
+    pthread_setname_np(pthread_self(), name.c_str());
+#elif defined(__APPLE__)
+    pthread_setname_np(name.c_str());
+#endif
+  }
+  void configureThread(uint32_t workerIndex) {
+    std::string threadName = config.namePrefix + std::to_string(workerIndex);
+    setCurrentThreadName(threadName.c_str());
+#ifdef __ANDROID__
+    if (config.pinToPerformanceCores && !performanceCores.empty()) {
+      setCPUAffinity();
+    }
+#endif
+    setThreadPriority();
+    log(LOG_LEVEL::Debug, "Worker", workerIndex,
+        "configured:", threadName.c_str());
+  }
+  void setCPUAffinity() {
+    // AFAIK it is not possible on iOS
+#ifdef __ANDROID__
+    if (performanceCores.empty()) {
+      log(LOG_LEVEL::Error, "No cores specified for affinity setting");
+      return;
+    }
+    cpu_set_t cpuset;
+    CPU_ZERO(&cpuset);
+    for (int32_t core : performanceCores) {
+      CPU_SET(core, &cpuset);
+    }
+    pid_t tid = getCurrentThreadId();
+    log(LOG_LEVEL::Debug, "Thread id", tid);
+    if (sched_setaffinity(tid, sizeof(cpuset), &cpuset) == 0) {
+      log(LOG_LEVEL::Debug, "Thread pinned to cores:", performanceCores);
+    } else {
+      log(LOG_LEVEL::Debug, "Failed to set CPU affinity (error:", errno,
+          "). Continuing without affinity.");
+    }
+#endif
+  }
+  void setThreadPriority() {
+    // pthread_setschedparam doesn't work on android because permissions reasons
+    // and in general does not provide visible improvements on iOS
+    // Set nice value as fallback or additional priority boost
+    constexpr int nice_value = 0;
+    if (setpriority(PRIO_PROCESS, 0, nice_value) != 0) {
+      log(LOG_LEVEL::Debug, "Failed to set nice value");
+    } else {
+      log(LOG_LEVEL::Debug, "Set nice value", nice_value);
+    }
+  }
+  void processTask(const WorkItem &item) {
+    activeWorkers++;
+    try {
+      item.task->execute();
+    } catch (const std::exception &e) {
+      log(LOG_LEVEL::Error, "Task failed:", e.what());
+      activeWorkers--;
+      throw;
+    }
+    activeWorkers--;
+    totalTasksProcessed++;
+  }
+  void workerThread(int workerIndex) {
+    configureThread(workerIndex);
+    while (running) {
+      WorkItem item;
+      {
+        std::unique_lock<std::mutex> lock(queueMutex);
+        condition.wait(lock, [this] { return !taskQueue.empty() || !running; });
+        if (!running && taskQueue.empty()) {
+          break;
+        }
+        if (!taskQueue.empty()) {
+          item = std::move(const_cast<WorkItem &>(taskQueue.top()));
+          taskQueue.pop();
+        } else {
+          continue;
+        }
+      }
+      processTask(item);
+    }
+    log(LOG_LEVEL::Debug, "Worker", workerIndex, "shutting down");
+  }
+};
+} // namespace rnexecutorch::threads

package/common/rnexecutorch/threads/utils/ThreadUtils.h ADDED Viewed

@@ -0,0 +1,29 @@
+#pragma once
+#include <executorch/extension/threadpool/cpuinfo_utils.h>
+#include <executorch/extension/threadpool/threadpool.h>
+#include <rnexecutorch/Log.h>
+namespace rnexecutorch::threads::utils {
+void unsafeSetupThreadPool(uint32_t num_of_cores = 0) {
+  auto num_of_perf_cores =
+      ::executorch::extension::cpuinfo::get_num_performant_cores();
+  log(LOG_LEVEL::Info, "Detected ", num_of_perf_cores, " performant cores");
+  // setting num_of_cores to floor(num_of_perf_cores / 2) + 1) because
+  // depending on cpu arch as when possible we want to leave at least 2
+  // performant cores for other tasks (setting more actually results in drop
+  // of performance). For older devices (i.e. samsung s22) resolves to 3
+  // cores, and for newer ones (like OnePlus 12) resolves to 4, which when
+  // benchmarked gives highest throughput. For iPhones they usually have 2
+  // performance cores
+  auto _num_of_cores = num_of_cores
+                           ? num_of_cores
+                           : static_cast<uint32_t>(num_of_perf_cores / 2) + 1;
+  const auto threadpool = ::executorch::extension::threadpool::get_threadpool();
+  threadpool->_unsafe_reset_threadpool(_num_of_cores);
+  log(LOG_LEVEL::Info, "Configuring xnnpack for",
+      threadpool->get_thread_count(), "threads");
+}
+} // namespace rnexecutorch::threads::utils

package/common/runner/runner.cpp CHANGED Viewed

@@ -222,9 +222,6 @@ Error Runner::generate(const std::string &prompt,
   RUNNER_ET_LOG(warmup, "RSS after prompt prefill: %f MiB (0 if unsupported)",
                 llm::get_rss_bytes() / 1024.0 / 1024.0);
-  if (cur_decoded != "�") {
-    wrapped_callback(cur_decoded);
-  }
   // start the main loop
   prompt_tokens_uint64.push_back(cur_token);
   int64_t num_generated_tokens = ET_UNWRAP(text_token_generator_->generate(
@@ -275,4 +272,13 @@ void Runner::stop() {
     ET_LOG(Error, "Token generator is not loaded, cannot stop");
   }
 }
+void Runner::set_count_interval(size_t count_interval) {
+  text_token_generator_->set_count_interval(count_interval);
+}
+void Runner::set_time_interval(size_t time_interval) {
+  text_token_generator_->set_time_interval(time_interval);
+}
 } // namespace example

package/common/runner/runner.h CHANGED Viewed

@@ -43,8 +43,12 @@ public:
                stats_callback = {},
            bool echo = true, bool warming = false);
   ::executorch::runtime::Error warmup(const std::string &prompt);
+  void set_count_interval(size_t count_interval);
+  void set_time_interval(size_t time_interval);
   void stop();
+  ::executorch::extension::llm::Stats stats_;
 private:
   float temperature_;
   bool shouldStop_{false};
@@ -59,9 +63,6 @@ private:
   std::unique_ptr<::executorch::extension::llm::TextPrefiller> text_prefiller_;
   std::unique_ptr<::executorch::extension::llm::TextTokenGenerator>
       text_token_generator_;
-  // stats
-  ::executorch::extension::llm::Stats stats_;
 };
 } // namespace example

package/common/runner/text_token_generator.h CHANGED Viewed

@@ -11,6 +11,7 @@
 #include "stats.h"
 #include "text_decoder_runner.h"
+#include <chrono>
 #include <executorch/extension/tensor/tensor.h>
 #include <iostream>
 #include <tokenizers-cpp/tokenizers_cpp.h>
@@ -27,7 +28,7 @@ public:
                      Stats *stats)
       : tokenizer_(tokenizer), text_decoder_runner_(text_decoder_runner),
         eos_ids_(std::move(eos_ids)), use_kv_cache_(use_kv_cache),
-        stats_(stats) {}
+        stats_(stats), timestamp_(std::chrono::high_resolution_clock::now()) {}
   /**
    * Token generation loop.
@@ -55,12 +56,8 @@ public:
     uint64_t prev_token;
     // cache to keep tokens if they were decoded into illegal character
     std::vector<int32_t> token_cache;
-    // if first token after prefill was part of multi-token character we need to
-    // add this to cache here
-    if (tokenizer_->Decode(
-            std::vector<int32_t>{static_cast<int32_t>(cur_token)}) == "�") {
-      token_cache.push_back(static_cast<int32_t>(cur_token));
-    }
+    // add first token after prefill to cache here
+    token_cache.push_back(static_cast<int32_t>(cur_token));
     if (use_kv_cache_) {
       // hard code these to size 1 as kv cache is locked to static size right
@@ -79,7 +76,7 @@ public:
         from_blob(&pos, {1}, executorch::aten::ScalarType::Long);
     should_stop_ = false;
+    timestamp_ = std::chrono::high_resolution_clock::now();
     // Generate our tokens
     while (pos < seq_len - 1) {
       // Run the model
@@ -112,9 +109,19 @@ public:
       token_cache.push_back(static_cast<int32_t>(cur_token));
       const std::string cache_decoded = tokenizer_->Decode(token_cache);
-      if (cache_decoded != "�" && cache_decoded != " �") {
+      const auto timeIntervalElapsed =
+          std::chrono::duration_cast<std::chrono::milliseconds>(
+              std::chrono::high_resolution_clock::now() - timestamp_) >
+          time_interval_;
+      const auto countIntervalElapsed = token_cache.size() > count_interval_;
+      const auto eos_reached = eos_ids_->contains(cur_token);
+      if (!cache_decoded.ends_with("�") &&
+          (countIntervalElapsed || timeIntervalElapsed || should_stop_ ||
+           eos_reached)) {
         token_callback(cache_decoded);
         token_cache.clear();
+        timestamp_ = std::chrono::high_resolution_clock::now();
       }
       if (should_stop_) {
@@ -122,7 +129,7 @@ public:
       }
       // data-dependent terminating condition: we have n_eos_ number of EOS
-      if (eos_ids_->find(cur_token) != eos_ids_->end()) {
+      if (eos_reached) {
         printf("\n");
         ET_LOG(Info, "\nReached to the end of generation");
         break;
@@ -136,11 +143,22 @@ public:
    */
   inline void stop() { should_stop_ = true; }
+  void set_count_interval(size_t count_interval) {
+    count_interval_ = count_interval;
+  }
+  void set_time_interval(size_t time_interval) {
+    time_interval_ = std::chrono::milliseconds(time_interval);
+  }
 private:
   tokenizers::Tokenizer *tokenizer_;
   TextDecoderRunner *text_decoder_runner_;
   std::unique_ptr<std::unordered_set<uint64_t>> eos_ids_;
   bool use_kv_cache_;
+  size_t count_interval_{10};
+  std::chrono::milliseconds time_interval_{120};
+  std::chrono::high_resolution_clock::time_point timestamp_;
   // state machine
   bool should_stop_ = false;

package/lib/Error.js ADDED Viewed

@@ -0,0 +1,53 @@
+export var ETError;
+(function (ETError) {
+    // React-native-ExecuTorch errors
+    ETError[ETError["UndefinedError"] = 101] = "UndefinedError";
+    ETError[ETError["ModuleNotLoaded"] = 102] = "ModuleNotLoaded";
+    ETError[ETError["FileWriteFailed"] = 103] = "FileWriteFailed";
+    ETError[ETError["ModelGenerating"] = 104] = "ModelGenerating";
+    ETError[ETError["LanguageNotSupported"] = 105] = "LanguageNotSupported";
+    ETError[ETError["InvalidModelSource"] = 255] = "InvalidModelSource";
+    //SpeechToText errors
+    ETError[ETError["MultilingualConfiguration"] = 160] = "MultilingualConfiguration";
+    ETError[ETError["MissingDataChunk"] = 161] = "MissingDataChunk";
+    ETError[ETError["StreamingNotStarted"] = 162] = "StreamingNotStarted";
+    // ExecuTorch mapped errors
+    // Based on: https://github.com/pytorch/executorch/blob/main/runtime/core/error.h
+    // System errors
+    ETError[ETError["Ok"] = 0] = "Ok";
+    ETError[ETError["Internal"] = 1] = "Internal";
+    ETError[ETError["InvalidState"] = 2] = "InvalidState";
+    ETError[ETError["EndOfMethod"] = 3] = "EndOfMethod";
+    // Logical errors
+    ETError[ETError["NotSupported"] = 16] = "NotSupported";
+    ETError[ETError["NotImplemented"] = 17] = "NotImplemented";
+    ETError[ETError["InvalidArgument"] = 18] = "InvalidArgument";
+    ETError[ETError["InvalidType"] = 19] = "InvalidType";
+    ETError[ETError["OperatorMissing"] = 20] = "OperatorMissing";
+    // Resource errors
+    ETError[ETError["NotFound"] = 32] = "NotFound";
+    ETError[ETError["MemoryAllocationFailed"] = 33] = "MemoryAllocationFailed";
+    ETError[ETError["AccessFailed"] = 34] = "AccessFailed";
+    ETError[ETError["InvalidProgram"] = 35] = "InvalidProgram";
+    ETError[ETError["InvalidExternalData"] = 36] = "InvalidExternalData";
+    ETError[ETError["OutOfResources"] = 37] = "OutOfResources";
+    // Delegate errors
+    ETError[ETError["DelegateInvalidCompatibility"] = 48] = "DelegateInvalidCompatibility";
+    ETError[ETError["DelegateMemoryAllocationFailed"] = 49] = "DelegateMemoryAllocationFailed";
+    ETError[ETError["DelegateInvalidHandle"] = 50] = "DelegateInvalidHandle";
+})(ETError || (ETError = {}));
+export const getError = (e) => {
+    if (typeof e === 'number') {
+        if (e in ETError)
+            return ETError[e];
+        return ETError[ETError.UndefinedError];
+    }
+    // try to extract number from message (can contain false positives)
+    const error = e;
+    const errorCode = parseInt(error.message, 10);
+    const message = Number.isNaN(errorCode)
+        ? error.message
+        : ' ' + error.message.slice(`${errorCode}`.length).trimStart();
+    const ETErrorMessage = (errorCode in ETError ? ETError[errorCode] : ETError[ETError.UndefinedError]);
+    return ETErrorMessage + message;
+};

package/lib/ThreadPool.d.ts ADDED Viewed

@@ -0,0 +1,10 @@
+declare class NativeThreadPool {
+    constructor();
+    executeNative(functionName: any, args: any): Promise<any>;
+    runInference(prompt: any, options?: {}): Promise<any>;
+    processImage(imagePath: any, options?: {}): Promise<any>;
+    heavyComputation(data: any): Promise<any>;
+    getStats(): any;
+}
+declare const _default: NativeThreadPool;
+export default _default;

package/lib/ThreadPool.js ADDED Viewed

@@ -0,0 +1,28 @@
+// ThreadPool.js
+class NativeThreadPool {
+    constructor() {
+        if (!global.NativeThreadPool) {
+            throw new Error('NativeThreadPool not installed');
+        }
+        this.pool = global.NativeThreadPool;
+    }
+    // Execute any native function
+    async executeNative(functionName, args) {
+        return await this.pool.executeNative(functionName, JSON.stringify(args));
+    }
+    // Specific methods for common tasks
+    async runInference(prompt, options = {}) {
+        return await this.executeNative('runInference', { prompt, ...options });
+    }
+    async processImage(imagePath, options = {}) {
+        return await this.executeNative('processImage', { imagePath, ...options });
+    }
+    async heavyComputation(data) {
+        return await this.executeNative('heavyComputation', data);
+    }
+    // Get thread pool statistics
+    getStats() {
+        return this.pool.getStats();
+    }
+}
+export default new NativeThreadPool();

package/lib/common/Logger.d.ts ADDED Viewed

@@ -0,0 +1,8 @@
+export declare class Logger {
+    private static readonly PREFIX;
+    static log(...data: any[]): void;
+    static debug(...data: any[]): void;
+    static info(...data: any[]): void;
+    static warn(...data: any[]): void;
+    static error(...data: any[]): void;
+}

package/lib/common/Logger.js ADDED Viewed

@@ -0,0 +1,19 @@
+/* eslint-disable no-console */
+export class Logger {
+    static PREFIX = '[React Native ExecuTorch]';
+    static log(...data) {
+        console.log(Logger.PREFIX, ...data);
+    }
+    static debug(...data) {
+        console.debug(Logger.PREFIX, ...data);
+    }
+    static info(...data) {
+        console.info(Logger.PREFIX, ...data);
+    }
+    static warn(...data) {
+        console.warn(Logger.PREFIX, ...data);
+    }
+    static error(...data) {
+        console.error(Logger.PREFIX, ...data);
+    }
+}

package/lib/constants/directories.js ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ import { documentDirectory } from 'expo-file-system';
2	+ export const RNEDirectory = `${documentDirectory}react-native-executorch/`;

package/lib/constants/llmDefaults.d.ts ADDED Viewed

@@ -0,0 +1,6 @@
+import { ChatConfig, Message } from '../types/llm';
+export declare const DEFAULT_SYSTEM_PROMPT = "You are a knowledgeable, efficient, and direct AI assistant. Provide concise answers, focusing on the key information needed. Offer suggestions tactfully when appropriate to improve outcomes. Engage in productive collaboration with the user. Don't return too much text.";
+export declare const DEFAULT_STRUCTURED_OUTPUT_PROMPT: (structuredOutputSchema: string) => string;
+export declare const DEFAULT_MESSAGE_HISTORY: Message[];
+export declare const DEFAULT_CONTEXT_WINDOW_LENGTH = 5;
+export declare const DEFAULT_CHAT_CONFIG: ChatConfig;

package/lib/constants/llmDefaults.js ADDED Viewed

@@ -0,0 +1,16 @@
+export const DEFAULT_SYSTEM_PROMPT = "You are a knowledgeable, efficient, and direct AI assistant. Provide concise answers, focusing on the key information needed. Offer suggestions tactfully when appropriate to improve outcomes. Engage in productive collaboration with the user. Don't return too much text.";
+export const DEFAULT_STRUCTURED_OUTPUT_PROMPT = (structuredOutputSchema) => `The output should be formatted as a JSON instance that conforms to the JSON schema below.
+As an example, for the schema {"properties": {"foo": {"title": "Foo", "description": "a list of strings", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]}
+the object {"foo": ["bar", "baz"]} is a well-formatted instance of the schema. The object {"properties": {"foo": ["bar", "baz"]}} is not well-formatted.
+Here is the output schema:
+${structuredOutputSchema}
+`;
+export const DEFAULT_MESSAGE_HISTORY = [];
+export const DEFAULT_CONTEXT_WINDOW_LENGTH = 5;
+export const DEFAULT_CHAT_CONFIG = {
+    systemPrompt: DEFAULT_SYSTEM_PROMPT,
+    initialMessageHistory: DEFAULT_MESSAGE_HISTORY,
+    contextWindowLength: DEFAULT_CONTEXT_WINDOW_LENGTH,
+};