npm - @use-solace/openllm - Versions diffs - 1.0.0 - Mend

@use-solace/openllm 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (29) hide show

package/src/elysia.ts ADDED Viewed

@@ -0,0 +1,99 @@
+import type { Elysia } from "elysia";
+import type {
+  APIConfig,
+  InferenceBackend,
+  InferenceRequest,
+  ModelRegistryEntry,
+  ModelRegistryImpl,
+  RegisterModelRequest,
+} from "./types.js";
+import { createOpenLLMClient } from "./client.js";
+export function openllm(config: APIConfig = {}) {
+  const plugin = (app: Elysia) => {
+    const enginePort = config.engine ?? 8080;
+    const prefix = config.prefix ?? "/openllm";
+    const enableRouter = config.modelrouter ?? false;
+    const registry = config.registry as ModelRegistryImpl | undefined;
+    const client = createOpenLLMClient({ engine: enginePort });
+    app.get(`${prefix}/health`, async () => {
+      return await client.health();
+    });
+    app.get(`${prefix}/models`, async () => {
+      const result = await client.listModels();
+      return result.models;
+    });
+    app.post(`${prefix}/models/register`, async ({ body }) => {
+      const req = body as RegisterModelRequest;
+      return await client.registerModel(req);
+    });
+    app.post(`${prefix}/models/load`, async ({ body }) => {
+      const req = body as { model_id: string };
+      return await client.loadModel(req);
+    });
+    app.post(`${prefix}/models/unload/:modelId`, async ({ params }) => {
+      const modelId = params.modelId as string;
+      return await client.unloadModel(modelId);
+    });
+    app.post(`${prefix}/inference`, async ({ body }) => {
+      const req = body as InferenceRequest;
+      return await client.inference(req);
+    });
+    if (enableRouter && registry) {
+      app.post(`${prefix}/router/chat`, async ({ body }) => {
+        const request = body as {
+          prompt: string;
+          options?: {
+            model?: string;
+            latency?: string;
+            inference?: string;
+            minContext?: number;
+            max_tokens?: number;
+            temperature?: number;
+          };
+        };
+        const options = request.options ?? {};
+        let model: ModelRegistryEntry | undefined;
+        if (options.model) {
+          model = registry.get(options.model);
+          if (!model) {
+            throw new Error(`Model '${options.model}' not found in registry`);
+          }
+        } else {
+          model = registry.findOne({
+            capability: "chat",
+            latency: options.latency as any,
+            inference: options.inference as InferenceBackend,
+            minContext: options.minContext,
+          });
+          if (!model) {
+            throw new Error(
+              "No suitable model found for the given constraints",
+            );
+          }
+        }
+        return await client.inference({
+          model_id: model.id,
+          prompt: request.prompt,
+          max_tokens: options.max_tokens,
+          temperature: options.temperature,
+        });
+      });
+    }
+    return app;
+  };
+  return plugin;
+}

package/src/index.ts ADDED Viewed

@@ -0,0 +1,55 @@
+export type {
+  InferenceBackend,
+  ModelCapability,
+  LatencyProfile,
+  ModelRegistryEntry,
+  RegistryEntryInput,
+  ModelRegistryConfig,
+  HealthResponse,
+  ModelListResponse,
+  RegisterModelResponse,
+  LoadModelRequest,
+  LoadModelResponse,
+  UnloadModelResponse,
+  InferenceRequest,
+  InferenceResponse,
+  StreamToken,
+  StreamCallback,
+  StreamCompleteCallback,
+  StreamErrorCallback,
+  StreamOptions,
+  OpenLLMConfig,
+  FindModelOptions,
+  APIConfig,
+  OpenLLMError,
+  ModelNotFoundError,
+  ModelNotLoadedError,
+  InferenceError,
+  ModelRegistryInstance,
+  ModelRegistryImpl,
+} from "./types.js";
+export {
+  ModelRegistry,
+} from "./registry.js";
+export type { ModelRegistry as ModelRegistryType } from "./registry.js";
+export {
+  OpenLLMClient,
+  createOpenLLMClient,
+} from "./client.js";
+export { openllm } from "./elysia.js";
+export const openllmAPI = {
+  start: (config: import("./types.js").APIConfig = {}) => {
+    const port = config.engine ?? 4292;
+    return {
+      start: (apiPort: number) => {
+        console.log(`Starting OpenLLM API on port ${apiPort}, connected to engine on port ${port}`);
+        console.log("Note: This is a client library. To start the actual server, use the Elysia plugin.");
+      },
+    };
+  },
+};

package/src/registry.ts ADDED Viewed

@@ -0,0 +1,123 @@
+import type {
+  FindModelOptions,
+  ModelRegistryConfig,
+  ModelRegistryEntry,
+  RegistryEntryInput,
+} from "./types.js";
+export class ModelRegistryImpl {
+  private entries: Map<string, ModelRegistryEntry> = new Map();
+  constructor(config: ModelRegistryConfig) {
+    const entries = config.entries ?? {};
+    for (const [id, entry] of Object.entries(entries)) {
+      this.registerEntry(id, entry);
+    }
+  }
+  private registerEntry(id: string, entry: RegistryEntryInput): ModelRegistryEntry {
+    const model: ModelRegistryEntry = {
+      id,
+      name: entry.id,
+      inference: entry.inference,
+      context: entry.context,
+      quant: entry.quant,
+      capabilities: entry.capabilities,
+      latency: entry.latency,
+      size_bytes: 4_000_000_000,
+      loaded: false,
+      loaded_at: undefined,
+    };
+    this.entries.set(id, model);
+    return model;
+  }
+  list(): ModelRegistryEntry[] {
+    return Array.from(this.entries.values());
+  }
+  get(id: string): ModelRegistryEntry | undefined {
+    return this.entries.get(id);
+  }
+  find(options: FindModelOptions = {}): ModelRegistryEntry[] {
+    const results = this.list().filter((model) => {
+      if (options.capability && !model.capabilities.includes(options.capability)) {
+        return false;
+      }
+      if (options.latency && model.latency !== options.latency) {
+        return false;
+      }
+      if (options.inference && model.inference !== options.inference) {
+        return false;
+      }
+      if (options.minContext && model.context < options.minContext) {
+        return false;
+      }
+      if (options.loaded !== undefined && model.loaded !== options.loaded) {
+        return false;
+      }
+      return true;
+    });
+    return results;
+  }
+  findOne(options: FindModelOptions = {}): ModelRegistryEntry | undefined {
+    return this.find(options)[0];
+  }
+  has(id: string): boolean {
+    return this.entries.has(id);
+  }
+  count(): number {
+    return this.entries.size;
+  }
+  add(id: string, entry: RegistryEntryInput): ModelRegistryEntry {
+    if (this.entries.has(id)) {
+      throw new Error(`Model with id '${id}' already exists`);
+    }
+    return this.registerEntry(id, entry);
+  }
+  update(id: string, updates: Partial<RegistryEntryInput>): ModelRegistryEntry {
+    const existing = this.entries.get(id);
+    if (!existing) {
+      throw new Error(`Model with id '${id}' not found`);
+    }
+    const updated: ModelRegistryEntry = {
+      ...existing,
+      ...updates,
+      id,
+    };
+    this.entries.set(id, updated);
+    return updated;
+  }
+  remove(id: string): boolean {
+    return this.entries.delete(id);
+  }
+  clear(): void {
+    this.entries.clear();
+  }
+  toObject(): Record<string, ModelRegistryEntry> {
+    return Object.fromEntries(this.entries);
+  }
+  fromObject(obj: Record<string, ModelRegistryEntry>): void {
+    this.entries.clear();
+    for (const [id, entry] of Object.entries(obj)) {
+      this.entries.set(id, entry);
+    }
+  }
+}
+export function ModelRegistry(
+  config: ModelRegistryConfig,
+): ModelRegistryImpl {
+  return new ModelRegistryImpl(config);
+}

package/src/types.ts ADDED Viewed

@@ -0,0 +1,164 @@
+export type InferenceBackend = "ollama" | "llama" | "huggingface" | "openai";
+export type ModelCapability = "chat" | "vision" | "embedding" | "completion";
+export type LatencyProfile = "extreme" | "fast" | "slow";
+export interface ModelRegistryEntry {
+  id: string;
+  name: string;
+  inference: InferenceBackend;
+  context: number;
+  quant?: string;
+  capabilities: ModelCapability[];
+  latency?: LatencyProfile;
+  size_bytes: number;
+  loaded: boolean;
+  loaded_at?: string;
+}
+export interface RegistryEntryInput {
+  id: string;
+  inference: InferenceBackend;
+  context: number;
+  quant?: string;
+  capabilities: ModelCapability[];
+  latency?: LatencyProfile;
+}
+export interface ModelRegistryConfig {
+  entries: Record<string, RegistryEntryInput>;
+}
+export interface HealthResponse {
+  status: string;
+  timestamp: string;
+  models_loaded: number;
+}
+export interface ModelListResponse {
+  models: ModelRegistryEntry[];
+}
+export interface RegisterModelRequest {
+  id: string;
+  name: string;
+  inference: InferenceBackend;
+  context: number;
+  quant?: string;
+  capabilities: ModelCapability[];
+  latency?: LatencyProfile;
+  size_bytes?: number;
+}
+export interface RegisterModelResponse {
+  success: boolean;
+  model: ModelRegistryEntry;
+  message: string;
+}
+export interface LoadModelRequest {
+  model_id: string;
+}
+export interface LoadModelResponse {
+  success: boolean;
+  model_id: string;
+  message: string;
+}
+export interface UnloadModelResponse {
+  success: boolean;
+  model_id: string;
+  message: string;
+}
+export interface InferenceRequest {
+  model_id: string;
+  prompt: string;
+  max_tokens?: number;
+  temperature?: number;
+}
+export interface InferenceResponse {
+  model_id: string;
+  text: string;
+  tokens_generated: number;
+  finish_reason: string;
+}
+export interface StreamToken {
+  token: string;
+  token_id: number;
+  complete: boolean;
+}
+export type StreamCallback = (token: StreamToken) => void;
+export type StreamCompleteCallback = (response: InferenceResponse) => void;
+export type StreamErrorCallback = (error: Error) => void;
+export interface StreamOptions {
+  onToken: StreamCallback;
+  onComplete?: StreamCompleteCallback;
+  onError?: StreamErrorCallback;
+}
+export interface OpenLLMConfig {
+  engine?: string | number;
+  timeout?: number;
+}
+export interface FindModelOptions {
+  capability?: ModelCapability;
+  latency?: LatencyProfile;
+  inference?: InferenceBackend;
+  minContext?: number;
+  loaded?: boolean;
+}
+export interface APIConfig {
+  modelrouter?: boolean;
+  registry?: unknown | string;
+  engine?: string | number;
+  prefix?: string;
+}
+export class OpenLLMError extends Error {
+  constructor(
+    message: string,
+    public code?: string,
+    public statusCode?: number,
+  ) {
+    super(message);
+    this.name = "OpenLLMError";
+  }
+}
+export class ModelNotFoundError extends OpenLLMError {
+  constructor(modelId: string) {
+    super(`Model '${modelId}' not found`, "MODEL_NOT_FOUND", 404);
+    this.name = "ModelNotFoundError";
+  }
+}
+export class ModelNotLoadedError extends OpenLLMError {
+  constructor(modelId: string) {
+    super(`Model '${modelId}' is not loaded`, "MODEL_NOT_LOADED", 412);
+    this.name = "ModelNotLoadedError";
+  }
+}
+export class InferenceError extends OpenLLMError {
+  constructor(message: string) {
+    super(message, "INFERENCE_ERROR", 502);
+    this.name = "InferenceError";
+  }
+}
+export interface ModelRegistryInstance {
+  list(): ReturnType<typeof import("./registry").ModelRegistryImpl.prototype.list>;
+  get(id: string): ReturnType<typeof import("./registry").ModelRegistryImpl.prototype.get>;
+  findOne(options: FindModelOptions): ReturnType<typeof import("./registry").ModelRegistryImpl.prototype.findOne>;
+}
+export type ModelRegistryImpl = ModelRegistryInstance;

package/tsconfig.json ADDED Viewed

@@ -0,0 +1,25 @@
+{
+  "compilerOptions": {
+    "target": "ES2022",
+    "module": "ES2022",
+    "lib": ["ES2022"],
+    "moduleResolution": "bundler",
+    "resolveJsonModule": true,
+    "allowSyntheticDefaultImports": true,
+    "esModuleInterop": true,
+    "strict": true,
+    "skipLibCheck": true,
+    "declaration": true,
+    "declarationMap": true,
+    "outDir": "./dist",
+    "rootDir": "./src",
+    "sourceMap": true,
+    "forceConsistentCasingInFileNames": true,
+    "noUnusedLocals": true,
+    "noUnusedParameters": true,
+    "noImplicitReturns": true,
+    "noFallthroughCasesInSwitch": true
+  },
+  "include": ["src/**/*", "../backend/src/index.ts"],
+  "exclude": ["node_modules", "dist"]
+}