npm - @infersec/conduit - Versions diffs - 1.71.0 → 1.72.0 - Mend

@infersec/conduit 1.71.0 → 1.72.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/dist/benchmark/output/writer.d.ts +1 -0
package/dist/benchmark/types.d.ts +1 -0
package/dist/cli.js +147 -1
package/package.json +1 -1

package/dist/benchmark/output/writer.d.ts CHANGED Viewed

@@ -4,6 +4,7 @@ export declare function writeOutput(options: {
     hardware: HardwareConfig;
     metadata?: ModelMetadata;
     modelMetadata: {
+        fileSizeBytes: number | null;
         format: string;
         parameterCount: string;
         quantization: string | null;

package/dist/benchmark/types.d.ts CHANGED Viewed

@@ -95,6 +95,7 @@ export interface ModelMetadata {
 export interface ModelOutput {
     metadata?: ModelMetadata;
     model: {
+        fileSizeBytes: number | null;
         format: string;
         parameterCount: string;
         quantization: string | null;

package/dist/cli.js CHANGED Viewed

@@ -20915,8 +20915,85 @@ object({
 object({
     id: ULIDSchema
 });
+const EngineOutputSchema = object({
+    created: string$1(),
+    id: ULIDSchema,
+    llamacppBatchSize: number$1().nullable(),
+    llamacppCacheTypeK: string$1().nullable(),
+    llamacppCacheTypeV: string$1().nullable(),
+    llamacppExtraArgs: array(string$1()),
+    llamacppFlashAttn: boolean$1(),
+    llamacppGpuLayers: number$1(),
+    llamacppMainGpu: number$1().nullable(),
+    llamacppParallelism: number$1(),
+    llamacppTensorSplit: string$1().nullable(),
+    llamacppUbatchSize: number$1().nullable(),
+    name: string$1(),
+    type: LLMEngineSchema,
+    updated: string$1(),
+    vllmDevice: string$1().nullable(),
+    vllmDtype: string$1().nullable(),
+    vllmExtraArgs: array(string$1()),
+    vllmTensorParallelSize: number$1()
+});
+object({
+    llamacppBatchSize: number$1().int().positive().nullable().optional(),
+    llamacppCacheTypeK: string$1().nullable().optional(),
+    llamacppCacheTypeV: string$1().nullable().optional(),
+    llamacppExtraArgs: array(string$1()).optional(),
+    llamacppFlashAttn: boolean$1().optional(),
+    llamacppGpuLayers: number$1().int().min(0).optional(),
+    llamacppMainGpu: number$1().int().min(0).nullable().optional(),
+    llamacppParallelism: number$1().int().positive().optional(),
+    llamacppTensorSplit: string$1().nullable().optional(),
+    llamacppUbatchSize: number$1().int().positive().nullable().optional(),
+    name: ResourceNameSchema,
+    type: LLMEngineSchema,
+    vllmDevice: string$1().nullable().optional(),
+    vllmDtype: string$1().nullable().optional(),
+    vllmExtraArgs: array(string$1()).optional(),
+    vllmTensorParallelSize: number$1().int().positive().optional()
+});
+object({
+    llamacppBatchSize: number$1().int().positive().nullable().optional(),
+    llamacppCacheTypeK: string$1().nullable().optional(),
+    llamacppCacheTypeV: string$1().nullable().optional(),
+    llamacppExtraArgs: array(string$1()).optional(),
+    llamacppFlashAttn: boolean$1().optional(),
+    llamacppGpuLayers: number$1().int().min(0).optional(),
+    llamacppMainGpu: number$1().int().min(0).nullable().optional(),
+    llamacppParallelism: number$1().int().positive().optional(),
+    llamacppTensorSplit: string$1().nullable().optional(),
+    llamacppUbatchSize: number$1().int().positive().nullable().optional(),
+    name: ResourceNameSchema.optional(),
+    type: LLMEngineSchema.optional(),
+    vllmDevice: string$1().nullable().optional(),
+    vllmDtype: string$1().nullable().optional(),
+    vllmExtraArgs: array(string$1()).optional(),
+    vllmTensorParallelSize: number$1().int().positive().optional()
+});
+object({
+    results: array(EngineOutputSchema)
+});
+object({
+    id: ULIDSchema
+});
 ({
+    "/api/v1/engines/:engineID": {
+        DELETE: {
+            parameters: {
+                engineID: ULIDSchema.describe("Engine identifier")
+            }},
+        GET: {
+            parameters: {
+                engineID: ULIDSchema.describe("Engine identifier")
+            }},
+        PATCH: {
+            parameters: {
+                engineID: ULIDSchema.describe("Engine identifier")
+            }}
+    },
     "/api/v1/endpoints/:endpointID": {
         DELETE: {
             parameters: {
@@ -127658,6 +127735,55 @@ class HuggingFaceClient {
             modelType: data.config?.model_type ?? data.gguf?.architecture
         };
     }
+    async fetchQuantizationFileSize(modelSlug, quantization) {
+        if (!quantization)
+            return null;
+        const listParams = {
+            accessToken: this.apiKey || undefined,
+            recursive: true,
+            repo: modelSlug
+        };
+        const qPattern = quantization.toLowerCase().replace(/[_-]/g, "");
+        const processed = new Set();
+        let found = false;
+        let totalBytes = 0;
+        async function* listGgufFiles() {
+            let lastError;
+            for (let attempt = 0; attempt < 3; attempt++) {
+                try {
+                    for await (const file of listFiles(listParams)) {
+                        yield file;
+                    }
+                    return;
+                }
+                catch (error) {
+                    lastError = error instanceof Error ? error : new Error(String(error));
+                    if (attempt < 2) {
+                        const delay = 1000 * Math.pow(2, attempt);
+                        await new Promise(resolve => setTimeout(resolve, Math.min(delay, 10000)));
+                    }
+                }
+            }
+            if (lastError) {
+                throw lastError;
+            }
+        }
+        for await (const file of listGgufFiles()) {
+            if (processed.has(file.path))
+                continue;
+            processed.add(file.path);
+            if (!file.path.toLowerCase().endsWith(".gguf"))
+                continue;
+            const filePattern = file.path.toLowerCase().replace(/[_-]/g, "");
+            if (!filePattern.includes(qPattern))
+                continue;
+            if (typeof file.size === "number") {
+                totalBytes += file.size;
+                found = true;
+            }
+        }
+        return found ? totalBytes : null;
+    }
     async listModelQuantizations(modelSlug) {
         if (!modelSlug) {
             return [];
@@ -153586,10 +153712,25 @@ async function provisionResources(options) {
     }
     const modelData = (await modelRes.json());
     const modelID = modelData.id;
+    const enginesRes = await fetch(`${apiUrl}/api/v1/engines`, {
+        headers,
+        method: "GET",
+        signal: AbortSignal.timeout(60_000)
+    });
+    if (!enginesRes.ok) {
+        const text = await enginesRes.text();
+        throw new Error(`Failed to list engines: ${enginesRes.status} ${text}`);
+    }
+    const enginesData = (await enginesRes.json());
+    const engine = enginesData.results.find(e => e.type === options.engine);
+    if (!engine) {
+        throw new Error(`No engine of type "${options.engine}" found for account`);
+    }
+    const engineId = engine.id;
     const sourceIDs = [];
     for (let i = 0; i < parallelism; i++) {
         const sourceBody = {
-            engine: options.engine,
+            engineId,
             modelID,
             name: `inftest_src_${i}_${shortSlug}`
         };
@@ -153807,6 +153948,7 @@ async function runBenchmark(options) {
 }
 async function runSingleBenchmark(options) {
     const { apiKey, apiUrl, bfclData, concurrency, entry, hardware, humanevalplusProblems, logger, mbppplusProblems, outputDir, parallelism } = options;
+    let fileSizeBytes = null;
     let metadata;
     if (entry.source?.type === "huggingface") {
         console.log("  → Fetching model metadata...");
@@ -153814,6 +153956,9 @@ async function runSingleBenchmark(options) {
             const hfClient = new HuggingFaceClient(process.env.HF_TOKEN);
             const info = await hfClient.fetchModelInfo(entry.slug);
             metadata = { ...info, source: entry.source };
+            if (entry.quantization) {
+                fileSizeBytes = await hfClient.fetchQuantizationFileSize(entry.slug, entry.quantization);
+            }
         }
         catch (err) {
             console.error(`  ⚠ Failed to fetch metadata: ${asError(err).message}`);
@@ -153993,6 +154138,7 @@ async function runSingleBenchmark(options) {
             hardware,
             metadata,
             modelMetadata: {
+                fileSizeBytes,
                 format: entry.format,
                 parameterCount: entry.parameterCount,
                 quantization: entry.quantization

package/package.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "name": "@infersec/conduit",
   "description": "End user conduit agent for connecting local LLMs to the cloud.",
-  "version": "1.71.0",
+  "version": "1.72.0",
   "bin": {
     "infersec-conduit": "./dist/cli.js"
   },