@infersec/conduit 1.71.0 → 1.72.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -4,6 +4,7 @@ export declare function writeOutput(options: {
4
4
  hardware: HardwareConfig;
5
5
  metadata?: ModelMetadata;
6
6
  modelMetadata: {
7
+ fileSizeBytes: number | null;
7
8
  format: string;
8
9
  parameterCount: string;
9
10
  quantization: string | null;
@@ -95,6 +95,7 @@ export interface ModelMetadata {
95
95
  export interface ModelOutput {
96
96
  metadata?: ModelMetadata;
97
97
  model: {
98
+ fileSizeBytes: number | null;
98
99
  format: string;
99
100
  parameterCount: string;
100
101
  quantization: string | null;
package/dist/cli.js CHANGED
@@ -20915,8 +20915,85 @@ object({
20915
20915
  object({
20916
20916
  id: ULIDSchema
20917
20917
  });
20918
+ const EngineOutputSchema = object({
20919
+ created: string$1(),
20920
+ id: ULIDSchema,
20921
+ llamacppBatchSize: number$1().nullable(),
20922
+ llamacppCacheTypeK: string$1().nullable(),
20923
+ llamacppCacheTypeV: string$1().nullable(),
20924
+ llamacppExtraArgs: array(string$1()),
20925
+ llamacppFlashAttn: boolean$1(),
20926
+ llamacppGpuLayers: number$1(),
20927
+ llamacppMainGpu: number$1().nullable(),
20928
+ llamacppParallelism: number$1(),
20929
+ llamacppTensorSplit: string$1().nullable(),
20930
+ llamacppUbatchSize: number$1().nullable(),
20931
+ name: string$1(),
20932
+ type: LLMEngineSchema,
20933
+ updated: string$1(),
20934
+ vllmDevice: string$1().nullable(),
20935
+ vllmDtype: string$1().nullable(),
20936
+ vllmExtraArgs: array(string$1()),
20937
+ vllmTensorParallelSize: number$1()
20938
+ });
20939
+ object({
20940
+ llamacppBatchSize: number$1().int().positive().nullable().optional(),
20941
+ llamacppCacheTypeK: string$1().nullable().optional(),
20942
+ llamacppCacheTypeV: string$1().nullable().optional(),
20943
+ llamacppExtraArgs: array(string$1()).optional(),
20944
+ llamacppFlashAttn: boolean$1().optional(),
20945
+ llamacppGpuLayers: number$1().int().min(0).optional(),
20946
+ llamacppMainGpu: number$1().int().min(0).nullable().optional(),
20947
+ llamacppParallelism: number$1().int().positive().optional(),
20948
+ llamacppTensorSplit: string$1().nullable().optional(),
20949
+ llamacppUbatchSize: number$1().int().positive().nullable().optional(),
20950
+ name: ResourceNameSchema,
20951
+ type: LLMEngineSchema,
20952
+ vllmDevice: string$1().nullable().optional(),
20953
+ vllmDtype: string$1().nullable().optional(),
20954
+ vllmExtraArgs: array(string$1()).optional(),
20955
+ vllmTensorParallelSize: number$1().int().positive().optional()
20956
+ });
20957
+ object({
20958
+ llamacppBatchSize: number$1().int().positive().nullable().optional(),
20959
+ llamacppCacheTypeK: string$1().nullable().optional(),
20960
+ llamacppCacheTypeV: string$1().nullable().optional(),
20961
+ llamacppExtraArgs: array(string$1()).optional(),
20962
+ llamacppFlashAttn: boolean$1().optional(),
20963
+ llamacppGpuLayers: number$1().int().min(0).optional(),
20964
+ llamacppMainGpu: number$1().int().min(0).nullable().optional(),
20965
+ llamacppParallelism: number$1().int().positive().optional(),
20966
+ llamacppTensorSplit: string$1().nullable().optional(),
20967
+ llamacppUbatchSize: number$1().int().positive().nullable().optional(),
20968
+ name: ResourceNameSchema.optional(),
20969
+ type: LLMEngineSchema.optional(),
20970
+ vllmDevice: string$1().nullable().optional(),
20971
+ vllmDtype: string$1().nullable().optional(),
20972
+ vllmExtraArgs: array(string$1()).optional(),
20973
+ vllmTensorParallelSize: number$1().int().positive().optional()
20974
+ });
20975
+ object({
20976
+ results: array(EngineOutputSchema)
20977
+ });
20978
+ object({
20979
+ id: ULIDSchema
20980
+ });
20918
20981
 
20919
20982
  ({
20983
+ "/api/v1/engines/:engineID": {
20984
+ DELETE: {
20985
+ parameters: {
20986
+ engineID: ULIDSchema.describe("Engine identifier")
20987
+ }},
20988
+ GET: {
20989
+ parameters: {
20990
+ engineID: ULIDSchema.describe("Engine identifier")
20991
+ }},
20992
+ PATCH: {
20993
+ parameters: {
20994
+ engineID: ULIDSchema.describe("Engine identifier")
20995
+ }}
20996
+ },
20920
20997
  "/api/v1/endpoints/:endpointID": {
20921
20998
  DELETE: {
20922
20999
  parameters: {
@@ -127658,6 +127735,55 @@ class HuggingFaceClient {
127658
127735
  modelType: data.config?.model_type ?? data.gguf?.architecture
127659
127736
  };
127660
127737
  }
127738
+ async fetchQuantizationFileSize(modelSlug, quantization) {
127739
+ if (!quantization)
127740
+ return null;
127741
+ const listParams = {
127742
+ accessToken: this.apiKey || undefined,
127743
+ recursive: true,
127744
+ repo: modelSlug
127745
+ };
127746
+ const qPattern = quantization.toLowerCase().replace(/[_-]/g, "");
127747
+ const processed = new Set();
127748
+ let found = false;
127749
+ let totalBytes = 0;
127750
+ async function* listGgufFiles() {
127751
+ let lastError;
127752
+ for (let attempt = 0; attempt < 3; attempt++) {
127753
+ try {
127754
+ for await (const file of listFiles(listParams)) {
127755
+ yield file;
127756
+ }
127757
+ return;
127758
+ }
127759
+ catch (error) {
127760
+ lastError = error instanceof Error ? error : new Error(String(error));
127761
+ if (attempt < 2) {
127762
+ const delay = 1000 * Math.pow(2, attempt);
127763
+ await new Promise(resolve => setTimeout(resolve, Math.min(delay, 10000)));
127764
+ }
127765
+ }
127766
+ }
127767
+ if (lastError) {
127768
+ throw lastError;
127769
+ }
127770
+ }
127771
+ for await (const file of listGgufFiles()) {
127772
+ if (processed.has(file.path))
127773
+ continue;
127774
+ processed.add(file.path);
127775
+ if (!file.path.toLowerCase().endsWith(".gguf"))
127776
+ continue;
127777
+ const filePattern = file.path.toLowerCase().replace(/[_-]/g, "");
127778
+ if (!filePattern.includes(qPattern))
127779
+ continue;
127780
+ if (typeof file.size === "number") {
127781
+ totalBytes += file.size;
127782
+ found = true;
127783
+ }
127784
+ }
127785
+ return found ? totalBytes : null;
127786
+ }
127661
127787
  async listModelQuantizations(modelSlug) {
127662
127788
  if (!modelSlug) {
127663
127789
  return [];
@@ -153586,10 +153712,25 @@ async function provisionResources(options) {
153586
153712
  }
153587
153713
  const modelData = (await modelRes.json());
153588
153714
  const modelID = modelData.id;
153715
+ const enginesRes = await fetch(`${apiUrl}/api/v1/engines`, {
153716
+ headers,
153717
+ method: "GET",
153718
+ signal: AbortSignal.timeout(60_000)
153719
+ });
153720
+ if (!enginesRes.ok) {
153721
+ const text = await enginesRes.text();
153722
+ throw new Error(`Failed to list engines: ${enginesRes.status} ${text}`);
153723
+ }
153724
+ const enginesData = (await enginesRes.json());
153725
+ const engine = enginesData.results.find(e => e.type === options.engine);
153726
+ if (!engine) {
153727
+ throw new Error(`No engine of type "${options.engine}" found for account`);
153728
+ }
153729
+ const engineId = engine.id;
153589
153730
  const sourceIDs = [];
153590
153731
  for (let i = 0; i < parallelism; i++) {
153591
153732
  const sourceBody = {
153592
- engine: options.engine,
153733
+ engineId,
153593
153734
  modelID,
153594
153735
  name: `inftest_src_${i}_${shortSlug}`
153595
153736
  };
@@ -153807,6 +153948,7 @@ async function runBenchmark(options) {
153807
153948
  }
153808
153949
  async function runSingleBenchmark(options) {
153809
153950
  const { apiKey, apiUrl, bfclData, concurrency, entry, hardware, humanevalplusProblems, logger, mbppplusProblems, outputDir, parallelism } = options;
153951
+ let fileSizeBytes = null;
153810
153952
  let metadata;
153811
153953
  if (entry.source?.type === "huggingface") {
153812
153954
  console.log(" → Fetching model metadata...");
@@ -153814,6 +153956,9 @@ async function runSingleBenchmark(options) {
153814
153956
  const hfClient = new HuggingFaceClient(process.env.HF_TOKEN);
153815
153957
  const info = await hfClient.fetchModelInfo(entry.slug);
153816
153958
  metadata = { ...info, source: entry.source };
153959
+ if (entry.quantization) {
153960
+ fileSizeBytes = await hfClient.fetchQuantizationFileSize(entry.slug, entry.quantization);
153961
+ }
153817
153962
  }
153818
153963
  catch (err) {
153819
153964
  console.error(` ⚠ Failed to fetch metadata: ${asError(err).message}`);
@@ -153993,6 +154138,7 @@ async function runSingleBenchmark(options) {
153993
154138
  hardware,
153994
154139
  metadata,
153995
154140
  modelMetadata: {
154141
+ fileSizeBytes,
153996
154142
  format: entry.format,
153997
154143
  parameterCount: entry.parameterCount,
153998
154144
  quantization: entry.quantization
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "@infersec/conduit",
3
3
  "description": "End user conduit agent for connecting local LLMs to the cloud.",
4
- "version": "1.71.0",
4
+ "version": "1.72.0",
5
5
  "bin": {
6
6
  "infersec-conduit": "./dist/cli.js"
7
7
  },