@infersec/conduit 1.70.0 → 1.72.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/benchmark/output/writer.d.ts +1 -0
- package/dist/benchmark/types.d.ts +1 -0
- package/dist/cli.js +147 -1
- package/package.json +1 -1
package/dist/cli.js
CHANGED
|
@@ -20915,8 +20915,85 @@ object({
|
|
|
20915
20915
|
object({
|
|
20916
20916
|
id: ULIDSchema
|
|
20917
20917
|
});
|
|
20918
|
+
const EngineOutputSchema = object({
|
|
20919
|
+
created: string$1(),
|
|
20920
|
+
id: ULIDSchema,
|
|
20921
|
+
llamacppBatchSize: number$1().nullable(),
|
|
20922
|
+
llamacppCacheTypeK: string$1().nullable(),
|
|
20923
|
+
llamacppCacheTypeV: string$1().nullable(),
|
|
20924
|
+
llamacppExtraArgs: array(string$1()),
|
|
20925
|
+
llamacppFlashAttn: boolean$1(),
|
|
20926
|
+
llamacppGpuLayers: number$1(),
|
|
20927
|
+
llamacppMainGpu: number$1().nullable(),
|
|
20928
|
+
llamacppParallelism: number$1(),
|
|
20929
|
+
llamacppTensorSplit: string$1().nullable(),
|
|
20930
|
+
llamacppUbatchSize: number$1().nullable(),
|
|
20931
|
+
name: string$1(),
|
|
20932
|
+
type: LLMEngineSchema,
|
|
20933
|
+
updated: string$1(),
|
|
20934
|
+
vllmDevice: string$1().nullable(),
|
|
20935
|
+
vllmDtype: string$1().nullable(),
|
|
20936
|
+
vllmExtraArgs: array(string$1()),
|
|
20937
|
+
vllmTensorParallelSize: number$1()
|
|
20938
|
+
});
|
|
20939
|
+
object({
|
|
20940
|
+
llamacppBatchSize: number$1().int().positive().nullable().optional(),
|
|
20941
|
+
llamacppCacheTypeK: string$1().nullable().optional(),
|
|
20942
|
+
llamacppCacheTypeV: string$1().nullable().optional(),
|
|
20943
|
+
llamacppExtraArgs: array(string$1()).optional(),
|
|
20944
|
+
llamacppFlashAttn: boolean$1().optional(),
|
|
20945
|
+
llamacppGpuLayers: number$1().int().min(0).optional(),
|
|
20946
|
+
llamacppMainGpu: number$1().int().min(0).nullable().optional(),
|
|
20947
|
+
llamacppParallelism: number$1().int().positive().optional(),
|
|
20948
|
+
llamacppTensorSplit: string$1().nullable().optional(),
|
|
20949
|
+
llamacppUbatchSize: number$1().int().positive().nullable().optional(),
|
|
20950
|
+
name: ResourceNameSchema,
|
|
20951
|
+
type: LLMEngineSchema,
|
|
20952
|
+
vllmDevice: string$1().nullable().optional(),
|
|
20953
|
+
vllmDtype: string$1().nullable().optional(),
|
|
20954
|
+
vllmExtraArgs: array(string$1()).optional(),
|
|
20955
|
+
vllmTensorParallelSize: number$1().int().positive().optional()
|
|
20956
|
+
});
|
|
20957
|
+
object({
|
|
20958
|
+
llamacppBatchSize: number$1().int().positive().nullable().optional(),
|
|
20959
|
+
llamacppCacheTypeK: string$1().nullable().optional(),
|
|
20960
|
+
llamacppCacheTypeV: string$1().nullable().optional(),
|
|
20961
|
+
llamacppExtraArgs: array(string$1()).optional(),
|
|
20962
|
+
llamacppFlashAttn: boolean$1().optional(),
|
|
20963
|
+
llamacppGpuLayers: number$1().int().min(0).optional(),
|
|
20964
|
+
llamacppMainGpu: number$1().int().min(0).nullable().optional(),
|
|
20965
|
+
llamacppParallelism: number$1().int().positive().optional(),
|
|
20966
|
+
llamacppTensorSplit: string$1().nullable().optional(),
|
|
20967
|
+
llamacppUbatchSize: number$1().int().positive().nullable().optional(),
|
|
20968
|
+
name: ResourceNameSchema.optional(),
|
|
20969
|
+
type: LLMEngineSchema.optional(),
|
|
20970
|
+
vllmDevice: string$1().nullable().optional(),
|
|
20971
|
+
vllmDtype: string$1().nullable().optional(),
|
|
20972
|
+
vllmExtraArgs: array(string$1()).optional(),
|
|
20973
|
+
vllmTensorParallelSize: number$1().int().positive().optional()
|
|
20974
|
+
});
|
|
20975
|
+
object({
|
|
20976
|
+
results: array(EngineOutputSchema)
|
|
20977
|
+
});
|
|
20978
|
+
object({
|
|
20979
|
+
id: ULIDSchema
|
|
20980
|
+
});
|
|
20918
20981
|
|
|
20919
20982
|
({
|
|
20983
|
+
"/api/v1/engines/:engineID": {
|
|
20984
|
+
DELETE: {
|
|
20985
|
+
parameters: {
|
|
20986
|
+
engineID: ULIDSchema.describe("Engine identifier")
|
|
20987
|
+
}},
|
|
20988
|
+
GET: {
|
|
20989
|
+
parameters: {
|
|
20990
|
+
engineID: ULIDSchema.describe("Engine identifier")
|
|
20991
|
+
}},
|
|
20992
|
+
PATCH: {
|
|
20993
|
+
parameters: {
|
|
20994
|
+
engineID: ULIDSchema.describe("Engine identifier")
|
|
20995
|
+
}}
|
|
20996
|
+
},
|
|
20920
20997
|
"/api/v1/endpoints/:endpointID": {
|
|
20921
20998
|
DELETE: {
|
|
20922
20999
|
parameters: {
|
|
@@ -127658,6 +127735,55 @@ class HuggingFaceClient {
|
|
|
127658
127735
|
modelType: data.config?.model_type ?? data.gguf?.architecture
|
|
127659
127736
|
};
|
|
127660
127737
|
}
|
|
127738
|
+
async fetchQuantizationFileSize(modelSlug, quantization) {
|
|
127739
|
+
if (!quantization)
|
|
127740
|
+
return null;
|
|
127741
|
+
const listParams = {
|
|
127742
|
+
accessToken: this.apiKey || undefined,
|
|
127743
|
+
recursive: true,
|
|
127744
|
+
repo: modelSlug
|
|
127745
|
+
};
|
|
127746
|
+
const qPattern = quantization.toLowerCase().replace(/[_-]/g, "");
|
|
127747
|
+
const processed = new Set();
|
|
127748
|
+
let found = false;
|
|
127749
|
+
let totalBytes = 0;
|
|
127750
|
+
async function* listGgufFiles() {
|
|
127751
|
+
let lastError;
|
|
127752
|
+
for (let attempt = 0; attempt < 3; attempt++) {
|
|
127753
|
+
try {
|
|
127754
|
+
for await (const file of listFiles(listParams)) {
|
|
127755
|
+
yield file;
|
|
127756
|
+
}
|
|
127757
|
+
return;
|
|
127758
|
+
}
|
|
127759
|
+
catch (error) {
|
|
127760
|
+
lastError = error instanceof Error ? error : new Error(String(error));
|
|
127761
|
+
if (attempt < 2) {
|
|
127762
|
+
const delay = 1000 * Math.pow(2, attempt);
|
|
127763
|
+
await new Promise(resolve => setTimeout(resolve, Math.min(delay, 10000)));
|
|
127764
|
+
}
|
|
127765
|
+
}
|
|
127766
|
+
}
|
|
127767
|
+
if (lastError) {
|
|
127768
|
+
throw lastError;
|
|
127769
|
+
}
|
|
127770
|
+
}
|
|
127771
|
+
for await (const file of listGgufFiles()) {
|
|
127772
|
+
if (processed.has(file.path))
|
|
127773
|
+
continue;
|
|
127774
|
+
processed.add(file.path);
|
|
127775
|
+
if (!file.path.toLowerCase().endsWith(".gguf"))
|
|
127776
|
+
continue;
|
|
127777
|
+
const filePattern = file.path.toLowerCase().replace(/[_-]/g, "");
|
|
127778
|
+
if (!filePattern.includes(qPattern))
|
|
127779
|
+
continue;
|
|
127780
|
+
if (typeof file.size === "number") {
|
|
127781
|
+
totalBytes += file.size;
|
|
127782
|
+
found = true;
|
|
127783
|
+
}
|
|
127784
|
+
}
|
|
127785
|
+
return found ? totalBytes : null;
|
|
127786
|
+
}
|
|
127661
127787
|
async listModelQuantizations(modelSlug) {
|
|
127662
127788
|
if (!modelSlug) {
|
|
127663
127789
|
return [];
|
|
@@ -153586,10 +153712,25 @@ async function provisionResources(options) {
|
|
|
153586
153712
|
}
|
|
153587
153713
|
const modelData = (await modelRes.json());
|
|
153588
153714
|
const modelID = modelData.id;
|
|
153715
|
+
const enginesRes = await fetch(`${apiUrl}/api/v1/engines`, {
|
|
153716
|
+
headers,
|
|
153717
|
+
method: "GET",
|
|
153718
|
+
signal: AbortSignal.timeout(60_000)
|
|
153719
|
+
});
|
|
153720
|
+
if (!enginesRes.ok) {
|
|
153721
|
+
const text = await enginesRes.text();
|
|
153722
|
+
throw new Error(`Failed to list engines: ${enginesRes.status} ${text}`);
|
|
153723
|
+
}
|
|
153724
|
+
const enginesData = (await enginesRes.json());
|
|
153725
|
+
const engine = enginesData.results.find(e => e.type === options.engine);
|
|
153726
|
+
if (!engine) {
|
|
153727
|
+
throw new Error(`No engine of type "${options.engine}" found for account`);
|
|
153728
|
+
}
|
|
153729
|
+
const engineId = engine.id;
|
|
153589
153730
|
const sourceIDs = [];
|
|
153590
153731
|
for (let i = 0; i < parallelism; i++) {
|
|
153591
153732
|
const sourceBody = {
|
|
153592
|
-
|
|
153733
|
+
engineId,
|
|
153593
153734
|
modelID,
|
|
153594
153735
|
name: `inftest_src_${i}_${shortSlug}`
|
|
153595
153736
|
};
|
|
@@ -153807,6 +153948,7 @@ async function runBenchmark(options) {
|
|
|
153807
153948
|
}
|
|
153808
153949
|
async function runSingleBenchmark(options) {
|
|
153809
153950
|
const { apiKey, apiUrl, bfclData, concurrency, entry, hardware, humanevalplusProblems, logger, mbppplusProblems, outputDir, parallelism } = options;
|
|
153951
|
+
let fileSizeBytes = null;
|
|
153810
153952
|
let metadata;
|
|
153811
153953
|
if (entry.source?.type === "huggingface") {
|
|
153812
153954
|
console.log(" → Fetching model metadata...");
|
|
@@ -153814,6 +153956,9 @@ async function runSingleBenchmark(options) {
|
|
|
153814
153956
|
const hfClient = new HuggingFaceClient(process.env.HF_TOKEN);
|
|
153815
153957
|
const info = await hfClient.fetchModelInfo(entry.slug);
|
|
153816
153958
|
metadata = { ...info, source: entry.source };
|
|
153959
|
+
if (entry.quantization) {
|
|
153960
|
+
fileSizeBytes = await hfClient.fetchQuantizationFileSize(entry.slug, entry.quantization);
|
|
153961
|
+
}
|
|
153817
153962
|
}
|
|
153818
153963
|
catch (err) {
|
|
153819
153964
|
console.error(` ⚠ Failed to fetch metadata: ${asError(err).message}`);
|
|
@@ -153993,6 +154138,7 @@ async function runSingleBenchmark(options) {
|
|
|
153993
154138
|
hardware,
|
|
153994
154139
|
metadata,
|
|
153995
154140
|
modelMetadata: {
|
|
154141
|
+
fileSizeBytes,
|
|
153996
154142
|
format: entry.format,
|
|
153997
154143
|
parameterCount: entry.parameterCount,
|
|
153998
154144
|
quantization: entry.quantization
|