npm - @huggingface/transformers - Versions diffs - 3.0.0-alpha.21 → 3.0.0-alpha.22 - Mend

@huggingface/transformers 3.0.0-alpha.21 → 3.0.0-alpha.22

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

package/README.md +2 -2
package/dist/ort-wasm-simd-threaded.jsep.wasm +0 -0
package/dist/transformers.cjs +31 -12
package/dist/transformers.cjs.map +1 -1
package/dist/transformers.js +399 -306
package/dist/transformers.js.map +1 -1
package/dist/transformers.min.cjs +3 -3
package/dist/transformers.min.cjs.map +1 -1
package/dist/transformers.min.js +6 -6
package/dist/transformers.min.js.map +1 -1
package/dist/transformers.min.mjs +3 -3
package/dist/transformers.min.mjs.map +1 -1
package/dist/transformers.mjs +31 -12
package/dist/transformers.mjs.map +1 -1
package/package.json +2 -2
package/src/backends/onnx.js +6 -3
package/src/configs.js +1 -1
package/src/env.js +1 -1
package/src/models.js +23 -7
package/types/backends/onnx.d.ts +5 -2
package/types/backends/onnx.d.ts.map +1 -1
package/types/configs.d.ts +1 -1
package/types/configs.d.ts.map +1 -1
package/types/models.d.ts.map +1 -1

package/dist/transformers.mjs CHANGED Viewed

@@ -3930,9 +3930,10 @@ let wasmInitPromise = null;
  * Create an ONNX inference session.
  * @param {Uint8Array} buffer The ONNX model buffer.
  * @param {import('onnxruntime-common').InferenceSession.SessionOptions} session_options ONNX inference session options.
- * @returns {Promise<import('onnxruntime-common').InferenceSession>} The ONNX inference session.
+ * @param {Object} session_config ONNX inference session configuration.
+ * @returns {Promise<import('onnxruntime-common').InferenceSession & { config: Object}>} The ONNX inference session.
  */
-async function createInferenceSession(buffer, session_options) {
+async function createInferenceSession(buffer, session_options, session_config) {
     if (wasmInitPromise) {
         // A previous session has already initialized the WASM runtime
         // so we wait for it to resolve before creating this new session.
@@ -3941,7 +3942,9 @@ async function createInferenceSession(buffer, session_options) {
     const sessionPromise = InferenceSession.create(buffer, session_options);
     wasmInitPromise ??= sessionPromise;
-    return await sessionPromise;
+    const session = await sessionPromise;
+    session.config = session_config;
+    return session;
 }
 /**
@@ -4376,7 +4379,7 @@ class AutoConfig {
 /**
  * Transformers.js-specific configuration, possibly present in config.json under the key `transformers.js_config`.
  * @typedef {Object} TransformersJSConfig
- * @property {import('./utils/tensor.js').DataType} [kv_cache_dtype] The data type of the key-value cache.
+ * @property {import('./utils/tensor.js').DataType|Record<import('./utils/dtypes.js').DataType, import('./utils/tensor.js').DataType>} [kv_cache_dtype] The data type of the key-value cache.
  * @property {Record<string, number>} [free_dimension_overrides] Override the free dimensions of the model.
  * See https://onnxruntime.ai/docs/tutorials/web/env-flags-and-session-options.html#freedimensionoverrides
  * for more information.
@@ -4430,7 +4433,7 @@ __webpack_require__.r(__webpack_exports__);
-const VERSION = '3.0.0-alpha.21';
+const VERSION = '3.0.0-alpha.22';
 // Check if various APIs are available (depends on environment)
 const IS_BROWSER_ENV = typeof self !== 'undefined';
@@ -6877,7 +6880,7 @@ const MODEL_CLASS_TO_NAME_MAPPING = new Map();
  * @param {string} pretrained_model_name_or_path The path to the directory containing the model file.
  * @param {string} fileName The name of the model file.
  * @param {import('./utils/hub.js').PretrainedModelOptions} options Additional options for loading the model.
- * @returns {Promise<{buffer: Uint8Array, session_options: Object}>} A Promise that resolves to the data needed to create an InferenceSession object.
+ * @returns {Promise<{buffer: Uint8Array, session_options: Object, session_config: Object}>} A Promise that resolves to the data needed to create an InferenceSession object.
  * @private
  */
 async function getSession(pretrained_model_name_or_path, fileName, options) {
@@ -6918,6 +6921,22 @@ async function getSession(pretrained_model_name_or_path, fileName, options) {
         throw new Error(`The device (${selectedDevice}) does not support fp16.`);
     }
+    // Only valid for models with a decoder
+    const kv_cache_dtype = custom_config.kv_cache_dtype
+        ? (typeof custom_config.kv_cache_dtype === 'string'
+            ? custom_config.kv_cache_dtype
+            : custom_config.kv_cache_dtype[selectedDtype] ?? 'float32')
+        : undefined;
+    if (kv_cache_dtype && !['float32', 'float16'].includes(kv_cache_dtype)) {
+        throw new Error(`Invalid kv_cache_dtype: ${kv_cache_dtype}. Should be one of: float32, float16`);
+    }
+    const session_config = {
+        dtype: selectedDtype,
+        kv_cache_dtype,
+    }
     // Construct the model file name
     const suffix = _utils_dtypes_js__WEBPACK_IMPORTED_MODULE_2__.DEFAULT_DTYPE_SUFFIX_MAPPING[selectedDtype];
     const modelFileName = `${options.subfolder ?? ''}/${fileName}${suffix}.onnx`;
@@ -6993,7 +7012,8 @@ async function getSession(pretrained_model_name_or_path, fileName, options) {
     }
     const buffer = await bufferPromise;
-    return { buffer, session_options };
+    return { buffer, session_options, session_config };
 }
 /**
@@ -7008,8 +7028,8 @@ async function getSession(pretrained_model_name_or_path, fileName, options) {
 async function constructSessions(pretrained_model_name_or_path, names, options) {
     return Object.fromEntries(await Promise.all(
         Object.keys(names).map(async (name) => {
-            const { buffer, session_options } = await getSession(pretrained_model_name_or_path, names[name], options);
-            const session = await (0,_backends_onnx_js__WEBPACK_IMPORTED_MODULE_1__.createInferenceSession)(buffer, session_options);
+            const { buffer, session_options, session_config } = await getSession(pretrained_model_name_or_path, names[name], options);
+            const session = await (0,_backends_onnx_js__WEBPACK_IMPORTED_MODULE_1__.createInferenceSession)(buffer, session_options, session_config);
             return [name, session];
         })
     ));
@@ -8366,9 +8386,8 @@ class PreTrainedModel extends _utils_generic_js__WEBPACK_IMPORTED_MODULE_3__.Cal
         if (pastKeyValues) {
             Object.assign(decoderFeeds, pastKeyValues)
         } else {
-            /** @type {import('./transformers.js').DataType} */
-            const dtype = this.custom_config.kv_cache_dtype ?? 'float32';
+            const session = this.sessions['decoder_model_merged'] ?? this.sessions['model'];
+            const dtype = session?.config?.kv_cache_dtype ?? 'float32';
             const empty = (dtype === 'float16') ? new Uint16Array() : [];
             const shapes = (0,_configs_js__WEBPACK_IMPORTED_MODULE_0__.getKeyValueShapes)(this.config);