@huggingface/transformers 3.0.0-alpha.15 → 3.0.0-alpha.17

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -101,7 +101,7 @@ npm i @huggingface/transformers
101
101
  Alternatively, you can use it in vanilla JS, without any bundler, by using a CDN or static hosting. For example, using [ES Modules](https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Modules), you can import the library with:
102
102
  ```html
103
103
  <script type="module">
104
- import { pipeline } from 'https://cdn.jsdelivr.net/npm/@huggingface/transformers@3.0.0-alpha.15';
104
+ import { pipeline } from 'https://cdn.jsdelivr.net/npm/@huggingface/transformers@3.0.0-alpha.17';
105
105
  </script>
106
106
  ```
107
107
 
@@ -134,7 +134,7 @@ Check out the Transformers.js [template](https://huggingface.co/new-space?templa
134
134
 
135
135
 
136
136
 
137
- By default, Transformers.js uses [hosted pretrained models](https://huggingface.co/models?library=transformers.js) and [precompiled WASM binaries](https://cdn.jsdelivr.net/npm/@huggingface/transformers@3.0.0-alpha.15/dist/), which should work out-of-the-box. You can customize this as follows:
137
+ By default, Transformers.js uses [hosted pretrained models](https://huggingface.co/models?library=transformers.js) and [precompiled WASM binaries](https://cdn.jsdelivr.net/npm/@huggingface/transformers@3.0.0-alpha.17/dist/), which should work out-of-the-box. You can customize this as follows:
138
138
 
139
139
  ### Settings
140
140
 
@@ -4327,16 +4327,23 @@ function getKeyValueShapes(config, {
4327
4327
  class PretrainedConfig {
4328
4328
  // NOTE: Typo in original
4329
4329
 
4330
+ /** @type {string|null} */
4331
+ model_type = null;
4332
+
4333
+ /** @type {boolean} */
4334
+ is_encoder_decoder = false;
4335
+
4336
+ /** @type {number} */
4330
4337
  max_position_embeddings;
4331
4338
 
4339
+ /** @type {TransformersJSConfig} */
4340
+ 'transformers.js_config';
4341
+
4332
4342
  /**
4333
4343
  * Create a new PreTrainedTokenizer instance.
4334
4344
  * @param {Object} configJSON The JSON of the config.
4335
4345
  */
4336
4346
  constructor(configJSON) {
4337
- this.model_type = null;
4338
- this.is_encoder_decoder = false;
4339
-
4340
4347
  Object.assign(this, configJSON);
4341
4348
  this.normalized_config = getNormalizedConfig(this);
4342
4349
  }
@@ -4388,7 +4395,12 @@ class AutoConfig {
4388
4395
  /**
4389
4396
  * Transformers.js-specific configuration, possibly present in config.json under the key `transformers.js_config`.
4390
4397
  * @typedef {Object} TransformersJSConfig
4391
- * @property {import('./transformers.js').DataType} [kv_cache_dtype]
4398
+ * @property {import('./utils/tensor.js').DataType} [kv_cache_dtype] The data type of the key-value cache.
4399
+ * @property {Record<string, number>} [free_dimension_overrides] Override the free dimensions of the model.
4400
+ * See https://onnxruntime.ai/docs/tutorials/web/env-flags-and-session-options.html#freedimensionoverrides
4401
+ * for more information.
4402
+ * @property {import('./utils/devices.js').DeviceType} [device] The default device to use for the model.
4403
+ * @property {import('./utils/dtypes.js').DataType} [dtype] The default data type to use for the model.
4392
4404
  */
4393
4405
 
4394
4406
 
@@ -4437,7 +4449,7 @@ __webpack_require__.r(__webpack_exports__);
4437
4449
 
4438
4450
 
4439
4451
 
4440
- const VERSION = '3.0.0-alpha.15';
4452
+ const VERSION = '3.0.0-alpha.17';
4441
4453
 
4442
4454
  // Check if various APIs are available (depends on environment)
4443
4455
  const IS_BROWSER_ENV = typeof self !== 'undefined';
@@ -6889,7 +6901,8 @@ const MODEL_CLASS_TO_NAME_MAPPING = new Map();
6889
6901
  * @private
6890
6902
  */
6891
6903
  async function getSession(pretrained_model_name_or_path, fileName, options) {
6892
- let device = options.device;
6904
+ const custom_config = options.config?.['transformers.js_config'] ?? {};
6905
+ let device = options.device ?? custom_config.device;
6893
6906
  if (device && typeof device !== 'string') {
6894
6907
  if (device.hasOwnProperty(fileName)) {
6895
6908
  device = device[fileName];
@@ -6907,7 +6920,7 @@ async function getSession(pretrained_model_name_or_path, fileName, options) {
6907
6920
 
6908
6921
  // If options.dtype is specified, we use it to choose the suffix for the model file.
6909
6922
  // Otherwise, we use the default dtype for the device.
6910
- let dtype = options.dtype;
6923
+ let dtype = options.dtype ?? custom_config.dtype;
6911
6924
  if (typeof dtype !== 'string') {
6912
6925
  if (dtype && dtype.hasOwnProperty(fileName)) {
6913
6926
  dtype = dtype[fileName];
@@ -6934,6 +6947,16 @@ async function getSession(pretrained_model_name_or_path, fileName, options) {
6934
6947
  // Overwrite `executionProviders` if not specified
6935
6948
  session_options.executionProviders ??= executionProviders;
6936
6949
 
6950
+ // Overwrite `freeDimensionOverrides` if specified in config and not set in session options
6951
+ const free_dimension_overrides = custom_config.free_dimension_overrides;
6952
+ if (free_dimension_overrides) {
6953
+ session_options.freeDimensionOverrides ??= free_dimension_overrides;
6954
+ } else if (selectedDevice.startsWith('webnn') && !session_options.freeDimensionOverrides) {
6955
+ console.warn(
6956
+ 'WebNN does not currently support dynamic shapes and requires `free_dimension_overrides` to be set in config.json as a field within "transformers.js_config". ' +
6957
+ 'When `free_dimension_overrides` is not set, you may experience significant performance degradation.'
6958
+ );
6959
+ }
6937
6960
 
6938
6961
  const bufferPromise = (0,_utils_hub_js__WEBPACK_IMPORTED_MODULE_5__.getModelFile)(pretrained_model_name_or_path, modelFileName, true, options);
6939
6962
 
@@ -6982,6 +7005,9 @@ async function getSession(pretrained_model_name_or_path, fileName, options) {
6982
7005
  /** @type {Record<string, import('onnxruntime-common').Tensor.DataLocation>} */
6983
7006
  const preferredOutputLocation = {};
6984
7007
  for (const key in shapes) {
7008
+ // TODO: For now, we keep encoder outputs on the CPU
7009
+ // (otherwise, this causes a memory leak or throws an error "Error: previous buffer is not registered")
7010
+ if (key.includes('encoder')) continue;
6985
7011
  preferredOutputLocation[key] = 'gpu-buffer';
6986
7012
  }
6987
7013
  session_options.preferredOutputLocation = preferredOutputLocation;
@@ -7137,37 +7163,6 @@ function toI64Tensor(items) {
7137
7163
  }
7138
7164
  }
7139
7165
 
7140
- /**
7141
- * Prepares an attention mask for a sequence of tokens based on configuration options.
7142
- * @param {Object} self The calling object instance.
7143
- * @param {Tensor} tokens The input tokens.
7144
- * @returns {Tensor} The attention mask tensor.
7145
- * @private
7146
- */
7147
- function prepareAttentionMask(self, tokens) {
7148
-
7149
- // Prepare attention mask
7150
- let pad_token_id = self.config.pad_token_id ?? null;
7151
- let eos_token_id = self.config.eos_token_id ?? null;
7152
- if ((0,_utils_core_js__WEBPACK_IMPORTED_MODULE_4__.isIntegralNumber)(eos_token_id)) {
7153
- eos_token_id = [eos_token_id];
7154
- }
7155
-
7156
- let is_pad_token_in_inputs = tokens.indexOf(pad_token_id) !== -1;
7157
- let is_pad_token_not_equal_to_eos_token_id = (eos_token_id === null) || !eos_token_id.includes(pad_token_id)
7158
-
7159
- if (is_pad_token_in_inputs && is_pad_token_not_equal_to_eos_token_id) {
7160
- let data = BigInt64Array.from(
7161
- // Note: != so that int matches bigint
7162
- // @ts-ignore
7163
- tokens.data.map(x => x != pad_token_id)
7164
- )
7165
- return new _utils_tensor_js__WEBPACK_IMPORTED_MODULE_8__.Tensor('int64', data, tokens.dims)
7166
- } else {
7167
- return (0,_utils_tensor_js__WEBPACK_IMPORTED_MODULE_8__.ones_like)(tokens);
7168
- }
7169
- }
7170
-
7171
7166
  /**
7172
7167
  * Creates a boolean tensor with a single value.
7173
7168
  * @param {boolean} value The value of the tensor.
@@ -7438,8 +7433,8 @@ function image_text_to_text_prepare_inputs_for_generation(self, ...args) {
7438
7433
  } else {
7439
7434
  return decoder_prepare_inputs_for_generation(self, ...args);
7440
7435
  }
7441
-
7442
7436
  }
7437
+
7443
7438
  //////////////////////////////////////////////////
7444
7439
 
7445
7440
  //////////////////////////////////////////////////
@@ -8202,13 +8197,12 @@ class PreTrainedModel extends _utils_generic_js__WEBPACK_IMPORTED_MODULE_3__.Cal
8202
8197
  // - GenerationMode.BEAM_SEARCH
8203
8198
  // - GenerationMode.BEAM_SAMPLE
8204
8199
  ////////////////////////////////////////////////////
8205
- let past_key_values = null;
8200
+ let outputs;
8206
8201
  let attentions = {};
8207
8202
  while (true) {
8208
8203
  // prepare model inputs
8209
8204
  model_inputs = this.prepare_inputs_for_generation(all_input_ids, model_inputs, generation_config);
8210
-
8211
- const outputs = await this.forward(model_inputs);
8205
+ outputs = await this.forward(model_inputs);
8212
8206
 
8213
8207
  if (generation_config.output_attentions && generation_config.return_dict_in_generate) {
8214
8208
  // Get attentions if they are present
@@ -8255,10 +8249,6 @@ class PreTrainedModel extends _utils_generic_js__WEBPACK_IMPORTED_MODULE_3__.Cal
8255
8249
 
8256
8250
  const stop = prepared_stopping_criteria(all_input_ids);
8257
8251
  if (stop.every(x => x)) {
8258
- if (generation_config.return_dict_in_generate) {
8259
- // Get past key values without disposing buffers
8260
- past_key_values = this.getPastKeyValues(outputs, model_inputs.past_key_values, false);
8261
- }
8262
8252
  break;
8263
8253
  }
8264
8254
 
@@ -8271,6 +8261,9 @@ class PreTrainedModel extends _utils_generic_js__WEBPACK_IMPORTED_MODULE_3__.Cal
8271
8261
  streamer.end();
8272
8262
  }
8273
8263
 
8264
+ // Retrieve and dispose all final past key values (including encoder attentions)
8265
+ const past_key_values = this.getPastKeyValues(outputs, model_inputs.past_key_values, true);
8266
+
8274
8267
  // TODO: ensure all_input_ids is padded correctly...
8275
8268
  const sequences = new _utils_tensor_js__WEBPACK_IMPORTED_MODULE_8__.Tensor('int64', all_input_ids.flat(), [all_input_ids.length, all_input_ids[0].length]);
8276
8269
 
@@ -8284,6 +8277,12 @@ class PreTrainedModel extends _utils_generic_js__WEBPACK_IMPORTED_MODULE_3__.Cal
8284
8277
  // logits,
8285
8278
  }
8286
8279
  } else {
8280
+ // Dispose all remaining tensors
8281
+ for (const tensor of Object.values(outputs)) {
8282
+ if (tensor.location === 'gpu-buffer') {
8283
+ tensor.dispose();
8284
+ }
8285
+ }
8287
8286
  return sequences;
8288
8287
  }
8289
8288
  }
@@ -8293,31 +8292,32 @@ class PreTrainedModel extends _utils_generic_js__WEBPACK_IMPORTED_MODULE_3__.Cal
8293
8292
  *
8294
8293
  * @param {Object} decoderResults The decoder results object.
8295
8294
  * @param {Object} pastKeyValues The previous past key values.
8296
- * @param {boolean} [dispose=true] Whether to dispose of the old gpu buffer.
8297
8295
  * @returns {Object} An object containing past key values.
8298
8296
  */
8299
- getPastKeyValues(decoderResults, pastKeyValues, dispose = true) {
8297
+ getPastKeyValues(decoderResults, pastKeyValues, disposeEncoderPKVs = false) {
8300
8298
  const pkvs = Object.create(null);
8301
8299
 
8302
8300
  for (const name in decoderResults) {
8303
8301
  if (name.startsWith('present')) {
8304
8302
  const newName = name.replace('present', 'past_key_values');
8305
-
8306
- if (pastKeyValues && name.includes('encoder')) {
8307
- // Optimization introduced by optimum to reuse past key values. So, we just replace the constant
8308
- // outputs with the previous past key values.
8303
+ const is_encoder_pkv = name.includes('encoder');
8304
+ if (is_encoder_pkv && pastKeyValues) {
8305
+ // Optimization introduced by optimum to reuse past key values.
8306
+ // So, we just replace the constant outputs (`decoderResults[name]`) with the previous past key values.
8309
8307
  // https://github.com/huggingface/optimum/blob/0bf2c05fb7e1182b52d21b703cfc95fd9e4ea3dc/optimum/onnxruntime/base.py#L677-L704
8310
8308
  pkvs[newName] = pastKeyValues[newName];
8311
- } else {
8312
- if (dispose && pastKeyValues) {
8313
- // Free old gpu buffer
8314
- const t = pastKeyValues[newName];
8315
- if (t.location === 'gpu-buffer') {
8316
- t.dispose();
8317
- }
8318
- }
8309
+ } else { // decoder or using first encoder PKVs
8319
8310
  pkvs[newName] = decoderResults[name];
8320
8311
  }
8312
+
8313
+ if (pastKeyValues && (!is_encoder_pkv || disposeEncoderPKVs)) {
8314
+ // - Always dispose decoder PKVs
8315
+ // - Only dispose encoder past key values when requested (after generation)
8316
+ const t = pastKeyValues[newName];
8317
+ if (t.location === 'gpu-buffer') {
8318
+ t.dispose();
8319
+ }
8320
+ }
8321
8321
  }
8322
8322
  }
8323
8323
  return pkvs;
@@ -20639,7 +20639,7 @@ function clean_up_tokenization(text) {
20639
20639
  * @returns {string} The text with accents removed.
20640
20640
  */
20641
20641
  function remove_accents(text) {
20642
- return text.replace(/[\u0300-\u036f]/g, '');
20642
+ return text.replace(/\p{M}/gu, '');
20643
20643
  }
20644
20644
 
20645
20645
  /**
@@ -20985,18 +20985,18 @@ class Unigram extends TokenizerModel {
20985
20985
  this.unk_token = this.vocab[config.unk_id];
20986
20986
 
20987
20987
  this.tokens_to_ids = new Map(this.vocab.map((x, i) => [x, i]));
20988
- this.bosToken = ' '; // beginning of a sentence token
20988
+ this.bos_token = ' '; // beginning of a sentence token
20989
20989
 
20990
- this.bosTokenId = this.tokens_to_ids.get(this.bosToken); // NOTE: may be undefined
20991
- this.eosToken = moreConfig.eos_token;
20990
+ this.bos_token_id = this.tokens_to_ids.get(this.bos_token); // NOTE: may be undefined
20991
+ this.eos_token = moreConfig.eos_token;
20992
20992
 
20993
- this.eosTokenId = this.tokens_to_ids.get(this.eosToken);
20994
- this.unkToken = this.vocab[this.unk_token_id];
20993
+ this.eos_token_id = this.tokens_to_ids.get(this.eos_token);
20994
+ this.unk_token = this.vocab[this.unk_token_id];
20995
20995
 
20996
20996
  this.minScore = (0,_utils_maths_js__WEBPACK_IMPORTED_MODULE_3__.min)(this.scores)[0];
20997
20997
 
20998
- this.unkScore = this.minScore - 10.0;
20999
- this.scores[this.unk_token_id] = this.unkScore;
20998
+ this.unk_score = this.minScore - 10.0;
20999
+ this.scores[this.unk_token_id] = this.unk_score;
21000
21000
 
21001
21001
  this.trie = new _utils_data_structures_js__WEBPACK_IMPORTED_MODULE_5__.CharTrie();
21002
21002
  this.trie.extend(this.vocab);
@@ -21011,26 +21011,27 @@ class Unigram extends TokenizerModel {
21011
21011
  * @param {TokenLattice} lattice The token lattice to populate with nodes.
21012
21012
  */
21013
21013
  populateNodes(lattice) {
21014
- const sentence = lattice.sentence;
21015
- const len = sentence.length;
21014
+ const chars = lattice.chars;
21015
+ const mblen = 1;
21016
21016
  let beginPos = 0;
21017
- while (beginPos < len) {
21018
- const mblen = 1;
21017
+ while (beginPos < chars.length) {
21019
21018
  let hasSingleNode = false;
21020
- const tokens = [];
21021
21019
 
21022
- for (let token of this.trie.commonPrefixSearch(sentence.slice(beginPos))) {
21020
+ const tokens = [];
21021
+ const sliced = chars.slice(beginPos).join('');
21022
+ const prefixedTokens = this.trie.commonPrefixSearch(sliced);
21023
+ for (const token of prefixedTokens) {
21023
21024
  tokens.push(token);
21024
21025
  const tokenId = this.tokens_to_ids.get(token);
21025
21026
  const tokenScore = this.scores[tokenId];
21026
- const n = token.length;
21027
+ const n = (0,_utils_core_js__WEBPACK_IMPORTED_MODULE_1__.len)(token);
21027
21028
  lattice.insert(beginPos, n, tokenScore, tokenId);
21028
21029
  if (!hasSingleNode && n === mblen) {
21029
21030
  hasSingleNode = true;
21030
21031
  }
21031
21032
  }
21032
21033
  if (!hasSingleNode) {
21033
- lattice.insert(beginPos, mblen, this.unkScore, this.unk_token_id);
21034
+ lattice.insert(beginPos, mblen, this.unk_score, this.unk_token_id);
21034
21035
  }
21035
21036
  beginPos += mblen;
21036
21037
  }
@@ -21043,7 +21044,7 @@ class Unigram extends TokenizerModel {
21043
21044
  * @returns {string[]} An array of subtokens obtained by encoding the input tokens using the unigram model.
21044
21045
  */
21045
21046
  tokenize(normalized) {
21046
- const lattice = new _utils_data_structures_js__WEBPACK_IMPORTED_MODULE_5__.TokenLattice(normalized, this.bosTokenId, this.eosTokenId);
21047
+ const lattice = new _utils_data_structures_js__WEBPACK_IMPORTED_MODULE_5__.TokenLattice(normalized, this.bos_token_id, this.eos_token_id);
21047
21048
  this.populateNodes(lattice);
21048
21049
  return lattice.tokens();
21049
21050
  }
@@ -21658,7 +21659,8 @@ class BertNormalizer extends Normalizer {
21658
21659
  * @returns {string} The text with accents removed.
21659
21660
  */
21660
21661
  stripAccents(text) {
21661
- return text.normalize('NFD').replace(/[\u0300-\u036f]/g, '');
21662
+ // "Mark, Nonspacing" (Mn)
21663
+ return text.normalize('NFD').replace(/\p{Mn}/gu, '');
21662
21664
  }
21663
21665
 
21664
21666
 
@@ -22776,7 +22778,7 @@ class Precompiled extends Normalizer {
22776
22778
  // TODO: detect when a different `this.charsmap` is used.
22777
22779
 
22778
22780
  text = text.replace(/[\u0001-\u0008\u000B\u000E-\u001F\u007F\u008F\u009F]/gm, ''); // Remove control characters
22779
- text = text.replace(/[\u0009\u000A\u000C\u000D\u1680\u200B\u200C\u200E\u200F\u2028\u2029\u2581\uFEFF\uFFFD]/gm, '\u0020'); // Replace certain characters with a space
22781
+ text = text.replace(/[\u0009\u000A\u000C\u000D\u00A0\u1680\u2000-\u200F\u2028\u2029\u202F\u205F\u2581\u3000\uFEFF\uFFFD]/gm, '\u0020'); // Replace certain characters with a space
22780
22782
 
22781
22783
  if (text.includes('\uFF5E')) {
22782
22784
  // To match the sentencepiece implementation 100%, we must handle a very strange edge-case.
@@ -25632,6 +25634,7 @@ __webpack_require__.r(__webpack_exports__);
25632
25634
  /* harmony export */ escapeRegExp: () => (/* binding */ escapeRegExp),
25633
25635
  /* harmony export */ isIntegralNumber: () => (/* binding */ isIntegralNumber),
25634
25636
  /* harmony export */ isTypedArray: () => (/* binding */ isTypedArray),
25637
+ /* harmony export */ len: () => (/* binding */ len),
25635
25638
  /* harmony export */ mergeArrays: () => (/* binding */ mergeArrays),
25636
25639
  /* harmony export */ pick: () => (/* binding */ pick),
25637
25640
  /* harmony export */ pop: () => (/* binding */ pop),
@@ -25788,6 +25791,18 @@ function pick(o, props) {
25788
25791
  );
25789
25792
  }
25790
25793
 
25794
+ /**
25795
+ * Calculate the length of a string, taking multi-byte characters into account.
25796
+ * This mimics the behavior of Python's `len` function.
25797
+ * @param {string} s The string to calculate the length of.
25798
+ * @returns {number} The length of the string.
25799
+ */
25800
+ function len(s) {
25801
+ let length = 0;
25802
+ for (const c of s) ++length;
25803
+ return length;
25804
+ }
25805
+
25791
25806
 
25792
25807
  /***/ }),
25793
25808
 
@@ -26035,7 +26050,7 @@ class CharTrie {
26035
26050
  * @param {string[]} texts The strings to add to the trie.
26036
26051
  */
26037
26052
  extend(texts) {
26038
- for (let text of texts) {
26053
+ for (const text of texts) {
26039
26054
  this.push(text);
26040
26055
  }
26041
26056
  }
@@ -26046,7 +26061,7 @@ class CharTrie {
26046
26061
  */
26047
26062
  push(text) {
26048
26063
  let node = this.root;
26049
- for (let ch of text) {
26064
+ for (const ch of text) {
26050
26065
  let child = node.children.get(ch);
26051
26066
  if (child === undefined) {
26052
26067
  child = CharTrieNode.default();
@@ -26064,12 +26079,14 @@ class CharTrie {
26064
26079
  */
26065
26080
  *commonPrefixSearch(text) {
26066
26081
  let node = this.root;
26082
+ if (node === undefined) return;
26083
+
26067
26084
  let prefix = "";
26068
- for (let i = 0; i < text.length && node !== undefined; ++i) {
26069
- const ch = text[i];
26085
+ for (const ch of text) {
26070
26086
  prefix += ch;
26071
26087
  node = node.children.get(ch);
26072
- if (node !== undefined && node.isLeaf) {
26088
+ if (node === undefined) return;
26089
+ if (node.isLeaf) {
26073
26090
  yield prefix;
26074
26091
  }
26075
26092
  }
@@ -26111,8 +26128,8 @@ class TokenLattice {
26111
26128
  * @param {number} eosTokenId The end-of-sequence token ID.
26112
26129
  */
26113
26130
  constructor(sentence, bosTokenId, eosTokenId) {
26114
- this.sentence = sentence;
26115
- this.len = sentence.length;
26131
+ this.chars = Array.from(sentence);
26132
+ this.len = this.chars.length;
26116
26133
  this.bosTokenId = bosTokenId;
26117
26134
  this.eosTokenId = eosTokenId;
26118
26135
  this.nodes = [];
@@ -26146,7 +26163,7 @@ class TokenLattice {
26146
26163
  /**
26147
26164
  * Implements the Viterbi algorithm to compute the most likely sequence of tokens.
26148
26165
  *
26149
- * @returns {TokenLatticeNode[]} The array of nodes representing the most likely sequence of tokens.
26166
+ * @returns {TokenLatticeNode[]} The most likely sequence of tokens.
26150
26167
  */
26151
26168
  viterbi() {
26152
26169
  const len = this.len;
@@ -26200,11 +26217,11 @@ class TokenLattice {
26200
26217
  * @returns {string} The array of nodes representing the most likely sequence of tokens.
26201
26218
  */
26202
26219
  piece(node) {
26203
- return this.sentence.slice(node.pos, node.pos + node.length);
26220
+ return this.chars.slice(node.pos, node.pos + node.length).join('');
26204
26221
  }
26205
26222
 
26206
26223
  /**
26207
- * @returns {Array} The array of nodes representing the most likely sequence of tokens.
26224
+ * @returns {string[]} The most likely sequence of tokens.
26208
26225
  */
26209
26226
  tokens() {
26210
26227
  const nodes = this.viterbi();
@@ -26212,7 +26229,7 @@ class TokenLattice {
26212
26229
  }
26213
26230
 
26214
26231
  /**
26215
- * @returns {Array} The array of nodes representing the most likely sequence of tokens.
26232
+ * @returns {number[]} The most likely sequence of token ids.
26216
26233
  */
26217
26234
  tokenIds() {
26218
26235
  const nodes = this.viterbi();
@@ -26453,7 +26470,7 @@ __webpack_require__.r(__webpack_exports__);
26453
26470
  /**
26454
26471
  * @typedef {Object} PretrainedOptions Options for loading a pretrained model.
26455
26472
  * @property {function} [progress_callback=null] If specified, this function will be called during model construction, to provide the user with progress updates.
26456
- * @property {Object} [config=null] Configuration for the model to use instead of an automatically loaded configuration. Configuration can be automatically loaded when:
26473
+ * @property {import('../configs.js').PretrainedConfig} [config=null] Configuration for the model to use instead of an automatically loaded configuration. Configuration can be automatically loaded when:
26457
26474
  * - The model is a model provided by the library (loaded with the *model id* string of a pretrained model).
26458
26475
  * - The model is loaded by supplying a local directory as `pretrained_model_name_or_path` and a configuration JSON file named *config.json* is found in the directory.
26459
26476
  * @property {string} [cache_dir=null] Path to a directory in which a downloaded pretrained model configuration should be cached if the standard cache should not be used.