@huggingface/transformers 3.2.2 → 3.2.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (82) hide show
  1. package/README.md +3 -2
  2. package/dist/transformers.cjs +252 -113
  3. package/dist/transformers.cjs.map +1 -1
  4. package/dist/transformers.js +256 -114
  5. package/dist/transformers.js.map +1 -1
  6. package/dist/transformers.min.cjs +1 -1
  7. package/dist/transformers.min.cjs.map +1 -1
  8. package/dist/transformers.min.js +1 -1
  9. package/dist/transformers.min.js.map +1 -1
  10. package/dist/transformers.min.mjs +1 -1
  11. package/dist/transformers.min.mjs.map +1 -1
  12. package/dist/transformers.mjs +256 -114
  13. package/dist/transformers.mjs.map +1 -1
  14. package/package.json +2 -2
  15. package/src/base/feature_extraction_utils.js +9 -9
  16. package/src/base/image_processors_utils.js +11 -0
  17. package/src/base/processing_utils.js +13 -3
  18. package/src/configs.js +5 -0
  19. package/src/env.js +1 -1
  20. package/src/models/auto/feature_extraction_auto.js +0 -16
  21. package/src/models/auto/processing_auto.js +0 -16
  22. package/src/models/convnext/image_processing_convnext.js +1 -0
  23. package/src/models/efficientnet/image_processing_efficientnet.js +1 -0
  24. package/src/models/florence2/processing_florence2.js +3 -0
  25. package/src/models/idefics3/image_processing_idefics3.js +2 -0
  26. package/src/models/janus/image_processing_janus.js +1 -0
  27. package/src/models/mgp_str/processing_mgp_str.js +2 -0
  28. package/src/models/paligemma/processing_paligemma.js +1 -0
  29. package/src/models/phi3_v/processing_phi3_v.js +1 -1
  30. package/src/models/pyannote/feature_extraction_pyannote.js +1 -0
  31. package/src/models/qwen2_vl/processing_qwen2_vl.js +1 -0
  32. package/src/models/seamless_m4t/feature_extraction_seamless_m4t.js +2 -2
  33. package/src/models/whisper/feature_extraction_whisper.js +1 -1
  34. package/src/models.js +93 -36
  35. package/src/ops/registry.js +10 -0
  36. package/src/pipelines.js +34 -7
  37. package/src/tokenizers.js +4 -7
  38. package/src/utils/dtypes.js +2 -0
  39. package/src/utils/hub.js +1 -1
  40. package/src/utils/maths.js +8 -6
  41. package/src/utils/tensor.js +42 -10
  42. package/types/base/feature_extraction_utils.d.ts +7 -7
  43. package/types/base/image_processors_utils.d.ts.map +1 -1
  44. package/types/base/processing_utils.d.ts +17 -19
  45. package/types/base/processing_utils.d.ts.map +1 -1
  46. package/types/configs.d.ts.map +1 -1
  47. package/types/generation/parameters.d.ts +1 -1
  48. package/types/models/auto/feature_extraction_auto.d.ts.map +1 -1
  49. package/types/models/auto/image_processing_auto.d.ts.map +1 -1
  50. package/types/models/auto/processing_auto.d.ts.map +1 -1
  51. package/types/models/convnext/image_processing_convnext.d.ts.map +1 -1
  52. package/types/models/efficientnet/image_processing_efficientnet.d.ts.map +1 -1
  53. package/types/models/florence2/processing_florence2.d.ts.map +1 -1
  54. package/types/models/idefics3/image_processing_idefics3.d.ts.map +1 -1
  55. package/types/models/janus/image_processing_janus.d.ts.map +1 -1
  56. package/types/models/mgp_str/processing_mgp_str.d.ts.map +1 -1
  57. package/types/models/paligemma/processing_paligemma.d.ts.map +1 -1
  58. package/types/models/phi3_v/processing_phi3_v.d.ts +6 -2
  59. package/types/models/phi3_v/processing_phi3_v.d.ts.map +1 -1
  60. package/types/models/pyannote/feature_extraction_pyannote.d.ts.map +1 -1
  61. package/types/models/qwen2_vl/processing_qwen2_vl.d.ts.map +1 -1
  62. package/types/models/sapiens/image_processing_sapiens.d.ts +10 -0
  63. package/types/models/sapiens/image_processing_sapiens.d.ts.map +1 -0
  64. package/types/models/whisper/generation_whisper.d.ts +1 -1
  65. package/types/models/whisper/generation_whisper.d.ts.map +1 -1
  66. package/types/models.d.ts +48 -17
  67. package/types/models.d.ts.map +1 -1
  68. package/types/ops/registry.d.ts +1 -0
  69. package/types/ops/registry.d.ts.map +1 -1
  70. package/types/pipelines.d.ts +2 -2
  71. package/types/pipelines.d.ts.map +1 -1
  72. package/types/tokenizers.d.ts.map +1 -1
  73. package/types/tsconfig.tsbuildinfo +1 -0
  74. package/types/utils/dtypes.d.ts.map +1 -1
  75. package/types/utils/hub.d.ts +1 -1
  76. package/types/utils/hub.d.ts.map +1 -1
  77. package/types/utils/image.d.ts +3 -2
  78. package/types/utils/image.d.ts.map +1 -1
  79. package/types/utils/maths.d.ts +8 -6
  80. package/types/utils/maths.d.ts.map +1 -1
  81. package/types/utils/tensor.d.ts +8 -4
  82. package/types/utils/tensor.d.ts.map +1 -1
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@huggingface/transformers",
3
- "version": "3.2.2",
3
+ "version": "3.2.4",
4
4
  "description": "State-of-the-art Machine Learning for the web. Run 🤗 Transformers directly in your browser, with no need for a server!",
5
5
  "main": "./src/transformers.js",
6
6
  "types": "./types/transformers.d.ts",
@@ -24,7 +24,7 @@
24
24
  "scripts": {
25
25
  "format": "prettier --write .",
26
26
  "format:check": "prettier --check .",
27
- "typegen": "tsc ./src/transformers.js --allowJs --declaration --emitDeclarationOnly --declarationMap --outDir types",
27
+ "typegen": "tsc --build",
28
28
  "dev": "webpack serve --no-client-overlay",
29
29
  "build": "webpack && npm run typegen",
30
30
  "test": "node --experimental-vm-modules node_modules/jest/bin/jest.js --verbose",
@@ -17,23 +17,23 @@ export class FeatureExtractor extends Callable {
17
17
  }
18
18
 
19
19
  /**
20
- * Instantiate one of the processor classes of the library from a pretrained model.
20
+ * Instantiate one of the feature extractor classes of the library from a pretrained model.
21
21
  *
22
- * The processor class to instantiate is selected based on the `image_processor_type` (or `feature_extractor_type`; legacy)
23
- * property of the config object (either passed as an argument or loaded from `pretrained_model_name_or_path` if possible)
22
+ * The feature extractor class to instantiate is selected based on the `feature_extractor_type` property of
23
+ * the config object (either passed as an argument or loaded from `pretrained_model_name_or_path` if possible)
24
24
  *
25
25
  * @param {string} pretrained_model_name_or_path The name or path of the pretrained model. Can be either:
26
- * - A string, the *model id* of a pretrained processor hosted inside a model repo on huggingface.co.
26
+ * - A string, the *model id* of a pretrained feature_extractor hosted inside a model repo on huggingface.co.
27
27
  * Valid model ids can be located at the root-level, like `bert-base-uncased`, or namespaced under a
28
28
  * user or organization name, like `dbmdz/bert-base-german-cased`.
29
- * - A path to a *directory* containing processor files, e.g., `./my_model_directory/`.
30
- * @param {import('../utils/hub.js').PretrainedOptions} options Additional options for loading the processor.
29
+ * - A path to a *directory* containing feature_extractor files, e.g., `./my_model_directory/`.
30
+ * @param {import('../utils/hub.js').PretrainedOptions} options Additional options for loading the feature_extractor.
31
31
  *
32
- * @returns {Promise<FeatureExtractor>} A new instance of the Processor class.
32
+ * @returns {Promise<FeatureExtractor>} A new instance of the Feature Extractor class.
33
33
  */
34
34
  static async from_pretrained(pretrained_model_name_or_path, options) {
35
- const preprocessorConfig = await getModelJSON(pretrained_model_name_or_path, FEATURE_EXTRACTOR_NAME, true, options);
36
- return new this(preprocessorConfig);
35
+ const config = await getModelJSON(pretrained_model_name_or_path, FEATURE_EXTRACTOR_NAME, true, options);
36
+ return new this(config);
37
37
  }
38
38
  }
39
39
 
@@ -604,14 +604,20 @@ export class ImageProcessor extends Callable {
604
604
  this.do_thumbnail = config.do_thumbnail;
605
605
  this.size = config.size ?? config.image_size;
606
606
  this.do_resize = config.do_resize ?? (this.size !== undefined);
607
+ // @ts-expect-error TS2339
607
608
  this.size_divisibility = config.size_divisibility ?? config.size_divisor;
608
609
 
609
610
  this.do_center_crop = config.do_center_crop;
611
+ // @ts-expect-error TS2339
610
612
  this.crop_size = config.crop_size;
613
+ // @ts-expect-error TS2339
611
614
  this.do_convert_rgb = config.do_convert_rgb ?? true;
615
+ // @ts-expect-error TS2339
612
616
  this.do_crop_margin = config.do_crop_margin;
613
617
 
618
+ // @ts-expect-error TS2339
614
619
  this.pad_size = config.pad_size;
620
+ // @ts-expect-error TS2339
615
621
  this.do_pad = config.do_pad;
616
622
 
617
623
  if (this.do_pad && !this.pad_size && this.size && this.size.width !== undefined && this.size.height !== undefined) {
@@ -820,6 +826,7 @@ export class ImageProcessor extends Callable {
820
826
  // Support both formats for backwards compatibility
821
827
  else if (Number.isInteger(size)) {
822
828
  shortest_edge = size;
829
+ // @ts-expect-error TS2339
823
830
  longest_edge = this.config.max_size ?? shortest_edge;
824
831
 
825
832
  } else if (size !== undefined) {
@@ -888,6 +895,7 @@ export class ImageProcessor extends Callable {
888
895
  } else if (size.min_pixels !== undefined && size.max_pixels !== undefined) {
889
896
  // Custom resize logic for Qwen2-VL models
890
897
  const { min_pixels, max_pixels } = size;
898
+ // @ts-expect-error TS2339
891
899
  const factor = this.config.patch_size * this.config.merge_size;
892
900
  return smart_resize(srcHeight, srcWidth, factor, min_pixels, max_pixels);
893
901
  } else {
@@ -903,6 +911,7 @@ export class ImageProcessor extends Callable {
903
911
  async resize(image) {
904
912
  const [newWidth, newHeight] = this.get_resize_output_image_size(image, this.size);
905
913
  return await image.resize(newWidth, newHeight, {
914
+ // @ts-expect-error TS2322
906
915
  resample: this.resample,
907
916
  });
908
917
  }
@@ -953,6 +962,7 @@ export class ImageProcessor extends Callable {
953
962
 
954
963
  // Resize the image using thumbnail method.
955
964
  if (this.do_thumbnail) {
965
+ // @ts-expect-error TS2345
956
966
  image = await this.thumbnail(image, this.size, this.resample);
957
967
  }
958
968
 
@@ -977,6 +987,7 @@ export class ImageProcessor extends Callable {
977
987
  // NOTE: All pixel-level manipulation (i.e., modifying `pixelData`)
978
988
  // occurs with data in the hwc format (height, width, channels),
979
989
  // to emulate the behavior of the original Python code (w/ numpy).
990
+ /** @type {Float32Array} */
980
991
  let pixelData = Float32Array.from(image.data);
981
992
  let imgDims = [image.height, image.width, image.channels];
982
993
 
@@ -28,6 +28,7 @@ import { getModelJSON } from '../utils/hub.js';
28
28
  /**
29
29
  * @typedef {Object} ProcessorProperties Additional processor-specific properties.
30
30
  * @typedef {import('../utils/hub.js').PretrainedOptions & ProcessorProperties} PretrainedProcessorOptions
31
+ * @typedef {import('../tokenizers.js').PreTrainedTokenizer} PreTrainedTokenizer
31
32
  */
32
33
 
33
34
 
@@ -61,7 +62,7 @@ export class Processor extends Callable {
61
62
  }
62
63
 
63
64
  /**
64
- * @returns {import('../tokenizers.js').PreTrainedTokenizer|undefined} The tokenizer of the processor, if it exists.
65
+ * @returns {PreTrainedTokenizer|undefined} The tokenizer of the processor, if it exists.
65
66
  */
66
67
  get tokenizer() {
67
68
  return this.components.tokenizer;
@@ -74,6 +75,11 @@ export class Processor extends Callable {
74
75
  return this.components.feature_extractor;
75
76
  }
76
77
 
78
+ /**
79
+ * @param {Parameters<PreTrainedTokenizer['apply_chat_template']>[0]} messages
80
+ * @param {Parameters<PreTrainedTokenizer['apply_chat_template']>[1]} options
81
+ * @returns {ReturnType<PreTrainedTokenizer['apply_chat_template']>}
82
+ */
77
83
  apply_chat_template(messages, options = {}) {
78
84
  if (!this.tokenizer) {
79
85
  throw new Error('Unable to apply chat template without a tokenizer.');
@@ -84,6 +90,10 @@ export class Processor extends Callable {
84
90
  });
85
91
  }
86
92
 
93
+ /**
94
+ * @param {Parameters<PreTrainedTokenizer['batch_decode']>} args
95
+ * @returns {ReturnType<PreTrainedTokenizer['batch_decode']>}
96
+ */
87
97
  batch_decode(...args) {
88
98
  if (!this.tokenizer) {
89
99
  throw new Error('Unable to decode without a tokenizer.');
@@ -111,8 +121,8 @@ export class Processor extends Callable {
111
121
  /**
112
122
  * Instantiate one of the processor classes of the library from a pretrained model.
113
123
  *
114
- * The processor class to instantiate is selected based on the `feature_extractor_type` property of the config object
115
- * (either passed as an argument or loaded from `pretrained_model_name_or_path` if possible)
124
+ * The processor class to instantiate is selected based on the `image_processor_type` (or `feature_extractor_type`; legacy)
125
+ * property of the config object (either passed as an argument or loaded from `pretrained_model_name_or_path` if possible)
116
126
  *
117
127
  * @param {string} pretrained_model_name_or_path The name or path of the pretrained model. Can be either:
118
128
  * - A string, the *model id* of a pretrained processor hosted inside a model repo on huggingface.co.
package/src/configs.js CHANGED
@@ -70,15 +70,19 @@ function getNormalizedConfig(config) {
70
70
  case 'florence2':
71
71
  case 'llava_onevision':
72
72
  case 'idefics3':
73
+ // @ts-expect-error TS2339
73
74
  init_normalized_config = getNormalizedConfig(config.text_config);
74
75
  break;
75
76
  case 'moondream1':
77
+ // @ts-expect-error TS2339
76
78
  init_normalized_config = getNormalizedConfig(config.phi_config);
77
79
  break;
78
80
  case 'musicgen':
81
+ // @ts-expect-error TS2339
79
82
  init_normalized_config = getNormalizedConfig(config.decoder);
80
83
  break;
81
84
  case 'multi_modality':
85
+ // @ts-expect-error TS2339
82
86
  init_normalized_config = getNormalizedConfig(config.language_config);
83
87
  break;
84
88
 
@@ -199,6 +203,7 @@ function getNormalizedConfig(config) {
199
203
  break;
200
204
 
201
205
  case 'vision-encoder-decoder':
206
+ // @ts-expect-error TS2339
202
207
  const decoderConfig = getNormalizedConfig(config.decoder);
203
208
 
204
209
  const add_encoder_pkv = 'num_decoder_layers' in decoderConfig;
package/src/env.js CHANGED
@@ -26,7 +26,7 @@ import fs from 'fs';
26
26
  import path from 'path';
27
27
  import url from 'url';
28
28
 
29
- const VERSION = '3.2.2';
29
+ const VERSION = '3.2.4';
30
30
 
31
31
  // Check if various APIs are available (depends on environment)
32
32
  const IS_BROWSER_ENV = typeof window !== "undefined" && typeof window.document !== "undefined";
@@ -6,22 +6,6 @@ import * as AllFeatureExtractors from '../feature_extractors.js';
6
6
 
7
7
  export class AutoFeatureExtractor {
8
8
 
9
- /**
10
- * Instantiate one of the feature extractor classes of the library from a pretrained model.
11
- *
12
- * The processor class to instantiate is selected based on the `feature_extractor_type` property of
13
- * the config object (either passed as an argument or loaded from `pretrained_model_name_or_path` if possible)
14
- *
15
- * @param {string} pretrained_model_name_or_path The name or path of the pretrained model. Can be either:
16
- * - A string, the *model id* of a pretrained processor hosted inside a model repo on huggingface.co.
17
- * Valid model ids can be located at the root-level, like `bert-base-uncased`, or namespaced under a
18
- * user or organization name, like `dbmdz/bert-base-german-cased`.
19
- * - A path to a *directory* containing processor files, e.g., `./my_model_directory/`.
20
- * @param {import('../../utils/hub.js').PretrainedOptions} options Additional options for loading the processor.
21
- *
22
- * @returns {Promise<AllFeatureExtractors.ImageProcessor>} A new instance of the Processor class.
23
- */
24
-
25
9
  /** @type {typeof FeatureExtractor.from_pretrained} */
26
10
  static async from_pretrained(pretrained_model_name_or_path, options={}) {
27
11
 
@@ -40,22 +40,6 @@ import * as AllFeatureExtractors from '../feature_extractors.js';
40
40
  */
41
41
  export class AutoProcessor {
42
42
 
43
- /**
44
- * Instantiate one of the processor classes of the library from a pretrained model.
45
- *
46
- * The processor class to instantiate is selected based on the `image_processor_type` (or `feature_extractor_type`; legacy)
47
- * property of the config object (either passed as an argument or loaded from `pretrained_model_name_or_path` if possible)
48
- *
49
- * @param {string} pretrained_model_name_or_path The name or path of the pretrained model. Can be either:
50
- * - A string, the *model id* of a pretrained processor hosted inside a model repo on huggingface.co.
51
- * Valid model ids can be located at the root-level, like `bert-base-uncased`, or namespaced under a
52
- * user or organization name, like `dbmdz/bert-base-german-cased`.
53
- * - A path to a *directory* containing processor files, e.g., `./my_model_directory/`.
54
- * @param {import('../../utils/hub.js').PretrainedOptions} options Additional options for loading the processor.
55
- *
56
- * @returns {Promise<Processor>} A new instance of the Processor class.
57
- */
58
-
59
43
  /** @type {typeof Processor.from_pretrained} */
60
44
  static async from_pretrained(pretrained_model_name_or_path, options={}) {
61
45
 
@@ -9,6 +9,7 @@ export class ConvNextImageProcessor extends ImageProcessor {
9
9
  /**
10
10
  * Percentage of the image to crop. Only has an effect if this.size < 384.
11
11
  */
12
+ // @ts-expect-error TS2339
12
13
  this.crop_pct = this.config.crop_pct ?? (224 / 256);
13
14
  }
14
15
 
@@ -5,6 +5,7 @@ import {
5
5
  export class EfficientNetImageProcessor extends ImageProcessor {
6
6
  constructor(config) {
7
7
  super(config);
8
+ // @ts-expect-error TS2339
8
9
  this.include_top = this.config.include_top ?? true;
9
10
  if (this.include_top) {
10
11
  this.image_std = this.image_std.map(x => x * x);
@@ -10,8 +10,11 @@ export class Florence2Processor extends Processor {
10
10
  super(config, components);
11
11
 
12
12
  const {
13
+ // @ts-expect-error TS2339
13
14
  tasks_answer_post_processing_type,
15
+ // @ts-expect-error TS2339
14
16
  task_prompts_without_inputs,
17
+ // @ts-expect-error TS2339
15
18
  task_prompts_with_input,
16
19
  } = this.image_processor.config;
17
20
 
@@ -146,6 +146,8 @@ export class Idefics3ImageProcessor extends ImageProcessor {
146
146
 
147
147
  const start_offset = i * pixel_attention_mask_stride + num_patches * h * w;
148
148
  const end_offset = (i + 1) * pixel_attention_mask_stride;
149
+
150
+ // @ts-expect-error
149
151
  pixel_attention_mask_data.fill(false, start_offset, end_offset);
150
152
  }
151
153
  }
@@ -13,6 +13,7 @@ export class VLMImageProcessor extends ImageProcessor {
13
13
  },
14
14
  ...config,
15
15
  });
16
+ // @ts-expect-error TS2339
16
17
  this.constant_values = this.config.background_color.map(x => x * this.rescale_factor)
17
18
  }
18
19
 
@@ -119,6 +119,8 @@ export class MgpstrProcessor extends Processor {
119
119
  * - bpe_preds: The list of BPE decoded sentences.
120
120
  * - wp_preds: The list of wp decoded sentences.
121
121
  */
122
+ // @ts-expect-error The type of this method is not compatible with the one
123
+ // in the base class. It might be a good idea to fix this.
122
124
  batch_decode([char_logits, bpe_logits, wp_logits]) {
123
125
  const [char_preds, char_scores] = this._decode_helper(char_logits, 'char');
124
126
  const [bpe_preds, bpe_scores] = this._decode_helper(bpe_logits, 'bpe');
@@ -41,6 +41,7 @@ export class PaliGemmaProcessor extends Processor {
41
41
  }
42
42
 
43
43
  const bos_token = this.tokenizer.bos_token;
44
+ // @ts-expect-error TS2339
44
45
  const image_seq_length = this.image_processor.config.image_seq_length;
45
46
  let input_strings;
46
47
  if (text.some((t) => t.includes(IMAGE_TOKEN))) {
@@ -14,7 +14,7 @@ export class Phi3VProcessor extends Processor {
14
14
  *
15
15
  * @param {string|string[]} text
16
16
  * @param {RawImage|RawImage[]} images
17
- * @param {...any} args
17
+ * @param { { padding?: boolean, truncation?: boolean, num_crops?: number } | undefined } options
18
18
  * @returns {Promise<any>}
19
19
  */
20
20
  async _call(text, images = null, {
@@ -52,6 +52,7 @@ export class PyAnnoteFeatureExtractor extends FeatureExtractor {
52
52
 
53
53
  let current_speaker = -1;
54
54
  for (let i = 0; i < scores.length; ++i) {
55
+ /** @type {number[]} */
55
56
  const probabilities = softmax(scores[i]);
56
57
  const [score, id] = max(probabilities);
57
58
  const [start, end] = [i, i + 1];
@@ -28,6 +28,7 @@ export class Qwen2VLProcessor extends Processor {
28
28
  }
29
29
 
30
30
  if (image_grid_thw) {
31
+ // @ts-expect-error TS2551
31
32
  let merge_length = this.image_processor.config.merge_size ** 2;
32
33
  let index = 0;
33
34
 
@@ -133,8 +133,8 @@ export class SeamlessM4TFeatureExtractor extends FeatureExtractor {
133
133
  'int64',
134
134
  new BigInt64Array(numPaddedFrames),
135
135
  [1, numPaddedFrames],
136
- )
137
- padded_attention_mask.data.fill(1n, 0, num_frames);
136
+ );
137
+ /** @type {BigInt64Array} */ (padded_attention_mask.data).fill(1n, 0, num_frames);
138
138
  }
139
139
  }
140
140
  }
@@ -44,7 +44,7 @@ export class WhisperFeatureExtractor extends FeatureExtractor {
44
44
  )
45
45
 
46
46
  const data = features.data;
47
- const maxValue = max(data)[0];
47
+ const maxValue = max(/** @type {Float32Array} */(data))[0];
48
48
 
49
49
  for (let i = 0; i < data.length; ++i) {
50
50
  data[i] = (Math.max(data[i], maxValue - 8.0) + 4.0) / 4.0;