@huggingface/transformers 3.0.0-alpha.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (96) hide show
  1. package/LICENSE +202 -0
  2. package/README.md +376 -0
  3. package/dist/ort-wasm-simd-threaded.jsep.wasm +0 -0
  4. package/dist/transformers.cjs +30741 -0
  5. package/dist/transformers.cjs.map +1 -0
  6. package/dist/transformers.js +33858 -0
  7. package/dist/transformers.js.map +1 -0
  8. package/dist/transformers.min.cjs +173 -0
  9. package/dist/transformers.min.cjs.map +1 -0
  10. package/dist/transformers.min.js +231 -0
  11. package/dist/transformers.min.js.map +1 -0
  12. package/package.json +92 -0
  13. package/src/backends/onnx.js +151 -0
  14. package/src/configs.js +360 -0
  15. package/src/env.js +152 -0
  16. package/src/generation/configuration_utils.js +381 -0
  17. package/src/generation/logits_process.js +716 -0
  18. package/src/generation/logits_sampler.js +204 -0
  19. package/src/generation/parameters.js +35 -0
  20. package/src/generation/stopping_criteria.js +156 -0
  21. package/src/generation/streamers.js +212 -0
  22. package/src/models/whisper/common_whisper.js +151 -0
  23. package/src/models/whisper/generation_whisper.js +89 -0
  24. package/src/models.js +7028 -0
  25. package/src/ops/registry.js +92 -0
  26. package/src/pipelines.js +3341 -0
  27. package/src/processors.js +2614 -0
  28. package/src/tokenizers.js +4395 -0
  29. package/src/transformers.js +28 -0
  30. package/src/utils/audio.js +704 -0
  31. package/src/utils/constants.js +2 -0
  32. package/src/utils/core.js +149 -0
  33. package/src/utils/data-structures.js +445 -0
  34. package/src/utils/devices.js +11 -0
  35. package/src/utils/dtypes.js +62 -0
  36. package/src/utils/generic.js +35 -0
  37. package/src/utils/hub.js +671 -0
  38. package/src/utils/image.js +745 -0
  39. package/src/utils/maths.js +1050 -0
  40. package/src/utils/tensor.js +1378 -0
  41. package/types/backends/onnx.d.ts +26 -0
  42. package/types/backends/onnx.d.ts.map +1 -0
  43. package/types/configs.d.ts +59 -0
  44. package/types/configs.d.ts.map +1 -0
  45. package/types/env.d.ts +106 -0
  46. package/types/env.d.ts.map +1 -0
  47. package/types/generation/configuration_utils.d.ts +320 -0
  48. package/types/generation/configuration_utils.d.ts.map +1 -0
  49. package/types/generation/logits_process.d.ts +354 -0
  50. package/types/generation/logits_process.d.ts.map +1 -0
  51. package/types/generation/logits_sampler.d.ts +51 -0
  52. package/types/generation/logits_sampler.d.ts.map +1 -0
  53. package/types/generation/parameters.d.ts +47 -0
  54. package/types/generation/parameters.d.ts.map +1 -0
  55. package/types/generation/stopping_criteria.d.ts +81 -0
  56. package/types/generation/stopping_criteria.d.ts.map +1 -0
  57. package/types/generation/streamers.d.ts +81 -0
  58. package/types/generation/streamers.d.ts.map +1 -0
  59. package/types/models/whisper/common_whisper.d.ts +8 -0
  60. package/types/models/whisper/common_whisper.d.ts.map +1 -0
  61. package/types/models/whisper/generation_whisper.d.ts +76 -0
  62. package/types/models/whisper/generation_whisper.d.ts.map +1 -0
  63. package/types/models.d.ts +3845 -0
  64. package/types/models.d.ts.map +1 -0
  65. package/types/ops/registry.d.ts +11 -0
  66. package/types/ops/registry.d.ts.map +1 -0
  67. package/types/pipelines.d.ts +2403 -0
  68. package/types/pipelines.d.ts.map +1 -0
  69. package/types/processors.d.ts +917 -0
  70. package/types/processors.d.ts.map +1 -0
  71. package/types/tokenizers.d.ts +999 -0
  72. package/types/tokenizers.d.ts.map +1 -0
  73. package/types/transformers.d.ts +13 -0
  74. package/types/transformers.d.ts.map +1 -0
  75. package/types/utils/audio.d.ts +130 -0
  76. package/types/utils/audio.d.ts.map +1 -0
  77. package/types/utils/constants.d.ts +2 -0
  78. package/types/utils/constants.d.ts.map +1 -0
  79. package/types/utils/core.d.ts +91 -0
  80. package/types/utils/core.d.ts.map +1 -0
  81. package/types/utils/data-structures.d.ts +236 -0
  82. package/types/utils/data-structures.d.ts.map +1 -0
  83. package/types/utils/devices.d.ts +8 -0
  84. package/types/utils/devices.d.ts.map +1 -0
  85. package/types/utils/dtypes.d.ts +22 -0
  86. package/types/utils/dtypes.d.ts.map +1 -0
  87. package/types/utils/generic.d.ts +11 -0
  88. package/types/utils/generic.d.ts.map +1 -0
  89. package/types/utils/hub.d.ts +191 -0
  90. package/types/utils/hub.d.ts.map +1 -0
  91. package/types/utils/image.d.ts +119 -0
  92. package/types/utils/image.d.ts.map +1 -0
  93. package/types/utils/maths.d.ts +280 -0
  94. package/types/utils/maths.d.ts.map +1 -0
  95. package/types/utils/tensor.d.ts +392 -0
  96. package/types/utils/tensor.d.ts.map +1 -0
@@ -0,0 +1,151 @@
1
+
2
+
3
+ const WHISPER_LANGUAGES = [
4
+ ["en", "english"],
5
+ ["zh", "chinese"],
6
+ ["de", "german"],
7
+ ["es", "spanish"],
8
+ ["ru", "russian"],
9
+ ["ko", "korean"],
10
+ ["fr", "french"],
11
+ ["ja", "japanese"],
12
+ ["pt", "portuguese"],
13
+ ["tr", "turkish"],
14
+ ["pl", "polish"],
15
+ ["ca", "catalan"],
16
+ ["nl", "dutch"],
17
+ ["ar", "arabic"],
18
+ ["sv", "swedish"],
19
+ ["it", "italian"],
20
+ ["id", "indonesian"],
21
+ ["hi", "hindi"],
22
+ ["fi", "finnish"],
23
+ ["vi", "vietnamese"],
24
+ ["he", "hebrew"],
25
+ ["uk", "ukrainian"],
26
+ ["el", "greek"],
27
+ ["ms", "malay"],
28
+ ["cs", "czech"],
29
+ ["ro", "romanian"],
30
+ ["da", "danish"],
31
+ ["hu", "hungarian"],
32
+ ["ta", "tamil"],
33
+ ["no", "norwegian"],
34
+ ["th", "thai"],
35
+ ["ur", "urdu"],
36
+ ["hr", "croatian"],
37
+ ["bg", "bulgarian"],
38
+ ["lt", "lithuanian"],
39
+ ["la", "latin"],
40
+ ["mi", "maori"],
41
+ ["ml", "malayalam"],
42
+ ["cy", "welsh"],
43
+ ["sk", "slovak"],
44
+ ["te", "telugu"],
45
+ ["fa", "persian"],
46
+ ["lv", "latvian"],
47
+ ["bn", "bengali"],
48
+ ["sr", "serbian"],
49
+ ["az", "azerbaijani"],
50
+ ["sl", "slovenian"],
51
+ ["kn", "kannada"],
52
+ ["et", "estonian"],
53
+ ["mk", "macedonian"],
54
+ ["br", "breton"],
55
+ ["eu", "basque"],
56
+ ["is", "icelandic"],
57
+ ["hy", "armenian"],
58
+ ["ne", "nepali"],
59
+ ["mn", "mongolian"],
60
+ ["bs", "bosnian"],
61
+ ["kk", "kazakh"],
62
+ ["sq", "albanian"],
63
+ ["sw", "swahili"],
64
+ ["gl", "galician"],
65
+ ["mr", "marathi"],
66
+ ["pa", "punjabi"],
67
+ ["si", "sinhala"],
68
+ ["km", "khmer"],
69
+ ["sn", "shona"],
70
+ ["yo", "yoruba"],
71
+ ["so", "somali"],
72
+ ["af", "afrikaans"],
73
+ ["oc", "occitan"],
74
+ ["ka", "georgian"],
75
+ ["be", "belarusian"],
76
+ ["tg", "tajik"],
77
+ ["sd", "sindhi"],
78
+ ["gu", "gujarati"],
79
+ ["am", "amharic"],
80
+ ["yi", "yiddish"],
81
+ ["lo", "lao"],
82
+ ["uz", "uzbek"],
83
+ ["fo", "faroese"],
84
+ ["ht", "haitian creole"],
85
+ ["ps", "pashto"],
86
+ ["tk", "turkmen"],
87
+ ["nn", "nynorsk"],
88
+ ["mt", "maltese"],
89
+ ["sa", "sanskrit"],
90
+ ["lb", "luxembourgish"],
91
+ ["my", "myanmar"],
92
+ ["bo", "tibetan"],
93
+ ["tl", "tagalog"],
94
+ ["mg", "malagasy"],
95
+ ["as", "assamese"],
96
+ ["tt", "tatar"],
97
+ ["haw", "hawaiian"],
98
+ ["ln", "lingala"],
99
+ ["ha", "hausa"],
100
+ ["ba", "bashkir"],
101
+ ["jw", "javanese"],
102
+ ["su", "sundanese"],
103
+ ]
104
+
105
+ // @ts-ignore
106
+ export const WHISPER_LANGUAGE_MAPPING = new Map(WHISPER_LANGUAGES);
107
+ // @ts-ignore
108
+ export const WHISPER_TO_LANGUAGE_CODE_MAPPING = new Map([
109
+ ...WHISPER_LANGUAGES.map(([k, v]) => [v, k]),
110
+ ...[
111
+ ["burmese", "my"],
112
+ ["valencian", "ca"],
113
+ ["flemish", "nl"],
114
+ ["haitian", "ht"],
115
+ ["letzeburgesch", "lb"],
116
+ ["pushto", "ps"],
117
+ ["panjabi", "pa"],
118
+ ["moldavian", "ro"],
119
+ ["moldovan", "ro"],
120
+ ["sinhalese", "si"],
121
+ ["castilian", "es"],
122
+ ]
123
+ ]);
124
+
125
+ /**
126
+ * @param {string} language The language name or code
127
+ * @returns {string} The language code
128
+ */
129
+ export function whisper_language_to_code(language) {
130
+ language = language.toLowerCase();
131
+
132
+ // Map to code from user-friendly name (e.g., "english" -> "en")
133
+ let language_code = WHISPER_TO_LANGUAGE_CODE_MAPPING.get(language);
134
+
135
+ if (language_code === undefined) {
136
+ // User provided something that is not a language name
137
+
138
+ if (WHISPER_LANGUAGE_MAPPING.has(language)) {
139
+ // User provided the language code directly (e.g., "en")
140
+ language_code = language;
141
+
142
+ } else {
143
+ // User provided something that is not a language code or name
144
+ const is_language_code = language.length === 2;
145
+ const langs = is_language_code ? WHISPER_LANGUAGE_MAPPING.keys() : WHISPER_LANGUAGE_MAPPING.values();
146
+
147
+ throw new Error(`Language "${language}" is not supported. Must be one of: ${JSON.stringify(langs)}`);
148
+ }
149
+ }
150
+ return language_code;
151
+ }
@@ -0,0 +1,89 @@
1
+ import { GenerationConfig } from "../../generation/configuration_utils.js";
2
+
3
+ export class WhisperGenerationConfig extends GenerationConfig {
4
+
5
+ /**
6
+ * Whether to return the timestamps with the text. This enables the `WhisperTimestampsLogitsProcessor`.
7
+ * @type {boolean}
8
+ */
9
+ return_timestamps = null;
10
+
11
+ /**
12
+ * Whether to return token-level timestamps
13
+ * with the text. This can be used with or without the `return_timestamps` option. To get word-level
14
+ * timestamps, use the tokenizer to group the tokens into words.
15
+ * @type {boolean}
16
+ */
17
+ return_token_timestamps = null;
18
+
19
+ /**
20
+ * The number of audio frames available in this chunk. This is only used generating word-level timestamps.
21
+ * @type {number}
22
+ */
23
+ num_frames = null;
24
+
25
+ /**
26
+ * Alignment heads to predict word-level timestamps. This is a list of [layer, head] pairs that
27
+ * select the cross-attention heads that are highly correlated to word-level timing.
28
+ * @type {[number, number][]}
29
+ */
30
+ alignment_heads = null;
31
+
32
+ /**
33
+ * Task to use for generation, either "translate" or "transcribe".
34
+ * @type {string}
35
+ */
36
+ task = null;
37
+
38
+ /**
39
+ * Language token to use for generation, can be either in the form of `<|en|>`, `en` or `english`.
40
+ * You can find all the possible language tokens in the `model.generation_config.lang_to_id` dictionary.
41
+ * @type {string}
42
+ */
43
+ language = null;
44
+
45
+ /**
46
+ * The id of the `"<|notimestamps|>"` token.
47
+ * @type {number}
48
+ */
49
+ no_timestamps_token_id = null;
50
+
51
+ /**
52
+ * Rank-1 list of token IDs created by passing text to [`~WhisperProcessor.get_prompt_ids`] that is
53
+ * provided as a prompt to each chunk. This can be used to provide or "prompt-engineer" a context for
54
+ * transcription, e.g. custom vocabularies or proper nouns to make it more likely to predict those words
55
+ * correctly. It cannot be used in conjunction with `decoder_start_token_id` as it overwrites this value.
56
+ * @type {number[]}
57
+ */
58
+ prompt_ids = null;
59
+
60
+ /**
61
+ * Whether the model is multilingual or not.
62
+ * @type {boolean}
63
+ */
64
+ is_multilingual = null;
65
+
66
+ /**
67
+ * (Optional) A mapping from language tokens to their corresponding IDs.
68
+ * Only required if the model is multilingual.
69
+ * @type {Record<string, number>|null}
70
+ */
71
+ lang_to_id = null;
72
+
73
+ /**
74
+ * (Optional) A mapping from task tokens to their corresponding IDs.
75
+ * @type {Record<string, number>|null}
76
+ */
77
+ task_to_id = null;
78
+
79
+ /**
80
+ * Used to set the maximum value of the initial timestamp. This is used to prevent the model from
81
+ * predicting timestamps that are too far in the future.
82
+ * @type {number}
83
+ */
84
+ max_initial_timestamp_index = 1;
85
+ }
86
+
87
+ /**
88
+ * @typedef {import('../../generation/parameters.js').GenerationFunctionParameters & {generation_config: WhisperGenerationConfig} & WhisperGenerationConfig} WhisperGenerationFunctionParameters
89
+ */