ctranslate2 4.6.3__cp313-cp313-win_amd64.whl → 4.7.0__cp313-cp313-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
ctranslate2/__init__.py CHANGED
@@ -21,6 +21,8 @@ if sys.platform == "win32":
21
21
  add_dll_directory = getattr(os, "add_dll_directory", None)
22
22
  if add_dll_directory is not None:
23
23
  add_dll_directory(package_dir)
24
+ add_dll_directory(f"{package_dir}/../_rocm_sdk_core/bin")
25
+ add_dll_directory(f"{package_dir}/../_rocm_sdk_libraries_custom/bin")
24
26
 
25
27
  for library in glob.glob(os.path.join(package_dir, "*.dll")):
26
28
  ctypes.CDLL(library)
Binary file
@@ -3,7 +3,7 @@ import argparse
3
3
  from eole.config.run import PredictConfig
4
4
  from eole.constants import PositionEncodingType
5
5
  from eole.inputters.inputter import vocabs_to_dict
6
- from eole.models.model import BaseModel
6
+ from eole.models.model import get_model_class
7
7
 
8
8
  from ctranslate2.converters import utils
9
9
  from ctranslate2.converters.converter import Converter
@@ -164,7 +164,8 @@ class EoleConverter(Converter):
164
164
 
165
165
  config = PredictConfig(model_path=self._model_path, src="dummy")
166
166
 
167
- vocabs, model, model_config = BaseModel.load_test_model(config)
167
+ model_class = get_model_class(config.model)
168
+ model, vocabs, model_config = model_class.for_inference(config)
168
169
  vocabs_dict = vocabs_to_dict(vocabs)
169
170
 
170
171
  config.model = model_config
@@ -253,6 +253,30 @@ class ModelLoader(abc.ABC):
253
253
  "No activation smoothing logic is defined for this model"
254
254
  )
255
255
 
256
+ def get_rotary_params(self, config, default_rope_theta):
257
+ rope_scaling = getattr(config, "rope_scaling", None)
258
+ if rope_scaling:
259
+ rope_type = rope_scaling.get("type") or rope_scaling.get("rope_type")
260
+
261
+ if rope_type == "default":
262
+ rotary_scaling_type = None
263
+ else:
264
+ rotary_scaling_type = _SUPPORTED_ROPE_SCALING.get(rope_type)
265
+ if rotary_scaling_type is None:
266
+ raise NotImplementedError(
267
+ "RoPE scaling type '%s' is not yet implemented. "
268
+ "The following RoPE scaling types are currently supported: %s"
269
+ % (rope_type, ", ".join(_SUPPORTED_ROPE_SCALING.keys()))
270
+ )
271
+ rotary_scaling_factor = rope_scaling.get("factor", 1)
272
+ rope_theta = rope_scaling.get("rope_theta", default_rope_theta)
273
+ else:
274
+ rotary_scaling_type = None
275
+ rotary_scaling_factor = 1
276
+ rope_theta = getattr(config, "rope_theta", default_rope_theta)
277
+
278
+ return rotary_scaling_type, rotary_scaling_factor, rope_theta
279
+
256
280
 
257
281
  @register_loader("BartConfig")
258
282
  class BartLoader(ModelLoader):
@@ -463,7 +487,7 @@ class M2M100Loader(BartLoader):
463
487
  if tokens[-1] == tokenizer.unk_token:
464
488
  tokens.insert(tokenizer.unk_token_id, tokens.pop())
465
489
 
466
- for token in tokenizer.additional_special_tokens:
490
+ for token in tokenizer.special_tokens_map.get("additional_special_tokens", []):
467
491
  if token not in tokens:
468
492
  tokens.append(token)
469
493
 
@@ -488,7 +512,7 @@ class MBartLoader(BartLoader):
488
512
  config.unk_token = tokenizer.unk_token
489
513
 
490
514
  # MBart-25 passes the language code as the decoder start token.
491
- if model.config.tokenizer_class in ("MBartTokenizer", None):
515
+ if getattr(model.config, "tokenizer_class", None) in ("MBartTokenizer", None):
492
516
  config.decoder_start_token = None
493
517
  else:
494
518
  config.decoder_start_token = tokenizer.eos_token
@@ -928,12 +952,14 @@ class WhisperLoader(BartLoader):
928
952
  "<|nocaptions|>",
929
953
  "<|notimestamps|>",
930
954
  ]
955
+
956
+ additional_tokens = getattr(tokenizer, "additional_special_tokens", [])
957
+ if not additional_tokens:
958
+ return []
959
+
931
960
  return [
932
- token_id
933
- for token_id, token in zip(
934
- tokenizer.additional_special_tokens_ids,
935
- tokenizer.additional_special_tokens,
936
- )
961
+ tokenizer.convert_tokens_to_ids(token)
962
+ for token in additional_tokens
937
963
  if token not in non_lang_special_tokens
938
964
  ]
939
965
 
@@ -1674,21 +1700,9 @@ class LlamaLoader(ModelLoader):
1674
1700
  if num_heads_kv == num_heads:
1675
1701
  num_heads_kv = None
1676
1702
 
1677
- rope_scaling = getattr(model.config, "rope_scaling", None)
1678
- if rope_scaling:
1679
- rope_type = rope_scaling.get("type") or rope_scaling["rope_type"]
1680
- rotary_scaling_type = _SUPPORTED_ROPE_SCALING.get(rope_type)
1681
- rotary_scaling_factor = rope_scaling["factor"]
1682
-
1683
- if rotary_scaling_type is None:
1684
- raise NotImplementedError(
1685
- "RoPE scaling type '%s' is not yet implemented. "
1686
- "The following RoPE scaling types are currently supported: %s"
1687
- % (rope_scaling["type"], ", ".join(_SUPPORTED_ROPE_SCALING.keys()))
1688
- )
1689
- else:
1690
- rotary_scaling_type = None
1691
- rotary_scaling_factor = 1
1703
+ rotary_scaling_type, rotary_scaling_factor, rope_theta = self.get_rotary_params(
1704
+ model.config, 10_000
1705
+ )
1692
1706
 
1693
1707
  quantization_config = getattr(model.config, "quantization_config", None)
1694
1708
  if quantization_config:
@@ -1722,7 +1736,7 @@ class LlamaLoader(ModelLoader):
1722
1736
  rotary_interleave=False,
1723
1737
  rotary_scaling_type=rotary_scaling_type,
1724
1738
  rotary_scaling_factor=rotary_scaling_factor,
1725
- rotary_base=getattr(model.config, "rope_theta", 10000),
1739
+ rotary_base=rope_theta,
1726
1740
  num_heads_kv=num_heads_kv,
1727
1741
  quant_type=quant_type,
1728
1742
  quant_group_size=quant_group_size,
@@ -1733,6 +1747,7 @@ class LlamaLoader(ModelLoader):
1733
1747
  self.set_linear(spec.decoder.projection, model.lm_head)
1734
1748
 
1735
1749
  # set extra RoPE parameters for Llama-3.1
1750
+ rope_scaling = getattr(model.config, "rope_scaling", None)
1736
1751
  if rotary_scaling_type == attention_spec.RotaryScalingType.Llama3:
1737
1752
  for layer in spec.decoder.layer:
1738
1753
  layer.self_attention.rotary_low_freq_factor = rope_scaling[
@@ -1859,8 +1874,12 @@ class Gemma3Loader(ModelLoader):
1859
1874
  "Quantization type '%s' is not yet implemented."
1860
1875
  % quantization_config.quant_method
1861
1876
  )
1877
+ quant_group_size = quantization_config.group_size
1878
+ quant_bits = quantization_config.bits
1862
1879
  else:
1863
1880
  quant_type = common_spec.Quantization.CT2
1881
+ quant_group_size = None
1882
+ quant_bits = None
1864
1883
 
1865
1884
  # Create base spec using from_config
1866
1885
  spec = transformer_spec.TransformerDecoderModelSpec.from_config(
@@ -1881,6 +1900,9 @@ class Gemma3Loader(ModelLoader):
1881
1900
  head_dim=head_dim,
1882
1901
  sliding_window=sliding_window, # Default to local sliding window
1883
1902
  pre_post_layer_norm=True,
1903
+ quant_type=quant_type,
1904
+ quant_group_size=quant_group_size,
1905
+ quant_bits=quant_bits,
1884
1906
  qk_norm=True,
1885
1907
  )
1886
1908
 
@@ -1933,7 +1955,8 @@ class Gemma3Loader(ModelLoader):
1933
1955
  config.eos_token = tokenizer.eos_token
1934
1956
 
1935
1957
  def set_layer_norm(self, spec, layer_norm):
1936
- spec.gamma = layer_norm.weight + 1.0
1958
+ spec.gamma = layer_norm.weight
1959
+ spec.layer_norm_use_residual = True
1937
1960
 
1938
1961
  def set_decoder(self, spec, module, quant_type=common_spec.Quantization.CT2):
1939
1962
  spec.scale_embeddings = True
@@ -2022,20 +2045,9 @@ class MistralLoader(ModelLoader):
2022
2045
 
2023
2046
  sliding_window = getattr(model.config, "sliding_window", 0)
2024
2047
 
2025
- rope_scaling = getattr(model.config, "rope_scaling", None)
2026
- if rope_scaling:
2027
- rotary_scaling_type = _SUPPORTED_ROPE_SCALING.get(rope_scaling["type"])
2028
- rotary_scaling_factor = rope_scaling["factor"]
2029
-
2030
- if rotary_scaling_type is None:
2031
- raise NotImplementedError(
2032
- "RoPE scaling type '%s' is not yet implemented. "
2033
- "The following RoPE scaling types are currently supported: %s"
2034
- % (rope_scaling["type"], ", ".join(_SUPPORTED_ROPE_SCALING.keys()))
2035
- )
2036
- else:
2037
- rotary_scaling_type = None
2038
- rotary_scaling_factor = 1
2048
+ rotary_scaling_type, rotary_scaling_factor, rope_theta = self.get_rotary_params(
2049
+ model.config, 10_000
2050
+ )
2039
2051
 
2040
2052
  quantization_config = getattr(model.config, "quantization_config", None)
2041
2053
  if quantization_config:
@@ -2068,7 +2080,7 @@ class MistralLoader(ModelLoader):
2068
2080
  rotary_interleave=False,
2069
2081
  rotary_scaling_type=rotary_scaling_type,
2070
2082
  rotary_scaling_factor=rotary_scaling_factor,
2071
- rotary_base=getattr(model.config, "rope_theta", 10000),
2083
+ rotary_base=rope_theta,
2072
2084
  num_heads_kv=num_heads_kv,
2073
2085
  sliding_window=sliding_window,
2074
2086
  quant_type=quant_type,
@@ -2167,21 +2179,9 @@ class Qwen2Loader(ModelLoader):
2167
2179
  if num_heads_kv == num_heads:
2168
2180
  num_heads_kv = None
2169
2181
 
2170
- rope_scaling = getattr(model.config, "rope_scaling", None)
2171
- if rope_scaling:
2172
- rope_type = rope_scaling.get("type") or rope_scaling["rope_type"]
2173
- rotary_scaling_type = _SUPPORTED_ROPE_SCALING.get(rope_type)
2174
- rotary_scaling_factor = rope_scaling["factor"]
2175
-
2176
- if rotary_scaling_type is None:
2177
- raise NotImplementedError(
2178
- "RoPE scaling type '%s' is not yet implemented. "
2179
- "The following RoPE scaling types are currently supported: %s"
2180
- % (rope_scaling["type"], ", ".join(_SUPPORTED_ROPE_SCALING.keys()))
2181
- )
2182
- else:
2183
- rotary_scaling_type = None
2184
- rotary_scaling_factor = 1
2182
+ rotary_scaling_type, rotary_scaling_factor, rope_theta = self.get_rotary_params(
2183
+ model.config, 10_000
2184
+ )
2185
2185
 
2186
2186
  # Check for AWQ quantization config
2187
2187
  quantization_config = getattr(model.config, "quantization_config", None)
@@ -2216,7 +2216,7 @@ class Qwen2Loader(ModelLoader):
2216
2216
  rotary_interleave=False,
2217
2217
  rotary_scaling_type=rotary_scaling_type,
2218
2218
  rotary_scaling_factor=rotary_scaling_factor,
2219
- rotary_base=getattr(model.config, "rope_theta", 10000),
2219
+ rotary_base=rope_theta,
2220
2220
  num_heads_kv=num_heads_kv,
2221
2221
  quant_type=quant_type,
2222
2222
  quant_group_size=quant_group_size,
@@ -2323,21 +2323,9 @@ class Qwen3Loader(ModelLoader):
2323
2323
  if num_heads_kv == num_heads:
2324
2324
  num_heads_kv = None
2325
2325
 
2326
- rope_scaling = getattr(model.config, "rope_scaling", None)
2327
- if rope_scaling:
2328
- rope_type = rope_scaling.get("type") or rope_scaling["rope_type"]
2329
- rotary_scaling_type = _SUPPORTED_ROPE_SCALING.get(rope_type)
2330
- rotary_scaling_factor = rope_scaling["factor"]
2331
- if rotary_scaling_type is None:
2332
- raise NotImplementedError(
2333
- "RoPE scaling type '%s' is not yet implemented. "
2334
- "The following RoPE scaling types are currently supported: %s"
2335
- % (rope_scaling["type"], ", ".join(_SUPPORTED_ROPE_SCALING.keys()))
2336
- )
2337
- else:
2338
- rotary_scaling_type = None
2339
- rotary_scaling_factor = 1
2340
-
2326
+ rotary_scaling_type, rotary_scaling_factor, rope_theta = self.get_rotary_params(
2327
+ model.config, 1_000_000
2328
+ )
2341
2329
  # Check for AWQ quantization config
2342
2330
  quantization_config = getattr(model.config, "quantization_config", None)
2343
2331
  if quantization_config:
@@ -2371,7 +2359,7 @@ class Qwen3Loader(ModelLoader):
2371
2359
  rotary_interleave=False,
2372
2360
  rotary_scaling_type=rotary_scaling_type,
2373
2361
  rotary_scaling_factor=rotary_scaling_factor,
2374
- rotary_base=getattr(model.config, "rope_theta", 10000),
2362
+ rotary_base=rope_theta,
2375
2363
  num_heads_kv=num_heads_kv,
2376
2364
  head_dim=head_dim,
2377
2365
  qk_norm=True,
Binary file
@@ -275,7 +275,7 @@ class TransformerDecoderSpec(model_spec.LayerSpec):
275
275
  self.project_in = common_spec.LinearSpec()
276
276
  self.project_out = common_spec.LinearSpec()
277
277
 
278
- if quant_type is not None:
278
+ if quant_type:
279
279
  self._config["quantization_type"] = quant_type
280
280
  self._config["quantization_bits"] = quant_bits
281
281
  self._config["quantization_group_size"] = quant_group_size
ctranslate2/version.py CHANGED
@@ -1,3 +1,3 @@
1
1
  """Version information."""
2
2
 
3
- __version__ = "4.6.3"
3
+ __version__ = "4.7.0"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ctranslate2
3
- Version: 4.6.3
3
+ Version: 4.7.0
4
4
  Summary: Fast inference engine for Transformer models
5
5
  Home-page: https://opennmt.net
6
6
  Author: OpenNMT
@@ -100,6 +100,8 @@ generator.generate_batch(start_tokens)
100
100
 
101
101
  See the [documentation](https://opennmt.net/CTranslate2) for more information and examples.
102
102
 
103
+ If you have an AMD ROCm GPU, we provide specific Python wheels on the [releases page](https://github.com/OpenNMT/CTranslate2/releases/).
104
+
103
105
  ## Benchmarks
104
106
 
105
107
  We translate the En->De test set *newstest2014* with multiple models:
@@ -1,33 +1,33 @@
1
- ctranslate2/__init__.py,sha256=CGqShDaFxQ-u-aCtVq99T4HKuBdMB8b49l2KSxnQb8M,1735
2
- ctranslate2/_ext.cp313-win_amd64.pyd,sha256=pdiMfARxnnneWviebu7neLfOs6fLdU4HjfcGpSCtKuU,715776
3
- ctranslate2/ctranslate2.dll,sha256=9TRGHvoyNSzXe9eEc3jKJa5-21-AeSENgp3DXvhCZ4M,58590720
1
+ ctranslate2/__init__.py,sha256=LZy5gF-9vTRdcERSnTSP_RrCPDks9UDU7uzxw1-d0aU,1881
2
+ ctranslate2/_ext.cp313-win_amd64.pyd,sha256=bPfgej3CXraCad6brhOxuQFimiFviEjVksLxGSC7Oas,715776
3
+ ctranslate2/ctranslate2.dll,sha256=umrDDC_rg_IbXg2MOi-8jNasZZdjb9b_Io8CWS5_M_U,59823104
4
4
  ctranslate2/cudnn64_9.dll,sha256=ntvN_3OwrwcOsWCyzmbln-ygSqAXNR2O7cxejhSZZ9I,266288
5
5
  ctranslate2/extensions.py,sha256=kDNt0H9KvfNCc3PrRGzfkj9Fkvna84i2O5Y-rav6UkU,21940
6
6
  ctranslate2/libiomp5md.dll,sha256=mCIzNmsK_NoeD1WgsTQJfjW3eWE_VN22nmhebNBrdV8,1614192
7
7
  ctranslate2/logging.py,sha256=P9evHdxuMx_iHvwJjEASEq-j5062H64Pl5-fJjxEuHk,1221
8
- ctranslate2/version.py,sha256=TboXlbA67GNmSOm1v2u_U8AKgYh5iminMMLTvi3Xho4,53
8
+ ctranslate2/version.py,sha256=cWqiIzEeUIcvUfq82ZopTbW1pRWqZkZOW7b6pks8tz8,53
9
9
  ctranslate2/converters/__init__.py,sha256=ufYjcXf2sK4fiXAUU6tIJyWmNuLjKFf_KH3GWLXe4ls,507
10
10
  ctranslate2/converters/converter.py,sha256=Qkb8NGLLmgqMT6HZkFq61zwbxyq3NlWcaxLZ6Ap-YOQ,3601
11
- ctranslate2/converters/eole_ct2.py,sha256=RUcDJH_2AUt0jDs5oAqccE6tQPbO9LQ6JmVriC1DTy8,12564
11
+ ctranslate2/converters/eole_ct2.py,sha256=sRXvPark9V-4umXpMxPuJVQekMLstyNZ7xNjyAFthvg,12623
12
12
  ctranslate2/converters/fairseq.py,sha256=2vlBk4AVCHwXxKkwPHVmcjyfo1dAV0_DJS1i6q-44NE,12822
13
13
  ctranslate2/converters/marian.py,sha256=1_7P3EbIDPOdyJbtb_Lp-LCBPBb9A8E9OhzoyFwTb64,11274
14
14
  ctranslate2/converters/openai_gpt2.py,sha256=1rXKM2ZURZHWRv4XZ135fPkVWpM4rTG-q7VR7OD6d-A,3304
15
15
  ctranslate2/converters/opennmt_py.py,sha256=zex4TbHiiJMy0tkqQg39oNjxmSZKf8dnRLH3iQ1H4z0,13227
16
16
  ctranslate2/converters/opennmt_tf.py,sha256=uBRp2wz5xriSQcA_c0S0ekY7ws6RpRX_0EKeMRdM7-s,16222
17
17
  ctranslate2/converters/opus_mt.py,sha256=5KbPaTiBhhorPzMpTugIfIJ8SgcqHfJUbJrWKBN-Djs,1254
18
- ctranslate2/converters/transformers.py,sha256=VRal3vKSQrAOvcNPwewjVMtgvWskz0KD5bdIrpNrZNA,142380
18
+ ctranslate2/converters/transformers.py,sha256=41E9rMH6Qm77OIfswMVn7esp_NPZn3ZimiLTA6Be_50,141519
19
19
  ctranslate2/converters/utils.py,sha256=w7NG39lx-9dOdL57OqKVTdC__opkuP8RACg1TLlUJwM,3817
20
20
  ctranslate2/models/__init__.py,sha256=53p98uemtuvVPz8xK7_LbOhBiUJJu-c-NdmOHJgdXus,497
21
21
  ctranslate2/specs/__init__.py,sha256=9GabtSyczznYqiqUS6XvULi8pQ3_3RNRogXobGP0G80,653
22
22
  ctranslate2/specs/attention_spec.py,sha256=FnaSiQREWQw_cURgsCb9_aIpGOCxyVGTCpIOdd-08v8,3492
23
23
  ctranslate2/specs/common_spec.py,sha256=freTDhQMy5PYofBrij4_FDgrKokMYApWSPIpASZIlJc,1608
24
24
  ctranslate2/specs/model_spec.py,sha256=atCAYzDEIzyJ1TCayFGZVutHqSWa1ww-vbZ0OiIJqh8,25736
25
- ctranslate2/specs/transformer_spec.py,sha256=-GJ0oSjI3ns-Ei_-xXIM_P2GaZxt5Z-g03zJ0m_4ciU,34317
25
+ ctranslate2/specs/transformer_spec.py,sha256=s6mY6MMHneraXrWua_531Xjb5MVEJZCUTemUERO11GI,34305
26
26
  ctranslate2/specs/wav2vec2_spec.py,sha256=NITsuOuf2F5bU1-aXit8-WEtWV9fH2Eq7A7857UyYho,2106
27
27
  ctranslate2/specs/wav2vec2bert_spec.py,sha256=UgtsJWC9mMgJ7bn4T_xg1uXK0rqA4-9tT2KMGVgPKnw,3529
28
28
  ctranslate2/specs/whisper_spec.py,sha256=_vm1sc5yOowOJ4iyvcxMXrgt-UcLJrZT8OtPscUXcQQ,2447
29
- ctranslate2-4.6.3.dist-info/METADATA,sha256=awoc6t4JSxpv51lmfAG28ZG91FhGQ8DHspyLzLqLo_Q,10839
30
- ctranslate2-4.6.3.dist-info/WHEEL,sha256=qV0EIPljj1XC_vuSatRWjn02nZIz3N1t8jsZz7HBr2U,101
31
- ctranslate2-4.6.3.dist-info/entry_points.txt,sha256=ZHkojut_TmVRHl0bJIGm2b9wqr98GAJqxN9rlJtQshs,466
32
- ctranslate2-4.6.3.dist-info/top_level.txt,sha256=1hUaWzcFIuSo2BAIUHFA3Osgsu6S1giq0y6Rosv8HOQ,12
33
- ctranslate2-4.6.3.dist-info/RECORD,,
29
+ ctranslate2-4.7.0.dist-info/METADATA,sha256=Vm9SM5sybdzcJHc6HBek2PgP6nbuDiEHWQFZuJjWDvc,10979
30
+ ctranslate2-4.7.0.dist-info/WHEEL,sha256=-WvvtQtdhM1F5HMi-4hSXLQ_1Tg6qJRWO1HnLNr4mCU,102
31
+ ctranslate2-4.7.0.dist-info/entry_points.txt,sha256=ZHkojut_TmVRHl0bJIGm2b9wqr98GAJqxN9rlJtQshs,466
32
+ ctranslate2-4.7.0.dist-info/top_level.txt,sha256=1hUaWzcFIuSo2BAIUHFA3Osgsu6S1giq0y6Rosv8HOQ,12
33
+ ctranslate2-4.7.0.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (80.9.0)
2
+ Generator: setuptools (80.10.2)
3
3
  Root-Is-Purelib: false
4
4
  Tag: cp313-cp313-win_amd64
5
5