ctranslate2 4.6.1__cp311-cp311-win_amd64.whl → 4.6.2__cp311-cp311-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ctranslate2/__init__.py +11 -3
- ctranslate2/_ext.cp311-win_amd64.pyd +0 -0
- ctranslate2/converters/transformers.py +303 -0
- ctranslate2/ctranslate2.dll +0 -0
- ctranslate2/specs/attention_spec.py +6 -0
- ctranslate2/specs/transformer_spec.py +7 -0
- ctranslate2/version.py +1 -1
- {ctranslate2-4.6.1.dist-info → ctranslate2-4.6.2.dist-info}/METADATA +1 -1
- {ctranslate2-4.6.1.dist-info → ctranslate2-4.6.2.dist-info}/RECORD +12 -12
- {ctranslate2-4.6.1.dist-info → ctranslate2-4.6.2.dist-info}/WHEEL +0 -0
- {ctranslate2-4.6.1.dist-info → ctranslate2-4.6.2.dist-info}/entry_points.txt +0 -0
- {ctranslate2-4.6.1.dist-info → ctranslate2-4.6.2.dist-info}/top_level.txt +0 -0
ctranslate2/__init__.py
CHANGED
|
@@ -5,10 +5,18 @@ if sys.platform == "win32":
|
|
|
5
5
|
import glob
|
|
6
6
|
import os
|
|
7
7
|
|
|
8
|
-
import pkg_resources
|
|
9
|
-
|
|
10
8
|
module_name = sys.modules[__name__].__name__
|
|
11
|
-
|
|
9
|
+
|
|
10
|
+
# Adressing python 3.9 < version
|
|
11
|
+
try:
|
|
12
|
+
from importlib.resources import files
|
|
13
|
+
|
|
14
|
+
# Fixed the pkg_resources depreciation
|
|
15
|
+
package_dir = str(files(module_name))
|
|
16
|
+
except ImportError:
|
|
17
|
+
import pkg_resources
|
|
18
|
+
|
|
19
|
+
package_dir = pkg_resources.resource_filename(module_name, "")
|
|
12
20
|
|
|
13
21
|
add_dll_directory = getattr(os, "add_dll_directory", None)
|
|
14
22
|
if add_dll_directory is not None:
|
|
Binary file
|
|
@@ -1819,6 +1819,192 @@ class LlamaLoader(ModelLoader):
|
|
|
1819
1819
|
gc.collect()
|
|
1820
1820
|
|
|
1821
1821
|
|
|
1822
|
+
@register_loader("Gemma3TextConfig")
|
|
1823
|
+
@register_loader("Gemma3Config")
|
|
1824
|
+
class Gemma3Loader(ModelLoader):
|
|
1825
|
+
@property
|
|
1826
|
+
def architecture_name(self):
|
|
1827
|
+
return "Gemma3ForCausalLM"
|
|
1828
|
+
|
|
1829
|
+
def get_model_spec(self, model):
|
|
1830
|
+
num_layers = model.config.num_hidden_layers
|
|
1831
|
+
num_heads = model.config.num_attention_heads
|
|
1832
|
+
num_heads_kv = getattr(model.config, "num_key_value_heads", num_heads)
|
|
1833
|
+
if num_heads_kv == num_heads:
|
|
1834
|
+
num_heads_kv = None
|
|
1835
|
+
|
|
1836
|
+
head_dim = model.config.head_dim
|
|
1837
|
+
|
|
1838
|
+
activation_config = getattr(
|
|
1839
|
+
model.config, "hidden_activation", "gelu_pytorch_tanh"
|
|
1840
|
+
)
|
|
1841
|
+
|
|
1842
|
+
# Get RoPE parameters
|
|
1843
|
+
rope_theta = getattr(model.config, "rope_theta", 1_000_000) # Global: 1M
|
|
1844
|
+
rope_local_base_freq = getattr(
|
|
1845
|
+
model.config, "rope_local_base_freq", 10_000
|
|
1846
|
+
) # Local: 10k
|
|
1847
|
+
|
|
1848
|
+
# Get sliding window configuration
|
|
1849
|
+
sliding_window = getattr(model.config, "sliding_window", 1024)
|
|
1850
|
+
layer_types = getattr(model.config, "layer_types", None)
|
|
1851
|
+
|
|
1852
|
+
quantization_config = getattr(model.config, "quantization_config", None)
|
|
1853
|
+
if quantization_config:
|
|
1854
|
+
if quantization_config.quant_method == "awq":
|
|
1855
|
+
quant_type = _SUPPORTED_QUANTIZATION.get(quantization_config.version)
|
|
1856
|
+
if quant_type is None:
|
|
1857
|
+
raise NotImplementedError(
|
|
1858
|
+
"Quantization type '%s' is not yet implemented."
|
|
1859
|
+
% quantization_config.quant_method
|
|
1860
|
+
)
|
|
1861
|
+
else:
|
|
1862
|
+
quant_type = common_spec.Quantization.CT2
|
|
1863
|
+
|
|
1864
|
+
# Create base spec using from_config
|
|
1865
|
+
spec = transformer_spec.TransformerDecoderModelSpec.from_config(
|
|
1866
|
+
num_layers,
|
|
1867
|
+
num_heads,
|
|
1868
|
+
activation=(
|
|
1869
|
+
common_spec.Activation.GELU
|
|
1870
|
+
if activation_config == "gelu"
|
|
1871
|
+
else common_spec.Activation.GELUTanh
|
|
1872
|
+
),
|
|
1873
|
+
pre_norm=True,
|
|
1874
|
+
ffn_glu=True,
|
|
1875
|
+
rms_norm=True,
|
|
1876
|
+
rotary_dim=head_dim,
|
|
1877
|
+
rotary_interleave=False,
|
|
1878
|
+
rotary_base=rope_local_base_freq, # Default to local base freq
|
|
1879
|
+
num_heads_kv=num_heads_kv,
|
|
1880
|
+
head_dim=head_dim,
|
|
1881
|
+
sliding_window=sliding_window, # Default to local sliding window
|
|
1882
|
+
pre_post_layer_norm=True,
|
|
1883
|
+
qk_norm=True,
|
|
1884
|
+
)
|
|
1885
|
+
|
|
1886
|
+
# Store layer_types for use in set_decoder
|
|
1887
|
+
self._layer_types = layer_types
|
|
1888
|
+
|
|
1889
|
+
# Override per-layer settings for global vs local attention
|
|
1890
|
+
for i, layer_type in enumerate(layer_types):
|
|
1891
|
+
layer = spec.decoder.layer[i]
|
|
1892
|
+
if layer_type == "full_attention":
|
|
1893
|
+
layer.self_attention.rotary_base = np.dtype("float32").type(rope_theta)
|
|
1894
|
+
layer.self_attention.sliding_window = np.dtype("int32").type(0)
|
|
1895
|
+
elif layer_type == "sliding_attention":
|
|
1896
|
+
layer.self_attention.rotary_base = np.dtype("float32").type(
|
|
1897
|
+
rope_local_base_freq
|
|
1898
|
+
)
|
|
1899
|
+
layer.self_attention.sliding_window = np.dtype("int32").type(
|
|
1900
|
+
sliding_window
|
|
1901
|
+
)
|
|
1902
|
+
|
|
1903
|
+
self.set_decoder(spec.decoder, model.model, quant_type)
|
|
1904
|
+
self.set_linear(spec.decoder.projection, model.lm_head)
|
|
1905
|
+
return spec
|
|
1906
|
+
|
|
1907
|
+
def get_vocabulary(self, model, tokenizer):
|
|
1908
|
+
tokens = super().get_vocabulary(model, tokenizer)
|
|
1909
|
+
|
|
1910
|
+
extra_ids = model.config.vocab_size - len(tokens)
|
|
1911
|
+
for i in range(extra_ids):
|
|
1912
|
+
tokens.append("<extra_id_%d>" % i)
|
|
1913
|
+
if model.config.vocab_size < len(tokens):
|
|
1914
|
+
tokens = tokens[: model.config.vocab_size]
|
|
1915
|
+
|
|
1916
|
+
return tokens
|
|
1917
|
+
|
|
1918
|
+
def set_vocabulary(self, spec, tokens):
|
|
1919
|
+
spec.register_vocabulary(tokens)
|
|
1920
|
+
|
|
1921
|
+
def set_config(self, config, model, tokenizer):
|
|
1922
|
+
config.bos_token = tokenizer.bos_token
|
|
1923
|
+
config.unk_token = tokenizer.unk_token
|
|
1924
|
+
|
|
1925
|
+
if (
|
|
1926
|
+
hasattr(tokenizer, "chat_template")
|
|
1927
|
+
and isinstance(tokenizer.chat_template, str)
|
|
1928
|
+
and tokenizer.chat_template.strip()
|
|
1929
|
+
):
|
|
1930
|
+
config.eos_token = "<end_of_turn>"
|
|
1931
|
+
else:
|
|
1932
|
+
config.eos_token = tokenizer.eos_token
|
|
1933
|
+
|
|
1934
|
+
def set_layer_norm(self, spec, layer_norm):
|
|
1935
|
+
spec.gamma = layer_norm.weight + 1.0
|
|
1936
|
+
|
|
1937
|
+
def set_decoder(self, spec, module, quant_type=common_spec.Quantization.CT2):
|
|
1938
|
+
spec.scale_embeddings = True
|
|
1939
|
+
spec.start_from_zero_embedding = False
|
|
1940
|
+
self.set_embeddings(spec.embeddings, module.embed_tokens) # Input
|
|
1941
|
+
self.set_layer_norm(spec.layer_norm, module.norm) # Output
|
|
1942
|
+
|
|
1943
|
+
for layer_spec, layer in zip(spec.layer, module.layers):
|
|
1944
|
+
self.set_layer_norm(layer_spec.input_layer_norm, layer.input_layernorm)
|
|
1945
|
+
|
|
1946
|
+
self.set_layer_norm(
|
|
1947
|
+
layer_spec.post_attention_layer_norm, layer.post_attention_layernorm
|
|
1948
|
+
)
|
|
1949
|
+
|
|
1950
|
+
self.set_layer_norm(
|
|
1951
|
+
layer_spec.pre_feedforward_layer_norm, layer.pre_feedforward_layernorm
|
|
1952
|
+
)
|
|
1953
|
+
|
|
1954
|
+
self.set_layer_norm(
|
|
1955
|
+
layer_spec.post_feedforward_layer_norm, layer.post_feedforward_layernorm
|
|
1956
|
+
)
|
|
1957
|
+
|
|
1958
|
+
# Set QK-norm weights (Gemma 3 uses this instead of soft-capping)
|
|
1959
|
+
self.set_layer_norm(
|
|
1960
|
+
layer_spec.self_attention.q_norm, layer.self_attn.q_norm
|
|
1961
|
+
)
|
|
1962
|
+
self.set_layer_norm(
|
|
1963
|
+
layer_spec.self_attention.k_norm, layer.self_attn.k_norm
|
|
1964
|
+
)
|
|
1965
|
+
|
|
1966
|
+
# Set attention projections
|
|
1967
|
+
split_layers = [common_spec.LinearSpec() for _ in range(3)]
|
|
1968
|
+
self.set_linear(
|
|
1969
|
+
split_layers[0], layer.self_attn.q_proj, quant_type=quant_type
|
|
1970
|
+
)
|
|
1971
|
+
self.set_linear(
|
|
1972
|
+
split_layers[1], layer.self_attn.k_proj, quant_type=quant_type
|
|
1973
|
+
)
|
|
1974
|
+
self.set_linear(
|
|
1975
|
+
split_layers[2], layer.self_attn.v_proj, quant_type=quant_type
|
|
1976
|
+
)
|
|
1977
|
+
|
|
1978
|
+
if quant_type == common_spec.Quantization.CT2:
|
|
1979
|
+
utils.fuse_linear(layer_spec.self_attention.linear[0], split_layers)
|
|
1980
|
+
else:
|
|
1981
|
+
cc_dim = 1 if quant_type == common_spec.Quantization.AWQ_GEMM else 0
|
|
1982
|
+
utils.fuse_linear_prequant(
|
|
1983
|
+
layer_spec.self_attention.linear[0], split_layers, cc_dim
|
|
1984
|
+
)
|
|
1985
|
+
|
|
1986
|
+
self.set_linear(
|
|
1987
|
+
layer_spec.self_attention.linear[1],
|
|
1988
|
+
layer.self_attn.o_proj,
|
|
1989
|
+
quant_type=quant_type,
|
|
1990
|
+
)
|
|
1991
|
+
|
|
1992
|
+
# Set FFN weights
|
|
1993
|
+
self.set_linear(
|
|
1994
|
+
layer_spec.ffn.linear_0, layer.mlp.gate_proj, quant_type=quant_type
|
|
1995
|
+
)
|
|
1996
|
+
self.set_linear(
|
|
1997
|
+
layer_spec.ffn.linear_0_noact, layer.mlp.up_proj, quant_type=quant_type
|
|
1998
|
+
)
|
|
1999
|
+
self.set_linear(
|
|
2000
|
+
layer_spec.ffn.linear_1, layer.mlp.down_proj, quant_type=quant_type
|
|
2001
|
+
)
|
|
2002
|
+
|
|
2003
|
+
delattr(layer, "self_attn")
|
|
2004
|
+
delattr(layer, "mlp")
|
|
2005
|
+
gc.collect()
|
|
2006
|
+
|
|
2007
|
+
|
|
1822
2008
|
@register_loader("MistralConfig")
|
|
1823
2009
|
class MistralLoader(ModelLoader):
|
|
1824
2010
|
@property
|
|
@@ -2074,6 +2260,123 @@ class Qwen2Loader(ModelLoader):
|
|
|
2074
2260
|
gc.collect()
|
|
2075
2261
|
|
|
2076
2262
|
|
|
2263
|
+
@register_loader("Qwen3Config")
|
|
2264
|
+
class Qwen3Loader(ModelLoader):
|
|
2265
|
+
@property
|
|
2266
|
+
def architecture_name(self):
|
|
2267
|
+
return "Qwen3ForCausalLM"
|
|
2268
|
+
|
|
2269
|
+
def get_model_spec(self, model):
|
|
2270
|
+
num_layers = model.config.num_hidden_layers
|
|
2271
|
+
num_heads = model.config.num_attention_heads
|
|
2272
|
+
num_heads_kv = getattr(model.config, "num_key_value_heads", num_heads)
|
|
2273
|
+
head_dim = getattr(
|
|
2274
|
+
model.config, "head_dim", model.config.hidden_size // num_heads
|
|
2275
|
+
)
|
|
2276
|
+
|
|
2277
|
+
if num_heads_kv == num_heads:
|
|
2278
|
+
num_heads_kv = None
|
|
2279
|
+
|
|
2280
|
+
rope_scaling = getattr(model.config, "rope_scaling", None)
|
|
2281
|
+
if rope_scaling:
|
|
2282
|
+
rope_type = rope_scaling.get("type") or rope_scaling["rope_type"]
|
|
2283
|
+
rotary_scaling_type = _SUPPORTED_ROPE_SCALING.get(rope_type)
|
|
2284
|
+
rotary_scaling_factor = rope_scaling["factor"]
|
|
2285
|
+
if rotary_scaling_type is None:
|
|
2286
|
+
raise NotImplementedError(
|
|
2287
|
+
"RoPE scaling type '%s' is not yet implemented. "
|
|
2288
|
+
"The following RoPE scaling types are currently supported: %s"
|
|
2289
|
+
% (rope_scaling["type"], ", ".join(_SUPPORTED_ROPE_SCALING.keys()))
|
|
2290
|
+
)
|
|
2291
|
+
else:
|
|
2292
|
+
rotary_scaling_type = None
|
|
2293
|
+
rotary_scaling_factor = 1
|
|
2294
|
+
|
|
2295
|
+
spec = transformer_spec.TransformerDecoderModelSpec.from_config(
|
|
2296
|
+
num_layers,
|
|
2297
|
+
num_heads,
|
|
2298
|
+
activation=common_spec.Activation.SWISH,
|
|
2299
|
+
pre_norm=True,
|
|
2300
|
+
ffn_glu=True,
|
|
2301
|
+
rms_norm=True,
|
|
2302
|
+
rotary_dim=model.config.head_dim,
|
|
2303
|
+
rotary_interleave=False,
|
|
2304
|
+
rotary_scaling_type=rotary_scaling_type,
|
|
2305
|
+
rotary_scaling_factor=rotary_scaling_factor,
|
|
2306
|
+
rotary_base=getattr(model.config, "rope_theta", 10000),
|
|
2307
|
+
num_heads_kv=num_heads_kv,
|
|
2308
|
+
head_dim=head_dim,
|
|
2309
|
+
qk_norm=True,
|
|
2310
|
+
)
|
|
2311
|
+
|
|
2312
|
+
self.set_decoder(spec.decoder, model.model)
|
|
2313
|
+
self.set_linear(spec.decoder.projection, model.lm_head)
|
|
2314
|
+
return spec
|
|
2315
|
+
|
|
2316
|
+
def get_vocabulary(self, model, tokenizer):
|
|
2317
|
+
tokens = super().get_vocabulary(model, tokenizer)
|
|
2318
|
+
extra_ids = model.config.vocab_size - len(tokens)
|
|
2319
|
+
for i in range(extra_ids):
|
|
2320
|
+
tokens.append("<extra_id_%d>" % i)
|
|
2321
|
+
return tokens
|
|
2322
|
+
|
|
2323
|
+
def set_vocabulary(self, spec, tokens):
|
|
2324
|
+
spec.register_vocabulary(tokens)
|
|
2325
|
+
|
|
2326
|
+
def set_config(self, config, model, tokenizer):
|
|
2327
|
+
config.bos_token = (
|
|
2328
|
+
tokenizer.bos_token
|
|
2329
|
+
if tokenizer.bos_token is not None
|
|
2330
|
+
else tokenizer.pad_token
|
|
2331
|
+
)
|
|
2332
|
+
config.eos_token = tokenizer.eos_token
|
|
2333
|
+
config.unk_token = (
|
|
2334
|
+
tokenizer.unk_token if tokenizer.unk_token is not None else ""
|
|
2335
|
+
)
|
|
2336
|
+
config.layer_norm_epsilon = model.config.rms_norm_eps
|
|
2337
|
+
|
|
2338
|
+
def set_layer_norm(self, spec, layer_norm):
|
|
2339
|
+
spec.gamma = layer_norm.weight
|
|
2340
|
+
|
|
2341
|
+
def set_decoder(self, spec, module):
|
|
2342
|
+
spec.scale_embeddings = False
|
|
2343
|
+
self.set_embeddings(spec.embeddings, module.embed_tokens)
|
|
2344
|
+
self.set_layer_norm(spec.layer_norm, module.norm)
|
|
2345
|
+
|
|
2346
|
+
for layer_idx, (layer_spec, layer) in enumerate(zip(spec.layer, module.layers)):
|
|
2347
|
+
self.set_layer_norm(
|
|
2348
|
+
layer_spec.self_attention.layer_norm, layer.input_layernorm
|
|
2349
|
+
)
|
|
2350
|
+
self.set_layer_norm(
|
|
2351
|
+
layer_spec.ffn.layer_norm, layer.post_attention_layernorm
|
|
2352
|
+
)
|
|
2353
|
+
|
|
2354
|
+
self.set_layer_norm(
|
|
2355
|
+
layer_spec.self_attention.q_norm, layer.self_attn.q_norm
|
|
2356
|
+
)
|
|
2357
|
+
self.set_layer_norm(
|
|
2358
|
+
layer_spec.self_attention.k_norm, layer.self_attn.k_norm
|
|
2359
|
+
)
|
|
2360
|
+
|
|
2361
|
+
split_layers = [common_spec.LinearSpec() for _ in range(3)]
|
|
2362
|
+
self.set_linear(split_layers[0], layer.self_attn.q_proj)
|
|
2363
|
+
self.set_linear(split_layers[1], layer.self_attn.k_proj)
|
|
2364
|
+
self.set_linear(split_layers[2], layer.self_attn.v_proj)
|
|
2365
|
+
utils.fuse_linear(layer_spec.self_attention.linear[0], split_layers)
|
|
2366
|
+
|
|
2367
|
+
self.set_linear(
|
|
2368
|
+
layer_spec.self_attention.linear[1],
|
|
2369
|
+
layer.self_attn.o_proj,
|
|
2370
|
+
)
|
|
2371
|
+
|
|
2372
|
+
self.set_linear(layer_spec.ffn.linear_0, layer.mlp.gate_proj)
|
|
2373
|
+
self.set_linear(layer_spec.ffn.linear_0_noact, layer.mlp.up_proj)
|
|
2374
|
+
self.set_linear(layer_spec.ffn.linear_1, layer.mlp.down_proj)
|
|
2375
|
+
|
|
2376
|
+
delattr(layer, "self_attn")
|
|
2377
|
+
delattr(layer, "mlp")
|
|
2378
|
+
|
|
2379
|
+
|
|
2077
2380
|
@register_loader("MixFormerSequentialConfig")
|
|
2078
2381
|
class MixFormerSequentialLoader(ModelLoader):
|
|
2079
2382
|
@property
|
ctranslate2/ctranslate2.dll
CHANGED
|
Binary file
|
|
@@ -32,6 +32,8 @@ class MultiHeadAttentionSpec(model_spec.LayerSpec):
|
|
|
32
32
|
num_heads_kv=None,
|
|
33
33
|
head_dim=None,
|
|
34
34
|
sliding_window=None,
|
|
35
|
+
qk_norm=False,
|
|
36
|
+
qk_norm_rms=True,
|
|
35
37
|
):
|
|
36
38
|
self.queries_scale = model_spec.OPTIONAL
|
|
37
39
|
|
|
@@ -40,6 +42,10 @@ class MultiHeadAttentionSpec(model_spec.LayerSpec):
|
|
|
40
42
|
common_spec.LinearSpec() for _ in range(2 if self_attention else 3)
|
|
41
43
|
]
|
|
42
44
|
|
|
45
|
+
if qk_norm:
|
|
46
|
+
self.q_norm = common_spec.LayerNormSpec(rms_norm=qk_norm_rms)
|
|
47
|
+
self.k_norm = common_spec.LayerNormSpec(rms_norm=qk_norm_rms)
|
|
48
|
+
|
|
43
49
|
if relative_position:
|
|
44
50
|
self.relative_position_keys = None
|
|
45
51
|
self.relative_position_values = None
|
|
@@ -109,6 +109,7 @@ class TransformerDecoderSpec(model_spec.LayerSpec):
|
|
|
109
109
|
quant_type: Optional[common_spec.Quantization] = None,
|
|
110
110
|
quant_group_size: Optional[int] = None,
|
|
111
111
|
quant_bits: Optional[int] = None,
|
|
112
|
+
qk_norm: Optional[bool] = False,
|
|
112
113
|
):
|
|
113
114
|
"""Initializes a Transformer decoder specification.
|
|
114
115
|
|
|
@@ -222,6 +223,7 @@ class TransformerDecoderSpec(model_spec.LayerSpec):
|
|
|
222
223
|
num_heads_kv=num_heads_kv,
|
|
223
224
|
head_dim=head_dim,
|
|
224
225
|
sliding_window=sliding_window,
|
|
226
|
+
qk_norm=qk_norm,
|
|
225
227
|
)
|
|
226
228
|
for _ in range(num_layers)
|
|
227
229
|
]
|
|
@@ -286,6 +288,7 @@ class TransformerDecoderLayerSpec(model_spec.LayerSpec):
|
|
|
286
288
|
num_heads_kv=None,
|
|
287
289
|
head_dim=None,
|
|
288
290
|
sliding_window=None,
|
|
291
|
+
qk_norm=False,
|
|
289
292
|
):
|
|
290
293
|
self.self_attention = attention_spec.MultiHeadAttentionSpec(
|
|
291
294
|
self_attention=True,
|
|
@@ -302,6 +305,7 @@ class TransformerDecoderLayerSpec(model_spec.LayerSpec):
|
|
|
302
305
|
num_heads_kv=num_heads_kv,
|
|
303
306
|
head_dim=head_dim,
|
|
304
307
|
sliding_window=sliding_window,
|
|
308
|
+
qk_norm=qk_norm,
|
|
305
309
|
)
|
|
306
310
|
|
|
307
311
|
if with_encoder_attention:
|
|
@@ -309,6 +313,7 @@ class TransformerDecoderLayerSpec(model_spec.LayerSpec):
|
|
|
309
313
|
rms_norm=rms_norm,
|
|
310
314
|
num_heads_kv=num_heads_kv,
|
|
311
315
|
sliding_window=sliding_window,
|
|
316
|
+
qk_norm=qk_norm,
|
|
312
317
|
)
|
|
313
318
|
|
|
314
319
|
self.ffn = FeedForwardSpec(glu=ffn_glu, rms_norm=rms_norm)
|
|
@@ -557,6 +562,7 @@ class TransformerDecoderModelSpec(model_spec.LanguageModelSpec):
|
|
|
557
562
|
quant_type: Optional[common_spec.Quantization] = None,
|
|
558
563
|
quant_group_size: Optional[int] = None,
|
|
559
564
|
quant_bits: Optional[int] = None,
|
|
565
|
+
qk_norm: Optional[bool] = False,
|
|
560
566
|
):
|
|
561
567
|
"""Creates a Transformer decoder model specification.
|
|
562
568
|
|
|
@@ -631,6 +637,7 @@ class TransformerDecoderModelSpec(model_spec.LanguageModelSpec):
|
|
|
631
637
|
quant_type=quant_type,
|
|
632
638
|
quant_group_size=quant_group_size,
|
|
633
639
|
quant_bits=quant_bits,
|
|
640
|
+
qk_norm=qk_norm,
|
|
634
641
|
)
|
|
635
642
|
|
|
636
643
|
return cls(decoder)
|
ctranslate2/version.py
CHANGED
|
@@ -1,11 +1,11 @@
|
|
|
1
|
-
ctranslate2/__init__.py,sha256=
|
|
2
|
-
ctranslate2/_ext.cp311-win_amd64.pyd,sha256=
|
|
3
|
-
ctranslate2/ctranslate2.dll,sha256=
|
|
1
|
+
ctranslate2/__init__.py,sha256=CGqShDaFxQ-u-aCtVq99T4HKuBdMB8b49l2KSxnQb8M,1735
|
|
2
|
+
ctranslate2/_ext.cp311-win_amd64.pyd,sha256=x0g24hF5CHZc7Uwyw5v6LyfHT_Lwc7nihm-pm3362Jo,702464
|
|
3
|
+
ctranslate2/ctranslate2.dll,sha256=9zIz4dY3yV1kTTKaipyQwjcGDwzZ3OzKiOkNpXdcQ1U,58389504
|
|
4
4
|
ctranslate2/cudnn64_9.dll,sha256=wHzEfy-kpWZZPHr0qn5X7fCamFoP3dFMuNb0VuJSrwU,438840
|
|
5
5
|
ctranslate2/extensions.py,sha256=axO2FI8ddiFmlko2AzQ6VcdtF-3hDA7VmPGnTIkrPkI,21782
|
|
6
6
|
ctranslate2/libiomp5md.dll,sha256=mCIzNmsK_NoeD1WgsTQJfjW3eWE_VN22nmhebNBrdV8,1614192
|
|
7
7
|
ctranslate2/logging.py,sha256=P9evHdxuMx_iHvwJjEASEq-j5062H64Pl5-fJjxEuHk,1221
|
|
8
|
-
ctranslate2/version.py,sha256=
|
|
8
|
+
ctranslate2/version.py,sha256=f2Hk9NHTYgXftujV8JVkeOzenykZ9QzbsZ-nIt9U1uc,53
|
|
9
9
|
ctranslate2/converters/__init__.py,sha256=ufYjcXf2sK4fiXAUU6tIJyWmNuLjKFf_KH3GWLXe4ls,507
|
|
10
10
|
ctranslate2/converters/converter.py,sha256=Qkb8NGLLmgqMT6HZkFq61zwbxyq3NlWcaxLZ6Ap-YOQ,3601
|
|
11
11
|
ctranslate2/converters/eole_ct2.py,sha256=RUcDJH_2AUt0jDs5oAqccE6tQPbO9LQ6JmVriC1DTy8,12564
|
|
@@ -15,19 +15,19 @@ ctranslate2/converters/openai_gpt2.py,sha256=1rXKM2ZURZHWRv4XZ135fPkVWpM4rTG-q7V
|
|
|
15
15
|
ctranslate2/converters/opennmt_py.py,sha256=Vva60az6tGqlQXs0UgC09r_fCD3u2u6wUJB-8V4OUFQ,13183
|
|
16
16
|
ctranslate2/converters/opennmt_tf.py,sha256=uBRp2wz5xriSQcA_c0S0ekY7ws6RpRX_0EKeMRdM7-s,16222
|
|
17
17
|
ctranslate2/converters/opus_mt.py,sha256=5KbPaTiBhhorPzMpTugIfIJ8SgcqHfJUbJrWKBN-Djs,1254
|
|
18
|
-
ctranslate2/converters/transformers.py,sha256=
|
|
18
|
+
ctranslate2/converters/transformers.py,sha256=zwqUFFFwLpam6z5lpBz2rgfYj065CbsdT9S_xVqPjCk,126110
|
|
19
19
|
ctranslate2/converters/utils.py,sha256=w7NG39lx-9dOdL57OqKVTdC__opkuP8RACg1TLlUJwM,3817
|
|
20
20
|
ctranslate2/models/__init__.py,sha256=53p98uemtuvVPz8xK7_LbOhBiUJJu-c-NdmOHJgdXus,497
|
|
21
21
|
ctranslate2/specs/__init__.py,sha256=9GabtSyczznYqiqUS6XvULi8pQ3_3RNRogXobGP0G80,653
|
|
22
|
-
ctranslate2/specs/attention_spec.py,sha256=
|
|
22
|
+
ctranslate2/specs/attention_spec.py,sha256=0JhCBrbb20G07UFnUAYIUtfcqn4VtflJHYWGIunwKDw,3442
|
|
23
23
|
ctranslate2/specs/common_spec.py,sha256=freTDhQMy5PYofBrij4_FDgrKokMYApWSPIpASZIlJc,1608
|
|
24
24
|
ctranslate2/specs/model_spec.py,sha256=atCAYzDEIzyJ1TCayFGZVutHqSWa1ww-vbZ0OiIJqh8,25736
|
|
25
|
-
ctranslate2/specs/transformer_spec.py,sha256=
|
|
25
|
+
ctranslate2/specs/transformer_spec.py,sha256=43jOIvCSbAvqZJ1IyvRdGUa4f-zhdKhQBOXvp0T8YLE,30360
|
|
26
26
|
ctranslate2/specs/wav2vec2_spec.py,sha256=NITsuOuf2F5bU1-aXit8-WEtWV9fH2Eq7A7857UyYho,2106
|
|
27
27
|
ctranslate2/specs/wav2vec2bert_spec.py,sha256=UgtsJWC9mMgJ7bn4T_xg1uXK0rqA4-9tT2KMGVgPKnw,3529
|
|
28
28
|
ctranslate2/specs/whisper_spec.py,sha256=_vm1sc5yOowOJ4iyvcxMXrgt-UcLJrZT8OtPscUXcQQ,2447
|
|
29
|
-
ctranslate2-4.6.
|
|
30
|
-
ctranslate2-4.6.
|
|
31
|
-
ctranslate2-4.6.
|
|
32
|
-
ctranslate2-4.6.
|
|
33
|
-
ctranslate2-4.6.
|
|
29
|
+
ctranslate2-4.6.2.dist-info/METADATA,sha256=r5HnmZE0BMI60j3N0GmDdM6l7Q7KW3w5nLLOX_AKCRY,10354
|
|
30
|
+
ctranslate2-4.6.2.dist-info/WHEEL,sha256=JLOMsP7F5qtkAkINx5UnzbFguf8CqZeraV8o04b0I8I,101
|
|
31
|
+
ctranslate2-4.6.2.dist-info/entry_points.txt,sha256=ZHkojut_TmVRHl0bJIGm2b9wqr98GAJqxN9rlJtQshs,466
|
|
32
|
+
ctranslate2-4.6.2.dist-info/top_level.txt,sha256=1hUaWzcFIuSo2BAIUHFA3Osgsu6S1giq0y6Rosv8HOQ,12
|
|
33
|
+
ctranslate2-4.6.2.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|