ctranslate2 4.7.0__cp314-cp314-macosx_11_0_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,98 @@
1
+ import enum
2
+
3
+ import numpy as np
4
+
5
+ from ctranslate2.specs import common_spec, model_spec
6
+
7
+
8
+ # This enum should match the C++ equivalent in include/ctranslate2/layers/attention.h.
9
+ class RotaryScalingType(enum.IntEnum):
10
+ """RoPE scaling type."""
11
+
12
+ Linear = 0
13
+ Su = 1
14
+ Llama3 = 2
15
+
16
+
17
+ class MultiHeadAttentionSpec(model_spec.LayerSpec):
18
+ def __init__(
19
+ self,
20
+ self_attention=False,
21
+ relative_position=False,
22
+ relative_asymmetric_position=False,
23
+ relative_attention_bias=False,
24
+ rms_norm=False,
25
+ rotary_dim=None,
26
+ rotary_interleave=True,
27
+ rotary_scaling_type=None,
28
+ rotary_scaling_factor=1,
29
+ rotary_base=10000,
30
+ original_max_position_embeddings=0,
31
+ max_position_embeddings=0,
32
+ num_heads_kv=None,
33
+ head_dim=None,
34
+ sliding_window=None,
35
+ qk_norm=False,
36
+ qk_norm_rms=True,
37
+ has_norm=True,
38
+ ):
39
+ self.queries_scale = model_spec.OPTIONAL
40
+
41
+ if has_norm:
42
+ self.layer_norm = common_spec.LayerNormSpec(rms_norm=rms_norm)
43
+ self.linear = [
44
+ common_spec.LinearSpec() for _ in range(2 if self_attention else 3)
45
+ ]
46
+
47
+ if qk_norm:
48
+ self.q_norm = common_spec.LayerNormSpec(rms_norm=qk_norm_rms)
49
+ self.k_norm = common_spec.LayerNormSpec(rms_norm=qk_norm_rms)
50
+
51
+ if relative_position:
52
+ self.relative_position_keys = None
53
+ self.relative_position_values = None
54
+
55
+ if relative_attention_bias:
56
+ self.relative_attention_bias = None
57
+ self.relative_attention_max_distance = None
58
+
59
+ if relative_asymmetric_position:
60
+ self.relative_asymmetric_position_keys = None
61
+ self.relative_left_max_position = None
62
+ self.relative_right_max_position = None
63
+
64
+ if original_max_position_embeddings != 0:
65
+ self.original_max_position_embeddings = np.dtype("int32").type(
66
+ original_max_position_embeddings
67
+ )
68
+ if max_position_embeddings != 0:
69
+ self.max_position_embeddings = np.dtype("int32").type(
70
+ max_position_embeddings
71
+ )
72
+
73
+ if rotary_dim is not None:
74
+ self.rotary_dim = np.dtype("int32").type(rotary_dim)
75
+ self.rotary_interleave = rotary_interleave
76
+ self.rotary_base = np.dtype("float32").type(rotary_base)
77
+
78
+ if rotary_scaling_type is not None:
79
+ self.rotary_scaling_type = np.dtype("int8").type(rotary_scaling_type)
80
+ if rotary_scaling_type is RotaryScalingType.Linear:
81
+ self.rotary_scaling_factor = np.dtype("float32").type(
82
+ rotary_scaling_factor
83
+ )
84
+ elif rotary_scaling_type is RotaryScalingType.Su:
85
+ self.rotary_scaling_long_factor = None
86
+ self.rotary_scaling_short_factor = None
87
+ elif rotary_scaling_type is RotaryScalingType.Llama3:
88
+ self.rotary_low_freq_factor = None
89
+ self.rotary_high_freq_factor = None
90
+
91
+ if num_heads_kv is not None:
92
+ self.num_heads_kv = np.dtype("int32").type(num_heads_kv)
93
+
94
+ if head_dim is not None:
95
+ self.head_dim = np.dtype("int32").type(head_dim)
96
+
97
+ if sliding_window is not None:
98
+ self.sliding_window = np.dtype("int32").type(sliding_window)
@@ -0,0 +1,66 @@
1
+ import enum
2
+
3
+ from ctranslate2.specs import model_spec
4
+
5
+
6
+ # This enum should match the C++ equivalent in include/ctranslate2/ops/activation.h.
7
+ class Activation(enum.IntEnum):
8
+ """Activation type."""
9
+
10
+ RELU = 0
11
+ GELUTanh = 1
12
+ SWISH = 2
13
+ GELU = 3
14
+ GELUSigmoid = 4
15
+ Tanh = 5
16
+ Sigmoid = 6
17
+
18
+
19
+ # This enum should match the C++ equivalent in include/ctranslate2/layers/common.h.
20
+ class EmbeddingsMerge(enum.IntEnum):
21
+ """Merge strategy for factors embeddings."""
22
+
23
+ CONCAT = 0
24
+ ADD = 1
25
+
26
+
27
+ class Quantization(enum.IntEnum):
28
+ """Activation type."""
29
+
30
+ CT2 = 0
31
+ AWQ_GEMM = 1
32
+ AWQ_GEMV = 2
33
+
34
+
35
+ class LayerNormSpec(model_spec.LayerSpec):
36
+ def __init__(self, rms_norm=False):
37
+ self.gamma = None
38
+ if not rms_norm:
39
+ self.beta = None
40
+ else:
41
+ self.layer_norm_use_residual = model_spec.OPTIONAL
42
+
43
+
44
+ class LinearSpec(model_spec.LayerSpec):
45
+ def __init__(self):
46
+ self.weight = None
47
+ self.weight_scale = model_spec.OPTIONAL
48
+ self.weight_zero = model_spec.OPTIONAL
49
+ self.bias = model_spec.OPTIONAL
50
+
51
+ def has_bias(self):
52
+ return not isinstance(self.bias, str)
53
+
54
+
55
+ class Conv1DSpec(model_spec.LayerSpec):
56
+ def __init__(self):
57
+ self.weight = None
58
+ self.weight_scale = model_spec.OPTIONAL
59
+ self.bias = model_spec.OPTIONAL
60
+
61
+
62
+ class EmbeddingsSpec(model_spec.LayerSpec):
63
+ def __init__(self):
64
+ self.weight = None
65
+ self.weight_scale = model_spec.OPTIONAL
66
+ self.multiply_by_sqrt_depth = model_spec.OPTIONAL