grillyoptimum 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- grillyoptimum-0.1.0/LICENSE +21 -0
- grillyoptimum-0.1.0/PKG-INFO +63 -0
- grillyoptimum-0.1.0/README.md +46 -0
- grillyoptimum-0.1.0/grillyoptimum/__init__.py +22 -0
- grillyoptimum-0.1.0/grillyoptimum/configuration.py +43 -0
- grillyoptimum-0.1.0/grillyoptimum/modeling.py +160 -0
- grillyoptimum-0.1.0/grillyoptimum/pipeline.py +76 -0
- grillyoptimum-0.1.0/grillyoptimum.egg-info/PKG-INFO +63 -0
- grillyoptimum-0.1.0/grillyoptimum.egg-info/SOURCES.txt +14 -0
- grillyoptimum-0.1.0/grillyoptimum.egg-info/dependency_links.txt +1 -0
- grillyoptimum-0.1.0/grillyoptimum.egg-info/requires.txt +7 -0
- grillyoptimum-0.1.0/grillyoptimum.egg-info/top_level.txt +1 -0
- grillyoptimum-0.1.0/pyproject.toml +21 -0
- grillyoptimum-0.1.0/setup.cfg +4 -0
- grillyoptimum-0.1.0/tests/test_optimum.py +741 -0
- grillyoptimum-0.1.0/tests/test_optimum_gpu.py +772 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Nicolas Cloutier / Grillcheese AI
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: grillyoptimum
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: HuggingFace Optimum-compatible Vulkan backend — optional grilly extension
|
|
5
|
+
Author-email: Nicolas Cloutier <ncloutier@grillcheeseai.com>
|
|
6
|
+
License: MIT
|
|
7
|
+
Requires-Python: >=3.12
|
|
8
|
+
Description-Content-Type: text/markdown
|
|
9
|
+
License-File: LICENSE
|
|
10
|
+
Requires-Dist: grilly>=0.4.0
|
|
11
|
+
Requires-Dist: grillyinference>=0.1.0
|
|
12
|
+
Requires-Dist: numpy
|
|
13
|
+
Requires-Dist: transformers
|
|
14
|
+
Provides-Extra: optimum
|
|
15
|
+
Requires-Dist: optimum; extra == "optimum"
|
|
16
|
+
Dynamic: license-file
|
|
17
|
+
|
|
18
|
+
# GrillyOptimum (Alpha not production ready)
|
|
19
|
+
|
|
20
|
+
HuggingFace Optimum-compatible Vulkan backend — optional [grilly](https://github.com/grillcheese/grilly) extension.
|
|
21
|
+
|
|
22
|
+
## Features
|
|
23
|
+
|
|
24
|
+
- **from_pretrained()** — load any HF Llama model directly
|
|
25
|
+
- **generate()** — HuggingFace-compatible generation interface
|
|
26
|
+
- **Pipeline integration** — use with `transformers.pipeline("text-generation")`
|
|
27
|
+
- **Vulkan acceleration** — native fp16 inference on RDNA2 GPUs
|
|
28
|
+
|
|
29
|
+
## Quick Start
|
|
30
|
+
|
|
31
|
+
```bash
|
|
32
|
+
pip install grillyoptimum
|
|
33
|
+
```
|
|
34
|
+
|
|
35
|
+
```python
|
|
36
|
+
from grillyoptimum import VulkanModelForCausalLM
|
|
37
|
+
from transformers import AutoTokenizer
|
|
38
|
+
|
|
39
|
+
model = VulkanModelForCausalLM.from_pretrained("meta-llama/Llama-3.2-3B-Instruct")
|
|
40
|
+
tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-3.2-3B-Instruct")
|
|
41
|
+
|
|
42
|
+
# HF-style generation
|
|
43
|
+
input_ids = tokenizer.encode("Hello, world!", return_tensors="np")
|
|
44
|
+
output = model.generate(input_ids, max_new_tokens=100, temperature=0.7)
|
|
45
|
+
print(tokenizer.decode(output[0]))
|
|
46
|
+
|
|
47
|
+
# Pipeline
|
|
48
|
+
from grillyoptimum.pipeline import create_text_generation_pipeline
|
|
49
|
+
pipe = create_text_generation_pipeline("meta-llama/Llama-3.2-3B-Instruct")
|
|
50
|
+
result = pipe("Explain quantum computing")
|
|
51
|
+
print(result[0]["generated_text"])
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
## Requirements
|
|
55
|
+
|
|
56
|
+
- Python 3.12+
|
|
57
|
+
- grilly >= 0.4.0
|
|
58
|
+
- grillyinference >= 0.1.0
|
|
59
|
+
- transformers
|
|
60
|
+
|
|
61
|
+
## License
|
|
62
|
+
|
|
63
|
+
MIT
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
# GrillyOptimum (Alpha not production ready)
|
|
2
|
+
|
|
3
|
+
HuggingFace Optimum-compatible Vulkan backend — optional [grilly](https://github.com/grillcheese/grilly) extension.
|
|
4
|
+
|
|
5
|
+
## Features
|
|
6
|
+
|
|
7
|
+
- **from_pretrained()** — load any HF Llama model directly
|
|
8
|
+
- **generate()** — HuggingFace-compatible generation interface
|
|
9
|
+
- **Pipeline integration** — use with `transformers.pipeline("text-generation")`
|
|
10
|
+
- **Vulkan acceleration** — native fp16 inference on RDNA2 GPUs
|
|
11
|
+
|
|
12
|
+
## Quick Start
|
|
13
|
+
|
|
14
|
+
```bash
|
|
15
|
+
pip install grillyoptimum
|
|
16
|
+
```
|
|
17
|
+
|
|
18
|
+
```python
|
|
19
|
+
from grillyoptimum import VulkanModelForCausalLM
|
|
20
|
+
from transformers import AutoTokenizer
|
|
21
|
+
|
|
22
|
+
model = VulkanModelForCausalLM.from_pretrained("meta-llama/Llama-3.2-3B-Instruct")
|
|
23
|
+
tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-3.2-3B-Instruct")
|
|
24
|
+
|
|
25
|
+
# HF-style generation
|
|
26
|
+
input_ids = tokenizer.encode("Hello, world!", return_tensors="np")
|
|
27
|
+
output = model.generate(input_ids, max_new_tokens=100, temperature=0.7)
|
|
28
|
+
print(tokenizer.decode(output[0]))
|
|
29
|
+
|
|
30
|
+
# Pipeline
|
|
31
|
+
from grillyoptimum.pipeline import create_text_generation_pipeline
|
|
32
|
+
pipe = create_text_generation_pipeline("meta-llama/Llama-3.2-3B-Instruct")
|
|
33
|
+
result = pipe("Explain quantum computing")
|
|
34
|
+
print(result[0]["generated_text"])
|
|
35
|
+
```
|
|
36
|
+
|
|
37
|
+
## Requirements
|
|
38
|
+
|
|
39
|
+
- Python 3.12+
|
|
40
|
+
- grilly >= 0.4.0
|
|
41
|
+
- grillyinference >= 0.1.0
|
|
42
|
+
- transformers
|
|
43
|
+
|
|
44
|
+
## License
|
|
45
|
+
|
|
46
|
+
MIT
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
"""GrillyOptimum — HuggingFace Optimum-compatible Vulkan backend.
|
|
2
|
+
|
|
3
|
+
Optional grilly extension providing:
|
|
4
|
+
- VulkanModelForCausalLM: HF-compatible model with from_pretrained + generate
|
|
5
|
+
- VulkanConfig: Backend configuration
|
|
6
|
+
- Pipeline integration for HF transformers pipelines
|
|
7
|
+
|
|
8
|
+
Usage:
|
|
9
|
+
from grillyoptimum import VulkanModelForCausalLM
|
|
10
|
+
model = VulkanModelForCausalLM.from_pretrained("meta-llama/Llama-3.2-3B-Instruct")
|
|
11
|
+
output = model.generate(input_ids, max_new_tokens=100)
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
from .modeling import VulkanModelForCausalLM
|
|
15
|
+
from .configuration import VulkanConfig
|
|
16
|
+
|
|
17
|
+
__version__ = "0.1.0"
|
|
18
|
+
|
|
19
|
+
__all__ = [
|
|
20
|
+
"VulkanModelForCausalLM",
|
|
21
|
+
"VulkanConfig",
|
|
22
|
+
]
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
"""Vulkan backend configuration for HF Optimum compatibility."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from dataclasses import dataclass, field
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
@dataclass
|
|
9
|
+
class VulkanConfig:
|
|
10
|
+
"""Configuration for the Vulkan inference backend.
|
|
11
|
+
|
|
12
|
+
Args:
|
|
13
|
+
dtype: Weight precision ("fp16" or "fp32").
|
|
14
|
+
use_vulkan: Whether to use Vulkan GPU acceleration.
|
|
15
|
+
page_size: KV-cache page size (default 256).
|
|
16
|
+
raw_window: Raw KV window (default 2048).
|
|
17
|
+
enable_h2o: Enable H2O eviction for extended context.
|
|
18
|
+
h2o_lambda: H2O decay rate.
|
|
19
|
+
enable_vsa: Enable VSA multi-scale summaries.
|
|
20
|
+
enable_quantization: Enable SmoothQuant INT8.
|
|
21
|
+
quantize_group_size: INT8 quantization group size.
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
dtype: str = "fp16"
|
|
25
|
+
use_vulkan: bool = True
|
|
26
|
+
page_size: int = 256
|
|
27
|
+
raw_window: int = 2048
|
|
28
|
+
enable_h2o: bool = False
|
|
29
|
+
h2o_lambda: float = 0.0002
|
|
30
|
+
enable_vsa: bool = False
|
|
31
|
+
enable_quantization: bool = False
|
|
32
|
+
quantize_group_size: int = 64
|
|
33
|
+
max_batch_size: int = 1
|
|
34
|
+
device: str = "vulkan"
|
|
35
|
+
|
|
36
|
+
def to_dict(self) -> dict:
|
|
37
|
+
from dataclasses import asdict
|
|
38
|
+
return asdict(self)
|
|
39
|
+
|
|
40
|
+
@classmethod
|
|
41
|
+
def from_dict(cls, d: dict) -> VulkanConfig:
|
|
42
|
+
valid_fields = {f.name for f in cls.__dataclass_fields__.values()}
|
|
43
|
+
return cls(**{k: v for k, v in d.items() if k in valid_fields})
|
|
@@ -0,0 +1,160 @@
|
|
|
1
|
+
"""VulkanModelForCausalLM — HuggingFace-compatible model interface.
|
|
2
|
+
|
|
3
|
+
Wraps GrillyInference's LlamaForCausalLM with HF GenerationMixin patterns:
|
|
4
|
+
- from_pretrained() loads weights and creates inference engine
|
|
5
|
+
- generate() is compatible with HF generation kwargs
|
|
6
|
+
- Can be used with transformers.pipeline("text-generation")
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
import logging
|
|
12
|
+
from pathlib import Path
|
|
13
|
+
from typing import Any
|
|
14
|
+
|
|
15
|
+
import numpy as np
|
|
16
|
+
|
|
17
|
+
from .configuration import VulkanConfig
|
|
18
|
+
|
|
19
|
+
logger = logging.getLogger(__name__)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class VulkanModelForCausalLM:
|
|
23
|
+
"""HuggingFace-compatible Vulkan model for causal language modeling.
|
|
24
|
+
|
|
25
|
+
Compatible with:
|
|
26
|
+
from grillyoptimum import VulkanModelForCausalLM
|
|
27
|
+
model = VulkanModelForCausalLM.from_pretrained("meta-llama/Llama-3.2-3B-Instruct")
|
|
28
|
+
output = model.generate(input_ids, max_new_tokens=100)
|
|
29
|
+
"""
|
|
30
|
+
|
|
31
|
+
def __init__(self, model, config, vulkan_config=None):
|
|
32
|
+
"""Initialize with a GrillyInference model.
|
|
33
|
+
|
|
34
|
+
Args:
|
|
35
|
+
model: grillyinference.LlamaForCausalLM instance.
|
|
36
|
+
config: grillyinference.LlamaConfig instance.
|
|
37
|
+
vulkan_config: Optional VulkanConfig.
|
|
38
|
+
"""
|
|
39
|
+
self._model = model
|
|
40
|
+
self.config = config
|
|
41
|
+
self.vulkan_config = vulkan_config or VulkanConfig()
|
|
42
|
+
self._kv_cache = None
|
|
43
|
+
|
|
44
|
+
@classmethod
|
|
45
|
+
def from_pretrained(
|
|
46
|
+
cls,
|
|
47
|
+
model_id_or_path: str,
|
|
48
|
+
dtype: str = "fp16",
|
|
49
|
+
vulkan_config: VulkanConfig | None = None,
|
|
50
|
+
**kwargs,
|
|
51
|
+
) -> VulkanModelForCausalLM:
|
|
52
|
+
"""Load a pretrained model, HuggingFace style.
|
|
53
|
+
|
|
54
|
+
Args:
|
|
55
|
+
model_id_or_path: HF model ID or local path.
|
|
56
|
+
dtype: Weight precision.
|
|
57
|
+
vulkan_config: Optional backend config.
|
|
58
|
+
**kwargs: Additional arguments (ignored for HF compat).
|
|
59
|
+
|
|
60
|
+
Returns:
|
|
61
|
+
VulkanModelForCausalLM instance.
|
|
62
|
+
"""
|
|
63
|
+
from grillyinference import LlamaForCausalLM, LlamaConfig
|
|
64
|
+
|
|
65
|
+
if vulkan_config is None:
|
|
66
|
+
vulkan_config = VulkanConfig(dtype=dtype)
|
|
67
|
+
|
|
68
|
+
config = LlamaConfig.from_pretrained(model_id_or_path)
|
|
69
|
+
model = LlamaForCausalLM.from_pretrained(
|
|
70
|
+
model_id_or_path,
|
|
71
|
+
dtype=vulkan_config.dtype,
|
|
72
|
+
)
|
|
73
|
+
|
|
74
|
+
return cls(model, config, vulkan_config)
|
|
75
|
+
|
|
76
|
+
def generate(
|
|
77
|
+
self,
|
|
78
|
+
input_ids: np.ndarray | None = None,
|
|
79
|
+
max_new_tokens: int = 128,
|
|
80
|
+
max_length: int | None = None,
|
|
81
|
+
temperature: float = 1.0,
|
|
82
|
+
top_k: int = 50,
|
|
83
|
+
top_p: float = 1.0,
|
|
84
|
+
do_sample: bool = True,
|
|
85
|
+
num_return_sequences: int = 1,
|
|
86
|
+
**kwargs,
|
|
87
|
+
) -> np.ndarray:
|
|
88
|
+
"""Generate tokens, HuggingFace style.
|
|
89
|
+
|
|
90
|
+
Args:
|
|
91
|
+
input_ids: (batch, seq_len) int32 input token IDs.
|
|
92
|
+
max_new_tokens: Maximum tokens to generate.
|
|
93
|
+
max_length: Maximum total length (alternative to max_new_tokens).
|
|
94
|
+
temperature: Sampling temperature.
|
|
95
|
+
top_k: Top-k filtering.
|
|
96
|
+
top_p: Nucleus sampling.
|
|
97
|
+
do_sample: Whether to sample (True) or greedy decode (False).
|
|
98
|
+
**kwargs: Additional HF generation kwargs (ignored).
|
|
99
|
+
|
|
100
|
+
Returns:
|
|
101
|
+
(batch, seq_len + generated) int32 array of token IDs.
|
|
102
|
+
"""
|
|
103
|
+
if input_ids is None:
|
|
104
|
+
raise ValueError("input_ids is required")
|
|
105
|
+
|
|
106
|
+
if isinstance(input_ids, list):
|
|
107
|
+
input_ids = np.array(input_ids, dtype=np.int32)
|
|
108
|
+
if input_ids.ndim == 1:
|
|
109
|
+
input_ids = input_ids[np.newaxis, :]
|
|
110
|
+
|
|
111
|
+
batch_size = input_ids.shape[0]
|
|
112
|
+
if batch_size != 1:
|
|
113
|
+
raise ValueError("Only batch_size=1 supported currently")
|
|
114
|
+
|
|
115
|
+
if max_length is not None:
|
|
116
|
+
max_new_tokens = max_length - input_ids.shape[1]
|
|
117
|
+
|
|
118
|
+
if not do_sample:
|
|
119
|
+
temperature = 0.0
|
|
120
|
+
|
|
121
|
+
# Import sampling function
|
|
122
|
+
from grillyinference.inference.generate import _sample_top_k_top_p, LLAMA3_STOP_TOKENS
|
|
123
|
+
|
|
124
|
+
# Prefill
|
|
125
|
+
from grillyinference import KVCache
|
|
126
|
+
kv_cache = KVCache(
|
|
127
|
+
self.config,
|
|
128
|
+
max_batch=1,
|
|
129
|
+
page_size=self.vulkan_config.page_size,
|
|
130
|
+
raw_window=self.vulkan_config.raw_window,
|
|
131
|
+
h2o_lambda=self.vulkan_config.h2o_lambda,
|
|
132
|
+
enable_vsa=self.vulkan_config.enable_vsa,
|
|
133
|
+
)
|
|
134
|
+
|
|
135
|
+
logits = self._model.forward(input_ids, kv_cache=kv_cache)
|
|
136
|
+
last_logits = logits[0, -1, :]
|
|
137
|
+
|
|
138
|
+
generated = list(input_ids[0])
|
|
139
|
+
|
|
140
|
+
for _ in range(max_new_tokens):
|
|
141
|
+
token_id = _sample_top_k_top_p(last_logits, temperature, top_k, top_p)
|
|
142
|
+
if token_id in LLAMA3_STOP_TOKENS:
|
|
143
|
+
break
|
|
144
|
+
generated.append(token_id)
|
|
145
|
+
token_array = np.array([[token_id]], dtype=np.int32)
|
|
146
|
+
logits = self._model.decode_step(token_array, kv_cache)
|
|
147
|
+
last_logits = logits[0, -1, :]
|
|
148
|
+
|
|
149
|
+
return np.array([generated], dtype=np.int32)
|
|
150
|
+
|
|
151
|
+
def __call__(self, input_ids, **kwargs):
|
|
152
|
+
"""Make model callable for pipeline compatibility."""
|
|
153
|
+
return self.generate(input_ids, **kwargs)
|
|
154
|
+
|
|
155
|
+
@property
|
|
156
|
+
def device(self):
|
|
157
|
+
return self.vulkan_config.device
|
|
158
|
+
|
|
159
|
+
def memory_footprint(self) -> dict:
|
|
160
|
+
return self._model.memory_footprint()
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
"""Pipeline integration for HuggingFace transformers.
|
|
2
|
+
|
|
3
|
+
Registers GrillyOptimum as a backend so HF pipelines can use it:
|
|
4
|
+
from transformers import pipeline
|
|
5
|
+
pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import logging
|
|
11
|
+
from typing import Any
|
|
12
|
+
|
|
13
|
+
logger = logging.getLogger(__name__)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def create_text_generation_pipeline(
|
|
17
|
+
model_id: str,
|
|
18
|
+
tokenizer=None,
|
|
19
|
+
dtype: str = "fp16",
|
|
20
|
+
**kwargs,
|
|
21
|
+
):
|
|
22
|
+
"""Create a text-generation pipeline using Vulkan backend.
|
|
23
|
+
|
|
24
|
+
Args:
|
|
25
|
+
model_id: HF model ID or local path.
|
|
26
|
+
tokenizer: Optional tokenizer (auto-loaded if None).
|
|
27
|
+
dtype: Weight precision.
|
|
28
|
+
**kwargs: Additional pipeline arguments.
|
|
29
|
+
|
|
30
|
+
Returns:
|
|
31
|
+
A callable pipeline object.
|
|
32
|
+
"""
|
|
33
|
+
from grillyinference import TextGenerator, LlamaForCausalLM
|
|
34
|
+
|
|
35
|
+
if tokenizer is None:
|
|
36
|
+
from transformers import AutoTokenizer
|
|
37
|
+
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
|
38
|
+
|
|
39
|
+
model = LlamaForCausalLM.from_pretrained(model_id, dtype=dtype)
|
|
40
|
+
generator = TextGenerator(model, tokenizer)
|
|
41
|
+
|
|
42
|
+
class VulkanTextGenerationPipeline:
|
|
43
|
+
"""Simple pipeline wrapper for text generation."""
|
|
44
|
+
|
|
45
|
+
def __init__(self, generator, tokenizer):
|
|
46
|
+
self._generator = generator
|
|
47
|
+
self._tokenizer = tokenizer
|
|
48
|
+
|
|
49
|
+
def __call__(
|
|
50
|
+
self,
|
|
51
|
+
text: str | list[str],
|
|
52
|
+
max_new_tokens: int = 128,
|
|
53
|
+
temperature: float = 0.7,
|
|
54
|
+
top_k: int = 50,
|
|
55
|
+
top_p: float = 0.9,
|
|
56
|
+
**kwargs,
|
|
57
|
+
) -> list[dict[str, Any]]:
|
|
58
|
+
if isinstance(text, str):
|
|
59
|
+
text = [text]
|
|
60
|
+
|
|
61
|
+
results = []
|
|
62
|
+
for prompt in text:
|
|
63
|
+
output = self._generator.generate(
|
|
64
|
+
prompt,
|
|
65
|
+
max_tokens=max_new_tokens,
|
|
66
|
+
temperature=temperature,
|
|
67
|
+
top_k=top_k,
|
|
68
|
+
top_p=top_p,
|
|
69
|
+
)
|
|
70
|
+
results.append({
|
|
71
|
+
"generated_text": output,
|
|
72
|
+
"prompt": prompt,
|
|
73
|
+
})
|
|
74
|
+
return results
|
|
75
|
+
|
|
76
|
+
return VulkanTextGenerationPipeline(generator, tokenizer)
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: grillyoptimum
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: HuggingFace Optimum-compatible Vulkan backend — optional grilly extension
|
|
5
|
+
Author-email: Nicolas Cloutier <ncloutier@grillcheeseai.com>
|
|
6
|
+
License: MIT
|
|
7
|
+
Requires-Python: >=3.12
|
|
8
|
+
Description-Content-Type: text/markdown
|
|
9
|
+
License-File: LICENSE
|
|
10
|
+
Requires-Dist: grilly>=0.4.0
|
|
11
|
+
Requires-Dist: grillyinference>=0.1.0
|
|
12
|
+
Requires-Dist: numpy
|
|
13
|
+
Requires-Dist: transformers
|
|
14
|
+
Provides-Extra: optimum
|
|
15
|
+
Requires-Dist: optimum; extra == "optimum"
|
|
16
|
+
Dynamic: license-file
|
|
17
|
+
|
|
18
|
+
# GrillyOptimum (Alpha not production ready)
|
|
19
|
+
|
|
20
|
+
HuggingFace Optimum-compatible Vulkan backend — optional [grilly](https://github.com/grillcheese/grilly) extension.
|
|
21
|
+
|
|
22
|
+
## Features
|
|
23
|
+
|
|
24
|
+
- **from_pretrained()** — load any HF Llama model directly
|
|
25
|
+
- **generate()** — HuggingFace-compatible generation interface
|
|
26
|
+
- **Pipeline integration** — use with `transformers.pipeline("text-generation")`
|
|
27
|
+
- **Vulkan acceleration** — native fp16 inference on RDNA2 GPUs
|
|
28
|
+
|
|
29
|
+
## Quick Start
|
|
30
|
+
|
|
31
|
+
```bash
|
|
32
|
+
pip install grillyoptimum
|
|
33
|
+
```
|
|
34
|
+
|
|
35
|
+
```python
|
|
36
|
+
from grillyoptimum import VulkanModelForCausalLM
|
|
37
|
+
from transformers import AutoTokenizer
|
|
38
|
+
|
|
39
|
+
model = VulkanModelForCausalLM.from_pretrained("meta-llama/Llama-3.2-3B-Instruct")
|
|
40
|
+
tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-3.2-3B-Instruct")
|
|
41
|
+
|
|
42
|
+
# HF-style generation
|
|
43
|
+
input_ids = tokenizer.encode("Hello, world!", return_tensors="np")
|
|
44
|
+
output = model.generate(input_ids, max_new_tokens=100, temperature=0.7)
|
|
45
|
+
print(tokenizer.decode(output[0]))
|
|
46
|
+
|
|
47
|
+
# Pipeline
|
|
48
|
+
from grillyoptimum.pipeline import create_text_generation_pipeline
|
|
49
|
+
pipe = create_text_generation_pipeline("meta-llama/Llama-3.2-3B-Instruct")
|
|
50
|
+
result = pipe("Explain quantum computing")
|
|
51
|
+
print(result[0]["generated_text"])
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
## Requirements
|
|
55
|
+
|
|
56
|
+
- Python 3.12+
|
|
57
|
+
- grilly >= 0.4.0
|
|
58
|
+
- grillyinference >= 0.1.0
|
|
59
|
+
- transformers
|
|
60
|
+
|
|
61
|
+
## License
|
|
62
|
+
|
|
63
|
+
MIT
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
LICENSE
|
|
2
|
+
README.md
|
|
3
|
+
pyproject.toml
|
|
4
|
+
grillyoptimum/__init__.py
|
|
5
|
+
grillyoptimum/configuration.py
|
|
6
|
+
grillyoptimum/modeling.py
|
|
7
|
+
grillyoptimum/pipeline.py
|
|
8
|
+
grillyoptimum.egg-info/PKG-INFO
|
|
9
|
+
grillyoptimum.egg-info/SOURCES.txt
|
|
10
|
+
grillyoptimum.egg-info/dependency_links.txt
|
|
11
|
+
grillyoptimum.egg-info/requires.txt
|
|
12
|
+
grillyoptimum.egg-info/top_level.txt
|
|
13
|
+
tests/test_optimum.py
|
|
14
|
+
tests/test_optimum_gpu.py
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
grillyoptimum
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=61.0", "wheel"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "grillyoptimum"
|
|
7
|
+
version = "0.1.0"
|
|
8
|
+
description = "HuggingFace Optimum-compatible Vulkan backend — optional grilly extension"
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
license = {text = "MIT"}
|
|
11
|
+
authors = [{name = "Nicolas Cloutier", email = "ncloutier@grillcheeseai.com"}]
|
|
12
|
+
requires-python = ">=3.12"
|
|
13
|
+
dependencies = [
|
|
14
|
+
"grilly>=0.4.0",
|
|
15
|
+
"grillyinference>=0.1.0",
|
|
16
|
+
"numpy",
|
|
17
|
+
"transformers",
|
|
18
|
+
]
|
|
19
|
+
|
|
20
|
+
[project.optional-dependencies]
|
|
21
|
+
optimum = ["optimum"]
|