nanochat 0.1.0.pre
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/LICENSE +25 -0
- data/README.md +129 -0
- data/bin/nanochat-setup +186 -0
- data/bin/package-checkpoint +122 -0
- data/bin/speedrun.sh +32 -0
- data/bin/train-tiny-model +190 -0
- data/bin/train-with-python-nanochat.sh +167 -0
- data/lib/nanochat/checkpoint_manager.rb +40 -0
- data/lib/nanochat/common.rb +32 -0
- data/lib/nanochat/config.rb +49 -0
- data/lib/nanochat/engine.rb +152 -0
- data/lib/nanochat/gpt.rb +285 -0
- data/lib/nanochat/tokenizer.rb +119 -0
- data/lib/nanochat/version.rb +5 -0
- data/lib/nanochat.rb +27 -0
- metadata +91 -0
checksums.yaml
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
---
|
|
2
|
+
SHA256:
|
|
3
|
+
metadata.gz: 28b30d23549c308bf1b45d404c0ecd6f2308b5be936249e8a00b3bea53438e40
|
|
4
|
+
data.tar.gz: b9d2246356b5a8e3c67ac5c0a6c6b2bdedb2146737cf243ad73d128afce83f54
|
|
5
|
+
SHA512:
|
|
6
|
+
metadata.gz: 7592265a945e4683317de282d941aa43dd24f36eda32e3e99c0d600d6f17229d7dad832db7a633c197f13c93ddf693bf3e07298cb1fb8088078d2157423ce1ac
|
|
7
|
+
data.tar.gz: 7d3eefcfa0a56dd1e1268dc85de9a1a5b7c161634b81f38fd759bca2b10313c001ba4805cda39906b59abd6766c1cd7c96304cabe179e861ab3496f2f13e169a
|
data/LICENSE
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Ruby implementation Copyright (c) 2025 Shannon Skipper
|
|
4
|
+
Original Python implementation Copyright (c) 2025 Andrej Karpathy
|
|
5
|
+
|
|
6
|
+
This Ruby port is based on nanochat by Andrej Karpathy
|
|
7
|
+
(https://github.com/karpathy/nanochat)
|
|
8
|
+
|
|
9
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
10
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
11
|
+
in the Software without restriction, including without limitation the rights
|
|
12
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
13
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
14
|
+
furnished to do so, subject to the following conditions:
|
|
15
|
+
|
|
16
|
+
The above copyright notice and this permission notice shall be included in all
|
|
17
|
+
copies or substantial portions of the Software.
|
|
18
|
+
|
|
19
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
20
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
21
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
22
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
23
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
24
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
25
|
+
SOFTWARE.
|
data/README.md
ADDED
|
@@ -0,0 +1,129 @@
|
|
|
1
|
+
# Nanochat Ruby
|
|
2
|
+
|
|
3
|
+
Ruby port of [nanochat](https://github.com/karpathy/nanochat) by Andrej Karpathy. Loads PyTorch checkpoints for inference and fine-tuning.
|
|
4
|
+
|
|
5
|
+
[](https://www.ruby-lang.org/)
|
|
6
|
+
[](LICENSE)
|
|
7
|
+
|
|
8
|
+
## Quick Start
|
|
9
|
+
|
|
10
|
+
Train and chat with your own language model:
|
|
11
|
+
|
|
12
|
+
```bash
|
|
13
|
+
# Train a demo model (~30 mins on CPU)
|
|
14
|
+
bash bin/speedrun.sh
|
|
15
|
+
|
|
16
|
+
# Install Ruby gem (prerelease)
|
|
17
|
+
gem install nanochat --pre
|
|
18
|
+
|
|
19
|
+
# Chat with your model
|
|
20
|
+
ruby examples/chat_cli.rb
|
|
21
|
+
```
|
|
22
|
+
|
|
23
|
+
The `bin/speedrun.sh` script trains a tiny checkpoint using [python-nanochat](https://github.com/karpathy/nanochat). Ruby nanochat loads it for inference.
|
|
24
|
+
|
|
25
|
+
## Fine-Tuning
|
|
26
|
+
|
|
27
|
+
```bash
|
|
28
|
+
ruby examples/finetune.rb \
|
|
29
|
+
--data my_training_data.txt \
|
|
30
|
+
--epochs 3 \
|
|
31
|
+
--output custom_model.pt
|
|
32
|
+
```
|
|
33
|
+
|
|
34
|
+
Uses AdamW with gradient clipping. Saves best checkpoint. See `examples/sample_training_data.txt` for data format.
|
|
35
|
+
|
|
36
|
+
## What Works
|
|
37
|
+
|
|
38
|
+
**Complete:**
|
|
39
|
+
- Inference: CLI chat, web UI, streaming, KV caching
|
|
40
|
+
- Fine-tuning on single GPU
|
|
41
|
+
- Tokenizer training
|
|
42
|
+
- PyTorch checkpoint loading
|
|
43
|
+
|
|
44
|
+
**Not implemented:**
|
|
45
|
+
- Multi-GPU training (not planned)
|
|
46
|
+
- Evaluation benchmarks (maybe later)
|
|
47
|
+
- Base training from scratch (maybe later)
|
|
48
|
+
- Reinforcement learning (maybe later)
|
|
49
|
+
|
|
50
|
+
## How It Works
|
|
51
|
+
|
|
52
|
+
GPT model with RoPE, GQA and causal attention. BPE tokenizer via HuggingFace. KV cache for fast inference. Temperature, top-k and top-p sampling. Auto-detects CUDA, MPS or CPU.
|
|
53
|
+
|
|
54
|
+
## Requirements
|
|
55
|
+
|
|
56
|
+
Ruby >= 3.4.0. LibTorch installs automatically via torch-rb.
|
|
57
|
+
|
|
58
|
+
## Tokenizer Training
|
|
59
|
+
|
|
60
|
+
```ruby
|
|
61
|
+
require 'nanochat'
|
|
62
|
+
|
|
63
|
+
tokenizer = Nanochat::Tokenizer.train_from_files(
|
|
64
|
+
['data/train.txt'],
|
|
65
|
+
vocab_size: 50_257
|
|
66
|
+
)
|
|
67
|
+
tokenizer.save('~/.cache/nanochat/my_tokenizer')
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
Tokenizers trained in Python work in Ruby (both save as `.json`). RustBPE `.pkl` files don't work.
|
|
71
|
+
|
|
72
|
+
## Benchmarks
|
|
73
|
+
|
|
74
|
+
```bash
|
|
75
|
+
ruby examples/benchmark.rb
|
|
76
|
+
```
|
|
77
|
+
|
|
78
|
+
Benchmarks tokenizer, forward pass, generation and sampling.
|
|
79
|
+
|
|
80
|
+
## Development
|
|
81
|
+
|
|
82
|
+
```bash
|
|
83
|
+
# Clone repository
|
|
84
|
+
git clone https://github.com/havenwood/nanochat
|
|
85
|
+
cd nanochat
|
|
86
|
+
|
|
87
|
+
# Install dependencies
|
|
88
|
+
bundle install
|
|
89
|
+
|
|
90
|
+
# Run tests
|
|
91
|
+
bundle exec rake test
|
|
92
|
+
|
|
93
|
+
# Run linter
|
|
94
|
+
bundle exec rubocop
|
|
95
|
+
|
|
96
|
+
# Run all checks
|
|
97
|
+
bundle exec rake
|
|
98
|
+
|
|
99
|
+
# Run performance benchmarks
|
|
100
|
+
ruby examples/benchmark.rb
|
|
101
|
+
```
|
|
102
|
+
|
|
103
|
+
## Training Models
|
|
104
|
+
|
|
105
|
+
`bin/speedrun.sh` trains a tiny d4 model (~30 mins on CPU).
|
|
106
|
+
|
|
107
|
+
For production d20 models, use [python-nanochat](https://github.com/karpathy/nanochat) (~4 hours on 8×H100):
|
|
108
|
+
|
|
109
|
+
```bash
|
|
110
|
+
git clone https://github.com/karpathy/nanochat
|
|
111
|
+
cd nanochat
|
|
112
|
+
bash speedrun.sh
|
|
113
|
+
```
|
|
114
|
+
|
|
115
|
+
Ruby nanochat loads any checkpoint from python-nanochat.
|
|
116
|
+
|
|
117
|
+
## Contributing
|
|
118
|
+
|
|
119
|
+
Pull requests welcome. Priorities: evaluation benchmarks, base training, performance.
|
|
120
|
+
|
|
121
|
+
## Credits
|
|
122
|
+
|
|
123
|
+
Andrej Karpathy for nanochat. Andrew Kane for torch-rb and tokenizers-ruby.
|
|
124
|
+
|
|
125
|
+
## Links
|
|
126
|
+
|
|
127
|
+
- [python-nanochat](https://github.com/karpathy/nanochat)
|
|
128
|
+
- [torch-rb](https://github.com/ankane/torch-rb)
|
|
129
|
+
- [tokenizers-ruby](https://github.com/ankane/tokenizers-ruby)
|
data/bin/nanochat-setup
ADDED
|
@@ -0,0 +1,186 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
# frozen_string_literal: true
|
|
3
|
+
|
|
4
|
+
# Setup script for Ruby nanochat - downloads pretrained checkpoint
|
|
5
|
+
#
|
|
6
|
+
# Usage:
|
|
7
|
+
# nanochat-setup [--tiny|--url URL]
|
|
8
|
+
|
|
9
|
+
require 'fileutils'
|
|
10
|
+
require 'net/http'
|
|
11
|
+
require 'uri'
|
|
12
|
+
require 'optparse'
|
|
13
|
+
|
|
14
|
+
CACHE_DIR = File.expand_path('~/.cache/nanochat')
|
|
15
|
+
CHECKPOINT_URLS = {
|
|
16
|
+
tiny: 'https://github.com/havenwood/nanochat/releases/download/v0.1.0/nanochat-tiny-d4.tar.gz'
|
|
17
|
+
}.freeze
|
|
18
|
+
|
|
19
|
+
def parse_options
|
|
20
|
+
options = {model: :tiny}
|
|
21
|
+
|
|
22
|
+
OptionParser.new do |opts|
|
|
23
|
+
opts.banner = 'Usage: nanochat-setup [options]'
|
|
24
|
+
opts.separator ''
|
|
25
|
+
opts.separator 'Download and setup a pretrained nanochat model'
|
|
26
|
+
opts.separator ''
|
|
27
|
+
|
|
28
|
+
opts.on('--tiny', 'Download tiny d4 model (default, ~30MB)') do
|
|
29
|
+
options[:model] = :tiny
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
opts.on('--url URL', 'Download from custom URL') do |url|
|
|
33
|
+
options[:url] = url
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
opts.on('-h', '--help', 'Show this help') do
|
|
37
|
+
puts opts
|
|
38
|
+
exit
|
|
39
|
+
end
|
|
40
|
+
end.parse!
|
|
41
|
+
|
|
42
|
+
options
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
def download_file(url, destination)
|
|
46
|
+
puts "Downloading from: #{url}"
|
|
47
|
+
|
|
48
|
+
uri = URI(url)
|
|
49
|
+
Net::HTTP.start(uri.host, uri.port, use_ssl: uri.scheme == 'https') do |http|
|
|
50
|
+
request = Net::HTTP::Get.new(uri)
|
|
51
|
+
|
|
52
|
+
http.request(request) do |response|
|
|
53
|
+
case response
|
|
54
|
+
when Net::HTTPSuccess
|
|
55
|
+
total_size = response['content-length']&.to_i
|
|
56
|
+
downloaded = 0
|
|
57
|
+
|
|
58
|
+
File.open(destination, 'wb') do |file|
|
|
59
|
+
response.read_body do |chunk|
|
|
60
|
+
file.write(chunk)
|
|
61
|
+
downloaded += chunk.size
|
|
62
|
+
|
|
63
|
+
if total_size
|
|
64
|
+
percent = (downloaded.to_f / total_size * 100).round(1)
|
|
65
|
+
print "\r Progress: #{percent}% (#{downloaded / 1_024_024}MB / #{total_size / 1_024_024}MB)"
|
|
66
|
+
else
|
|
67
|
+
print "\r Downloaded: #{downloaded / 1_024_024}MB"
|
|
68
|
+
end
|
|
69
|
+
end
|
|
70
|
+
end
|
|
71
|
+
puts # Newline after progress
|
|
72
|
+
when Net::HTTPRedirection
|
|
73
|
+
# Follow redirect
|
|
74
|
+
redirect_url = response['location']
|
|
75
|
+
puts " Following redirect to: #{redirect_url}"
|
|
76
|
+
return download_file(redirect_url, destination)
|
|
77
|
+
else
|
|
78
|
+
raise "Failed to download: #{response.code} #{response.message}"
|
|
79
|
+
end
|
|
80
|
+
end
|
|
81
|
+
end
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
def extract_tarball(archive_path, destination)
|
|
85
|
+
puts "Extracting to: #{destination}"
|
|
86
|
+
|
|
87
|
+
# Use tar command for extraction
|
|
88
|
+
system('tar', '-xzf', archive_path, '-C', destination, '--strip-components=1') ||
|
|
89
|
+
raise('Failed to extract tarball')
|
|
90
|
+
|
|
91
|
+
puts 'Extraction complete'
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
def verify_checkpoint(cache_dir)
|
|
95
|
+
model_path = File.join(cache_dir, 'model.pt')
|
|
96
|
+
tokenizer_path = File.join(cache_dir, 'tokenizer', 'tokenizer.json')
|
|
97
|
+
|
|
98
|
+
missing = []
|
|
99
|
+
missing << 'model.pt' unless File.exist?(model_path)
|
|
100
|
+
missing << 'tokenizer/tokenizer.json' unless File.exist?(tokenizer_path)
|
|
101
|
+
|
|
102
|
+
if missing.empty?
|
|
103
|
+
puts 'Checkpoint verified:'
|
|
104
|
+
puts " Model: #{model_path}"
|
|
105
|
+
puts " Tokenizer: #{tokenizer_path}"
|
|
106
|
+
true
|
|
107
|
+
else
|
|
108
|
+
puts "Missing files: #{missing.join(', ')}"
|
|
109
|
+
false
|
|
110
|
+
end
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
def main
|
|
114
|
+
options = parse_options
|
|
115
|
+
|
|
116
|
+
puts 'Nanochat Setup'
|
|
117
|
+
puts '=' * 50
|
|
118
|
+
puts
|
|
119
|
+
|
|
120
|
+
# Check if already setup
|
|
121
|
+
if File.exist?(File.join(CACHE_DIR, 'model.pt'))
|
|
122
|
+
puts 'Checkpoint already exists at ~/.cache/nanochat/'
|
|
123
|
+
print 'Overwrite? [y/N]: '
|
|
124
|
+
response = $stdin.gets&.chomp&.downcase
|
|
125
|
+
|
|
126
|
+
unless %w[y yes].include?(response)
|
|
127
|
+
puts 'Setup cancelled'
|
|
128
|
+
exit 0
|
|
129
|
+
end
|
|
130
|
+
end
|
|
131
|
+
|
|
132
|
+
# Prepare cache directory
|
|
133
|
+
FileUtils.mkdir_p(CACHE_DIR)
|
|
134
|
+
|
|
135
|
+
# Determine download URL
|
|
136
|
+
url = options[:url] || CHECKPOINT_URLS[options[:model]]
|
|
137
|
+
|
|
138
|
+
unless url
|
|
139
|
+
puts "No URL available for model: #{options[:model]}"
|
|
140
|
+
exit 1
|
|
141
|
+
end
|
|
142
|
+
|
|
143
|
+
# Download
|
|
144
|
+
archive_path = File.join(CACHE_DIR, 'download.tar.gz')
|
|
145
|
+
|
|
146
|
+
begin
|
|
147
|
+
download_file(url, archive_path)
|
|
148
|
+
rescue StandardError => e
|
|
149
|
+
puts
|
|
150
|
+
puts "Download failed: #{e.message}"
|
|
151
|
+
puts
|
|
152
|
+
puts 'Alternative: Train your own model with python-nanochat'
|
|
153
|
+
puts ' bash bin/speedrun.sh # ~30 mins on CPU'
|
|
154
|
+
puts " Checkpoint will be at: #{CACHE_DIR}"
|
|
155
|
+
exit 1
|
|
156
|
+
end
|
|
157
|
+
|
|
158
|
+
# Extract
|
|
159
|
+
begin
|
|
160
|
+
extract_tarball(archive_path, CACHE_DIR)
|
|
161
|
+
rescue StandardError => e
|
|
162
|
+
puts "Extraction failed: #{e.message}"
|
|
163
|
+
exit 1
|
|
164
|
+
ensure
|
|
165
|
+
FileUtils.rm_f(archive_path)
|
|
166
|
+
end
|
|
167
|
+
|
|
168
|
+
# Verify
|
|
169
|
+
puts
|
|
170
|
+
unless verify_checkpoint(CACHE_DIR)
|
|
171
|
+
puts
|
|
172
|
+
puts 'Setup incomplete - checkpoint verification failed'
|
|
173
|
+
exit 1
|
|
174
|
+
end
|
|
175
|
+
|
|
176
|
+
puts
|
|
177
|
+
puts '=' * 50
|
|
178
|
+
puts 'Setup complete!'
|
|
179
|
+
puts
|
|
180
|
+
puts 'Try it out:'
|
|
181
|
+
puts ' ruby examples/chat_cli.rb'
|
|
182
|
+
puts ' ruby examples/chat_web.rb # Web UI on http://localhost:8000'
|
|
183
|
+
puts
|
|
184
|
+
end
|
|
185
|
+
|
|
186
|
+
main if __FILE__ == $PROGRAM_NAME
|
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
# frozen_string_literal: true
|
|
3
|
+
|
|
4
|
+
# Package a trained nanochat checkpoint for distribution
|
|
5
|
+
#
|
|
6
|
+
# This creates a tarball suitable for uploading to GitHub releases.
|
|
7
|
+
# End users can then download it with `nanochat-setup`.
|
|
8
|
+
#
|
|
9
|
+
# Usage:
|
|
10
|
+
# bin/package-checkpoint [--output nanochat-tiny-d4.tar.gz]
|
|
11
|
+
|
|
12
|
+
require 'fileutils'
|
|
13
|
+
require 'optparse'
|
|
14
|
+
|
|
15
|
+
CACHE_DIR = File.expand_path('~/.cache/nanochat')
|
|
16
|
+
|
|
17
|
+
def parse_options
|
|
18
|
+
options = {output: 'nanochat-tiny-d4.tar.gz'}
|
|
19
|
+
|
|
20
|
+
OptionParser.new do |opts|
|
|
21
|
+
opts.banner = 'Usage: package-checkpoint [options]'
|
|
22
|
+
opts.separator ''
|
|
23
|
+
opts.separator 'Package a trained checkpoint for distribution'
|
|
24
|
+
opts.separator ''
|
|
25
|
+
|
|
26
|
+
opts.on('-o', '--output PATH', 'Output tarball path (default: nanochat-tiny-d4.tar.gz)') do |path|
|
|
27
|
+
options[:output] = path
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
opts.on('-h', '--help', 'Show this help') do
|
|
31
|
+
puts opts
|
|
32
|
+
exit
|
|
33
|
+
end
|
|
34
|
+
end.parse!
|
|
35
|
+
|
|
36
|
+
options
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
def verify_checkpoint
|
|
40
|
+
model_path = File.join(CACHE_DIR, 'model.pt')
|
|
41
|
+
tokenizer_path = File.join(CACHE_DIR, 'tokenizer', 'tokenizer.json')
|
|
42
|
+
|
|
43
|
+
missing = []
|
|
44
|
+
missing << 'model.pt' unless File.exist?(model_path)
|
|
45
|
+
missing << 'tokenizer/tokenizer.json' unless File.exist?(tokenizer_path)
|
|
46
|
+
|
|
47
|
+
if missing.empty?
|
|
48
|
+
puts "Checkpoint verified at: #{CACHE_DIR}"
|
|
49
|
+
puts " Model: #{model_path}"
|
|
50
|
+
puts " Tokenizer: #{tokenizer_path}"
|
|
51
|
+
true
|
|
52
|
+
else
|
|
53
|
+
puts "Missing files: #{missing.join(', ')}"
|
|
54
|
+
puts
|
|
55
|
+
puts 'Train a checkpoint first:'
|
|
56
|
+
puts ' bash bin/train-with-python-nanochat.sh'
|
|
57
|
+
false
|
|
58
|
+
end
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
def package_checkpoint(output_path)
|
|
62
|
+
output_path = File.expand_path(output_path)
|
|
63
|
+
cache_parent = File.dirname(CACHE_DIR)
|
|
64
|
+
cache_name = File.basename(CACHE_DIR)
|
|
65
|
+
|
|
66
|
+
puts
|
|
67
|
+
puts 'Creating tarball...'
|
|
68
|
+
puts " Source: #{CACHE_DIR}"
|
|
69
|
+
puts " Output: #{output_path}"
|
|
70
|
+
puts
|
|
71
|
+
|
|
72
|
+
# Create tarball
|
|
73
|
+
success = system(
|
|
74
|
+
'tar', '-czf', output_path,
|
|
75
|
+
'-C', cache_parent,
|
|
76
|
+
cache_name
|
|
77
|
+
)
|
|
78
|
+
|
|
79
|
+
unless success
|
|
80
|
+
puts 'Failed to create tarball'
|
|
81
|
+
exit 1
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
# Get size
|
|
85
|
+
size_mb = (File.size(output_path) / (1024.0 * 1024.0)).round(1)
|
|
86
|
+
|
|
87
|
+
puts "Tarball created: #{output_path}"
|
|
88
|
+
puts " Size: #{size_mb} MB"
|
|
89
|
+
puts
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
def show_next_steps(output_path)
|
|
93
|
+
puts '=' * 70
|
|
94
|
+
puts 'Next Steps'
|
|
95
|
+
puts '=' * 70
|
|
96
|
+
puts
|
|
97
|
+
puts 'Test locally:'
|
|
98
|
+
puts " tar -xzf #{output_path} -C #{File.dirname(CACHE_DIR)}"
|
|
99
|
+
puts ' ruby examples/chat_cli.rb'
|
|
100
|
+
puts
|
|
101
|
+
puts 'Upload to GitHub releases and users can download with nanochat-setup'
|
|
102
|
+
puts
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
def main
|
|
106
|
+
options = parse_options
|
|
107
|
+
|
|
108
|
+
puts 'Package Nanochat Checkpoint'
|
|
109
|
+
puts '=' * 70
|
|
110
|
+
puts
|
|
111
|
+
|
|
112
|
+
# Verify checkpoint exists
|
|
113
|
+
exit 1 unless verify_checkpoint
|
|
114
|
+
|
|
115
|
+
# Package it
|
|
116
|
+
package_checkpoint(options[:output])
|
|
117
|
+
|
|
118
|
+
# Show next steps
|
|
119
|
+
show_next_steps(options[:output])
|
|
120
|
+
end
|
|
121
|
+
|
|
122
|
+
main if __FILE__ == $PROGRAM_NAME
|
data/bin/speedrun.sh
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
#!/bin/bash
|
|
2
|
+
|
|
3
|
+
# Train a tiny nanochat checkpoint for Ruby nanochat
|
|
4
|
+
#
|
|
5
|
+
# This is a simplified version adapted for CPU/single-GPU training.
|
|
6
|
+
# For the full $100 8×H100 training, use python-nanochat directly.
|
|
7
|
+
#
|
|
8
|
+
# This script trains a d4 model (4 layers, ~30 mins on CPU)
|
|
9
|
+
# The Python nanochat speedrun.sh trains a d20 model (~4 hours on 8×H100)
|
|
10
|
+
|
|
11
|
+
set -e
|
|
12
|
+
|
|
13
|
+
echo "======================================================================"
|
|
14
|
+
echo "🚀 Ruby Nanochat - Quick Training Demo"
|
|
15
|
+
echo "======================================================================"
|
|
16
|
+
echo ""
|
|
17
|
+
echo "This trains a tiny d4 model suitable for testing Ruby nanochat."
|
|
18
|
+
echo ""
|
|
19
|
+
echo "📊 Model specs:"
|
|
20
|
+
echo " • 4 layers (vs 20 in Python speedrun.sh)"
|
|
21
|
+
echo " • Runs on CPU or single GPU"
|
|
22
|
+
echo " • ~30 minutes training time"
|
|
23
|
+
echo " • Good for demos and development"
|
|
24
|
+
echo ""
|
|
25
|
+
echo "💡 For production models, use python-nanochat/speedrun.sh directly"
|
|
26
|
+
echo " (requires 8×H100 GPUs, ~4 hours, ~$100)"
|
|
27
|
+
echo ""
|
|
28
|
+
echo "======================================================================"
|
|
29
|
+
echo ""
|
|
30
|
+
|
|
31
|
+
# Run the actual training script
|
|
32
|
+
bash bin/train-with-python-nanochat.sh
|
|
@@ -0,0 +1,190 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
# frozen_string_literal: true
|
|
3
|
+
|
|
4
|
+
# Train a tiny nanochat model using Python nanochat's CPU training script
|
|
5
|
+
#
|
|
6
|
+
# This wraps Python nanochat's dev/runcpu.sh to make it easy for Ruby users
|
|
7
|
+
# to train a minimal checkpoint without needing GPU clusters.
|
|
8
|
+
#
|
|
9
|
+
# Attribution: Uses training scripts from https://github.com/karpathy/nanochat
|
|
10
|
+
#
|
|
11
|
+
# Usage:
|
|
12
|
+
# bin/train-tiny-model [--quick|--standard]
|
|
13
|
+
|
|
14
|
+
require 'English'
|
|
15
|
+
require 'fileutils'
|
|
16
|
+
require 'optparse'
|
|
17
|
+
|
|
18
|
+
PYTHON_NANOCHAT_DIR = File.expand_path('python-nanochat')
|
|
19
|
+
CACHE_DIR = File.expand_path('~/.cache/nanochat')
|
|
20
|
+
|
|
21
|
+
def parse_options
|
|
22
|
+
options = {mode: :quick}
|
|
23
|
+
|
|
24
|
+
OptionParser.new do |opts|
|
|
25
|
+
opts.banner = 'Usage: train-tiny-model [options]'
|
|
26
|
+
opts.separator ''
|
|
27
|
+
opts.separator 'Train a tiny nanochat checkpoint using Python nanochat'
|
|
28
|
+
opts.separator ''
|
|
29
|
+
|
|
30
|
+
opts.on('--quick', 'Quick training (d4, 50 iterations, ~30 mins CPU, default)') do
|
|
31
|
+
options[:mode] = :quick
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
opts.on('--standard', 'Standard training (d4, 500 iterations, ~5 hours CPU)') do
|
|
35
|
+
options[:mode] = :standard
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
opts.on('-h', '--help', 'Show this help') do
|
|
39
|
+
puts opts
|
|
40
|
+
exit
|
|
41
|
+
end
|
|
42
|
+
end.parse!
|
|
43
|
+
|
|
44
|
+
options
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
def check_python_nanochat
|
|
48
|
+
unless Dir.exist?(PYTHON_NANOCHAT_DIR)
|
|
49
|
+
puts "Python nanochat not found at: #{PYTHON_NANOCHAT_DIR}"
|
|
50
|
+
puts
|
|
51
|
+
puts 'Clone it first:'
|
|
52
|
+
puts ' git clone https://github.com/karpathy/nanochat python-nanochat'
|
|
53
|
+
puts
|
|
54
|
+
exit 1
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
script_path = File.join(PYTHON_NANOCHAT_DIR, 'dev', 'runcpu.sh')
|
|
58
|
+
unless File.exist?(script_path)
|
|
59
|
+
puts "Training script not found: #{script_path}"
|
|
60
|
+
exit 1
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
puts "Python nanochat found at: #{PYTHON_NANOCHAT_DIR}"
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
def train_model(mode)
|
|
67
|
+
puts
|
|
68
|
+
puts 'Starting Training'
|
|
69
|
+
puts '=' * 70
|
|
70
|
+
puts
|
|
71
|
+
puts "Mode: #{mode} (d4 model, 4 layers)"
|
|
72
|
+
puts "Output: #{CACHE_DIR}"
|
|
73
|
+
puts "Time: ~#{mode == :quick ? '30 minutes' : '5 hours'} on CPU"
|
|
74
|
+
puts
|
|
75
|
+
puts 'Attribution: Using training scripts from'
|
|
76
|
+
puts 'https://github.com/karpathy/nanochat by Andrej Karpathy'
|
|
77
|
+
puts
|
|
78
|
+
puts '=' * 70
|
|
79
|
+
puts
|
|
80
|
+
|
|
81
|
+
# Change to python-nanochat directory
|
|
82
|
+
Dir.chdir(PYTHON_NANOCHAT_DIR) do
|
|
83
|
+
if mode == :quick
|
|
84
|
+
# Use the existing dev/runcpu.sh as-is (50 iterations)
|
|
85
|
+
puts 'Running: bash dev/runcpu.sh'
|
|
86
|
+
puts
|
|
87
|
+
system('bash', 'dev/runcpu.sh') || raise('Training failed')
|
|
88
|
+
else
|
|
89
|
+
# Modify runcpu.sh to run more iterations for better quality
|
|
90
|
+
puts 'Running modified training (500 iterations)'
|
|
91
|
+
puts
|
|
92
|
+
|
|
93
|
+
# Create a modified version inline
|
|
94
|
+
modified_script = File.read('dev/runcpu.sh')
|
|
95
|
+
.gsub('--num_iterations=50', '--num_iterations=500')
|
|
96
|
+
.gsub('--num_iterations=100', '--num_iterations=1000')
|
|
97
|
+
|
|
98
|
+
IO.popen('bash', 'w') do |bash|
|
|
99
|
+
bash.puts modified_script
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
raise 'Training failed' unless $CHILD_STATUS.success?
|
|
103
|
+
end
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
puts
|
|
107
|
+
puts 'Training complete!'
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
def verify_checkpoint
|
|
111
|
+
puts
|
|
112
|
+
puts 'Verifying checkpoint...'
|
|
113
|
+
|
|
114
|
+
model_path = File.join(CACHE_DIR, 'model.pt')
|
|
115
|
+
tokenizer_path = File.join(CACHE_DIR, 'tokenizer', 'tokenizer.json')
|
|
116
|
+
|
|
117
|
+
missing = []
|
|
118
|
+
missing << 'model.pt' unless File.exist?(model_path)
|
|
119
|
+
missing << 'tokenizer/tokenizer.json' unless File.exist?(tokenizer_path)
|
|
120
|
+
|
|
121
|
+
if missing.empty?
|
|
122
|
+
puts 'Checkpoint verified:'
|
|
123
|
+
puts " Model: #{model_path}"
|
|
124
|
+
puts " Tokenizer: #{tokenizer_path}"
|
|
125
|
+
true
|
|
126
|
+
else
|
|
127
|
+
puts "Missing files: #{missing.join(', ')}"
|
|
128
|
+
false
|
|
129
|
+
end
|
|
130
|
+
end
|
|
131
|
+
|
|
132
|
+
def show_next_steps
|
|
133
|
+
puts
|
|
134
|
+
puts '=' * 70
|
|
135
|
+
puts 'Try it out:'
|
|
136
|
+
puts ' ruby examples/chat_cli.rb'
|
|
137
|
+
puts ' ruby examples/chat_web.rb # Web UI on http://localhost:8000'
|
|
138
|
+
puts
|
|
139
|
+
end
|
|
140
|
+
|
|
141
|
+
def main
|
|
142
|
+
options = parse_options
|
|
143
|
+
|
|
144
|
+
puts 'Train Tiny Nanochat Model'
|
|
145
|
+
puts '=' * 70
|
|
146
|
+
puts
|
|
147
|
+
|
|
148
|
+
# Check prerequisites
|
|
149
|
+
check_python_nanochat
|
|
150
|
+
|
|
151
|
+
# Warn about existing checkpoint
|
|
152
|
+
if File.exist?(File.join(CACHE_DIR, 'model.pt'))
|
|
153
|
+
puts
|
|
154
|
+
puts 'Warning: Checkpoint already exists'
|
|
155
|
+
puts "Location: #{CACHE_DIR}"
|
|
156
|
+
print 'Overwrite with new training? [y/N]: '
|
|
157
|
+
response = $stdin.gets&.chomp&.downcase
|
|
158
|
+
|
|
159
|
+
unless %w[y yes].include?(response)
|
|
160
|
+
puts 'Training cancelled'
|
|
161
|
+
exit 0
|
|
162
|
+
end
|
|
163
|
+
|
|
164
|
+
FileUtils.rm_rf(CACHE_DIR)
|
|
165
|
+
end
|
|
166
|
+
|
|
167
|
+
# Train
|
|
168
|
+
begin
|
|
169
|
+
train_model(options[:mode])
|
|
170
|
+
rescue StandardError => e
|
|
171
|
+
puts
|
|
172
|
+
puts "Training failed: #{e.message}"
|
|
173
|
+
puts
|
|
174
|
+
puts 'Make sure Python nanochat dependencies are installed'
|
|
175
|
+
puts 'Check python-nanochat/README.md for setup instructions'
|
|
176
|
+
exit 1
|
|
177
|
+
end
|
|
178
|
+
|
|
179
|
+
# Verify
|
|
180
|
+
unless verify_checkpoint
|
|
181
|
+
puts
|
|
182
|
+
puts 'Training completed but checkpoint verification failed'
|
|
183
|
+
exit 1
|
|
184
|
+
end
|
|
185
|
+
|
|
186
|
+
# Success!
|
|
187
|
+
show_next_steps
|
|
188
|
+
end
|
|
189
|
+
|
|
190
|
+
main if __FILE__ == $PROGRAM_NAME
|