gptmed 0.0.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- gptmed-0.0.1/LICENSE +21 -0
- gptmed-0.0.1/MANIFEST.in +40 -0
- gptmed-0.0.1/PKG-INFO +325 -0
- gptmed-0.0.1/README.md +262 -0
- gptmed-0.0.1/gptmed/__init__.py +37 -0
- gptmed-0.0.1/gptmed/configs/__init__.py +1 -0
- gptmed-0.0.1/gptmed/configs/train_config.py +154 -0
- gptmed-0.0.1/gptmed/data/__init__.py +5 -0
- gptmed-0.0.1/gptmed/data/parsers/__init__.py +10 -0
- gptmed-0.0.1/gptmed/data/parsers/medquad_parser.py +257 -0
- gptmed-0.0.1/gptmed/data/parsers/text_formatter.py +148 -0
- gptmed-0.0.1/gptmed/inference/__init__.py +1 -0
- gptmed-0.0.1/gptmed/inference/decoding_utils.py +190 -0
- gptmed-0.0.1/gptmed/inference/generation_config.py +83 -0
- gptmed-0.0.1/gptmed/inference/generator.py +253 -0
- gptmed-0.0.1/gptmed/inference/sampling.py +261 -0
- gptmed-0.0.1/gptmed/model/__init__.py +9 -0
- gptmed-0.0.1/gptmed/model/architecture/__init__.py +35 -0
- gptmed-0.0.1/gptmed/model/architecture/attention.py +188 -0
- gptmed-0.0.1/gptmed/model/architecture/decoder_block.py +130 -0
- gptmed-0.0.1/gptmed/model/architecture/embeddings.py +146 -0
- gptmed-0.0.1/gptmed/model/architecture/feedforward.py +109 -0
- gptmed-0.0.1/gptmed/model/architecture/transformer.py +204 -0
- gptmed-0.0.1/gptmed/model/configs/__init__.py +17 -0
- gptmed-0.0.1/gptmed/model/configs/model_config.py +155 -0
- gptmed-0.0.1/gptmed/tokenizer/__init__.py +7 -0
- gptmed-0.0.1/gptmed/tokenizer/tokenize_data.py +286 -0
- gptmed-0.0.1/gptmed/tokenizer/train_tokenizer.py +218 -0
- gptmed-0.0.1/gptmed/training/__init__.py +1 -0
- gptmed-0.0.1/gptmed/training/dataset.py +183 -0
- gptmed-0.0.1/gptmed/training/train.py +272 -0
- gptmed-0.0.1/gptmed/training/trainer.py +331 -0
- gptmed-0.0.1/gptmed/training/utils.py +212 -0
- gptmed-0.0.1/gptmed/utils/__init__.py +1 -0
- gptmed-0.0.1/gptmed/utils/checkpoints.py +224 -0
- gptmed-0.0.1/gptmed/utils/logging.py +189 -0
- gptmed-0.0.1/gptmed.egg-info/PKG-INFO +325 -0
- gptmed-0.0.1/gptmed.egg-info/SOURCES.txt +44 -0
- gptmed-0.0.1/gptmed.egg-info/dependency_links.txt +1 -0
- gptmed-0.0.1/gptmed.egg-info/entry_points.txt +3 -0
- gptmed-0.0.1/gptmed.egg-info/requires.txt +14 -0
- gptmed-0.0.1/gptmed.egg-info/top_level.txt +1 -0
- gptmed-0.0.1/pyproject.toml +109 -0
- gptmed-0.0.1/requirements.txt +18 -0
- gptmed-0.0.1/setup.cfg +4 -0
- gptmed-0.0.1/setup.py +11 -0
gptmed-0.0.1/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Your Name
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
gptmed-0.0.1/MANIFEST.in
ADDED
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
# Include documentation
|
|
2
|
+
include README.md
|
|
3
|
+
include LICENSE
|
|
4
|
+
include requirements.txt
|
|
5
|
+
|
|
6
|
+
# Include configuration files
|
|
7
|
+
recursive-include configs *.py *.json *.yaml
|
|
8
|
+
|
|
9
|
+
# Include model configs
|
|
10
|
+
recursive-include model/configs *.py *.json
|
|
11
|
+
|
|
12
|
+
# Include tokenizer files (vocab and model files)
|
|
13
|
+
recursive-include tokenizer *.model *.vocab *.json
|
|
14
|
+
|
|
15
|
+
# Exclude unnecessary files
|
|
16
|
+
exclude .gitignore
|
|
17
|
+
exclude .env
|
|
18
|
+
exclude *.pyc
|
|
19
|
+
exclude */__pycache__/*
|
|
20
|
+
recursive-exclude * __pycache__
|
|
21
|
+
recursive-exclude * *.py[co]
|
|
22
|
+
recursive-exclude * .DS_Store
|
|
23
|
+
|
|
24
|
+
# Exclude datasets, logs, and checkpoints (too large for PyPI)
|
|
25
|
+
exclude dataset/*
|
|
26
|
+
exclude logs/*
|
|
27
|
+
exclude model/checkpoints/*
|
|
28
|
+
recursive-exclude dataset *
|
|
29
|
+
recursive-exclude logs *
|
|
30
|
+
recursive-exclude model/checkpoints *
|
|
31
|
+
|
|
32
|
+
# Exclude test files
|
|
33
|
+
recursive-exclude tests *
|
|
34
|
+
exclude test_*.py
|
|
35
|
+
|
|
36
|
+
# Exclude development/training scripts not needed for inference
|
|
37
|
+
exclude download_medquad.py
|
|
38
|
+
exclude monitor_training.py
|
|
39
|
+
exclude preprocess.py
|
|
40
|
+
exclude generate_sample.py
|
gptmed-0.0.1/PKG-INFO
ADDED
|
@@ -0,0 +1,325 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: gptmed
|
|
3
|
+
Version: 0.0.1
|
|
4
|
+
Summary: A lightweight GPT-based language model framework for training custom question-answering models on any domain
|
|
5
|
+
Author-email: Sanjog Sigdel <sigdelsanjog@gmail.com>
|
|
6
|
+
Maintainer-email: Sanjog Sigdel <sigdelsanjog@gmail.com>
|
|
7
|
+
License: MIT License
|
|
8
|
+
|
|
9
|
+
Copyright (c) 2026 Your Name
|
|
10
|
+
|
|
11
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
12
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
13
|
+
in the Software without restriction, including without limitation the rights
|
|
14
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
15
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
16
|
+
furnished to do so, subject to the following conditions:
|
|
17
|
+
|
|
18
|
+
The above copyright notice and this permission notice shall be included in all
|
|
19
|
+
copies or substantial portions of the Software.
|
|
20
|
+
|
|
21
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
22
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
23
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
24
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
25
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
26
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
27
|
+
SOFTWARE.
|
|
28
|
+
|
|
29
|
+
Project-URL: Homepage, https://github.com/sigdelsanjog/gptmed
|
|
30
|
+
Project-URL: Documentation, https://github.com/sigdelsanjog/gptmed#readme
|
|
31
|
+
Project-URL: Repository, https://github.com/sigdelsanjog/gptmed
|
|
32
|
+
Project-URL: Issues, https://github.com/sigdelsanjog/gptmed/issues
|
|
33
|
+
Keywords: nlp,language-model,transformer,gpt,pytorch,qa,question-answering,training,deep-learning,custom-model
|
|
34
|
+
Classifier: Development Status :: 3 - Alpha
|
|
35
|
+
Classifier: Intended Audience :: Developers
|
|
36
|
+
Classifier: Intended Audience :: Science/Research
|
|
37
|
+
Classifier: Intended Audience :: Education
|
|
38
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
39
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
40
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
41
|
+
Classifier: Programming Language :: Python :: 3
|
|
42
|
+
Classifier: Programming Language :: Python :: 3.8
|
|
43
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
44
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
45
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
46
|
+
Classifier: Operating System :: OS Independent
|
|
47
|
+
Requires-Python: >=3.8
|
|
48
|
+
Description-Content-Type: text/markdown
|
|
49
|
+
License-File: LICENSE
|
|
50
|
+
Requires-Dist: torch>=2.0.0
|
|
51
|
+
Requires-Dist: sentencepiece>=0.1.99
|
|
52
|
+
Requires-Dist: numpy>=1.24.0
|
|
53
|
+
Requires-Dist: tqdm>=4.65.0
|
|
54
|
+
Provides-Extra: dev
|
|
55
|
+
Requires-Dist: pytest>=7.0.0; extra == "dev"
|
|
56
|
+
Requires-Dist: black>=22.0.0; extra == "dev"
|
|
57
|
+
Requires-Dist: flake8>=4.0.0; extra == "dev"
|
|
58
|
+
Requires-Dist: mypy>=0.950; extra == "dev"
|
|
59
|
+
Provides-Extra: training
|
|
60
|
+
Requires-Dist: tensorboard>=2.10.0; extra == "training"
|
|
61
|
+
Requires-Dist: wandb>=0.13.0; extra == "training"
|
|
62
|
+
Dynamic: license-file
|
|
63
|
+
|
|
64
|
+
# GptMed 🤖
|
|
65
|
+
|
|
66
|
+
A lightweight GPT-based language model framework for training custom question-answering models on any domain. This package provides a transformer-based GPT architecture that you can train on your own Q&A datasets - whether it's casual conversations, technical support, education, or any other domain.
|
|
67
|
+
|
|
68
|
+
[](https://badge.fury.io/py/gptmed)
|
|
69
|
+
[](https://www.python.org/downloads/)
|
|
70
|
+
[](https://opensource.org/licenses/MIT)
|
|
71
|
+
|
|
72
|
+
## Features
|
|
73
|
+
|
|
74
|
+
- 🧠 **Custom GPT Architecture**: Lightweight transformer model for any Q&A domain
|
|
75
|
+
- 🎯 **Domain-Agnostic**: Train on any question-answering dataset (casual chat, tech support, education, etc.)
|
|
76
|
+
- ⚡ **Fast Inference**: Optimized for quick question answering
|
|
77
|
+
- 🔧 **Flexible Training**: Easy to train on your own custom datasets
|
|
78
|
+
- 📦 **Lightweight**: Small model size suitable for edge deployment
|
|
79
|
+
- 🛠️ **Complete Toolkit**: Includes tokenizer training, model training, and inference utilities
|
|
80
|
+
|
|
81
|
+
## Installation
|
|
82
|
+
|
|
83
|
+
### From PyPI (Recommended)
|
|
84
|
+
|
|
85
|
+
```bash
|
|
86
|
+
pip install gptmed
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
### From Source
|
|
90
|
+
|
|
91
|
+
```bash
|
|
92
|
+
git clone https://github.com/sigdelsanjog/gptmed.git
|
|
93
|
+
cd gptmed
|
|
94
|
+
pip install -e .
|
|
95
|
+
```
|
|
96
|
+
|
|
97
|
+
### With Optional Dependencies
|
|
98
|
+
|
|
99
|
+
```bash
|
|
100
|
+
# For development
|
|
101
|
+
pip install gptmed[dev]
|
|
102
|
+
|
|
103
|
+
# For training
|
|
104
|
+
pip install gptmed[training]
|
|
105
|
+
|
|
106
|
+
# All dependencies
|
|
107
|
+
pip install gptmed[dev,training]
|
|
108
|
+
```
|
|
109
|
+
|
|
110
|
+
## Quick Start
|
|
111
|
+
|
|
112
|
+
### Inference (Generate Answers)
|
|
113
|
+
|
|
114
|
+
```python
|
|
115
|
+
from gptmed.inference.generator import TextGenerator
|
|
116
|
+
from gptmed.model.architecture import GPTTransformer
|
|
117
|
+
from gptmed.model.configs.model_config import get_small_config
|
|
118
|
+
|
|
119
|
+
# Load model
|
|
120
|
+
config = get_small_config()
|
|
121
|
+
model = GPTTransformer(config)
|
|
122
|
+
|
|
123
|
+
# Load your trained checkpoint
|
|
124
|
+
# model.load_state_dict(torch.load('path/to/checkpoint.pt'))
|
|
125
|
+
|
|
126
|
+
# Create generator
|
|
127
|
+
generator = TextGenerator(
|
|
128
|
+
model=model,
|
|
129
|
+
tokenizer_path='path/to/tokenizer.model'
|
|
130
|
+
)
|
|
131
|
+
|
|
132
|
+
# Generate answer
|
|
133
|
+
question = "What's your favorite programming language?"
|
|
134
|
+
answer = generator.generate(
|
|
135
|
+
prompt=question,
|
|
136
|
+
max_length=100,
|
|
137
|
+
temperature=0.7
|
|
138
|
+
)
|
|
139
|
+
|
|
140
|
+
print(f"Q: {question}")
|
|
141
|
+
print(f"A: {answer}")
|
|
142
|
+
```
|
|
143
|
+
|
|
144
|
+
### Using Command Line
|
|
145
|
+
|
|
146
|
+
```bash
|
|
147
|
+
# Generate answers
|
|
148
|
+
gptmed-generate --prompt "How do I train a custom model?" --max-length 100
|
|
149
|
+
|
|
150
|
+
# Train model
|
|
151
|
+
gptmed-train --model-size small --num-epochs 10 --batch-size 16
|
|
152
|
+
```
|
|
153
|
+
|
|
154
|
+
### Training Your Own Model
|
|
155
|
+
|
|
156
|
+
```python
|
|
157
|
+
from gptmed.training.train import main
|
|
158
|
+
from gptmed.configs.train_config import get_default_config
|
|
159
|
+
from gptmed.model.configs.model_config import get_small_config
|
|
160
|
+
|
|
161
|
+
# Configure training
|
|
162
|
+
train_config = get_default_config()
|
|
163
|
+
train_config.batch_size = 16
|
|
164
|
+
train_config.num_epochs = 10
|
|
165
|
+
train_config.learning_rate = 3e-4
|
|
166
|
+
|
|
167
|
+
# Start training
|
|
168
|
+
main()
|
|
169
|
+
```
|
|
170
|
+
|
|
171
|
+
## Model Architecture
|
|
172
|
+
|
|
173
|
+
The model uses a custom GPT-based transformer architecture:
|
|
174
|
+
|
|
175
|
+
- **Embedding**: Token + positional embeddings
|
|
176
|
+
- **Transformer Blocks**: Multi-head self-attention + feed-forward networks
|
|
177
|
+
- **Parameters**: ~10M (small), ~50M (medium)
|
|
178
|
+
- **Context Length**: 512 tokens
|
|
179
|
+
- **Vocabulary**: Custom SentencePiece tokenizer trained on your data
|
|
180
|
+
|
|
181
|
+
## Configuration
|
|
182
|
+
|
|
183
|
+
### Model Sizes
|
|
184
|
+
|
|
185
|
+
```python
|
|
186
|
+
from gptmed.model.configs.model_config import (
|
|
187
|
+
get_tiny_config, # ~2M parameters - for testing
|
|
188
|
+
get_small_config, # ~10M parameters - recommended
|
|
189
|
+
get_medium_config # ~50M parameters - higher quality
|
|
190
|
+
)
|
|
191
|
+
```
|
|
192
|
+
|
|
193
|
+
### Training Configuration
|
|
194
|
+
|
|
195
|
+
```python
|
|
196
|
+
from gptmed.configs.train_config import TrainingConfig
|
|
197
|
+
|
|
198
|
+
config = TrainingConfig(
|
|
199
|
+
batch_size=16,
|
|
200
|
+
learning_rate=3e-4,
|
|
201
|
+
num_epochs=10,
|
|
202
|
+
warmup_steps=100,
|
|
203
|
+
grad_clip=1.0
|
|
204
|
+
)
|
|
205
|
+
```
|
|
206
|
+
|
|
207
|
+
## Project Structure
|
|
208
|
+
|
|
209
|
+
```
|
|
210
|
+
gptmed/
|
|
211
|
+
├── model/
|
|
212
|
+
│ ├── architecture/ # GPT transformer implementation
|
|
213
|
+
│ └── configs/ # Model configurations
|
|
214
|
+
├── inference/
|
|
215
|
+
│ ├── generator.py # Text generation
|
|
216
|
+
│ └── sampling.py # Sampling strategies
|
|
217
|
+
├── training/
|
|
218
|
+
│ ├── train.py # Training script
|
|
219
|
+
│ ├── trainer.py # Training loop
|
|
220
|
+
│ └── dataset.py # Data loading
|
|
221
|
+
├── tokenizer/
|
|
222
|
+
│ └── train_tokenizer.py # SentencePiece tokenizer
|
|
223
|
+
├── configs/
|
|
224
|
+
│ └── train_config.py # Training configurations
|
|
225
|
+
└── utils/
|
|
226
|
+
├── checkpoints.py # Model checkpointing
|
|
227
|
+
└── logging.py # Training logging
|
|
228
|
+
```
|
|
229
|
+
|
|
230
|
+
## Requirements
|
|
231
|
+
|
|
232
|
+
- Python >= 3.8
|
|
233
|
+
- PyTorch >= 2.0.0
|
|
234
|
+
- sentencepiece >= 0.1.99
|
|
235
|
+
- numpy >= 1.24.0
|
|
236
|
+
- tqdm >= 4.65.0
|
|
237
|
+
|
|
238
|
+
## Documentation
|
|
239
|
+
|
|
240
|
+
For detailed documentation, visit [GitHub Repository](https://github.com/yourusername/medllm).
|
|
241
|
+
|
|
242
|
+
### Key Guides
|
|
243
|
+
|
|
244
|
+
- [Training Guide](docs/training.md)
|
|
245
|
+
- [Inference Guide](docs/inference.md)
|
|
246
|
+
- [Model Architecture](docs/architecture.md)
|
|
247
|
+
- [API Reference](docs/api.md)
|
|
248
|
+
|
|
249
|
+
## Performance
|
|
250
|
+
|
|
251
|
+
| Model Size | Parameters | Training Time | Inference Speed |
|
|
252
|
+
| ---------- | ---------- | ------------- | --------------- |
|
|
253
|
+
| Tiny | ~2M | 2 hours | ~100 tokens/sec |
|
|
254
|
+
| Small | ~10M | 8 hours | ~80 tokens/sec |
|
|
255
|
+
| Medium | ~50M | 24 hours | ~50 tokens/sec |
|
|
256
|
+
|
|
257
|
+
_Tested on GTX 1080 8GB_
|
|
258
|
+
|
|
259
|
+
## Examples
|
|
260
|
+
|
|
261
|
+
### Medical Question Answering
|
|
262
|
+
|
|
263
|
+
```python
|
|
264
|
+
# Example 1: Symptoms inquiry
|
|
265
|
+
question = "What are the early signs of Alzheimer's disease?"
|
|
266
|
+
answer = generator.generate(question, temperature=0.7)
|
|
267
|
+
|
|
268
|
+
# Example 2: Treatment information
|
|
269
|
+
question = "How is Type 2 diabetes treated?"
|
|
270
|
+
answer = generator.generate(question, temperature=0.6)
|
|
271
|
+
|
|
272
|
+
# Example 3: Medical definitions
|
|
273
|
+
question = "What is hypertension?"
|
|
274
|
+
answer = generator.generate(question, temperature=0.5)
|
|
275
|
+
```
|
|
276
|
+
|
|
277
|
+
## Contributing
|
|
278
|
+
|
|
279
|
+
Contributions are welcome! Please feel free to submit a Pull Request.
|
|
280
|
+
|
|
281
|
+
1. Fork the repository
|
|
282
|
+
2. Create your feature branch (`git checkout -b feature/AmazingFeature`)
|
|
283
|
+
3. Commit your changes (`git commit -m 'Add some AmazingFeature'`)
|
|
284
|
+
4. Push to the branch (`git push origin feature/AmazingFeature`)
|
|
285
|
+
5. Open a Pull Request
|
|
286
|
+
|
|
287
|
+
## Citation
|
|
288
|
+
|
|
289
|
+
If you use this model in your research, please cite:
|
|
290
|
+
|
|
291
|
+
```bibtex
|
|
292
|
+
@software{llm_med_2026,
|
|
293
|
+
author = {Sanjog Sigdel},
|
|
294
|
+
title = {GptMed: A custom causal question answering general purpose GPT Transformer Architecture Model},
|
|
295
|
+
year = {2026},
|
|
296
|
+
url = {https://github.com/sigdelsanjog/gptmed}
|
|
297
|
+
}
|
|
298
|
+
```
|
|
299
|
+
|
|
300
|
+
## License
|
|
301
|
+
|
|
302
|
+
This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
|
|
303
|
+
|
|
304
|
+
## Acknowledgments
|
|
305
|
+
|
|
306
|
+
- MedQuAD dataset creators
|
|
307
|
+
- PyTorch team
|
|
308
|
+
|
|
309
|
+
## Disclaimer
|
|
310
|
+
|
|
311
|
+
⚠️ **Medical Disclaimer**: This model is for research and educational purposes only. It should NOT be used for actual medical diagnosis or treatment decisions. Always consult qualified healthcare professionals for medical advice.
|
|
312
|
+
|
|
313
|
+
## Support
|
|
314
|
+
|
|
315
|
+
- 📫 Issues: [GitHub Issues](https://github.com/sigdelsanjog/gptmed/issues)
|
|
316
|
+
- 💬 Discussions: [GitHub Discussions](https://github.com/sigdelsanjog/gptmed/discussions)
|
|
317
|
+
- 📧 Email: sanjog.sigdel@ku.edu.np
|
|
318
|
+
|
|
319
|
+
## Changelog
|
|
320
|
+
|
|
321
|
+
See [CHANGELOG.md](CHANGELOG.md) for version history.
|
|
322
|
+
|
|
323
|
+
---
|
|
324
|
+
|
|
325
|
+
Made with ❤️ for learning purpose
|
gptmed-0.0.1/README.md
ADDED
|
@@ -0,0 +1,262 @@
|
|
|
1
|
+
# GptMed 🤖
|
|
2
|
+
|
|
3
|
+
A lightweight GPT-based language model framework for training custom question-answering models on any domain. This package provides a transformer-based GPT architecture that you can train on your own Q&A datasets - whether it's casual conversations, technical support, education, or any other domain.
|
|
4
|
+
|
|
5
|
+
[](https://badge.fury.io/py/gptmed)
|
|
6
|
+
[](https://www.python.org/downloads/)
|
|
7
|
+
[](https://opensource.org/licenses/MIT)
|
|
8
|
+
|
|
9
|
+
## Features
|
|
10
|
+
|
|
11
|
+
- 🧠 **Custom GPT Architecture**: Lightweight transformer model for any Q&A domain
|
|
12
|
+
- 🎯 **Domain-Agnostic**: Train on any question-answering dataset (casual chat, tech support, education, etc.)
|
|
13
|
+
- ⚡ **Fast Inference**: Optimized for quick question answering
|
|
14
|
+
- 🔧 **Flexible Training**: Easy to train on your own custom datasets
|
|
15
|
+
- 📦 **Lightweight**: Small model size suitable for edge deployment
|
|
16
|
+
- 🛠️ **Complete Toolkit**: Includes tokenizer training, model training, and inference utilities
|
|
17
|
+
|
|
18
|
+
## Installation
|
|
19
|
+
|
|
20
|
+
### From PyPI (Recommended)
|
|
21
|
+
|
|
22
|
+
```bash
|
|
23
|
+
pip install gptmed
|
|
24
|
+
```
|
|
25
|
+
|
|
26
|
+
### From Source
|
|
27
|
+
|
|
28
|
+
```bash
|
|
29
|
+
git clone https://github.com/sigdelsanjog/gptmed.git
|
|
30
|
+
cd gptmed
|
|
31
|
+
pip install -e .
|
|
32
|
+
```
|
|
33
|
+
|
|
34
|
+
### With Optional Dependencies
|
|
35
|
+
|
|
36
|
+
```bash
|
|
37
|
+
# For development
|
|
38
|
+
pip install gptmed[dev]
|
|
39
|
+
|
|
40
|
+
# For training
|
|
41
|
+
pip install gptmed[training]
|
|
42
|
+
|
|
43
|
+
# All dependencies
|
|
44
|
+
pip install gptmed[dev,training]
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
## Quick Start
|
|
48
|
+
|
|
49
|
+
### Inference (Generate Answers)
|
|
50
|
+
|
|
51
|
+
```python
|
|
52
|
+
from gptmed.inference.generator import TextGenerator
|
|
53
|
+
from gptmed.model.architecture import GPTTransformer
|
|
54
|
+
from gptmed.model.configs.model_config import get_small_config
|
|
55
|
+
|
|
56
|
+
# Load model
|
|
57
|
+
config = get_small_config()
|
|
58
|
+
model = GPTTransformer(config)
|
|
59
|
+
|
|
60
|
+
# Load your trained checkpoint
|
|
61
|
+
# model.load_state_dict(torch.load('path/to/checkpoint.pt'))
|
|
62
|
+
|
|
63
|
+
# Create generator
|
|
64
|
+
generator = TextGenerator(
|
|
65
|
+
model=model,
|
|
66
|
+
tokenizer_path='path/to/tokenizer.model'
|
|
67
|
+
)
|
|
68
|
+
|
|
69
|
+
# Generate answer
|
|
70
|
+
question = "What's your favorite programming language?"
|
|
71
|
+
answer = generator.generate(
|
|
72
|
+
prompt=question,
|
|
73
|
+
max_length=100,
|
|
74
|
+
temperature=0.7
|
|
75
|
+
)
|
|
76
|
+
|
|
77
|
+
print(f"Q: {question}")
|
|
78
|
+
print(f"A: {answer}")
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
### Using Command Line
|
|
82
|
+
|
|
83
|
+
```bash
|
|
84
|
+
# Generate answers
|
|
85
|
+
gptmed-generate --prompt "How do I train a custom model?" --max-length 100
|
|
86
|
+
|
|
87
|
+
# Train model
|
|
88
|
+
gptmed-train --model-size small --num-epochs 10 --batch-size 16
|
|
89
|
+
```
|
|
90
|
+
|
|
91
|
+
### Training Your Own Model
|
|
92
|
+
|
|
93
|
+
```python
|
|
94
|
+
from gptmed.training.train import main
|
|
95
|
+
from gptmed.configs.train_config import get_default_config
|
|
96
|
+
from gptmed.model.configs.model_config import get_small_config
|
|
97
|
+
|
|
98
|
+
# Configure training
|
|
99
|
+
train_config = get_default_config()
|
|
100
|
+
train_config.batch_size = 16
|
|
101
|
+
train_config.num_epochs = 10
|
|
102
|
+
train_config.learning_rate = 3e-4
|
|
103
|
+
|
|
104
|
+
# Start training
|
|
105
|
+
main()
|
|
106
|
+
```
|
|
107
|
+
|
|
108
|
+
## Model Architecture
|
|
109
|
+
|
|
110
|
+
The model uses a custom GPT-based transformer architecture:
|
|
111
|
+
|
|
112
|
+
- **Embedding**: Token + positional embeddings
|
|
113
|
+
- **Transformer Blocks**: Multi-head self-attention + feed-forward networks
|
|
114
|
+
- **Parameters**: ~10M (small), ~50M (medium)
|
|
115
|
+
- **Context Length**: 512 tokens
|
|
116
|
+
- **Vocabulary**: Custom SentencePiece tokenizer trained on your data
|
|
117
|
+
|
|
118
|
+
## Configuration
|
|
119
|
+
|
|
120
|
+
### Model Sizes
|
|
121
|
+
|
|
122
|
+
```python
|
|
123
|
+
from gptmed.model.configs.model_config import (
|
|
124
|
+
get_tiny_config, # ~2M parameters - for testing
|
|
125
|
+
get_small_config, # ~10M parameters - recommended
|
|
126
|
+
get_medium_config # ~50M parameters - higher quality
|
|
127
|
+
)
|
|
128
|
+
```
|
|
129
|
+
|
|
130
|
+
### Training Configuration
|
|
131
|
+
|
|
132
|
+
```python
|
|
133
|
+
from gptmed.configs.train_config import TrainingConfig
|
|
134
|
+
|
|
135
|
+
config = TrainingConfig(
|
|
136
|
+
batch_size=16,
|
|
137
|
+
learning_rate=3e-4,
|
|
138
|
+
num_epochs=10,
|
|
139
|
+
warmup_steps=100,
|
|
140
|
+
grad_clip=1.0
|
|
141
|
+
)
|
|
142
|
+
```
|
|
143
|
+
|
|
144
|
+
## Project Structure
|
|
145
|
+
|
|
146
|
+
```
|
|
147
|
+
gptmed/
|
|
148
|
+
├── model/
|
|
149
|
+
│ ├── architecture/ # GPT transformer implementation
|
|
150
|
+
│ └── configs/ # Model configurations
|
|
151
|
+
├── inference/
|
|
152
|
+
│ ├── generator.py # Text generation
|
|
153
|
+
│ └── sampling.py # Sampling strategies
|
|
154
|
+
├── training/
|
|
155
|
+
│ ├── train.py # Training script
|
|
156
|
+
│ ├── trainer.py # Training loop
|
|
157
|
+
│ └── dataset.py # Data loading
|
|
158
|
+
├── tokenizer/
|
|
159
|
+
│ └── train_tokenizer.py # SentencePiece tokenizer
|
|
160
|
+
├── configs/
|
|
161
|
+
│ └── train_config.py # Training configurations
|
|
162
|
+
└── utils/
|
|
163
|
+
├── checkpoints.py # Model checkpointing
|
|
164
|
+
└── logging.py # Training logging
|
|
165
|
+
```
|
|
166
|
+
|
|
167
|
+
## Requirements
|
|
168
|
+
|
|
169
|
+
- Python >= 3.8
|
|
170
|
+
- PyTorch >= 2.0.0
|
|
171
|
+
- sentencepiece >= 0.1.99
|
|
172
|
+
- numpy >= 1.24.0
|
|
173
|
+
- tqdm >= 4.65.0
|
|
174
|
+
|
|
175
|
+
## Documentation
|
|
176
|
+
|
|
177
|
+
For detailed documentation, visit [GitHub Repository](https://github.com/yourusername/medllm).
|
|
178
|
+
|
|
179
|
+
### Key Guides
|
|
180
|
+
|
|
181
|
+
- [Training Guide](docs/training.md)
|
|
182
|
+
- [Inference Guide](docs/inference.md)
|
|
183
|
+
- [Model Architecture](docs/architecture.md)
|
|
184
|
+
- [API Reference](docs/api.md)
|
|
185
|
+
|
|
186
|
+
## Performance
|
|
187
|
+
|
|
188
|
+
| Model Size | Parameters | Training Time | Inference Speed |
|
|
189
|
+
| ---------- | ---------- | ------------- | --------------- |
|
|
190
|
+
| Tiny | ~2M | 2 hours | ~100 tokens/sec |
|
|
191
|
+
| Small | ~10M | 8 hours | ~80 tokens/sec |
|
|
192
|
+
| Medium | ~50M | 24 hours | ~50 tokens/sec |
|
|
193
|
+
|
|
194
|
+
_Tested on GTX 1080 8GB_
|
|
195
|
+
|
|
196
|
+
## Examples
|
|
197
|
+
|
|
198
|
+
### Medical Question Answering
|
|
199
|
+
|
|
200
|
+
```python
|
|
201
|
+
# Example 1: Symptoms inquiry
|
|
202
|
+
question = "What are the early signs of Alzheimer's disease?"
|
|
203
|
+
answer = generator.generate(question, temperature=0.7)
|
|
204
|
+
|
|
205
|
+
# Example 2: Treatment information
|
|
206
|
+
question = "How is Type 2 diabetes treated?"
|
|
207
|
+
answer = generator.generate(question, temperature=0.6)
|
|
208
|
+
|
|
209
|
+
# Example 3: Medical definitions
|
|
210
|
+
question = "What is hypertension?"
|
|
211
|
+
answer = generator.generate(question, temperature=0.5)
|
|
212
|
+
```
|
|
213
|
+
|
|
214
|
+
## Contributing
|
|
215
|
+
|
|
216
|
+
Contributions are welcome! Please feel free to submit a Pull Request.
|
|
217
|
+
|
|
218
|
+
1. Fork the repository
|
|
219
|
+
2. Create your feature branch (`git checkout -b feature/AmazingFeature`)
|
|
220
|
+
3. Commit your changes (`git commit -m 'Add some AmazingFeature'`)
|
|
221
|
+
4. Push to the branch (`git push origin feature/AmazingFeature`)
|
|
222
|
+
5. Open a Pull Request
|
|
223
|
+
|
|
224
|
+
## Citation
|
|
225
|
+
|
|
226
|
+
If you use this model in your research, please cite:
|
|
227
|
+
|
|
228
|
+
```bibtex
|
|
229
|
+
@software{llm_med_2026,
|
|
230
|
+
author = {Sanjog Sigdel},
|
|
231
|
+
title = {GptMed: A custom causal question answering general purpose GPT Transformer Architecture Model},
|
|
232
|
+
year = {2026},
|
|
233
|
+
url = {https://github.com/sigdelsanjog/gptmed}
|
|
234
|
+
}
|
|
235
|
+
```
|
|
236
|
+
|
|
237
|
+
## License
|
|
238
|
+
|
|
239
|
+
This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
|
|
240
|
+
|
|
241
|
+
## Acknowledgments
|
|
242
|
+
|
|
243
|
+
- MedQuAD dataset creators
|
|
244
|
+
- PyTorch team
|
|
245
|
+
|
|
246
|
+
## Disclaimer
|
|
247
|
+
|
|
248
|
+
⚠️ **Medical Disclaimer**: This model is for research and educational purposes only. It should NOT be used for actual medical diagnosis or treatment decisions. Always consult qualified healthcare professionals for medical advice.
|
|
249
|
+
|
|
250
|
+
## Support
|
|
251
|
+
|
|
252
|
+
- 📫 Issues: [GitHub Issues](https://github.com/sigdelsanjog/gptmed/issues)
|
|
253
|
+
- 💬 Discussions: [GitHub Discussions](https://github.com/sigdelsanjog/gptmed/discussions)
|
|
254
|
+
- 📧 Email: sanjog.sigdel@ku.edu.np
|
|
255
|
+
|
|
256
|
+
## Changelog
|
|
257
|
+
|
|
258
|
+
See [CHANGELOG.md](CHANGELOG.md) for version history.
|
|
259
|
+
|
|
260
|
+
---
|
|
261
|
+
|
|
262
|
+
Made with ❤️ for learning purpose
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
"""
|
|
2
|
+
llm-med: A lightweight medical question-answering language model
|
|
3
|
+
|
|
4
|
+
This package provides a GPT-based transformer architecture trained on the MedQuAD dataset
|
|
5
|
+
for medical domain question answering.
|
|
6
|
+
|
|
7
|
+
Main Components:
|
|
8
|
+
- model: GPT transformer architecture
|
|
9
|
+
- inference: Text generation and sampling
|
|
10
|
+
- training: Training loop and utilities
|
|
11
|
+
- tokenizer: SentencePiece tokenizer
|
|
12
|
+
- configs: Configuration management
|
|
13
|
+
- utils: Utility functions
|
|
14
|
+
|
|
15
|
+
Example:
|
|
16
|
+
>>> from llm_med.model.architecture import GPTTransformer
|
|
17
|
+
>>> from llm_med.model.configs.model_config import get_small_config
|
|
18
|
+
>>> from llm_med.inference.generator import TextGenerator
|
|
19
|
+
>>>
|
|
20
|
+
>>> config = get_small_config()
|
|
21
|
+
>>> model = GPTTransformer(config)
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
__version__ = "0.2.0"
|
|
25
|
+
__author__ = "Sanjog Sigdel"
|
|
26
|
+
__email__ = "sigdelsanjog@gmail.com"
|
|
27
|
+
|
|
28
|
+
# Expose main components at package level for convenience
|
|
29
|
+
from llm_med.model.architecture import GPTTransformer
|
|
30
|
+
from llm_med.model.configs.model_config import ModelConfig, get_small_config, get_tiny_config
|
|
31
|
+
|
|
32
|
+
__all__ = [
|
|
33
|
+
"GPTTransformer",
|
|
34
|
+
"ModelConfig",
|
|
35
|
+
"get_small_config",
|
|
36
|
+
"get_tiny_config",
|
|
37
|
+
]
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Configs package."""
|