gptmed 0.0.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. gptmed-0.0.1/LICENSE +21 -0
  2. gptmed-0.0.1/MANIFEST.in +40 -0
  3. gptmed-0.0.1/PKG-INFO +325 -0
  4. gptmed-0.0.1/README.md +262 -0
  5. gptmed-0.0.1/gptmed/__init__.py +37 -0
  6. gptmed-0.0.1/gptmed/configs/__init__.py +1 -0
  7. gptmed-0.0.1/gptmed/configs/train_config.py +154 -0
  8. gptmed-0.0.1/gptmed/data/__init__.py +5 -0
  9. gptmed-0.0.1/gptmed/data/parsers/__init__.py +10 -0
  10. gptmed-0.0.1/gptmed/data/parsers/medquad_parser.py +257 -0
  11. gptmed-0.0.1/gptmed/data/parsers/text_formatter.py +148 -0
  12. gptmed-0.0.1/gptmed/inference/__init__.py +1 -0
  13. gptmed-0.0.1/gptmed/inference/decoding_utils.py +190 -0
  14. gptmed-0.0.1/gptmed/inference/generation_config.py +83 -0
  15. gptmed-0.0.1/gptmed/inference/generator.py +253 -0
  16. gptmed-0.0.1/gptmed/inference/sampling.py +261 -0
  17. gptmed-0.0.1/gptmed/model/__init__.py +9 -0
  18. gptmed-0.0.1/gptmed/model/architecture/__init__.py +35 -0
  19. gptmed-0.0.1/gptmed/model/architecture/attention.py +188 -0
  20. gptmed-0.0.1/gptmed/model/architecture/decoder_block.py +130 -0
  21. gptmed-0.0.1/gptmed/model/architecture/embeddings.py +146 -0
  22. gptmed-0.0.1/gptmed/model/architecture/feedforward.py +109 -0
  23. gptmed-0.0.1/gptmed/model/architecture/transformer.py +204 -0
  24. gptmed-0.0.1/gptmed/model/configs/__init__.py +17 -0
  25. gptmed-0.0.1/gptmed/model/configs/model_config.py +155 -0
  26. gptmed-0.0.1/gptmed/tokenizer/__init__.py +7 -0
  27. gptmed-0.0.1/gptmed/tokenizer/tokenize_data.py +286 -0
  28. gptmed-0.0.1/gptmed/tokenizer/train_tokenizer.py +218 -0
  29. gptmed-0.0.1/gptmed/training/__init__.py +1 -0
  30. gptmed-0.0.1/gptmed/training/dataset.py +183 -0
  31. gptmed-0.0.1/gptmed/training/train.py +272 -0
  32. gptmed-0.0.1/gptmed/training/trainer.py +331 -0
  33. gptmed-0.0.1/gptmed/training/utils.py +212 -0
  34. gptmed-0.0.1/gptmed/utils/__init__.py +1 -0
  35. gptmed-0.0.1/gptmed/utils/checkpoints.py +224 -0
  36. gptmed-0.0.1/gptmed/utils/logging.py +189 -0
  37. gptmed-0.0.1/gptmed.egg-info/PKG-INFO +325 -0
  38. gptmed-0.0.1/gptmed.egg-info/SOURCES.txt +44 -0
  39. gptmed-0.0.1/gptmed.egg-info/dependency_links.txt +1 -0
  40. gptmed-0.0.1/gptmed.egg-info/entry_points.txt +3 -0
  41. gptmed-0.0.1/gptmed.egg-info/requires.txt +14 -0
  42. gptmed-0.0.1/gptmed.egg-info/top_level.txt +1 -0
  43. gptmed-0.0.1/pyproject.toml +109 -0
  44. gptmed-0.0.1/requirements.txt +18 -0
  45. gptmed-0.0.1/setup.cfg +4 -0
  46. gptmed-0.0.1/setup.py +11 -0
gptmed-0.0.1/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Your Name
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,40 @@
1
+ # Include documentation
2
+ include README.md
3
+ include LICENSE
4
+ include requirements.txt
5
+
6
+ # Include configuration files
7
+ recursive-include configs *.py *.json *.yaml
8
+
9
+ # Include model configs
10
+ recursive-include model/configs *.py *.json
11
+
12
+ # Include tokenizer files (vocab and model files)
13
+ recursive-include tokenizer *.model *.vocab *.json
14
+
15
+ # Exclude unnecessary files
16
+ exclude .gitignore
17
+ exclude .env
18
+ exclude *.pyc
19
+ exclude */__pycache__/*
20
+ recursive-exclude * __pycache__
21
+ recursive-exclude * *.py[co]
22
+ recursive-exclude * .DS_Store
23
+
24
+ # Exclude datasets, logs, and checkpoints (too large for PyPI)
25
+ exclude dataset/*
26
+ exclude logs/*
27
+ exclude model/checkpoints/*
28
+ recursive-exclude dataset *
29
+ recursive-exclude logs *
30
+ recursive-exclude model/checkpoints *
31
+
32
+ # Exclude test files
33
+ recursive-exclude tests *
34
+ exclude test_*.py
35
+
36
+ # Exclude development/training scripts not needed for inference
37
+ exclude download_medquad.py
38
+ exclude monitor_training.py
39
+ exclude preprocess.py
40
+ exclude generate_sample.py
gptmed-0.0.1/PKG-INFO ADDED
@@ -0,0 +1,325 @@
1
+ Metadata-Version: 2.4
2
+ Name: gptmed
3
+ Version: 0.0.1
4
+ Summary: A lightweight GPT-based language model framework for training custom question-answering models on any domain
5
+ Author-email: Sanjog Sigdel <sigdelsanjog@gmail.com>
6
+ Maintainer-email: Sanjog Sigdel <sigdelsanjog@gmail.com>
7
+ License: MIT License
8
+
9
+ Copyright (c) 2026 Your Name
10
+
11
+ Permission is hereby granted, free of charge, to any person obtaining a copy
12
+ of this software and associated documentation files (the "Software"), to deal
13
+ in the Software without restriction, including without limitation the rights
14
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
15
+ copies of the Software, and to permit persons to whom the Software is
16
+ furnished to do so, subject to the following conditions:
17
+
18
+ The above copyright notice and this permission notice shall be included in all
19
+ copies or substantial portions of the Software.
20
+
21
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
22
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
23
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
24
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
25
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
26
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
27
+ SOFTWARE.
28
+
29
+ Project-URL: Homepage, https://github.com/sigdelsanjog/gptmed
30
+ Project-URL: Documentation, https://github.com/sigdelsanjog/gptmed#readme
31
+ Project-URL: Repository, https://github.com/sigdelsanjog/gptmed
32
+ Project-URL: Issues, https://github.com/sigdelsanjog/gptmed/issues
33
+ Keywords: nlp,language-model,transformer,gpt,pytorch,qa,question-answering,training,deep-learning,custom-model
34
+ Classifier: Development Status :: 3 - Alpha
35
+ Classifier: Intended Audience :: Developers
36
+ Classifier: Intended Audience :: Science/Research
37
+ Classifier: Intended Audience :: Education
38
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
39
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
40
+ Classifier: License :: OSI Approved :: MIT License
41
+ Classifier: Programming Language :: Python :: 3
42
+ Classifier: Programming Language :: Python :: 3.8
43
+ Classifier: Programming Language :: Python :: 3.9
44
+ Classifier: Programming Language :: Python :: 3.10
45
+ Classifier: Programming Language :: Python :: 3.11
46
+ Classifier: Operating System :: OS Independent
47
+ Requires-Python: >=3.8
48
+ Description-Content-Type: text/markdown
49
+ License-File: LICENSE
50
+ Requires-Dist: torch>=2.0.0
51
+ Requires-Dist: sentencepiece>=0.1.99
52
+ Requires-Dist: numpy>=1.24.0
53
+ Requires-Dist: tqdm>=4.65.0
54
+ Provides-Extra: dev
55
+ Requires-Dist: pytest>=7.0.0; extra == "dev"
56
+ Requires-Dist: black>=22.0.0; extra == "dev"
57
+ Requires-Dist: flake8>=4.0.0; extra == "dev"
58
+ Requires-Dist: mypy>=0.950; extra == "dev"
59
+ Provides-Extra: training
60
+ Requires-Dist: tensorboard>=2.10.0; extra == "training"
61
+ Requires-Dist: wandb>=0.13.0; extra == "training"
62
+ Dynamic: license-file
63
+
64
+ # GptMed 🤖
65
+
66
+ A lightweight GPT-based language model framework for training custom question-answering models on any domain. This package provides a transformer-based GPT architecture that you can train on your own Q&A datasets - whether it's casual conversations, technical support, education, or any other domain.
67
+
68
+ [![PyPI version](https://badge.fury.io/py/gptmed.svg)](https://badge.fury.io/py/gptmed)
69
+ [![Python 3.8+](https://img.shields.io/badge/python-3.8+-blue.svg)](https://www.python.org/downloads/)
70
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
71
+
72
+ ## Features
73
+
74
+ - 🧠 **Custom GPT Architecture**: Lightweight transformer model for any Q&A domain
75
+ - 🎯 **Domain-Agnostic**: Train on any question-answering dataset (casual chat, tech support, education, etc.)
76
+ - ⚡ **Fast Inference**: Optimized for quick question answering
77
+ - 🔧 **Flexible Training**: Easy to train on your own custom datasets
78
+ - 📦 **Lightweight**: Small model size suitable for edge deployment
79
+ - 🛠️ **Complete Toolkit**: Includes tokenizer training, model training, and inference utilities
80
+
81
+ ## Installation
82
+
83
+ ### From PyPI (Recommended)
84
+
85
+ ```bash
86
+ pip install gptmed
87
+ ```
88
+
89
+ ### From Source
90
+
91
+ ```bash
92
+ git clone https://github.com/sigdelsanjog/gptmed.git
93
+ cd gptmed
94
+ pip install -e .
95
+ ```
96
+
97
+ ### With Optional Dependencies
98
+
99
+ ```bash
100
+ # For development
101
+ pip install gptmed[dev]
102
+
103
+ # For training
104
+ pip install gptmed[training]
105
+
106
+ # All dependencies
107
+ pip install gptmed[dev,training]
108
+ ```
109
+
110
+ ## Quick Start
111
+
112
+ ### Inference (Generate Answers)
113
+
114
+ ```python
115
+ from gptmed.inference.generator import TextGenerator
116
+ from gptmed.model.architecture import GPTTransformer
117
+ from gptmed.model.configs.model_config import get_small_config
118
+
119
+ # Load model
120
+ config = get_small_config()
121
+ model = GPTTransformer(config)
122
+
123
+ # Load your trained checkpoint
124
+ # model.load_state_dict(torch.load('path/to/checkpoint.pt'))
125
+
126
+ # Create generator
127
+ generator = TextGenerator(
128
+ model=model,
129
+ tokenizer_path='path/to/tokenizer.model'
130
+ )
131
+
132
+ # Generate answer
133
+ question = "What's your favorite programming language?"
134
+ answer = generator.generate(
135
+ prompt=question,
136
+ max_length=100,
137
+ temperature=0.7
138
+ )
139
+
140
+ print(f"Q: {question}")
141
+ print(f"A: {answer}")
142
+ ```
143
+
144
+ ### Using Command Line
145
+
146
+ ```bash
147
+ # Generate answers
148
+ gptmed-generate --prompt "How do I train a custom model?" --max-length 100
149
+
150
+ # Train model
151
+ gptmed-train --model-size small --num-epochs 10 --batch-size 16
152
+ ```
153
+
154
+ ### Training Your Own Model
155
+
156
+ ```python
157
+ from gptmed.training.train import main
158
+ from gptmed.configs.train_config import get_default_config
159
+ from gptmed.model.configs.model_config import get_small_config
160
+
161
+ # Configure training
162
+ train_config = get_default_config()
163
+ train_config.batch_size = 16
164
+ train_config.num_epochs = 10
165
+ train_config.learning_rate = 3e-4
166
+
167
+ # Start training
168
+ main()
169
+ ```
170
+
171
+ ## Model Architecture
172
+
173
+ The model uses a custom GPT-based transformer architecture:
174
+
175
+ - **Embedding**: Token + positional embeddings
176
+ - **Transformer Blocks**: Multi-head self-attention + feed-forward networks
177
+ - **Parameters**: ~10M (small), ~50M (medium)
178
+ - **Context Length**: 512 tokens
179
+ - **Vocabulary**: Custom SentencePiece tokenizer trained on your data
180
+
181
+ ## Configuration
182
+
183
+ ### Model Sizes
184
+
185
+ ```python
186
+ from gptmed.model.configs.model_config import (
187
+ get_tiny_config, # ~2M parameters - for testing
188
+ get_small_config, # ~10M parameters - recommended
189
+ get_medium_config # ~50M parameters - higher quality
190
+ )
191
+ ```
192
+
193
+ ### Training Configuration
194
+
195
+ ```python
196
+ from gptmed.configs.train_config import TrainingConfig
197
+
198
+ config = TrainingConfig(
199
+ batch_size=16,
200
+ learning_rate=3e-4,
201
+ num_epochs=10,
202
+ warmup_steps=100,
203
+ grad_clip=1.0
204
+ )
205
+ ```
206
+
207
+ ## Project Structure
208
+
209
+ ```
210
+ gptmed/
211
+ ├── model/
212
+ │ ├── architecture/ # GPT transformer implementation
213
+ │ └── configs/ # Model configurations
214
+ ├── inference/
215
+ │ ├── generator.py # Text generation
216
+ │ └── sampling.py # Sampling strategies
217
+ ├── training/
218
+ │ ├── train.py # Training script
219
+ │ ├── trainer.py # Training loop
220
+ │ └── dataset.py # Data loading
221
+ ├── tokenizer/
222
+ │ └── train_tokenizer.py # SentencePiece tokenizer
223
+ ├── configs/
224
+ │ └── train_config.py # Training configurations
225
+ └── utils/
226
+ ├── checkpoints.py # Model checkpointing
227
+ └── logging.py # Training logging
228
+ ```
229
+
230
+ ## Requirements
231
+
232
+ - Python >= 3.8
233
+ - PyTorch >= 2.0.0
234
+ - sentencepiece >= 0.1.99
235
+ - numpy >= 1.24.0
236
+ - tqdm >= 4.65.0
237
+
238
+ ## Documentation
239
+
240
+ For detailed documentation, visit [GitHub Repository](https://github.com/yourusername/medllm).
241
+
242
+ ### Key Guides
243
+
244
+ - [Training Guide](docs/training.md)
245
+ - [Inference Guide](docs/inference.md)
246
+ - [Model Architecture](docs/architecture.md)
247
+ - [API Reference](docs/api.md)
248
+
249
+ ## Performance
250
+
251
+ | Model Size | Parameters | Training Time | Inference Speed |
252
+ | ---------- | ---------- | ------------- | --------------- |
253
+ | Tiny | ~2M | 2 hours | ~100 tokens/sec |
254
+ | Small | ~10M | 8 hours | ~80 tokens/sec |
255
+ | Medium | ~50M | 24 hours | ~50 tokens/sec |
256
+
257
+ _Tested on GTX 1080 8GB_
258
+
259
+ ## Examples
260
+
261
+ ### Medical Question Answering
262
+
263
+ ```python
264
+ # Example 1: Symptoms inquiry
265
+ question = "What are the early signs of Alzheimer's disease?"
266
+ answer = generator.generate(question, temperature=0.7)
267
+
268
+ # Example 2: Treatment information
269
+ question = "How is Type 2 diabetes treated?"
270
+ answer = generator.generate(question, temperature=0.6)
271
+
272
+ # Example 3: Medical definitions
273
+ question = "What is hypertension?"
274
+ answer = generator.generate(question, temperature=0.5)
275
+ ```
276
+
277
+ ## Contributing
278
+
279
+ Contributions are welcome! Please feel free to submit a Pull Request.
280
+
281
+ 1. Fork the repository
282
+ 2. Create your feature branch (`git checkout -b feature/AmazingFeature`)
283
+ 3. Commit your changes (`git commit -m 'Add some AmazingFeature'`)
284
+ 4. Push to the branch (`git push origin feature/AmazingFeature`)
285
+ 5. Open a Pull Request
286
+
287
+ ## Citation
288
+
289
+ If you use this model in your research, please cite:
290
+
291
+ ```bibtex
292
+ @software{llm_med_2026,
293
+ author = {Sanjog Sigdel},
294
+ title = {GptMed: A custom causal question answering general purpose GPT Transformer Architecture Model},
295
+ year = {2026},
296
+ url = {https://github.com/sigdelsanjog/gptmed}
297
+ }
298
+ ```
299
+
300
+ ## License
301
+
302
+ This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
303
+
304
+ ## Acknowledgments
305
+
306
+ - MedQuAD dataset creators
307
+ - PyTorch team
308
+
309
+ ## Disclaimer
310
+
311
+ ⚠️ **Medical Disclaimer**: This model is for research and educational purposes only. It should NOT be used for actual medical diagnosis or treatment decisions. Always consult qualified healthcare professionals for medical advice.
312
+
313
+ ## Support
314
+
315
+ - 📫 Issues: [GitHub Issues](https://github.com/sigdelsanjog/gptmed/issues)
316
+ - 💬 Discussions: [GitHub Discussions](https://github.com/sigdelsanjog/gptmed/discussions)
317
+ - 📧 Email: sanjog.sigdel@ku.edu.np
318
+
319
+ ## Changelog
320
+
321
+ See [CHANGELOG.md](CHANGELOG.md) for version history.
322
+
323
+ ---
324
+
325
+ Made with ❤️ for learning purpose
gptmed-0.0.1/README.md ADDED
@@ -0,0 +1,262 @@
1
+ # GptMed 🤖
2
+
3
+ A lightweight GPT-based language model framework for training custom question-answering models on any domain. This package provides a transformer-based GPT architecture that you can train on your own Q&A datasets - whether it's casual conversations, technical support, education, or any other domain.
4
+
5
+ [![PyPI version](https://badge.fury.io/py/gptmed.svg)](https://badge.fury.io/py/gptmed)
6
+ [![Python 3.8+](https://img.shields.io/badge/python-3.8+-blue.svg)](https://www.python.org/downloads/)
7
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
8
+
9
+ ## Features
10
+
11
+ - 🧠 **Custom GPT Architecture**: Lightweight transformer model for any Q&A domain
12
+ - 🎯 **Domain-Agnostic**: Train on any question-answering dataset (casual chat, tech support, education, etc.)
13
+ - ⚡ **Fast Inference**: Optimized for quick question answering
14
+ - 🔧 **Flexible Training**: Easy to train on your own custom datasets
15
+ - 📦 **Lightweight**: Small model size suitable for edge deployment
16
+ - 🛠️ **Complete Toolkit**: Includes tokenizer training, model training, and inference utilities
17
+
18
+ ## Installation
19
+
20
+ ### From PyPI (Recommended)
21
+
22
+ ```bash
23
+ pip install gptmed
24
+ ```
25
+
26
+ ### From Source
27
+
28
+ ```bash
29
+ git clone https://github.com/sigdelsanjog/gptmed.git
30
+ cd gptmed
31
+ pip install -e .
32
+ ```
33
+
34
+ ### With Optional Dependencies
35
+
36
+ ```bash
37
+ # For development
38
+ pip install gptmed[dev]
39
+
40
+ # For training
41
+ pip install gptmed[training]
42
+
43
+ # All dependencies
44
+ pip install gptmed[dev,training]
45
+ ```
46
+
47
+ ## Quick Start
48
+
49
+ ### Inference (Generate Answers)
50
+
51
+ ```python
52
+ from gptmed.inference.generator import TextGenerator
53
+ from gptmed.model.architecture import GPTTransformer
54
+ from gptmed.model.configs.model_config import get_small_config
55
+
56
+ # Load model
57
+ config = get_small_config()
58
+ model = GPTTransformer(config)
59
+
60
+ # Load your trained checkpoint
61
+ # model.load_state_dict(torch.load('path/to/checkpoint.pt'))
62
+
63
+ # Create generator
64
+ generator = TextGenerator(
65
+ model=model,
66
+ tokenizer_path='path/to/tokenizer.model'
67
+ )
68
+
69
+ # Generate answer
70
+ question = "What's your favorite programming language?"
71
+ answer = generator.generate(
72
+ prompt=question,
73
+ max_length=100,
74
+ temperature=0.7
75
+ )
76
+
77
+ print(f"Q: {question}")
78
+ print(f"A: {answer}")
79
+ ```
80
+
81
+ ### Using Command Line
82
+
83
+ ```bash
84
+ # Generate answers
85
+ gptmed-generate --prompt "How do I train a custom model?" --max-length 100
86
+
87
+ # Train model
88
+ gptmed-train --model-size small --num-epochs 10 --batch-size 16
89
+ ```
90
+
91
+ ### Training Your Own Model
92
+
93
+ ```python
94
+ from gptmed.training.train import main
95
+ from gptmed.configs.train_config import get_default_config
96
+ from gptmed.model.configs.model_config import get_small_config
97
+
98
+ # Configure training
99
+ train_config = get_default_config()
100
+ train_config.batch_size = 16
101
+ train_config.num_epochs = 10
102
+ train_config.learning_rate = 3e-4
103
+
104
+ # Start training
105
+ main()
106
+ ```
107
+
108
+ ## Model Architecture
109
+
110
+ The model uses a custom GPT-based transformer architecture:
111
+
112
+ - **Embedding**: Token + positional embeddings
113
+ - **Transformer Blocks**: Multi-head self-attention + feed-forward networks
114
+ - **Parameters**: ~10M (small), ~50M (medium)
115
+ - **Context Length**: 512 tokens
116
+ - **Vocabulary**: Custom SentencePiece tokenizer trained on your data
117
+
118
+ ## Configuration
119
+
120
+ ### Model Sizes
121
+
122
+ ```python
123
+ from gptmed.model.configs.model_config import (
124
+ get_tiny_config, # ~2M parameters - for testing
125
+ get_small_config, # ~10M parameters - recommended
126
+ get_medium_config # ~50M parameters - higher quality
127
+ )
128
+ ```
129
+
130
+ ### Training Configuration
131
+
132
+ ```python
133
+ from gptmed.configs.train_config import TrainingConfig
134
+
135
+ config = TrainingConfig(
136
+ batch_size=16,
137
+ learning_rate=3e-4,
138
+ num_epochs=10,
139
+ warmup_steps=100,
140
+ grad_clip=1.0
141
+ )
142
+ ```
143
+
144
+ ## Project Structure
145
+
146
+ ```
147
+ gptmed/
148
+ ├── model/
149
+ │ ├── architecture/ # GPT transformer implementation
150
+ │ └── configs/ # Model configurations
151
+ ├── inference/
152
+ │ ├── generator.py # Text generation
153
+ │ └── sampling.py # Sampling strategies
154
+ ├── training/
155
+ │ ├── train.py # Training script
156
+ │ ├── trainer.py # Training loop
157
+ │ └── dataset.py # Data loading
158
+ ├── tokenizer/
159
+ │ └── train_tokenizer.py # SentencePiece tokenizer
160
+ ├── configs/
161
+ │ └── train_config.py # Training configurations
162
+ └── utils/
163
+ ├── checkpoints.py # Model checkpointing
164
+ └── logging.py # Training logging
165
+ ```
166
+
167
+ ## Requirements
168
+
169
+ - Python >= 3.8
170
+ - PyTorch >= 2.0.0
171
+ - sentencepiece >= 0.1.99
172
+ - numpy >= 1.24.0
173
+ - tqdm >= 4.65.0
174
+
175
+ ## Documentation
176
+
177
+ For detailed documentation, visit [GitHub Repository](https://github.com/yourusername/medllm).
178
+
179
+ ### Key Guides
180
+
181
+ - [Training Guide](docs/training.md)
182
+ - [Inference Guide](docs/inference.md)
183
+ - [Model Architecture](docs/architecture.md)
184
+ - [API Reference](docs/api.md)
185
+
186
+ ## Performance
187
+
188
+ | Model Size | Parameters | Training Time | Inference Speed |
189
+ | ---------- | ---------- | ------------- | --------------- |
190
+ | Tiny | ~2M | 2 hours | ~100 tokens/sec |
191
+ | Small | ~10M | 8 hours | ~80 tokens/sec |
192
+ | Medium | ~50M | 24 hours | ~50 tokens/sec |
193
+
194
+ _Tested on GTX 1080 8GB_
195
+
196
+ ## Examples
197
+
198
+ ### Medical Question Answering
199
+
200
+ ```python
201
+ # Example 1: Symptoms inquiry
202
+ question = "What are the early signs of Alzheimer's disease?"
203
+ answer = generator.generate(question, temperature=0.7)
204
+
205
+ # Example 2: Treatment information
206
+ question = "How is Type 2 diabetes treated?"
207
+ answer = generator.generate(question, temperature=0.6)
208
+
209
+ # Example 3: Medical definitions
210
+ question = "What is hypertension?"
211
+ answer = generator.generate(question, temperature=0.5)
212
+ ```
213
+
214
+ ## Contributing
215
+
216
+ Contributions are welcome! Please feel free to submit a Pull Request.
217
+
218
+ 1. Fork the repository
219
+ 2. Create your feature branch (`git checkout -b feature/AmazingFeature`)
220
+ 3. Commit your changes (`git commit -m 'Add some AmazingFeature'`)
221
+ 4. Push to the branch (`git push origin feature/AmazingFeature`)
222
+ 5. Open a Pull Request
223
+
224
+ ## Citation
225
+
226
+ If you use this model in your research, please cite:
227
+
228
+ ```bibtex
229
+ @software{llm_med_2026,
230
+ author = {Sanjog Sigdel},
231
+ title = {GptMed: A custom causal question answering general purpose GPT Transformer Architecture Model},
232
+ year = {2026},
233
+ url = {https://github.com/sigdelsanjog/gptmed}
234
+ }
235
+ ```
236
+
237
+ ## License
238
+
239
+ This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
240
+
241
+ ## Acknowledgments
242
+
243
+ - MedQuAD dataset creators
244
+ - PyTorch team
245
+
246
+ ## Disclaimer
247
+
248
+ ⚠️ **Medical Disclaimer**: This model is for research and educational purposes only. It should NOT be used for actual medical diagnosis or treatment decisions. Always consult qualified healthcare professionals for medical advice.
249
+
250
+ ## Support
251
+
252
+ - 📫 Issues: [GitHub Issues](https://github.com/sigdelsanjog/gptmed/issues)
253
+ - 💬 Discussions: [GitHub Discussions](https://github.com/sigdelsanjog/gptmed/discussions)
254
+ - 📧 Email: sanjog.sigdel@ku.edu.np
255
+
256
+ ## Changelog
257
+
258
+ See [CHANGELOG.md](CHANGELOG.md) for version history.
259
+
260
+ ---
261
+
262
+ Made with ❤️ for learning purpose
@@ -0,0 +1,37 @@
1
+ """
2
+ llm-med: A lightweight medical question-answering language model
3
+
4
+ This package provides a GPT-based transformer architecture trained on the MedQuAD dataset
5
+ for medical domain question answering.
6
+
7
+ Main Components:
8
+ - model: GPT transformer architecture
9
+ - inference: Text generation and sampling
10
+ - training: Training loop and utilities
11
+ - tokenizer: SentencePiece tokenizer
12
+ - configs: Configuration management
13
+ - utils: Utility functions
14
+
15
+ Example:
16
+ >>> from llm_med.model.architecture import GPTTransformer
17
+ >>> from llm_med.model.configs.model_config import get_small_config
18
+ >>> from llm_med.inference.generator import TextGenerator
19
+ >>>
20
+ >>> config = get_small_config()
21
+ >>> model = GPTTransformer(config)
22
+ """
23
+
24
+ __version__ = "0.2.0"
25
+ __author__ = "Sanjog Sigdel"
26
+ __email__ = "sigdelsanjog@gmail.com"
27
+
28
+ # Expose main components at package level for convenience
29
+ from llm_med.model.architecture import GPTTransformer
30
+ from llm_med.model.configs.model_config import ModelConfig, get_small_config, get_tiny_config
31
+
32
+ __all__ = [
33
+ "GPTTransformer",
34
+ "ModelConfig",
35
+ "get_small_config",
36
+ "get_tiny_config",
37
+ ]
@@ -0,0 +1 @@
1
+ """Configs package."""