gptmed 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. gptmed/__init__.py +37 -0
  2. gptmed/configs/__init__.py +1 -0
  3. gptmed/configs/train_config.py +154 -0
  4. gptmed/data/__init__.py +5 -0
  5. gptmed/data/parsers/__init__.py +10 -0
  6. gptmed/data/parsers/medquad_parser.py +257 -0
  7. gptmed/data/parsers/text_formatter.py +148 -0
  8. gptmed/inference/__init__.py +1 -0
  9. gptmed/inference/decoding_utils.py +190 -0
  10. gptmed/inference/generation_config.py +83 -0
  11. gptmed/inference/generator.py +253 -0
  12. gptmed/inference/sampling.py +261 -0
  13. gptmed/model/__init__.py +9 -0
  14. gptmed/model/architecture/__init__.py +35 -0
  15. gptmed/model/architecture/attention.py +188 -0
  16. gptmed/model/architecture/decoder_block.py +130 -0
  17. gptmed/model/architecture/embeddings.py +146 -0
  18. gptmed/model/architecture/feedforward.py +109 -0
  19. gptmed/model/architecture/transformer.py +204 -0
  20. gptmed/model/configs/__init__.py +17 -0
  21. gptmed/model/configs/model_config.py +155 -0
  22. gptmed/tokenizer/__init__.py +7 -0
  23. gptmed/tokenizer/tokenize_data.py +286 -0
  24. gptmed/tokenizer/train_tokenizer.py +218 -0
  25. gptmed/training/__init__.py +1 -0
  26. gptmed/training/dataset.py +183 -0
  27. gptmed/training/train.py +272 -0
  28. gptmed/training/trainer.py +331 -0
  29. gptmed/training/utils.py +212 -0
  30. gptmed/utils/__init__.py +1 -0
  31. gptmed/utils/checkpoints.py +224 -0
  32. gptmed/utils/logging.py +189 -0
  33. gptmed-0.0.1.dist-info/METADATA +325 -0
  34. gptmed-0.0.1.dist-info/RECORD +38 -0
  35. gptmed-0.0.1.dist-info/WHEEL +5 -0
  36. gptmed-0.0.1.dist-info/entry_points.txt +3 -0
  37. gptmed-0.0.1.dist-info/licenses/LICENSE +21 -0
  38. gptmed-0.0.1.dist-info/top_level.txt +1 -0
@@ -0,0 +1,325 @@
1
+ Metadata-Version: 2.4
2
+ Name: gptmed
3
+ Version: 0.0.1
4
+ Summary: A lightweight GPT-based language model framework for training custom question-answering models on any domain
5
+ Author-email: Sanjog Sigdel <sigdelsanjog@gmail.com>
6
+ Maintainer-email: Sanjog Sigdel <sigdelsanjog@gmail.com>
7
+ License: MIT License
8
+
9
+ Copyright (c) 2026 Your Name
10
+
11
+ Permission is hereby granted, free of charge, to any person obtaining a copy
12
+ of this software and associated documentation files (the "Software"), to deal
13
+ in the Software without restriction, including without limitation the rights
14
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
15
+ copies of the Software, and to permit persons to whom the Software is
16
+ furnished to do so, subject to the following conditions:
17
+
18
+ The above copyright notice and this permission notice shall be included in all
19
+ copies or substantial portions of the Software.
20
+
21
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
22
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
23
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
24
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
25
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
26
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
27
+ SOFTWARE.
28
+
29
+ Project-URL: Homepage, https://github.com/sigdelsanjog/gptmed
30
+ Project-URL: Documentation, https://github.com/sigdelsanjog/gptmed#readme
31
+ Project-URL: Repository, https://github.com/sigdelsanjog/gptmed
32
+ Project-URL: Issues, https://github.com/sigdelsanjog/gptmed/issues
33
+ Keywords: nlp,language-model,transformer,gpt,pytorch,qa,question-answering,training,deep-learning,custom-model
34
+ Classifier: Development Status :: 3 - Alpha
35
+ Classifier: Intended Audience :: Developers
36
+ Classifier: Intended Audience :: Science/Research
37
+ Classifier: Intended Audience :: Education
38
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
39
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
40
+ Classifier: License :: OSI Approved :: MIT License
41
+ Classifier: Programming Language :: Python :: 3
42
+ Classifier: Programming Language :: Python :: 3.8
43
+ Classifier: Programming Language :: Python :: 3.9
44
+ Classifier: Programming Language :: Python :: 3.10
45
+ Classifier: Programming Language :: Python :: 3.11
46
+ Classifier: Operating System :: OS Independent
47
+ Requires-Python: >=3.8
48
+ Description-Content-Type: text/markdown
49
+ License-File: LICENSE
50
+ Requires-Dist: torch>=2.0.0
51
+ Requires-Dist: sentencepiece>=0.1.99
52
+ Requires-Dist: numpy>=1.24.0
53
+ Requires-Dist: tqdm>=4.65.0
54
+ Provides-Extra: dev
55
+ Requires-Dist: pytest>=7.0.0; extra == "dev"
56
+ Requires-Dist: black>=22.0.0; extra == "dev"
57
+ Requires-Dist: flake8>=4.0.0; extra == "dev"
58
+ Requires-Dist: mypy>=0.950; extra == "dev"
59
+ Provides-Extra: training
60
+ Requires-Dist: tensorboard>=2.10.0; extra == "training"
61
+ Requires-Dist: wandb>=0.13.0; extra == "training"
62
+ Dynamic: license-file
63
+
64
+ # GptMed 🤖
65
+
66
+ A lightweight GPT-based language model framework for training custom question-answering models on any domain. This package provides a transformer-based GPT architecture that you can train on your own Q&A datasets - whether it's casual conversations, technical support, education, or any other domain.
67
+
68
+ [![PyPI version](https://badge.fury.io/py/gptmed.svg)](https://badge.fury.io/py/gptmed)
69
+ [![Python 3.8+](https://img.shields.io/badge/python-3.8+-blue.svg)](https://www.python.org/downloads/)
70
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
71
+
72
+ ## Features
73
+
74
+ - 🧠 **Custom GPT Architecture**: Lightweight transformer model for any Q&A domain
75
+ - 🎯 **Domain-Agnostic**: Train on any question-answering dataset (casual chat, tech support, education, etc.)
76
+ - ⚡ **Fast Inference**: Optimized for quick question answering
77
+ - 🔧 **Flexible Training**: Easy to train on your own custom datasets
78
+ - 📦 **Lightweight**: Small model size suitable for edge deployment
79
+ - 🛠️ **Complete Toolkit**: Includes tokenizer training, model training, and inference utilities
80
+
81
+ ## Installation
82
+
83
+ ### From PyPI (Recommended)
84
+
85
+ ```bash
86
+ pip install gptmed
87
+ ```
88
+
89
+ ### From Source
90
+
91
+ ```bash
92
+ git clone https://github.com/sigdelsanjog/gptmed.git
93
+ cd gptmed
94
+ pip install -e .
95
+ ```
96
+
97
+ ### With Optional Dependencies
98
+
99
+ ```bash
100
+ # For development
101
+ pip install gptmed[dev]
102
+
103
+ # For training
104
+ pip install gptmed[training]
105
+
106
+ # All dependencies
107
+ pip install gptmed[dev,training]
108
+ ```
109
+
110
+ ## Quick Start
111
+
112
+ ### Inference (Generate Answers)
113
+
114
+ ```python
115
+ from gptmed.inference.generator import TextGenerator
116
+ from gptmed.model.architecture import GPTTransformer
117
+ from gptmed.model.configs.model_config import get_small_config
118
+
119
+ # Load model
120
+ config = get_small_config()
121
+ model = GPTTransformer(config)
122
+
123
+ # Load your trained checkpoint
124
+ # model.load_state_dict(torch.load('path/to/checkpoint.pt'))
125
+
126
+ # Create generator
127
+ generator = TextGenerator(
128
+ model=model,
129
+ tokenizer_path='path/to/tokenizer.model'
130
+ )
131
+
132
+ # Generate answer
133
+ question = "What's your favorite programming language?"
134
+ answer = generator.generate(
135
+ prompt=question,
136
+ max_length=100,
137
+ temperature=0.7
138
+ )
139
+
140
+ print(f"Q: {question}")
141
+ print(f"A: {answer}")
142
+ ```
143
+
144
+ ### Using Command Line
145
+
146
+ ```bash
147
+ # Generate answers
148
+ gptmed-generate --prompt "How do I train a custom model?" --max-length 100
149
+
150
+ # Train model
151
+ gptmed-train --model-size small --num-epochs 10 --batch-size 16
152
+ ```
153
+
154
+ ### Training Your Own Model
155
+
156
+ ```python
157
+ from gptmed.training.train import main
158
+ from gptmed.configs.train_config import get_default_config
159
+ from gptmed.model.configs.model_config import get_small_config
160
+
161
+ # Configure training
162
+ train_config = get_default_config()
163
+ train_config.batch_size = 16
164
+ train_config.num_epochs = 10
165
+ train_config.learning_rate = 3e-4
166
+
167
+ # Start training
168
+ main()
169
+ ```
170
+
171
+ ## Model Architecture
172
+
173
+ The model uses a custom GPT-based transformer architecture:
174
+
175
+ - **Embedding**: Token + positional embeddings
176
+ - **Transformer Blocks**: Multi-head self-attention + feed-forward networks
177
+ - **Parameters**: ~10M (small), ~50M (medium)
178
+ - **Context Length**: 512 tokens
179
+ - **Vocabulary**: Custom SentencePiece tokenizer trained on your data
180
+
181
+ ## Configuration
182
+
183
+ ### Model Sizes
184
+
185
+ ```python
186
+ from gptmed.model.configs.model_config import (
187
+ get_tiny_config, # ~2M parameters - for testing
188
+ get_small_config, # ~10M parameters - recommended
189
+ get_medium_config # ~50M parameters - higher quality
190
+ )
191
+ ```
192
+
193
+ ### Training Configuration
194
+
195
+ ```python
196
+ from gptmed.configs.train_config import TrainingConfig
197
+
198
+ config = TrainingConfig(
199
+ batch_size=16,
200
+ learning_rate=3e-4,
201
+ num_epochs=10,
202
+ warmup_steps=100,
203
+ grad_clip=1.0
204
+ )
205
+ ```
206
+
207
+ ## Project Structure
208
+
209
+ ```
210
+ gptmed/
211
+ ├── model/
212
+ │ ├── architecture/ # GPT transformer implementation
213
+ │ └── configs/ # Model configurations
214
+ ├── inference/
215
+ │ ├── generator.py # Text generation
216
+ │ └── sampling.py # Sampling strategies
217
+ ├── training/
218
+ │ ├── train.py # Training script
219
+ │ ├── trainer.py # Training loop
220
+ │ └── dataset.py # Data loading
221
+ ├── tokenizer/
222
+ │ └── train_tokenizer.py # SentencePiece tokenizer
223
+ ├── configs/
224
+ │ └── train_config.py # Training configurations
225
+ └── utils/
226
+ ├── checkpoints.py # Model checkpointing
227
+ └── logging.py # Training logging
228
+ ```
229
+
230
+ ## Requirements
231
+
232
+ - Python >= 3.8
233
+ - PyTorch >= 2.0.0
234
+ - sentencepiece >= 0.1.99
235
+ - numpy >= 1.24.0
236
+ - tqdm >= 4.65.0
237
+
238
+ ## Documentation
239
+
240
+ For detailed documentation, visit [GitHub Repository](https://github.com/yourusername/medllm).
241
+
242
+ ### Key Guides
243
+
244
+ - [Training Guide](docs/training.md)
245
+ - [Inference Guide](docs/inference.md)
246
+ - [Model Architecture](docs/architecture.md)
247
+ - [API Reference](docs/api.md)
248
+
249
+ ## Performance
250
+
251
+ | Model Size | Parameters | Training Time | Inference Speed |
252
+ | ---------- | ---------- | ------------- | --------------- |
253
+ | Tiny | ~2M | 2 hours | ~100 tokens/sec |
254
+ | Small | ~10M | 8 hours | ~80 tokens/sec |
255
+ | Medium | ~50M | 24 hours | ~50 tokens/sec |
256
+
257
+ _Tested on GTX 1080 8GB_
258
+
259
+ ## Examples
260
+
261
+ ### Medical Question Answering
262
+
263
+ ```python
264
+ # Example 1: Symptoms inquiry
265
+ question = "What are the early signs of Alzheimer's disease?"
266
+ answer = generator.generate(question, temperature=0.7)
267
+
268
+ # Example 2: Treatment information
269
+ question = "How is Type 2 diabetes treated?"
270
+ answer = generator.generate(question, temperature=0.6)
271
+
272
+ # Example 3: Medical definitions
273
+ question = "What is hypertension?"
274
+ answer = generator.generate(question, temperature=0.5)
275
+ ```
276
+
277
+ ## Contributing
278
+
279
+ Contributions are welcome! Please feel free to submit a Pull Request.
280
+
281
+ 1. Fork the repository
282
+ 2. Create your feature branch (`git checkout -b feature/AmazingFeature`)
283
+ 3. Commit your changes (`git commit -m 'Add some AmazingFeature'`)
284
+ 4. Push to the branch (`git push origin feature/AmazingFeature`)
285
+ 5. Open a Pull Request
286
+
287
+ ## Citation
288
+
289
+ If you use this model in your research, please cite:
290
+
291
+ ```bibtex
292
+ @software{llm_med_2026,
293
+ author = {Sanjog Sigdel},
294
+ title = {GptMed: A custom causal question answering general purpose GPT Transformer Architecture Model},
295
+ year = {2026},
296
+ url = {https://github.com/sigdelsanjog/gptmed}
297
+ }
298
+ ```
299
+
300
+ ## License
301
+
302
+ This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
303
+
304
+ ## Acknowledgments
305
+
306
+ - MedQuAD dataset creators
307
+ - PyTorch team
308
+
309
+ ## Disclaimer
310
+
311
+ ⚠️ **Medical Disclaimer**: This model is for research and educational purposes only. It should NOT be used for actual medical diagnosis or treatment decisions. Always consult qualified healthcare professionals for medical advice.
312
+
313
+ ## Support
314
+
315
+ - 📫 Issues: [GitHub Issues](https://github.com/sigdelsanjog/gptmed/issues)
316
+ - 💬 Discussions: [GitHub Discussions](https://github.com/sigdelsanjog/gptmed/discussions)
317
+ - 📧 Email: sanjog.sigdel@ku.edu.np
318
+
319
+ ## Changelog
320
+
321
+ See [CHANGELOG.md](CHANGELOG.md) for version history.
322
+
323
+ ---
324
+
325
+ Made with ❤️ for learning purpose
@@ -0,0 +1,38 @@
1
+ gptmed/__init__.py,sha256=Hj1lpVY8kVBnVZMpz2Dk_9bQM-pgSpWaim4aYPG4i08,1130
2
+ gptmed/configs/__init__.py,sha256=yRa-zgPQ-OCzu8fvCrfWMG-CjF3dru3PZzknzm0oUaQ,23
3
+ gptmed/configs/train_config.py,sha256=KqfNBh9hdTTd_6gEAlrClU8sVFSlVDmZJOrf3cPwFe8,4657
4
+ gptmed/data/__init__.py,sha256=iAHeakB5pBAd7MkmarPPY0UKS9bTaO_winLZ23Y2O90,54
5
+ gptmed/data/parsers/__init__.py,sha256=BgVzXuZgeE5DUCC4SzN7vflL40wQ4Q4_4DmJ1Y43_nw,211
6
+ gptmed/data/parsers/medquad_parser.py,sha256=g3QCRiVBdcq8RdyuYH_qKFrHgU5KkHY59WfWxUwspP0,7974
7
+ gptmed/data/parsers/text_formatter.py,sha256=tVmnDBT54BbxX9BPKMXSPzzLmM39frDxKRKuz_HoRag,4072
8
+ gptmed/inference/__init__.py,sha256=NDPViXhOgpItC8n13T9axX4UH1E7mrjt6kJ5OfIwvMs,25
9
+ gptmed/inference/decoding_utils.py,sha256=zTDZYdl2jcGwSrcINXMw-5uoYuF4A9TSushhPxJi1o0,5041
10
+ gptmed/inference/generation_config.py,sha256=hpPyZUk1K6qGSBAoQx3Jm0_ZrrYld77ACxbIlCCCcVU,2813
11
+ gptmed/inference/generator.py,sha256=ZVd4sPSr6l4ov3AhNJib8YDNBERrs39-zdFZumluVnI,7889
12
+ gptmed/inference/sampling.py,sha256=B6fRlJafypuBMKJ0rTbsk6k8KXloXiIvroi7rN6ekBA,7947
13
+ gptmed/model/__init__.py,sha256=V44r-TSMaZObgHgeXRl2Ec9pkqWUkUVAf37xgtlZQO8,192
14
+ gptmed/model/architecture/__init__.py,sha256=9MpSAYwwZY-t1vBLIupuRtLD7CaOLJRENMh3zKx3M-4,970
15
+ gptmed/model/architecture/attention.py,sha256=Qk1eGl9glKWQbhcXJWmFkO5U3VHBq7OrsjVG0tPmgnY,6420
16
+ gptmed/model/architecture/decoder_block.py,sha256=n-Uo09TDcirKeWTWTNumldGOrx-b2Elb25lbF6cTYwg,3879
17
+ gptmed/model/architecture/embeddings.py,sha256=GoVXctC21MsNwyoIiOq7NX-v_DzYkbFcQAfvZ2fg66s,4717
18
+ gptmed/model/architecture/feedforward.py,sha256=uJ5QOlWX0ritKDQLUE7GPmMojelR9-sTI_BaYc4Ehfw,3232
19
+ gptmed/model/architecture/transformer.py,sha256=H1njPoy0Uam59JbA24C0olEDwPfhh3ev4HsUFRIC_0Y,6626
20
+ gptmed/model/configs/__init__.py,sha256=0ZfBO0k4yFaqh2yO7eVSQUZjHKp-Esjpdn4m6zwhLig,276
21
+ gptmed/model/configs/model_config.py,sha256=wI-i2Dw_pTdIKCDe1pqLvP3ky3YedEy7DwZYN5lwmKE,4673
22
+ gptmed/tokenizer/__init__.py,sha256=Cs6h9mtmh0hbqq1qvawRdggb7-GPKMnISOeNJFEuVqo,158
23
+ gptmed/tokenizer/tokenize_data.py,sha256=KgMtMfaz_RtOhN_CrvC267k9ujxRdO89rToVJ6nzdwg,9139
24
+ gptmed/tokenizer/train_tokenizer.py,sha256=f0Hucyft9e8LU2RtpTqg8h_0SpOC_oMABl0_me-wfL8,7068
25
+ gptmed/training/__init__.py,sha256=6G0_gdlwBnQBG8wZlTm2NtgkXZJcXRfLMDQ2iu6O3U4,24
26
+ gptmed/training/dataset.py,sha256=QbNVTN4Og5gqMAV2ckjRX8W_k9aUc9IZJDcu0u9U8t0,5347
27
+ gptmed/training/train.py,sha256=d--RS5v8ZAWlqux74YHnX-HAmJB1WveK38VxAZ8x2Bo,8157
28
+ gptmed/training/trainer.py,sha256=qOOn5oUVvqQMAbO5KWddngk1QzxdHdyTXEwXrL8uS40,10732
29
+ gptmed/training/utils.py,sha256=pJxCwneNr2STITIYwIDCxRzIICDFOxOMzK8DT7ck2oQ,5651
30
+ gptmed/utils/__init__.py,sha256=XuMhIqOXF7mjnog_6Iky-hSbwvFb0iK42B4iDUpgi0U,44
31
+ gptmed/utils/checkpoints.py,sha256=L4q1-_4GbHCoD7QuEKYeQ-xXDTF-6sqZOxKQ_LT8YmQ,7112
32
+ gptmed/utils/logging.py,sha256=7dJc1tayMxCBjFSDXe4r9ACUTpoPTTGsJ0UZMTqZIDY,5303
33
+ gptmed-0.0.1.dist-info/licenses/LICENSE,sha256=v2spsd7N1pKFFh2G8wGP_45iwe5S0DYiJzG4im8Rupc,1066
34
+ gptmed-0.0.1.dist-info/METADATA,sha256=tVtGIXe76Iq0IKrHfS0FsFVuJ1_wlLcrejQzg2N6qyA,10196
35
+ gptmed-0.0.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
36
+ gptmed-0.0.1.dist-info/entry_points.txt,sha256=ATqOzTtPVdUiFX5ZSeo3n9JkUCqocUxEXTgy1CfNRZE,110
37
+ gptmed-0.0.1.dist-info/top_level.txt,sha256=mhyEq3rG33t21ziJz5w3TPgx0RjPf4zXMNUx2JTiNmE,7
38
+ gptmed-0.0.1.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (80.9.0)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1,3 @@
1
+ [console_scripts]
2
+ gptmed-generate = gptmed.inference.generator:main
3
+ gptmed-train = gptmed.training.train:main
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Your Name
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1 @@
1
+ gptmed