eole 0.5.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (133) hide show
  1. eole-0.5.0/LICENSE.md +21 -0
  2. eole-0.5.0/PKG-INFO +227 -0
  3. eole-0.5.0/README.md +192 -0
  4. eole-0.5.0/eole/__init__.py +11 -0
  5. eole-0.5.0/eole/adapters/__init__.py +14 -0
  6. eole-0.5.0/eole/adapters/adapters.py +351 -0
  7. eole-0.5.0/eole/bin/__init__.py +57 -0
  8. eole-0.5.0/eole/bin/convert/HF_mappings.py +627 -0
  9. eole-0.5.0/eole/bin/convert/__init__.py +0 -0
  10. eole-0.5.0/eole/bin/convert/convert_HF.py +986 -0
  11. eole-0.5.0/eole/bin/convert/convert_T5.py +303 -0
  12. eole-0.5.0/eole/bin/convert/convert_onmt_config.py +133 -0
  13. eole-0.5.0/eole/bin/main.py +38 -0
  14. eole-0.5.0/eole/bin/run/__init__.py +47 -0
  15. eole-0.5.0/eole/bin/run/build_vocab.py +260 -0
  16. eole-0.5.0/eole/bin/run/predict.py +60 -0
  17. eole-0.5.0/eole/bin/run/serve.py +734 -0
  18. eole-0.5.0/eole/bin/run/train.py +210 -0
  19. eole-0.5.0/eole/bin/tools/LM_scoring.py +140 -0
  20. eole-0.5.0/eole/bin/tools/__init__.py +0 -0
  21. eole-0.5.0/eole/bin/tools/embeddings_to_torch.py +143 -0
  22. eole-0.5.0/eole/bin/tools/hellaswag.py +132 -0
  23. eole-0.5.0/eole/bin/tools/load_test.py +371 -0
  24. eole-0.5.0/eole/bin/tools/mbr_bleu.py +52 -0
  25. eole-0.5.0/eole/bin/tools/oracle_bleu.py +64 -0
  26. eole-0.5.0/eole/bin/tools/oracle_comet.py +101 -0
  27. eole-0.5.0/eole/bin/tools/run_mmlu.py +255 -0
  28. eole-0.5.0/eole/bin/tools/spm_to_vocab.py +33 -0
  29. eole-0.5.0/eole/config/__init__.py +108 -0
  30. eole-0.5.0/eole/config/cli.py +74 -0
  31. eole-0.5.0/eole/config/common.py +145 -0
  32. eole-0.5.0/eole/config/config.py +26 -0
  33. eole-0.5.0/eole/config/data.py +284 -0
  34. eole-0.5.0/eole/config/inference.py +150 -0
  35. eole-0.5.0/eole/config/models.py +897 -0
  36. eole-0.5.0/eole/config/run.py +236 -0
  37. eole-0.5.0/eole/config/training.py +320 -0
  38. eole-0.5.0/eole/constants.py +205 -0
  39. eole-0.5.0/eole/decoders/__init__.py +13 -0
  40. eole-0.5.0/eole/decoders/cnn_decoder.py +158 -0
  41. eole-0.5.0/eole/decoders/decoder.py +99 -0
  42. eole-0.5.0/eole/decoders/ensemble.py +217 -0
  43. eole-0.5.0/eole/decoders/rnn_decoder.py +323 -0
  44. eole-0.5.0/eole/decoders/transformer.py +605 -0
  45. eole-0.5.0/eole/encoders/__init__.py +17 -0
  46. eole-0.5.0/eole/encoders/cnn_encoder.py +74 -0
  47. eole-0.5.0/eole/encoders/deepseek_sam.py +540 -0
  48. eole-0.5.0/eole/encoders/encoder.py +52 -0
  49. eole-0.5.0/eole/encoders/mean_encoder.py +83 -0
  50. eole-0.5.0/eole/encoders/rnn_encoder.py +144 -0
  51. eole-0.5.0/eole/encoders/transformer.py +148 -0
  52. eole-0.5.0/eole/encoders/vision.py +400 -0
  53. eole-0.5.0/eole/inference_engine.py +622 -0
  54. eole-0.5.0/eole/inputters/__init__.py +0 -0
  55. eole-0.5.0/eole/inputters/dynamic_iterator.py +537 -0
  56. eole-0.5.0/eole/inputters/image_utils.py +318 -0
  57. eole-0.5.0/eole/inputters/inputter.py +148 -0
  58. eole-0.5.0/eole/inputters/text_corpus.py +485 -0
  59. eole-0.5.0/eole/inputters/text_utils.py +476 -0
  60. eole-0.5.0/eole/models/__init__.py +0 -0
  61. eole-0.5.0/eole/models/model.py +1127 -0
  62. eole-0.5.0/eole/models/model_saver.py +329 -0
  63. eole-0.5.0/eole/ops/__init__.py +329 -0
  64. eole-0.5.0/eole/predict/__init__.py +72 -0
  65. eole-0.5.0/eole/predict/beam_search.py +477 -0
  66. eole-0.5.0/eole/predict/decode_strategy.py +328 -0
  67. eole-0.5.0/eole/predict/encoder.py +149 -0
  68. eole-0.5.0/eole/predict/generator.py +264 -0
  69. eole-0.5.0/eole/predict/greedy_search.py +313 -0
  70. eole-0.5.0/eole/predict/inference.py +800 -0
  71. eole-0.5.0/eole/predict/penalties.py +98 -0
  72. eole-0.5.0/eole/predict/prediction.py +219 -0
  73. eole-0.5.0/eole/predict/translator.py +295 -0
  74. eole-0.5.0/eole/scorers/__init__.py +45 -0
  75. eole-0.5.0/eole/scorers/bleu.py +19 -0
  76. eole-0.5.0/eole/scorers/scorer.py +21 -0
  77. eole-0.5.0/eole/scorers/ter.py +19 -0
  78. eole-0.5.0/eole/tests/__init__.py +0 -0
  79. eole-0.5.0/eole/tests/test_attention.py +33 -0
  80. eole-0.5.0/eole/tests/test_beam_search.py +739 -0
  81. eole-0.5.0/eole/tests/test_data_prepare.py +121 -0
  82. eole-0.5.0/eole/tests/test_embeddings.py +115 -0
  83. eole-0.5.0/eole/tests/test_events.py +49 -0
  84. eole-0.5.0/eole/tests/test_greedy_search.py +494 -0
  85. eole-0.5.0/eole/tests/test_inference_engines.py +102 -0
  86. eole-0.5.0/eole/tests/test_models.py +346 -0
  87. eole-0.5.0/eole/tests/test_recipes.py +35 -0
  88. eole-0.5.0/eole/tests/test_simple.py +6 -0
  89. eole-0.5.0/eole/tests/test_subword_marker.py +489 -0
  90. eole-0.5.0/eole/tests/test_transform.py +787 -0
  91. eole-0.5.0/eole/tests/test_translator.py +34 -0
  92. eole-0.5.0/eole/tests/utils_for_tests.py +8 -0
  93. eole-0.5.0/eole/train_single.py +284 -0
  94. eole-0.5.0/eole/trainer.py +600 -0
  95. eole-0.5.0/eole/transforms/__init__.py +58 -0
  96. eole-0.5.0/eole/transforms/bart.py +415 -0
  97. eole-0.5.0/eole/transforms/clean.py +186 -0
  98. eole-0.5.0/eole/transforms/docify.py +133 -0
  99. eole-0.5.0/eole/transforms/fuzzymatch.py +174 -0
  100. eole-0.5.0/eole/transforms/inlinetags.py +307 -0
  101. eole-0.5.0/eole/transforms/insert_mask_before_placeholder.py +46 -0
  102. eole-0.5.0/eole/transforms/misc.py +304 -0
  103. eole-0.5.0/eole/transforms/normalize.py +315 -0
  104. eole-0.5.0/eole/transforms/sampling.py +242 -0
  105. eole-0.5.0/eole/transforms/terminology.py +275 -0
  106. eole-0.5.0/eole/transforms/tokenize.py +545 -0
  107. eole-0.5.0/eole/transforms/tokenize_id.py +121 -0
  108. eole-0.5.0/eole/transforms/transform.py +339 -0
  109. eole-0.5.0/eole/transforms/uppercase.py +47 -0
  110. eole-0.5.0/eole/utils/__init__.py +31 -0
  111. eole-0.5.0/eole/utils/alignment.py +208 -0
  112. eole-0.5.0/eole/utils/attention_entropy.py +176 -0
  113. eole-0.5.0/eole/utils/cnn_factory.py +102 -0
  114. eole-0.5.0/eole/utils/distributed.py +179 -0
  115. eole-0.5.0/eole/utils/distributed_workers.py +325 -0
  116. eole-0.5.0/eole/utils/earlystopping.py +207 -0
  117. eole-0.5.0/eole/utils/logging.py +33 -0
  118. eole-0.5.0/eole/utils/loss.py +399 -0
  119. eole-0.5.0/eole/utils/misc.py +171 -0
  120. eole-0.5.0/eole/utils/optimizers.py +485 -0
  121. eole-0.5.0/eole/utils/report_manager.py +147 -0
  122. eole-0.5.0/eole/utils/scoring_utils.py +126 -0
  123. eole-0.5.0/eole/utils/sentencepiece_model_pb2.py +46 -0
  124. eole-0.5.0/eole/utils/statistics.py +200 -0
  125. eole-0.5.0/eole.egg-info/PKG-INFO +227 -0
  126. eole-0.5.0/eole.egg-info/SOURCES.txt +131 -0
  127. eole-0.5.0/eole.egg-info/dependency_links.txt +1 -0
  128. eole-0.5.0/eole.egg-info/entry_points.txt +2 -0
  129. eole-0.5.0/eole.egg-info/requires.txt +26 -0
  130. eole-0.5.0/eole.egg-info/top_level.txt +1 -0
  131. eole-0.5.0/pyproject.toml +17 -0
  132. eole-0.5.0/setup.cfg +4 -0
  133. eole-0.5.0/setup.py +113 -0
eole-0.5.0/LICENSE.md ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2024-Present EOLE
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
eole-0.5.0/PKG-INFO ADDED
@@ -0,0 +1,227 @@
1
+ Metadata-Version: 2.1
2
+ Name: eole
3
+ Version: 0.5.0
4
+ Summary: Open language modeling toolkit based on PyTorch
5
+ Project-URL: Source, https://github.com/eole-nlp/eole/
6
+ Requires-Python: >=3.10
7
+ Description-Content-Type: text/markdown
8
+ License-File: LICENSE.md
9
+ Requires-Dist: configargparse
10
+ Requires-Dist: ctranslate2<5,>=4
11
+ Requires-Dist: fastapi
12
+ Requires-Dist: fasttext-wheel
13
+ Requires-Dist: huggingface_hub
14
+ Requires-Dist: datasets
15
+ Requires-Dist: numpy>=2.0
16
+ Requires-Dist: pandas
17
+ Requires-Dist: protobuf==3.20.1
18
+ Requires-Dist: pyahocorasick
19
+ Requires-Dist: pyonmttok<2,>=1.38.1
20
+ Requires-Dist: pyyaml
21
+ Requires-Dist: rapidfuzz
22
+ Requires-Dist: rich
23
+ Requires-Dist: sacrebleu
24
+ Requires-Dist: safetensors
25
+ Requires-Dist: sentencepiece<0.1.98,>=0.1.94
26
+ Requires-Dist: six
27
+ Requires-Dist: spacy
28
+ Requires-Dist: subword-nmt>=0.3.7
29
+ Requires-Dist: tensorboard>=2.18.0
30
+ Requires-Dist: torch<2.11,>=2.8
31
+ Requires-Dist: torch-optimi
32
+ Requires-Dist: uvicorn
33
+ Requires-Dist: waitress
34
+ Requires-Dist: pydantic
35
+
36
+ # EOLE
37
+
38
+ [![Documentation](https://img.shields.io/badge/docs-latest-blue.svg)](https://eole-nlp.github.io/eole)
39
+
40
+ Open language modeling toolkit based on [PyTorch](https://pytorch.org) initially spun-off of OpenNMT-py
41
+
42
+
43
+ - New !!!! - Added Torch compile and Cudagraphs - as fast as vLLM / faster than CT2 on GPU. see [results](https://github.com/eole-nlp/eole/blob/main/benchmarks/genai/README.md)
44
+
45
+ Just reproduce with your own hardware:
46
+ ```
47
+ git clone https://github.com/eole-nlp/eole
48
+ cd eole
49
+ pip install -e .
50
+ cd benchmarks/genai
51
+ EOLE_TORCH_COMPILE="1" EOLE_COMPILE_MODE="0" python generate-eole.py
52
+ ```
53
+ First run will take 60-80 seconds to compile
54
+ Run it a second time and see the blast.
55
+
56
+
57
+ - New January 2026: Almost full refactor of the code: Encoders, Decoders, Adapters, Model classes, Trainer, Distributed training / Inference.
58
+
59
+ We aim to maintain the **research-friendly** approach of the original project while including latest architectures (LLMs) and various other techniques.
60
+ Our goal is to provide a comprehensive yet compact and modular codebase for experimenting with various types of language models (encoder, decoder, seq2seq).
61
+
62
+ ## HF Models supported
63
+
64
+ - **tencent/HunyuanOCR** End-to-End OCR model by Tencent. Uses more image token vs Deepseek but smaller LM. Results are impressive. (see [recipe](https://github.com/eole-nlp/eole/tree/main/recipes/hunyuanocr))
65
+ - **deepseek-ai/DeepSeek-OCR** For now takes any image and rescales to 1024x1024 before processing - Gundam mode not implemented yet) - pdf_ocr to mmd replicated - check recipes
66
+ - **tencent/Hunyuan-MT-7B** SOTA NMT at WMT25, better than Towerplus-9B and EuroLLM-9B
67
+ - **Qwen/Qwen2/3** Non VL family. Includes Qwen3-30B-A3B
68
+ - **google/gemma-3-27b-it** All Gemma3 family - supports text and image input
69
+ - **Mistral-3.1-24B-instruct** supports all Mistral AI models (text and image input) - includes Ministral 3, Mixtral, Mathstral
70
+ - **meta-llama/Llama-3.X** models
71
+ - **microsoft/Phi-2/3** models
72
+
73
+ Of course you can train your own architecture (Decoder only, Encoder Only, or EncoderDecoder Model)
74
+
75
+ ## Latest developments
76
+
77
+ - **high inference speed** using Flash Attention (decoding with in-place KVCache), Vllm RMSNorm kernel, fused MLP Gate / activation, fused KVQ Linear.
78
+ - **prefixLM + split prompt/answer in src/tgt** optional method to feed your data
79
+ - **Pure-BF16 Training** thanks to [Kahan Summation](https://arxiv.org/pdf/2010.06192) implemented [here](https://optimi.benjaminwarner.dev/kahan_summation/)
80
+ - **Web-based (Google translator-like) interface** featuring the latest Hunyuan-MT-7B or EuroLLM-8B-Instruct LLM
81
+ - **Estimator layer** which enables to rescore multiple beams in the same model. Read article [here](https://medium.com/p/05b00b271a47) and [here](https://medium.com/p/7dccfe167814)
82
+ - **Support Hugging Face Tokenizers** for better compatiblity
83
+ - **Replicate CometKiwi(XL/XXL)** Encoder+Estimator models
84
+
85
+ ---
86
+
87
+ ## Key Features
88
+
89
+ - **Versatile Training and Inference**: Train from scratch, finetune, and infer models of various architectures including Transformer Encoder/Decoder/EncoderDecoder and RNN EncoderDecoder.
90
+ - **Dynamic Data Transforms**: Apply on-the-fly transformations in the dataloading logic for both training and inference.
91
+ - **Comprehensive LLM Support**: Includes converters for Llama, Mistral, Phi, Gemma ...
92
+ - **Advanced Quantization**: Support for 8-bit and 4-bit quantization, along with LoRA adapters, with or without checkpointing, as well as mixed precision (FP16).
93
+ - **Efficient Finetuning**: Finetune 7B and 13B models on a single RTX 24GB GPU using 4-bit quantization.
94
+ - **Flexible Inference**: Perform inference in 4-bit or 8-bit using the same layer quantization methods as in finetuning.
95
+ - **Tensor Parallelism**: Enable tensor parallelism for both training and inference when models exceed the memory capacity of a single GPU.
96
+
97
+ ## Work completed
98
+
99
+ We have made significant progress in several areas:
100
+
101
+ - **Configuration Management**: Streamlined through [pydantic](https://docs.pydantic.dev) models.
102
+ - **Command Line Entry Points**: Improved using structured subparsers for better organization.
103
+ - **Reproducible Recipes**: Provided for widely used models and tasks, ensuring consistency and reliability.
104
+ - **Core API Simplification**: Refined around the new configuration objects for ease of use.
105
+ - **Revamped Fast API based server**: see above example with EuroLLM-9B-Instruct
106
+
107
+ ---
108
+
109
+ ### Future Directions
110
+
111
+ There are still several exciting avenues to explore:
112
+
113
+ - **Documentation**: Enhance and expand the documentation for better user guidance.
114
+ - **Test Coverage**: Improve testing to ensure code reliability and performance.
115
+ - **Logging Enhancements**: Implement more sophisticated logging mechanisms.
116
+ - **Broader Model Support**: Extend support to include a wider range of open models, potentially multi-modal.
117
+
118
+ ## Setup
119
+
120
+ ### Using Docker
121
+
122
+ To facilitate setup and reproducibility, we provide Docker images via the GitHub Container Registry: [EOLE Docker Images](https://github.com/eole-nlp/eole/pkgs/container/eole).
123
+
124
+ You can customize the workflow and build your own images based on specific needs using `build.sh` and `Dockerfile` in the `docker` directory of the repository.
125
+
126
+
127
+ To pull the Docker image:
128
+ ```bash
129
+ docker pull ghcr.io/eole-nlp/eole:0.4.0-torch2.9.1-ubuntu22.04-cuda12.8
130
+ ```
131
+
132
+ Example one-liner to run a container and open a bash shell within it:
133
+ ```bash
134
+ docker run --rm -it --runtime=nvidia ghcr.io/eole-nlp/eole:0.4.0-torch2.9.1-ubuntu22.04-cuda12.8
135
+ ```
136
+
137
+ > **Note**: Ensure you have the [Nvidia Container Toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html) (formerly nvidia-docker) installed to take advantage of CUDA/GPU features.
138
+
139
+ Depending on your needs, you can add various flags:
140
+ - `-p 5000:5000`: Forward an exposed port from your container to your host.
141
+ - `-v /some/local/directory:/some/container/directory`: Mount a local directory to a container directory.
142
+ - `--entrypoint some_command`: Run a specific command as the container entry point (instead of the default bash shell).
143
+
144
+ ### Installing Locally
145
+
146
+ #### Requirements
147
+
148
+ - Python >= 3.10
149
+ - PyTorch >= 2.8 < 2.10
150
+
151
+ #### Installation from Source
152
+
153
+ To install from source:
154
+ ```bash
155
+ git clone https://github.com/eole-nlp/eole
156
+ cd eole
157
+ pip install -e .
158
+ ```
159
+
160
+ #### Installation from PyPI
161
+
162
+ Installation from PyPI will be available soon.
163
+
164
+ #### Notes
165
+
166
+ If you encounter a `MemoryError` during installation, try using `pip` with the `--no-cache-dir` option.
167
+
168
+ (Optional) Some advanced features (e.g., pretrained models or specific transforms) require extra packages. Install them with:
169
+ ```bash
170
+ pip install -r requirements.opt.txt
171
+ ```
172
+
173
+ ### Manual Installation of Some Dependencies
174
+
175
+ #### Flash Attention
176
+
177
+ To use [Flash Attention](https://github.com/Dao-AILab/flash-attention#installation-and-features), install it manually:
178
+ ```bash
179
+ pip install flash-attn --no-build-isolation
180
+ ```
181
+
182
+ #### AWQ
183
+
184
+ For inference or quantizing an AWQ model, AutoAWQ is required. Install it with:
185
+ ```bash
186
+ pip install autoawq
187
+ ```
188
+
189
+ For more details, refer to [AutoAWQ](https://github.com/casper-hansen/AutoAWQ).
190
+
191
+
192
+ ## Notes on Mixed-precision or Low precision Training
193
+
194
+ Until Feb 25, we used torch optimizers with or without AMP (mixed precision) or "fusedadam" which was an old implementation of Apex/Nvidia using FP16 with dynamic loss scaling and without FP32 master weights.
195
+ As of 0.2 "fusedadam" is deprecated and we implemented pure-BF16 training.
196
+
197
+ As a result, config flags are now:
198
+
199
+ For FP16-amp or BF16-amp training (using pytorch optimizers and amp implementation)
200
+ ```
201
+ compute_dtype: fp16 or bf16
202
+ use_amp: true
203
+ optim: adam or adamw
204
+ ```
205
+ Special note: even though it may not be logical, we still use the torch GradScaler in BF16-AMP. Even if the BF16 range is similar to FP32, scaling prevents from underflowing.
206
+ We tested BF16-AMP without the GradScaler and it does not give good results.
207
+
208
+
209
+ For pure-bf16 training (using torch-optimi and kahan summation)
210
+ ```
211
+ compute_dtype: bf16
212
+ use_amp: false
213
+ optim: adam or adamw
214
+ ```
215
+ Pure-BF16 training is faster than AMP and the memory footprint is reduced (master weights are kept in BF16 vs FP32). However Kahan Summation is not magical, results are good but not as good as AMP.
216
+ Use this feature mainly when memory footprint is an issue with LLMs.
217
+
218
+
219
+ ---
220
+
221
+ ## Contributing
222
+
223
+ We love contributions! Please look at issues marked with the [contributions welcome](https://github.com/eole-nlp/eole/issues?q=is%3Aissue+is%3Aopen+label%3A%22contributions+welcome%22) tag.
224
+
225
+ Before raising an issue, make sure you read the requirements and the [Full Documentation](https://eole-nlp.github.io/eole). You can also check if a [Recipe](https://github.com/eole-nlp/eole/tree/main/recipes) fits your use case.
226
+
227
+ Unless there is a bug, please use the [Discussions](https://github.com/eole-nlp/eole/discussions) tab to ask questions or propose new topics/features.
eole-0.5.0/README.md ADDED
@@ -0,0 +1,192 @@
1
+ # EOLE
2
+
3
+ [![Documentation](https://img.shields.io/badge/docs-latest-blue.svg)](https://eole-nlp.github.io/eole)
4
+
5
+ Open language modeling toolkit based on [PyTorch](https://pytorch.org) initially spun-off of OpenNMT-py
6
+
7
+
8
+ - New !!!! - Added Torch compile and Cudagraphs - as fast as vLLM / faster than CT2 on GPU. see [results](https://github.com/eole-nlp/eole/blob/main/benchmarks/genai/README.md)
9
+
10
+ Just reproduce with your own hardware:
11
+ ```
12
+ git clone https://github.com/eole-nlp/eole
13
+ cd eole
14
+ pip install -e .
15
+ cd benchmarks/genai
16
+ EOLE_TORCH_COMPILE="1" EOLE_COMPILE_MODE="0" python generate-eole.py
17
+ ```
18
+ First run will take 60-80 seconds to compile
19
+ Run it a second time and see the blast.
20
+
21
+
22
+ - New January 2026: Almost full refactor of the code: Encoders, Decoders, Adapters, Model classes, Trainer, Distributed training / Inference.
23
+
24
+ We aim to maintain the **research-friendly** approach of the original project while including latest architectures (LLMs) and various other techniques.
25
+ Our goal is to provide a comprehensive yet compact and modular codebase for experimenting with various types of language models (encoder, decoder, seq2seq).
26
+
27
+ ## HF Models supported
28
+
29
+ - **tencent/HunyuanOCR** End-to-End OCR model by Tencent. Uses more image token vs Deepseek but smaller LM. Results are impressive. (see [recipe](https://github.com/eole-nlp/eole/tree/main/recipes/hunyuanocr))
30
+ - **deepseek-ai/DeepSeek-OCR** For now takes any image and rescales to 1024x1024 before processing - Gundam mode not implemented yet) - pdf_ocr to mmd replicated - check recipes
31
+ - **tencent/Hunyuan-MT-7B** SOTA NMT at WMT25, better than Towerplus-9B and EuroLLM-9B
32
+ - **Qwen/Qwen2/3** Non VL family. Includes Qwen3-30B-A3B
33
+ - **google/gemma-3-27b-it** All Gemma3 family - supports text and image input
34
+ - **Mistral-3.1-24B-instruct** supports all Mistral AI models (text and image input) - includes Ministral 3, Mixtral, Mathstral
35
+ - **meta-llama/Llama-3.X** models
36
+ - **microsoft/Phi-2/3** models
37
+
38
+ Of course you can train your own architecture (Decoder only, Encoder Only, or EncoderDecoder Model)
39
+
40
+ ## Latest developments
41
+
42
+ - **high inference speed** using Flash Attention (decoding with in-place KVCache), Vllm RMSNorm kernel, fused MLP Gate / activation, fused KVQ Linear.
43
+ - **prefixLM + split prompt/answer in src/tgt** optional method to feed your data
44
+ - **Pure-BF16 Training** thanks to [Kahan Summation](https://arxiv.org/pdf/2010.06192) implemented [here](https://optimi.benjaminwarner.dev/kahan_summation/)
45
+ - **Web-based (Google translator-like) interface** featuring the latest Hunyuan-MT-7B or EuroLLM-8B-Instruct LLM
46
+ - **Estimator layer** which enables to rescore multiple beams in the same model. Read article [here](https://medium.com/p/05b00b271a47) and [here](https://medium.com/p/7dccfe167814)
47
+ - **Support Hugging Face Tokenizers** for better compatiblity
48
+ - **Replicate CometKiwi(XL/XXL)** Encoder+Estimator models
49
+
50
+ ---
51
+
52
+ ## Key Features
53
+
54
+ - **Versatile Training and Inference**: Train from scratch, finetune, and infer models of various architectures including Transformer Encoder/Decoder/EncoderDecoder and RNN EncoderDecoder.
55
+ - **Dynamic Data Transforms**: Apply on-the-fly transformations in the dataloading logic for both training and inference.
56
+ - **Comprehensive LLM Support**: Includes converters for Llama, Mistral, Phi, Gemma ...
57
+ - **Advanced Quantization**: Support for 8-bit and 4-bit quantization, along with LoRA adapters, with or without checkpointing, as well as mixed precision (FP16).
58
+ - **Efficient Finetuning**: Finetune 7B and 13B models on a single RTX 24GB GPU using 4-bit quantization.
59
+ - **Flexible Inference**: Perform inference in 4-bit or 8-bit using the same layer quantization methods as in finetuning.
60
+ - **Tensor Parallelism**: Enable tensor parallelism for both training and inference when models exceed the memory capacity of a single GPU.
61
+
62
+ ## Work completed
63
+
64
+ We have made significant progress in several areas:
65
+
66
+ - **Configuration Management**: Streamlined through [pydantic](https://docs.pydantic.dev) models.
67
+ - **Command Line Entry Points**: Improved using structured subparsers for better organization.
68
+ - **Reproducible Recipes**: Provided for widely used models and tasks, ensuring consistency and reliability.
69
+ - **Core API Simplification**: Refined around the new configuration objects for ease of use.
70
+ - **Revamped Fast API based server**: see above example with EuroLLM-9B-Instruct
71
+
72
+ ---
73
+
74
+ ### Future Directions
75
+
76
+ There are still several exciting avenues to explore:
77
+
78
+ - **Documentation**: Enhance and expand the documentation for better user guidance.
79
+ - **Test Coverage**: Improve testing to ensure code reliability and performance.
80
+ - **Logging Enhancements**: Implement more sophisticated logging mechanisms.
81
+ - **Broader Model Support**: Extend support to include a wider range of open models, potentially multi-modal.
82
+
83
+ ## Setup
84
+
85
+ ### Using Docker
86
+
87
+ To facilitate setup and reproducibility, we provide Docker images via the GitHub Container Registry: [EOLE Docker Images](https://github.com/eole-nlp/eole/pkgs/container/eole).
88
+
89
+ You can customize the workflow and build your own images based on specific needs using `build.sh` and `Dockerfile` in the `docker` directory of the repository.
90
+
91
+
92
+ To pull the Docker image:
93
+ ```bash
94
+ docker pull ghcr.io/eole-nlp/eole:0.4.0-torch2.9.1-ubuntu22.04-cuda12.8
95
+ ```
96
+
97
+ Example one-liner to run a container and open a bash shell within it:
98
+ ```bash
99
+ docker run --rm -it --runtime=nvidia ghcr.io/eole-nlp/eole:0.4.0-torch2.9.1-ubuntu22.04-cuda12.8
100
+ ```
101
+
102
+ > **Note**: Ensure you have the [Nvidia Container Toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html) (formerly nvidia-docker) installed to take advantage of CUDA/GPU features.
103
+
104
+ Depending on your needs, you can add various flags:
105
+ - `-p 5000:5000`: Forward an exposed port from your container to your host.
106
+ - `-v /some/local/directory:/some/container/directory`: Mount a local directory to a container directory.
107
+ - `--entrypoint some_command`: Run a specific command as the container entry point (instead of the default bash shell).
108
+
109
+ ### Installing Locally
110
+
111
+ #### Requirements
112
+
113
+ - Python >= 3.10
114
+ - PyTorch >= 2.8 < 2.10
115
+
116
+ #### Installation from Source
117
+
118
+ To install from source:
119
+ ```bash
120
+ git clone https://github.com/eole-nlp/eole
121
+ cd eole
122
+ pip install -e .
123
+ ```
124
+
125
+ #### Installation from PyPI
126
+
127
+ Installation from PyPI will be available soon.
128
+
129
+ #### Notes
130
+
131
+ If you encounter a `MemoryError` during installation, try using `pip` with the `--no-cache-dir` option.
132
+
133
+ (Optional) Some advanced features (e.g., pretrained models or specific transforms) require extra packages. Install them with:
134
+ ```bash
135
+ pip install -r requirements.opt.txt
136
+ ```
137
+
138
+ ### Manual Installation of Some Dependencies
139
+
140
+ #### Flash Attention
141
+
142
+ To use [Flash Attention](https://github.com/Dao-AILab/flash-attention#installation-and-features), install it manually:
143
+ ```bash
144
+ pip install flash-attn --no-build-isolation
145
+ ```
146
+
147
+ #### AWQ
148
+
149
+ For inference or quantizing an AWQ model, AutoAWQ is required. Install it with:
150
+ ```bash
151
+ pip install autoawq
152
+ ```
153
+
154
+ For more details, refer to [AutoAWQ](https://github.com/casper-hansen/AutoAWQ).
155
+
156
+
157
+ ## Notes on Mixed-precision or Low precision Training
158
+
159
+ Until Feb 25, we used torch optimizers with or without AMP (mixed precision) or "fusedadam" which was an old implementation of Apex/Nvidia using FP16 with dynamic loss scaling and without FP32 master weights.
160
+ As of 0.2 "fusedadam" is deprecated and we implemented pure-BF16 training.
161
+
162
+ As a result, config flags are now:
163
+
164
+ For FP16-amp or BF16-amp training (using pytorch optimizers and amp implementation)
165
+ ```
166
+ compute_dtype: fp16 or bf16
167
+ use_amp: true
168
+ optim: adam or adamw
169
+ ```
170
+ Special note: even though it may not be logical, we still use the torch GradScaler in BF16-AMP. Even if the BF16 range is similar to FP32, scaling prevents from underflowing.
171
+ We tested BF16-AMP without the GradScaler and it does not give good results.
172
+
173
+
174
+ For pure-bf16 training (using torch-optimi and kahan summation)
175
+ ```
176
+ compute_dtype: bf16
177
+ use_amp: false
178
+ optim: adam or adamw
179
+ ```
180
+ Pure-BF16 training is faster than AMP and the memory footprint is reduced (master weights are kept in BF16 vs FP32). However Kahan Summation is not magical, results are good but not as good as AMP.
181
+ Use this feature mainly when memory footprint is an issue with LLMs.
182
+
183
+
184
+ ---
185
+
186
+ ## Contributing
187
+
188
+ We love contributions! Please look at issues marked with the [contributions welcome](https://github.com/eole-nlp/eole/issues?q=is%3Aissue+is%3Aopen+label%3A%22contributions+welcome%22) tag.
189
+
190
+ Before raising an issue, make sure you read the requirements and the [Full Documentation](https://eole-nlp.github.io/eole). You can also check if a [Recipe](https://github.com/eole-nlp/eole/tree/main/recipes) fits your use case.
191
+
192
+ Unless there is a bug, please use the [Discussions](https://github.com/eole-nlp/eole/discussions) tab to ask questions or propose new topics/features.
@@ -0,0 +1,11 @@
1
+ import os
2
+
3
+ __version__ = "0.5.0"
4
+ ROOT_DIR = os.path.abspath(os.path.dirname(__file__))
5
+ EOLE_TORCH_COMPILE = os.environ.get("EOLE_TORCH_COMPILE", "0") == "1"
6
+
7
+ EOLE_COMPILE_MODE = os.environ.get("EOLE_COMPILE_MODE", "2")
8
+ # Mode = 0 : Decoder Level - cudagraphs True
9
+ # Mode = 1 : Decoder Level - cudagraphs False
10
+ # Mode = 2 : Decoder Layer Level - cudagraphs True
11
+ # Mode = 3 : Decoder Layer Level - cudagraphs False
@@ -0,0 +1,14 @@
1
+ """Module defining adapters / mm projectors."""
2
+
3
+ from eole.adapters.adapters import VisionLanguageAdapter
4
+ from eole.adapters.adapters import Gemma3MultiModalProjector
5
+ from eole.adapters.adapters import DeepSeekOCRProjector
6
+ from eole.adapters.adapters import HunYuanVisionPatchMerger
7
+
8
+
9
+ str2adapter = {
10
+ "llava": VisionLanguageAdapter,
11
+ "gemma3": Gemma3MultiModalProjector,
12
+ "deepseekocr": DeepSeekOCRProjector,
13
+ "hunyuanocr": HunYuanVisionPatchMerger,
14
+ }