llmflowstack 1.2.6__tar.gz → 1.3.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. {llmflowstack-1.2.6 → llmflowstack-1.3.0}/PKG-INFO +33 -132
  2. {llmflowstack-1.2.6 → llmflowstack-1.3.0}/README.md +27 -120
  3. llmflowstack-1.3.0/llmflowstack/__init__.py +27 -0
  4. llmflowstack-1.3.0/llmflowstack/callbacks/force_json.py +428 -0
  5. llmflowstack-1.3.0/llmflowstack/collators/multimodal_causal.py +122 -0
  6. llmflowstack-1.3.0/llmflowstack/decoders/__init__.py +18 -0
  7. llmflowstack-1.3.0/llmflowstack/decoders/base_decoder.py +694 -0
  8. llmflowstack-1.3.0/llmflowstack/decoders/gemma_3.py +143 -0
  9. llmflowstack-1.3.0/llmflowstack/decoders/gpt_2.py +106 -0
  10. llmflowstack-1.3.0/llmflowstack/decoders/gpt_oss.py +174 -0
  11. llmflowstack-1.3.0/llmflowstack/decoders/llama_3.py +123 -0
  12. llmflowstack-1.3.0/llmflowstack/decoders/llama_4.py +134 -0
  13. llmflowstack-1.3.0/llmflowstack/decoders/medgemma.py +169 -0
  14. llmflowstack-1.3.0/llmflowstack/decoders/qwen_3.py +194 -0
  15. {llmflowstack-1.2.6 → llmflowstack-1.3.0}/llmflowstack/rag/VectorDatabase.py +48 -14
  16. {llmflowstack-1.2.6 → llmflowstack-1.3.0}/llmflowstack/rag/__init__.py +0 -4
  17. llmflowstack-1.3.0/llmflowstack/schemas/__init__.py +6 -0
  18. llmflowstack-1.3.0/llmflowstack/schemas/params.py +106 -0
  19. llmflowstack-1.3.0/pyproject.toml +37 -0
  20. llmflowstack-1.2.6/llmflowstack/__init__.py +0 -23
  21. llmflowstack-1.2.6/llmflowstack/callbacks/stop_on_token.py +0 -16
  22. llmflowstack-1.2.6/llmflowstack/decoders/BaseDecoder.py +0 -487
  23. llmflowstack-1.2.6/llmflowstack/decoders/GPT_OSS.py +0 -300
  24. llmflowstack-1.2.6/llmflowstack/decoders/Gemma.py +0 -327
  25. llmflowstack-1.2.6/llmflowstack/decoders/LLaMA3.py +0 -244
  26. llmflowstack-1.2.6/llmflowstack/decoders/LLaMA4.py +0 -324
  27. llmflowstack-1.2.6/llmflowstack/decoders/MedGemma.py +0 -275
  28. llmflowstack-1.2.6/llmflowstack/decoders/__init__.py +0 -13
  29. llmflowstack-1.2.6/llmflowstack/schemas/__init__.py +0 -9
  30. llmflowstack-1.2.6/llmflowstack/schemas/params.py +0 -40
  31. llmflowstack-1.2.6/llmflowstack/utils/generation_utils.py +0 -30
  32. llmflowstack-1.2.6/pyproject.toml +0 -43
  33. {llmflowstack-1.2.6 → llmflowstack-1.3.0}/.github/workflows/python-publish.yml +0 -0
  34. {llmflowstack-1.2.6 → llmflowstack-1.3.0}/.gitignore +0 -0
  35. {llmflowstack-1.2.6 → llmflowstack-1.3.0}/LICENSE +0 -0
  36. {llmflowstack-1.2.6 → llmflowstack-1.3.0}/llmflowstack/callbacks/__init__.py +0 -0
  37. {llmflowstack-1.2.6 → llmflowstack-1.3.0}/llmflowstack/callbacks/log_collector.py +0 -0
  38. {llmflowstack-1.2.6 → llmflowstack-1.3.0}/llmflowstack/utils/__init__.py +0 -0
  39. {llmflowstack-1.2.6 → llmflowstack-1.3.0}/llmflowstack/utils/evaluation_methods.py +0 -0
  40. {llmflowstack-1.2.6 → llmflowstack-1.3.0}/llmflowstack/utils/exceptions.py +0 -0
  41. {llmflowstack-1.2.6 → llmflowstack-1.3.0}/llmflowstack/utils/logging.py +0 -0
@@ -1,35 +1,29 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: llmflowstack
3
- Version: 1.2.6
4
- Summary: LLMFlowStack is a framework for training and using LLMs (LLaMA, GPT-OSS, Gemma, ...). Supports DAPT, fine-tuning, and distributed inference. Public fork without institution-specific components.
3
+ Version: 1.3.0
4
+ Summary: LLMFlowStack is a framework for training and using LLMs (LLaMA, GPT-OSS, Gemma, ...). Supports DAPT, fine-tuning, and distributed inference.
5
5
  Author-email: Gustavo Henrique Ferreira Cruz <gustavohferreiracruz@gmail.com>
6
6
  License: MIT
7
7
  License-File: LICENSE
8
8
  Requires-Python: >=3.12
9
9
  Requires-Dist: accelerate
10
10
  Requires-Dist: bert-score
11
- Requires-Dist: bitsandbytes
12
11
  Requires-Dist: chromadb
13
12
  Requires-Dist: datasets
14
13
  Requires-Dist: evaluate
15
- Requires-Dist: huggingface-hub
14
+ Requires-Dist: fbgemm-gpu-genai
16
15
  Requires-Dist: kernels
17
16
  Requires-Dist: langchain-chroma
18
17
  Requires-Dist: langchain-community
19
18
  Requires-Dist: nltk
20
- Requires-Dist: numpy
21
- Requires-Dist: openai-harmony
22
- Requires-Dist: pandas
23
19
  Requires-Dist: peft
20
+ Requires-Dist: pillow
24
21
  Requires-Dist: rouge-score
25
22
  Requires-Dist: safetensors
26
- Requires-Dist: scikit-learn
27
- Requires-Dist: scipy
28
23
  Requires-Dist: sentence-transformers
29
24
  Requires-Dist: torch
30
- Requires-Dist: torchvision
31
- Requires-Dist: tqdm
32
- Requires-Dist: transformers
25
+ Requires-Dist: torchao
26
+ Requires-Dist: transformers==4.57.6
33
27
  Requires-Dist: triton
34
28
  Requires-Dist: trl
35
29
  Description-Content-Type: text/markdown
@@ -53,32 +47,23 @@ The goal is to make experimentation with LLMs more accessible, without the need
53
47
  This framework is designed to provide flexibility when working with different open-source and commercial LLMs. Currently, the following models are supported:
54
48
 
55
49
  - **GPT-OSS**
56
-
57
50
  - [`GPT-OSS 20B`](https://huggingface.co/openai/gpt-oss-20b)
58
51
  - [`GPT-OSS 120B`](https://huggingface.co/openai/gpt-oss-120b)
59
- > Fine-Tuning, DAPT and Inference Available
60
52
 
61
53
  - **LLaMA 3**
62
-
63
54
  - [`LLaMA 3.1 8B - Instruct`](https://huggingface.co/meta-llama/Llama-3.1-8B-Instruct)
64
55
  - [`LLaMA 3.1 70B - Instruct`](https://huggingface.co/meta-llama/Llama-3.1-70B-Instruct)
65
56
  - [`LLaMA 3.3 70B - Instruct`](https://huggingface.co/meta-llama/Llama-3.3-70B-Instruct)
66
57
  - [`LLaMA 3.3 405B - Instruct`](https://huggingface.co/meta-llama/Llama-3.1-405B-Instruct)
67
- > Fine-Tuning, DAPT and Inference Available
68
58
 
69
59
  - **LLaMA 4**
70
-
71
60
  - [`LLaMA 4 Scout - Instruct`](https://huggingface.co/meta-llama/Llama-4-Scout-17B-16E-Instruct)
72
- > DAPT and Inference Available
73
61
 
74
62
  - **Gemma**
75
-
76
63
  - [`Gemma 3 27B - Instruct`](https://huggingface.co/google/gemma-3-27b-it)
77
- > DAPT and Inference Available
78
64
 
79
65
  - **MedGemma**
80
- - [`MedGemma 27B Text - Instruct`](https://huggingface.co/google/medgemma-27b-text-it)
81
- > Fine-Tuning, DAPT and Inference Available
66
+ - [`MedGemma 27B - Instruct`](https://huggingface.co/google/medgemma-27b-it)
82
67
 
83
68
  > Other architectures based on those **may** function correctly.
84
69
 
@@ -101,22 +86,22 @@ This section presents a bit of what you can do with the framework.
101
86
  You can load as many models as your hardware allows (H100 GPU recommended)...
102
87
 
103
88
  ```python
104
- from llmflowstack import GPT_OSS, LLaMA3
89
+ from llmflowstack import GptOss, Llama3
105
90
 
106
- # Loading a LLaMA model
107
- first_model = LLaMA3()
91
+ # Loading a Llama model
92
+ first_model = Llama3()
108
93
  first_model.load_checkpoint(
109
94
  checkpoint="/llama-3.1-8b-Instruct",
110
95
  )
111
96
 
112
- # Loading a quantized LLaMA model
113
- second_model = LLaMA3(
97
+ # Loading a quantized Llama model
98
+ second_model = Llama3(
114
99
  checkpoint="/llama-3.3-70b-Instruct",
115
100
  quantization="4bit"
116
101
  )
117
102
 
118
103
  # Loading a GPT-OSS, quantized and with seed
119
- thrid_model = GPT_OSS(
104
+ thrid_model = GptOss(
120
105
  checkpoint="/gpt-oss-20b",
121
106
  quantization=True,
122
107
  seed=1234
@@ -126,32 +111,31 @@ thrid_model = GPT_OSS(
126
111
  ### Inference Examples
127
112
 
128
113
  ```python
129
- > from llmflowstack import GPT_OSS, GenerationParams, GenerationSampleParams
114
+ > from llmflowstack import GptOss, GenerationParams
130
115
 
131
- > gpt_oss_model = GPT_OSS(checkpoint="/gpt-oss-120b")
116
+ > gpt_oss_model = GptOss(checkpoint="/gpt-oss-120b")
132
117
 
133
118
  > gpt_oss_model.generate("Tell me a joke!")
134
119
  'Why did the scarecrow become a successful motivational speaker? Because he was outstanding **in** his field! 🌾😄'
135
120
 
136
121
  # Exclusive for GPT-OSS
137
- > gpt_oss_model.set_reasoning_level("High")
122
+ > gpt_oss_model.set_reasoning_level("High") # Low, Medium, High, Off
138
123
 
139
124
  > custom_input = gpt_oss_model.build_input(
140
125
  input_text="Tell me another joke!",
141
126
  developer_message="You are a clown and after every joke, you should say 'HONK HONK'"
142
127
  )
143
128
  > gpt_oss_model.generate(
144
- input=custom_input,
129
+ data=custom_input,
145
130
  params=GenerationParams(
131
+ mode="sample", # greedy, sample or beam
146
132
  max_new_tokens=1024,
147
- sample=GenerationSampleParams(
148
- temperature=0.3
149
- )
133
+ temperature=0.3
150
134
  )
151
135
  )
152
136
  'Why did the scarecrow win an award? Because he was outstanding in his field! \n\nHONK HONK'
153
137
 
154
- > llama_model = LLaMA3(checkpoint="/llama-3.3-70B-Instruct", quantization="4bit")
138
+ > llama_model = Llama3(checkpoint="/llama-3.3-70B-Instruct", quantization="4bit")
155
139
  > llama_model.generate("Why is the sky blue?")
156
140
  'The sky appears blue because of a phenomenon called Rayleigh scattering, which is the scattering of light'
157
141
 
@@ -162,7 +146,7 @@ thrid_model = GPT_OSS(
162
146
  You can also generate tokens using a streamer, that is, receiving one token at a time by using the iterator version of the generate function:
163
147
 
164
148
  ```python
165
- llama_4 = LLaMA4(
149
+ llama_4 = Llama4(
166
150
  checkpoint="llama-4-scout-17b-16e-instruct"
167
151
  )
168
152
 
@@ -175,10 +159,10 @@ for text in it:
175
159
  ### Training Examples (DAPT & Fine-tune)
176
160
 
177
161
  ```python
178
- from llmflowstack import LLaMA3
162
+ from llmflowstack import Llama3
179
163
  from llmflowstack.schemas import TrainParams
180
164
 
181
- model = LLaMA3(
165
+ model = Llama3(
182
166
  checkpoint="llama-3.1-8b-Instruct"
183
167
  )
184
168
 
@@ -186,28 +170,29 @@ model = LLaMA3(
186
170
  dataset = []
187
171
  dataset.append(model.build_input(
188
172
  input_text="Chico is a cat, which color he is?",
189
- expected_answer="Black!"
173
+ output_text="Black!"
190
174
  ))
191
175
 
192
176
  dataset.append(model.build_input(
193
177
  input_text="Fred is a dog, which color he is?",
194
- expected_answer="White!"
178
+ output_text="White!"
195
179
  ))
196
180
 
197
181
  # Does the DAPT in the full model
198
- model.dapt(
199
- train_dataset=dataset,
182
+ model.train(
183
+ train_data=dataset,
200
184
  params=TrainParams(
201
185
  batch_size=1,
202
186
  epochs=3,
203
187
  gradient_accumulation=1,
204
188
  lr=2e-5
205
- )
189
+ ),
190
+ mode="DAPT"
206
191
  )
207
192
 
208
193
  # Does the fine-tune this time
209
- model.fine_tune(
210
- train_dataset=dataset,
194
+ model.train(
195
+ train_data=dataset,
211
196
  params=TrainParams(
212
197
  batch_size=1,
213
198
  gradient_accumulation=1,
@@ -216,7 +201,8 @@ model.fine_tune(
216
201
  ),
217
202
  save_at_end=True,
218
203
  # It will save the model
219
- save_path="./output"
204
+ save_path="./output",
205
+ mode="FT"
220
206
  )
221
207
 
222
208
  # Saving the final result
@@ -224,88 +210,3 @@ model.save_checkpoint(
224
210
  path="./model-output"
225
211
  )
226
212
  ```
227
-
228
- ### RAG Pipeline
229
-
230
- A prototype of a RAG pipeline is also available. You can instantiate and use it as follows:
231
-
232
- ```python
233
- from llmflowstack import VectorDatabase
234
-
235
- vector_db = VectorDatabase(
236
- checkpoint="jina-embeddings-v4",
237
- chunk_size=1000,
238
- chunk_overlap=200
239
- )
240
-
241
- # Create or load an existing collection
242
- vector_db.get_collection(
243
- collection_name="memory_rag",
244
- persist_directory="./memory"
245
- )
246
-
247
- vector_db.get_collection(
248
- collection_name="files_rag",
249
- persist_directory="./files"
250
- )
251
-
252
- # You may also omit the persist directory; in this case, the RAG data will be stored in memory
253
- vector_db.get_collection(
254
- collection_name="files_rag"
255
- )
256
-
257
- # To create a new document in a collection
258
- vector_db.create(
259
- collection_name="memory_rag",
260
- information="User loves Pizza!", # Main information to be indexed in the vector database
261
- other_info={"category": "food"},
262
- can_split=False, # Indicates whether the information can be split into chunks
263
- should_index=True # Defaults to True — defines whether the document should be indexed or only returned as a Document instance
264
- )
265
-
266
- # After adding documents, you can query the database
267
- query_result = vector_db.query(
268
- collection_name="memory_rag",
269
- query="pizza",
270
- filter={"category": "food"},
271
- k=3 # Number of chunks to retrieve
272
- )
273
-
274
- print(query_result)
275
- # > "User loves Pizza!"
276
- ```
277
-
278
- ### NLP Evaluation
279
-
280
- > **Disclaimer**
281
- > These evaluation functions are designed for batch processing. Models and encoders are loaded internally on each call, which may be inefficient for per-sample or streaming evaluation.
282
-
283
- ```python
284
- > from llmflowstack import text_evaluation
285
- > from llmflowstack.utils import (bert_score_evaluation, bleu_score_evaluation, cosine_similarity_evaluation, rouge_evaluation)
286
-
287
- # Predictions from some model
288
- > predictions = ["Chico is a dog, and he is orange!", "Fred is a cat, and he is white!"]
289
- # References text (ground truth)
290
- > references = ["Chico is a cat, and he is black!", "Fred is a dog, and he is white!"]
291
-
292
- # BERT Score Evaluation
293
- > bert_score_evaluation(predictions, references)
294
- {'bertscore_precision': 0.9773, 'bertscore_recall': 0.9773, 'bertscore_f1': 0.9773}
295
-
296
- # Bleu Score Evaluation
297
- > bleu_score_evaluation(predictions, references)
298
- {'bleu_score': 0.3656}
299
-
300
- # Cosine Similarity Evaluation
301
- > cosine_similarity_evaluation(predictions, references)
302
- {'cosine_similarity': 0.7443}
303
-
304
- # Rouge Score Evaluation
305
- > rouge_evaluation(predictions, references)
306
- {'rouge1': 0.8125, 'rouge2': 0.6429, 'rougeL': 0.8125}
307
-
308
- # All-in-one function
309
- > text_evaluation(predictions, references)
310
- {'bertscore_precision': 0.9773, 'bertscore_recall': 0.9773, 'bertscore_f1': 0.9773, 'bleu_score': 0.3656, 'cosine_similarity': 0.7443, 'rouge1': 0.8125, 'rouge2': 0.6429, 'rougeL': 0.8125}
311
- ```
@@ -17,32 +17,23 @@ The goal is to make experimentation with LLMs more accessible, without the need
17
17
  This framework is designed to provide flexibility when working with different open-source and commercial LLMs. Currently, the following models are supported:
18
18
 
19
19
  - **GPT-OSS**
20
-
21
20
  - [`GPT-OSS 20B`](https://huggingface.co/openai/gpt-oss-20b)
22
21
  - [`GPT-OSS 120B`](https://huggingface.co/openai/gpt-oss-120b)
23
- > Fine-Tuning, DAPT and Inference Available
24
22
 
25
23
  - **LLaMA 3**
26
-
27
24
  - [`LLaMA 3.1 8B - Instruct`](https://huggingface.co/meta-llama/Llama-3.1-8B-Instruct)
28
25
  - [`LLaMA 3.1 70B - Instruct`](https://huggingface.co/meta-llama/Llama-3.1-70B-Instruct)
29
26
  - [`LLaMA 3.3 70B - Instruct`](https://huggingface.co/meta-llama/Llama-3.3-70B-Instruct)
30
27
  - [`LLaMA 3.3 405B - Instruct`](https://huggingface.co/meta-llama/Llama-3.1-405B-Instruct)
31
- > Fine-Tuning, DAPT and Inference Available
32
28
 
33
29
  - **LLaMA 4**
34
-
35
30
  - [`LLaMA 4 Scout - Instruct`](https://huggingface.co/meta-llama/Llama-4-Scout-17B-16E-Instruct)
36
- > DAPT and Inference Available
37
31
 
38
32
  - **Gemma**
39
-
40
33
  - [`Gemma 3 27B - Instruct`](https://huggingface.co/google/gemma-3-27b-it)
41
- > DAPT and Inference Available
42
34
 
43
35
  - **MedGemma**
44
- - [`MedGemma 27B Text - Instruct`](https://huggingface.co/google/medgemma-27b-text-it)
45
- > Fine-Tuning, DAPT and Inference Available
36
+ - [`MedGemma 27B - Instruct`](https://huggingface.co/google/medgemma-27b-it)
46
37
 
47
38
  > Other architectures based on those **may** function correctly.
48
39
 
@@ -65,22 +56,22 @@ This section presents a bit of what you can do with the framework.
65
56
  You can load as many models as your hardware allows (H100 GPU recommended)...
66
57
 
67
58
  ```python
68
- from llmflowstack import GPT_OSS, LLaMA3
59
+ from llmflowstack import GptOss, Llama3
69
60
 
70
- # Loading a LLaMA model
71
- first_model = LLaMA3()
61
+ # Loading a Llama model
62
+ first_model = Llama3()
72
63
  first_model.load_checkpoint(
73
64
  checkpoint="/llama-3.1-8b-Instruct",
74
65
  )
75
66
 
76
- # Loading a quantized LLaMA model
77
- second_model = LLaMA3(
67
+ # Loading a quantized Llama model
68
+ second_model = Llama3(
78
69
  checkpoint="/llama-3.3-70b-Instruct",
79
70
  quantization="4bit"
80
71
  )
81
72
 
82
73
  # Loading a GPT-OSS, quantized and with seed
83
- thrid_model = GPT_OSS(
74
+ thrid_model = GptOss(
84
75
  checkpoint="/gpt-oss-20b",
85
76
  quantization=True,
86
77
  seed=1234
@@ -90,32 +81,31 @@ thrid_model = GPT_OSS(
90
81
  ### Inference Examples
91
82
 
92
83
  ```python
93
- > from llmflowstack import GPT_OSS, GenerationParams, GenerationSampleParams
84
+ > from llmflowstack import GptOss, GenerationParams
94
85
 
95
- > gpt_oss_model = GPT_OSS(checkpoint="/gpt-oss-120b")
86
+ > gpt_oss_model = GptOss(checkpoint="/gpt-oss-120b")
96
87
 
97
88
  > gpt_oss_model.generate("Tell me a joke!")
98
89
  'Why did the scarecrow become a successful motivational speaker? Because he was outstanding **in** his field! 🌾😄'
99
90
 
100
91
  # Exclusive for GPT-OSS
101
- > gpt_oss_model.set_reasoning_level("High")
92
+ > gpt_oss_model.set_reasoning_level("High") # Low, Medium, High, Off
102
93
 
103
94
  > custom_input = gpt_oss_model.build_input(
104
95
  input_text="Tell me another joke!",
105
96
  developer_message="You are a clown and after every joke, you should say 'HONK HONK'"
106
97
  )
107
98
  > gpt_oss_model.generate(
108
- input=custom_input,
99
+ data=custom_input,
109
100
  params=GenerationParams(
101
+ mode="sample", # greedy, sample or beam
110
102
  max_new_tokens=1024,
111
- sample=GenerationSampleParams(
112
- temperature=0.3
113
- )
103
+ temperature=0.3
114
104
  )
115
105
  )
116
106
  'Why did the scarecrow win an award? Because he was outstanding in his field! \n\nHONK HONK'
117
107
 
118
- > llama_model = LLaMA3(checkpoint="/llama-3.3-70B-Instruct", quantization="4bit")
108
+ > llama_model = Llama3(checkpoint="/llama-3.3-70B-Instruct", quantization="4bit")
119
109
  > llama_model.generate("Why is the sky blue?")
120
110
  'The sky appears blue because of a phenomenon called Rayleigh scattering, which is the scattering of light'
121
111
 
@@ -126,7 +116,7 @@ thrid_model = GPT_OSS(
126
116
  You can also generate tokens using a streamer, that is, receiving one token at a time by using the iterator version of the generate function:
127
117
 
128
118
  ```python
129
- llama_4 = LLaMA4(
119
+ llama_4 = Llama4(
130
120
  checkpoint="llama-4-scout-17b-16e-instruct"
131
121
  )
132
122
 
@@ -139,10 +129,10 @@ for text in it:
139
129
  ### Training Examples (DAPT & Fine-tune)
140
130
 
141
131
  ```python
142
- from llmflowstack import LLaMA3
132
+ from llmflowstack import Llama3
143
133
  from llmflowstack.schemas import TrainParams
144
134
 
145
- model = LLaMA3(
135
+ model = Llama3(
146
136
  checkpoint="llama-3.1-8b-Instruct"
147
137
  )
148
138
 
@@ -150,28 +140,29 @@ model = LLaMA3(
150
140
  dataset = []
151
141
  dataset.append(model.build_input(
152
142
  input_text="Chico is a cat, which color he is?",
153
- expected_answer="Black!"
143
+ output_text="Black!"
154
144
  ))
155
145
 
156
146
  dataset.append(model.build_input(
157
147
  input_text="Fred is a dog, which color he is?",
158
- expected_answer="White!"
148
+ output_text="White!"
159
149
  ))
160
150
 
161
151
  # Does the DAPT in the full model
162
- model.dapt(
163
- train_dataset=dataset,
152
+ model.train(
153
+ train_data=dataset,
164
154
  params=TrainParams(
165
155
  batch_size=1,
166
156
  epochs=3,
167
157
  gradient_accumulation=1,
168
158
  lr=2e-5
169
- )
159
+ ),
160
+ mode="DAPT"
170
161
  )
171
162
 
172
163
  # Does the fine-tune this time
173
- model.fine_tune(
174
- train_dataset=dataset,
164
+ model.train(
165
+ train_data=dataset,
175
166
  params=TrainParams(
176
167
  batch_size=1,
177
168
  gradient_accumulation=1,
@@ -180,7 +171,8 @@ model.fine_tune(
180
171
  ),
181
172
  save_at_end=True,
182
173
  # It will save the model
183
- save_path="./output"
174
+ save_path="./output",
175
+ mode="FT"
184
176
  )
185
177
 
186
178
  # Saving the final result
@@ -188,88 +180,3 @@ model.save_checkpoint(
188
180
  path="./model-output"
189
181
  )
190
182
  ```
191
-
192
- ### RAG Pipeline
193
-
194
- A prototype of a RAG pipeline is also available. You can instantiate and use it as follows:
195
-
196
- ```python
197
- from llmflowstack import VectorDatabase
198
-
199
- vector_db = VectorDatabase(
200
- checkpoint="jina-embeddings-v4",
201
- chunk_size=1000,
202
- chunk_overlap=200
203
- )
204
-
205
- # Create or load an existing collection
206
- vector_db.get_collection(
207
- collection_name="memory_rag",
208
- persist_directory="./memory"
209
- )
210
-
211
- vector_db.get_collection(
212
- collection_name="files_rag",
213
- persist_directory="./files"
214
- )
215
-
216
- # You may also omit the persist directory; in this case, the RAG data will be stored in memory
217
- vector_db.get_collection(
218
- collection_name="files_rag"
219
- )
220
-
221
- # To create a new document in a collection
222
- vector_db.create(
223
- collection_name="memory_rag",
224
- information="User loves Pizza!", # Main information to be indexed in the vector database
225
- other_info={"category": "food"},
226
- can_split=False, # Indicates whether the information can be split into chunks
227
- should_index=True # Defaults to True — defines whether the document should be indexed or only returned as a Document instance
228
- )
229
-
230
- # After adding documents, you can query the database
231
- query_result = vector_db.query(
232
- collection_name="memory_rag",
233
- query="pizza",
234
- filter={"category": "food"},
235
- k=3 # Number of chunks to retrieve
236
- )
237
-
238
- print(query_result)
239
- # > "User loves Pizza!"
240
- ```
241
-
242
- ### NLP Evaluation
243
-
244
- > **Disclaimer**
245
- > These evaluation functions are designed for batch processing. Models and encoders are loaded internally on each call, which may be inefficient for per-sample or streaming evaluation.
246
-
247
- ```python
248
- > from llmflowstack import text_evaluation
249
- > from llmflowstack.utils import (bert_score_evaluation, bleu_score_evaluation, cosine_similarity_evaluation, rouge_evaluation)
250
-
251
- # Predictions from some model
252
- > predictions = ["Chico is a dog, and he is orange!", "Fred is a cat, and he is white!"]
253
- # References text (ground truth)
254
- > references = ["Chico is a cat, and he is black!", "Fred is a dog, and he is white!"]
255
-
256
- # BERT Score Evaluation
257
- > bert_score_evaluation(predictions, references)
258
- {'bertscore_precision': 0.9773, 'bertscore_recall': 0.9773, 'bertscore_f1': 0.9773}
259
-
260
- # Bleu Score Evaluation
261
- > bleu_score_evaluation(predictions, references)
262
- {'bleu_score': 0.3656}
263
-
264
- # Cosine Similarity Evaluation
265
- > cosine_similarity_evaluation(predictions, references)
266
- {'cosine_similarity': 0.7443}
267
-
268
- # Rouge Score Evaluation
269
- > rouge_evaluation(predictions, references)
270
- {'rouge1': 0.8125, 'rouge2': 0.6429, 'rougeL': 0.8125}
271
-
272
- # All-in-one function
273
- > text_evaluation(predictions, references)
274
- {'bertscore_precision': 0.9773, 'bertscore_recall': 0.9773, 'bertscore_f1': 0.9773, 'bleu_score': 0.3656, 'cosine_similarity': 0.7443, 'rouge1': 0.8125, 'rouge2': 0.6429, 'rougeL': 0.8125}
275
- ```
@@ -0,0 +1,27 @@
1
+ from .decoders.gemma_3 import Gemma3
2
+ from .decoders.gpt_2 import Gpt2
3
+ from .decoders.gpt_oss import GptOss
4
+ from .decoders.llama_3 import Llama3
5
+ from .decoders.llama_4 import Llama4
6
+ from .decoders.medgemma import MedGemma
7
+ #from .decoders.qwen_3 import Qwen3
8
+ from .rag.VectorDatabase import VectorDatabase
9
+ from .schemas.params import GenerationParams, TrainParams
10
+ from .utils.evaluation_methods import text_evaluation
11
+
12
+ __all__ = [
13
+ "Gemma3",
14
+ "Gpt2",
15
+ "GptOss",
16
+ "Llama3",
17
+ "Llama4",
18
+ "MedGemma",
19
+ # "Qwen3",
20
+
21
+ "VectorDatabase",
22
+
23
+ "GenerationParams",
24
+ "TrainParams",
25
+
26
+ "text_evaluation"
27
+ ]