llmflowstack 1.2.6__tar.gz → 1.3.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. {llmflowstack-1.2.6 → llmflowstack-1.3.1}/PKG-INFO +34 -132
  2. {llmflowstack-1.2.6 → llmflowstack-1.3.1}/README.md +27 -120
  3. llmflowstack-1.3.1/llmflowstack/__init__.py +27 -0
  4. llmflowstack-1.3.1/llmflowstack/callbacks/force_json.py +428 -0
  5. llmflowstack-1.3.1/llmflowstack/collators/multimodal_causal.py +122 -0
  6. llmflowstack-1.3.1/llmflowstack/decoders/__init__.py +18 -0
  7. llmflowstack-1.3.1/llmflowstack/decoders/base_decoder.py +694 -0
  8. llmflowstack-1.3.1/llmflowstack/decoders/gemma_3.py +143 -0
  9. llmflowstack-1.3.1/llmflowstack/decoders/gpt_2.py +106 -0
  10. llmflowstack-1.3.1/llmflowstack/decoders/gpt_oss.py +174 -0
  11. llmflowstack-1.3.1/llmflowstack/decoders/llama_3.py +123 -0
  12. llmflowstack-1.3.1/llmflowstack/decoders/llama_4.py +134 -0
  13. llmflowstack-1.3.1/llmflowstack/decoders/medgemma.py +169 -0
  14. llmflowstack-1.3.1/llmflowstack/decoders/qwen_3.py +194 -0
  15. {llmflowstack-1.2.6 → llmflowstack-1.3.1}/llmflowstack/rag/VectorDatabase.py +48 -14
  16. {llmflowstack-1.2.6 → llmflowstack-1.3.1}/llmflowstack/rag/__init__.py +0 -4
  17. llmflowstack-1.3.1/llmflowstack/schemas/__init__.py +6 -0
  18. llmflowstack-1.3.1/llmflowstack/schemas/params.py +106 -0
  19. llmflowstack-1.3.1/pyproject.toml +38 -0
  20. llmflowstack-1.2.6/llmflowstack/__init__.py +0 -23
  21. llmflowstack-1.2.6/llmflowstack/callbacks/stop_on_token.py +0 -16
  22. llmflowstack-1.2.6/llmflowstack/decoders/BaseDecoder.py +0 -487
  23. llmflowstack-1.2.6/llmflowstack/decoders/GPT_OSS.py +0 -300
  24. llmflowstack-1.2.6/llmflowstack/decoders/Gemma.py +0 -327
  25. llmflowstack-1.2.6/llmflowstack/decoders/LLaMA3.py +0 -244
  26. llmflowstack-1.2.6/llmflowstack/decoders/LLaMA4.py +0 -324
  27. llmflowstack-1.2.6/llmflowstack/decoders/MedGemma.py +0 -275
  28. llmflowstack-1.2.6/llmflowstack/decoders/__init__.py +0 -13
  29. llmflowstack-1.2.6/llmflowstack/schemas/__init__.py +0 -9
  30. llmflowstack-1.2.6/llmflowstack/schemas/params.py +0 -40
  31. llmflowstack-1.2.6/llmflowstack/utils/generation_utils.py +0 -30
  32. llmflowstack-1.2.6/pyproject.toml +0 -43
  33. {llmflowstack-1.2.6 → llmflowstack-1.3.1}/.github/workflows/python-publish.yml +0 -0
  34. {llmflowstack-1.2.6 → llmflowstack-1.3.1}/.gitignore +0 -0
  35. {llmflowstack-1.2.6 → llmflowstack-1.3.1}/LICENSE +0 -0
  36. {llmflowstack-1.2.6 → llmflowstack-1.3.1}/llmflowstack/callbacks/__init__.py +0 -0
  37. {llmflowstack-1.2.6 → llmflowstack-1.3.1}/llmflowstack/callbacks/log_collector.py +0 -0
  38. {llmflowstack-1.2.6 → llmflowstack-1.3.1}/llmflowstack/utils/__init__.py +0 -0
  39. {llmflowstack-1.2.6 → llmflowstack-1.3.1}/llmflowstack/utils/evaluation_methods.py +0 -0
  40. {llmflowstack-1.2.6 → llmflowstack-1.3.1}/llmflowstack/utils/exceptions.py +0 -0
  41. {llmflowstack-1.2.6 → llmflowstack-1.3.1}/llmflowstack/utils/logging.py +0 -0
@@ -1,35 +1,30 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: llmflowstack
3
- Version: 1.2.6
4
- Summary: LLMFlowStack is a framework for training and using LLMs (LLaMA, GPT-OSS, Gemma, ...). Supports DAPT, fine-tuning, and distributed inference. Public fork without institution-specific components.
3
+ Version: 1.3.1
4
+ Summary: LLMFlowStack is a framework for training and using LLMs (LLaMA, GPT-OSS, Gemma, ...). Supports DAPT, fine-tuning, and distributed inference.
5
5
  Author-email: Gustavo Henrique Ferreira Cruz <gustavohferreiracruz@gmail.com>
6
6
  License: MIT
7
7
  License-File: LICENSE
8
8
  Requires-Python: >=3.12
9
9
  Requires-Dist: accelerate
10
10
  Requires-Dist: bert-score
11
- Requires-Dist: bitsandbytes
12
11
  Requires-Dist: chromadb
13
12
  Requires-Dist: datasets
14
13
  Requires-Dist: evaluate
15
- Requires-Dist: huggingface-hub
14
+ Requires-Dist: fbgemm-gpu-genai
16
15
  Requires-Dist: kernels
17
16
  Requires-Dist: langchain-chroma
18
17
  Requires-Dist: langchain-community
18
+ Requires-Dist: mslk-cuda==1.0.0
19
19
  Requires-Dist: nltk
20
- Requires-Dist: numpy
21
- Requires-Dist: openai-harmony
22
- Requires-Dist: pandas
23
20
  Requires-Dist: peft
21
+ Requires-Dist: pillow
24
22
  Requires-Dist: rouge-score
25
23
  Requires-Dist: safetensors
26
- Requires-Dist: scikit-learn
27
- Requires-Dist: scipy
28
24
  Requires-Dist: sentence-transformers
29
25
  Requires-Dist: torch
30
- Requires-Dist: torchvision
31
- Requires-Dist: tqdm
32
- Requires-Dist: transformers
26
+ Requires-Dist: torchao==0.16.0
27
+ Requires-Dist: transformers==4.57.6
33
28
  Requires-Dist: triton
34
29
  Requires-Dist: trl
35
30
  Description-Content-Type: text/markdown
@@ -53,32 +48,23 @@ The goal is to make experimentation with LLMs more accessible, without the need
53
48
  This framework is designed to provide flexibility when working with different open-source and commercial LLMs. Currently, the following models are supported:
54
49
 
55
50
  - **GPT-OSS**
56
-
57
51
  - [`GPT-OSS 20B`](https://huggingface.co/openai/gpt-oss-20b)
58
52
  - [`GPT-OSS 120B`](https://huggingface.co/openai/gpt-oss-120b)
59
- > Fine-Tuning, DAPT and Inference Available
60
53
 
61
54
  - **LLaMA 3**
62
-
63
55
  - [`LLaMA 3.1 8B - Instruct`](https://huggingface.co/meta-llama/Llama-3.1-8B-Instruct)
64
56
  - [`LLaMA 3.1 70B - Instruct`](https://huggingface.co/meta-llama/Llama-3.1-70B-Instruct)
65
57
  - [`LLaMA 3.3 70B - Instruct`](https://huggingface.co/meta-llama/Llama-3.3-70B-Instruct)
66
58
  - [`LLaMA 3.3 405B - Instruct`](https://huggingface.co/meta-llama/Llama-3.1-405B-Instruct)
67
- > Fine-Tuning, DAPT and Inference Available
68
59
 
69
60
  - **LLaMA 4**
70
-
71
61
  - [`LLaMA 4 Scout - Instruct`](https://huggingface.co/meta-llama/Llama-4-Scout-17B-16E-Instruct)
72
- > DAPT and Inference Available
73
62
 
74
63
  - **Gemma**
75
-
76
64
  - [`Gemma 3 27B - Instruct`](https://huggingface.co/google/gemma-3-27b-it)
77
- > DAPT and Inference Available
78
65
 
79
66
  - **MedGemma**
80
- - [`MedGemma 27B Text - Instruct`](https://huggingface.co/google/medgemma-27b-text-it)
81
- > Fine-Tuning, DAPT and Inference Available
67
+ - [`MedGemma 27B - Instruct`](https://huggingface.co/google/medgemma-27b-it)
82
68
 
83
69
  > Other architectures based on those **may** function correctly.
84
70
 
@@ -101,22 +87,22 @@ This section presents a bit of what you can do with the framework.
101
87
  You can load as many models as your hardware allows (H100 GPU recommended)...
102
88
 
103
89
  ```python
104
- from llmflowstack import GPT_OSS, LLaMA3
90
+ from llmflowstack import GptOss, Llama3
105
91
 
106
- # Loading a LLaMA model
107
- first_model = LLaMA3()
92
+ # Loading a Llama model
93
+ first_model = Llama3()
108
94
  first_model.load_checkpoint(
109
95
  checkpoint="/llama-3.1-8b-Instruct",
110
96
  )
111
97
 
112
- # Loading a quantized LLaMA model
113
- second_model = LLaMA3(
98
+ # Loading a quantized Llama model
99
+ second_model = Llama3(
114
100
  checkpoint="/llama-3.3-70b-Instruct",
115
101
  quantization="4bit"
116
102
  )
117
103
 
118
104
  # Loading a GPT-OSS, quantized and with seed
119
- thrid_model = GPT_OSS(
105
+ thrid_model = GptOss(
120
106
  checkpoint="/gpt-oss-20b",
121
107
  quantization=True,
122
108
  seed=1234
@@ -126,32 +112,31 @@ thrid_model = GPT_OSS(
126
112
  ### Inference Examples
127
113
 
128
114
  ```python
129
- > from llmflowstack import GPT_OSS, GenerationParams, GenerationSampleParams
115
+ > from llmflowstack import GptOss, GenerationParams
130
116
 
131
- > gpt_oss_model = GPT_OSS(checkpoint="/gpt-oss-120b")
117
+ > gpt_oss_model = GptOss(checkpoint="/gpt-oss-120b")
132
118
 
133
119
  > gpt_oss_model.generate("Tell me a joke!")
134
120
  'Why did the scarecrow become a successful motivational speaker? Because he was outstanding **in** his field! 🌾😄'
135
121
 
136
122
  # Exclusive for GPT-OSS
137
- > gpt_oss_model.set_reasoning_level("High")
123
+ > gpt_oss_model.set_reasoning_level("High") # Low, Medium, High, Off
138
124
 
139
125
  > custom_input = gpt_oss_model.build_input(
140
126
  input_text="Tell me another joke!",
141
127
  developer_message="You are a clown and after every joke, you should say 'HONK HONK'"
142
128
  )
143
129
  > gpt_oss_model.generate(
144
- input=custom_input,
130
+ data=custom_input,
145
131
  params=GenerationParams(
132
+ mode="sample", # greedy, sample or beam
146
133
  max_new_tokens=1024,
147
- sample=GenerationSampleParams(
148
- temperature=0.3
149
- )
134
+ temperature=0.3
150
135
  )
151
136
  )
152
137
  'Why did the scarecrow win an award? Because he was outstanding in his field! \n\nHONK HONK'
153
138
 
154
- > llama_model = LLaMA3(checkpoint="/llama-3.3-70B-Instruct", quantization="4bit")
139
+ > llama_model = Llama3(checkpoint="/llama-3.3-70B-Instruct", quantization="4bit")
155
140
  > llama_model.generate("Why is the sky blue?")
156
141
  'The sky appears blue because of a phenomenon called Rayleigh scattering, which is the scattering of light'
157
142
 
@@ -162,7 +147,7 @@ thrid_model = GPT_OSS(
162
147
  You can also generate tokens using a streamer, that is, receiving one token at a time by using the iterator version of the generate function:
163
148
 
164
149
  ```python
165
- llama_4 = LLaMA4(
150
+ llama_4 = Llama4(
166
151
  checkpoint="llama-4-scout-17b-16e-instruct"
167
152
  )
168
153
 
@@ -175,10 +160,10 @@ for text in it:
175
160
  ### Training Examples (DAPT & Fine-tune)
176
161
 
177
162
  ```python
178
- from llmflowstack import LLaMA3
163
+ from llmflowstack import Llama3
179
164
  from llmflowstack.schemas import TrainParams
180
165
 
181
- model = LLaMA3(
166
+ model = Llama3(
182
167
  checkpoint="llama-3.1-8b-Instruct"
183
168
  )
184
169
 
@@ -186,28 +171,29 @@ model = LLaMA3(
186
171
  dataset = []
187
172
  dataset.append(model.build_input(
188
173
  input_text="Chico is a cat, which color he is?",
189
- expected_answer="Black!"
174
+ output_text="Black!"
190
175
  ))
191
176
 
192
177
  dataset.append(model.build_input(
193
178
  input_text="Fred is a dog, which color he is?",
194
- expected_answer="White!"
179
+ output_text="White!"
195
180
  ))
196
181
 
197
182
  # Does the DAPT in the full model
198
- model.dapt(
199
- train_dataset=dataset,
183
+ model.train(
184
+ train_data=dataset,
200
185
  params=TrainParams(
201
186
  batch_size=1,
202
187
  epochs=3,
203
188
  gradient_accumulation=1,
204
189
  lr=2e-5
205
- )
190
+ ),
191
+ mode="DAPT"
206
192
  )
207
193
 
208
194
  # Does the fine-tune this time
209
- model.fine_tune(
210
- train_dataset=dataset,
195
+ model.train(
196
+ train_data=dataset,
211
197
  params=TrainParams(
212
198
  batch_size=1,
213
199
  gradient_accumulation=1,
@@ -216,7 +202,8 @@ model.fine_tune(
216
202
  ),
217
203
  save_at_end=True,
218
204
  # It will save the model
219
- save_path="./output"
205
+ save_path="./output",
206
+ mode="FT"
220
207
  )
221
208
 
222
209
  # Saving the final result
@@ -224,88 +211,3 @@ model.save_checkpoint(
224
211
  path="./model-output"
225
212
  )
226
213
  ```
227
-
228
- ### RAG Pipeline
229
-
230
- A prototype of a RAG pipeline is also available. You can instantiate and use it as follows:
231
-
232
- ```python
233
- from llmflowstack import VectorDatabase
234
-
235
- vector_db = VectorDatabase(
236
- checkpoint="jina-embeddings-v4",
237
- chunk_size=1000,
238
- chunk_overlap=200
239
- )
240
-
241
- # Create or load an existing collection
242
- vector_db.get_collection(
243
- collection_name="memory_rag",
244
- persist_directory="./memory"
245
- )
246
-
247
- vector_db.get_collection(
248
- collection_name="files_rag",
249
- persist_directory="./files"
250
- )
251
-
252
- # You may also omit the persist directory; in this case, the RAG data will be stored in memory
253
- vector_db.get_collection(
254
- collection_name="files_rag"
255
- )
256
-
257
- # To create a new document in a collection
258
- vector_db.create(
259
- collection_name="memory_rag",
260
- information="User loves Pizza!", # Main information to be indexed in the vector database
261
- other_info={"category": "food"},
262
- can_split=False, # Indicates whether the information can be split into chunks
263
- should_index=True # Defaults to True — defines whether the document should be indexed or only returned as a Document instance
264
- )
265
-
266
- # After adding documents, you can query the database
267
- query_result = vector_db.query(
268
- collection_name="memory_rag",
269
- query="pizza",
270
- filter={"category": "food"},
271
- k=3 # Number of chunks to retrieve
272
- )
273
-
274
- print(query_result)
275
- # > "User loves Pizza!"
276
- ```
277
-
278
- ### NLP Evaluation
279
-
280
- > **Disclaimer**
281
- > These evaluation functions are designed for batch processing. Models and encoders are loaded internally on each call, which may be inefficient for per-sample or streaming evaluation.
282
-
283
- ```python
284
- > from llmflowstack import text_evaluation
285
- > from llmflowstack.utils import (bert_score_evaluation, bleu_score_evaluation, cosine_similarity_evaluation, rouge_evaluation)
286
-
287
- # Predictions from some model
288
- > predictions = ["Chico is a dog, and he is orange!", "Fred is a cat, and he is white!"]
289
- # References text (ground truth)
290
- > references = ["Chico is a cat, and he is black!", "Fred is a dog, and he is white!"]
291
-
292
- # BERT Score Evaluation
293
- > bert_score_evaluation(predictions, references)
294
- {'bertscore_precision': 0.9773, 'bertscore_recall': 0.9773, 'bertscore_f1': 0.9773}
295
-
296
- # Bleu Score Evaluation
297
- > bleu_score_evaluation(predictions, references)
298
- {'bleu_score': 0.3656}
299
-
300
- # Cosine Similarity Evaluation
301
- > cosine_similarity_evaluation(predictions, references)
302
- {'cosine_similarity': 0.7443}
303
-
304
- # Rouge Score Evaluation
305
- > rouge_evaluation(predictions, references)
306
- {'rouge1': 0.8125, 'rouge2': 0.6429, 'rougeL': 0.8125}
307
-
308
- # All-in-one function
309
- > text_evaluation(predictions, references)
310
- {'bertscore_precision': 0.9773, 'bertscore_recall': 0.9773, 'bertscore_f1': 0.9773, 'bleu_score': 0.3656, 'cosine_similarity': 0.7443, 'rouge1': 0.8125, 'rouge2': 0.6429, 'rougeL': 0.8125}
311
- ```
@@ -17,32 +17,23 @@ The goal is to make experimentation with LLMs more accessible, without the need
17
17
  This framework is designed to provide flexibility when working with different open-source and commercial LLMs. Currently, the following models are supported:
18
18
 
19
19
  - **GPT-OSS**
20
-
21
20
  - [`GPT-OSS 20B`](https://huggingface.co/openai/gpt-oss-20b)
22
21
  - [`GPT-OSS 120B`](https://huggingface.co/openai/gpt-oss-120b)
23
- > Fine-Tuning, DAPT and Inference Available
24
22
 
25
23
  - **LLaMA 3**
26
-
27
24
  - [`LLaMA 3.1 8B - Instruct`](https://huggingface.co/meta-llama/Llama-3.1-8B-Instruct)
28
25
  - [`LLaMA 3.1 70B - Instruct`](https://huggingface.co/meta-llama/Llama-3.1-70B-Instruct)
29
26
  - [`LLaMA 3.3 70B - Instruct`](https://huggingface.co/meta-llama/Llama-3.3-70B-Instruct)
30
27
  - [`LLaMA 3.3 405B - Instruct`](https://huggingface.co/meta-llama/Llama-3.1-405B-Instruct)
31
- > Fine-Tuning, DAPT and Inference Available
32
28
 
33
29
  - **LLaMA 4**
34
-
35
30
  - [`LLaMA 4 Scout - Instruct`](https://huggingface.co/meta-llama/Llama-4-Scout-17B-16E-Instruct)
36
- > DAPT and Inference Available
37
31
 
38
32
  - **Gemma**
39
-
40
33
  - [`Gemma 3 27B - Instruct`](https://huggingface.co/google/gemma-3-27b-it)
41
- > DAPT and Inference Available
42
34
 
43
35
  - **MedGemma**
44
- - [`MedGemma 27B Text - Instruct`](https://huggingface.co/google/medgemma-27b-text-it)
45
- > Fine-Tuning, DAPT and Inference Available
36
+ - [`MedGemma 27B - Instruct`](https://huggingface.co/google/medgemma-27b-it)
46
37
 
47
38
  > Other architectures based on those **may** function correctly.
48
39
 
@@ -65,22 +56,22 @@ This section presents a bit of what you can do with the framework.
65
56
  You can load as many models as your hardware allows (H100 GPU recommended)...
66
57
 
67
58
  ```python
68
- from llmflowstack import GPT_OSS, LLaMA3
59
+ from llmflowstack import GptOss, Llama3
69
60
 
70
- # Loading a LLaMA model
71
- first_model = LLaMA3()
61
+ # Loading a Llama model
62
+ first_model = Llama3()
72
63
  first_model.load_checkpoint(
73
64
  checkpoint="/llama-3.1-8b-Instruct",
74
65
  )
75
66
 
76
- # Loading a quantized LLaMA model
77
- second_model = LLaMA3(
67
+ # Loading a quantized Llama model
68
+ second_model = Llama3(
78
69
  checkpoint="/llama-3.3-70b-Instruct",
79
70
  quantization="4bit"
80
71
  )
81
72
 
82
73
  # Loading a GPT-OSS, quantized and with seed
83
- thrid_model = GPT_OSS(
74
+ thrid_model = GptOss(
84
75
  checkpoint="/gpt-oss-20b",
85
76
  quantization=True,
86
77
  seed=1234
@@ -90,32 +81,31 @@ thrid_model = GPT_OSS(
90
81
  ### Inference Examples
91
82
 
92
83
  ```python
93
- > from llmflowstack import GPT_OSS, GenerationParams, GenerationSampleParams
84
+ > from llmflowstack import GptOss, GenerationParams
94
85
 
95
- > gpt_oss_model = GPT_OSS(checkpoint="/gpt-oss-120b")
86
+ > gpt_oss_model = GptOss(checkpoint="/gpt-oss-120b")
96
87
 
97
88
  > gpt_oss_model.generate("Tell me a joke!")
98
89
  'Why did the scarecrow become a successful motivational speaker? Because he was outstanding **in** his field! 🌾😄'
99
90
 
100
91
  # Exclusive for GPT-OSS
101
- > gpt_oss_model.set_reasoning_level("High")
92
+ > gpt_oss_model.set_reasoning_level("High") # Low, Medium, High, Off
102
93
 
103
94
  > custom_input = gpt_oss_model.build_input(
104
95
  input_text="Tell me another joke!",
105
96
  developer_message="You are a clown and after every joke, you should say 'HONK HONK'"
106
97
  )
107
98
  > gpt_oss_model.generate(
108
- input=custom_input,
99
+ data=custom_input,
109
100
  params=GenerationParams(
101
+ mode="sample", # greedy, sample or beam
110
102
  max_new_tokens=1024,
111
- sample=GenerationSampleParams(
112
- temperature=0.3
113
- )
103
+ temperature=0.3
114
104
  )
115
105
  )
116
106
  'Why did the scarecrow win an award? Because he was outstanding in his field! \n\nHONK HONK'
117
107
 
118
- > llama_model = LLaMA3(checkpoint="/llama-3.3-70B-Instruct", quantization="4bit")
108
+ > llama_model = Llama3(checkpoint="/llama-3.3-70B-Instruct", quantization="4bit")
119
109
  > llama_model.generate("Why is the sky blue?")
120
110
  'The sky appears blue because of a phenomenon called Rayleigh scattering, which is the scattering of light'
121
111
 
@@ -126,7 +116,7 @@ thrid_model = GPT_OSS(
126
116
  You can also generate tokens using a streamer, that is, receiving one token at a time by using the iterator version of the generate function:
127
117
 
128
118
  ```python
129
- llama_4 = LLaMA4(
119
+ llama_4 = Llama4(
130
120
  checkpoint="llama-4-scout-17b-16e-instruct"
131
121
  )
132
122
 
@@ -139,10 +129,10 @@ for text in it:
139
129
  ### Training Examples (DAPT & Fine-tune)
140
130
 
141
131
  ```python
142
- from llmflowstack import LLaMA3
132
+ from llmflowstack import Llama3
143
133
  from llmflowstack.schemas import TrainParams
144
134
 
145
- model = LLaMA3(
135
+ model = Llama3(
146
136
  checkpoint="llama-3.1-8b-Instruct"
147
137
  )
148
138
 
@@ -150,28 +140,29 @@ model = LLaMA3(
150
140
  dataset = []
151
141
  dataset.append(model.build_input(
152
142
  input_text="Chico is a cat, which color he is?",
153
- expected_answer="Black!"
143
+ output_text="Black!"
154
144
  ))
155
145
 
156
146
  dataset.append(model.build_input(
157
147
  input_text="Fred is a dog, which color he is?",
158
- expected_answer="White!"
148
+ output_text="White!"
159
149
  ))
160
150
 
161
151
  # Does the DAPT in the full model
162
- model.dapt(
163
- train_dataset=dataset,
152
+ model.train(
153
+ train_data=dataset,
164
154
  params=TrainParams(
165
155
  batch_size=1,
166
156
  epochs=3,
167
157
  gradient_accumulation=1,
168
158
  lr=2e-5
169
- )
159
+ ),
160
+ mode="DAPT"
170
161
  )
171
162
 
172
163
  # Does the fine-tune this time
173
- model.fine_tune(
174
- train_dataset=dataset,
164
+ model.train(
165
+ train_data=dataset,
175
166
  params=TrainParams(
176
167
  batch_size=1,
177
168
  gradient_accumulation=1,
@@ -180,7 +171,8 @@ model.fine_tune(
180
171
  ),
181
172
  save_at_end=True,
182
173
  # It will save the model
183
- save_path="./output"
174
+ save_path="./output",
175
+ mode="FT"
184
176
  )
185
177
 
186
178
  # Saving the final result
@@ -188,88 +180,3 @@ model.save_checkpoint(
188
180
  path="./model-output"
189
181
  )
190
182
  ```
191
-
192
- ### RAG Pipeline
193
-
194
- A prototype of a RAG pipeline is also available. You can instantiate and use it as follows:
195
-
196
- ```python
197
- from llmflowstack import VectorDatabase
198
-
199
- vector_db = VectorDatabase(
200
- checkpoint="jina-embeddings-v4",
201
- chunk_size=1000,
202
- chunk_overlap=200
203
- )
204
-
205
- # Create or load an existing collection
206
- vector_db.get_collection(
207
- collection_name="memory_rag",
208
- persist_directory="./memory"
209
- )
210
-
211
- vector_db.get_collection(
212
- collection_name="files_rag",
213
- persist_directory="./files"
214
- )
215
-
216
- # You may also omit the persist directory; in this case, the RAG data will be stored in memory
217
- vector_db.get_collection(
218
- collection_name="files_rag"
219
- )
220
-
221
- # To create a new document in a collection
222
- vector_db.create(
223
- collection_name="memory_rag",
224
- information="User loves Pizza!", # Main information to be indexed in the vector database
225
- other_info={"category": "food"},
226
- can_split=False, # Indicates whether the information can be split into chunks
227
- should_index=True # Defaults to True — defines whether the document should be indexed or only returned as a Document instance
228
- )
229
-
230
- # After adding documents, you can query the database
231
- query_result = vector_db.query(
232
- collection_name="memory_rag",
233
- query="pizza",
234
- filter={"category": "food"},
235
- k=3 # Number of chunks to retrieve
236
- )
237
-
238
- print(query_result)
239
- # > "User loves Pizza!"
240
- ```
241
-
242
- ### NLP Evaluation
243
-
244
- > **Disclaimer**
245
- > These evaluation functions are designed for batch processing. Models and encoders are loaded internally on each call, which may be inefficient for per-sample or streaming evaluation.
246
-
247
- ```python
248
- > from llmflowstack import text_evaluation
249
- > from llmflowstack.utils import (bert_score_evaluation, bleu_score_evaluation, cosine_similarity_evaluation, rouge_evaluation)
250
-
251
- # Predictions from some model
252
- > predictions = ["Chico is a dog, and he is orange!", "Fred is a cat, and he is white!"]
253
- # References text (ground truth)
254
- > references = ["Chico is a cat, and he is black!", "Fred is a dog, and he is white!"]
255
-
256
- # BERT Score Evaluation
257
- > bert_score_evaluation(predictions, references)
258
- {'bertscore_precision': 0.9773, 'bertscore_recall': 0.9773, 'bertscore_f1': 0.9773}
259
-
260
- # Bleu Score Evaluation
261
- > bleu_score_evaluation(predictions, references)
262
- {'bleu_score': 0.3656}
263
-
264
- # Cosine Similarity Evaluation
265
- > cosine_similarity_evaluation(predictions, references)
266
- {'cosine_similarity': 0.7443}
267
-
268
- # Rouge Score Evaluation
269
- > rouge_evaluation(predictions, references)
270
- {'rouge1': 0.8125, 'rouge2': 0.6429, 'rougeL': 0.8125}
271
-
272
- # All-in-one function
273
- > text_evaluation(predictions, references)
274
- {'bertscore_precision': 0.9773, 'bertscore_recall': 0.9773, 'bertscore_f1': 0.9773, 'bleu_score': 0.3656, 'cosine_similarity': 0.7443, 'rouge1': 0.8125, 'rouge2': 0.6429, 'rougeL': 0.8125}
275
- ```
@@ -0,0 +1,27 @@
1
+ from .decoders.gemma_3 import Gemma3
2
+ from .decoders.gpt_2 import Gpt2
3
+ from .decoders.gpt_oss import GptOss
4
+ from .decoders.llama_3 import Llama3
5
+ from .decoders.llama_4 import Llama4
6
+ from .decoders.medgemma import MedGemma
7
+ #from .decoders.qwen_3 import Qwen3
8
+ from .rag.VectorDatabase import VectorDatabase
9
+ from .schemas.params import GenerationParams, TrainParams
10
+ from .utils.evaluation_methods import text_evaluation
11
+
12
+ __all__ = [
13
+ "Gemma3",
14
+ "Gpt2",
15
+ "GptOss",
16
+ "Llama3",
17
+ "Llama4",
18
+ "MedGemma",
19
+ # "Qwen3",
20
+
21
+ "VectorDatabase",
22
+
23
+ "GenerationParams",
24
+ "TrainParams",
25
+
26
+ "text_evaluation"
27
+ ]