clean-web-scraper 4.1.3 → 4.1.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md
CHANGED
@@ -0,0 +1,35 @@
|
|
1
|
+
!rm -r /content/.ipynb_checkpoints
|
2
|
+
!rm -r /content/data
|
3
|
+
!rm -r /content/.config
|
4
|
+
!rm -r /content/sample_data/
|
5
|
+
!rm -r /content/lora_model/
|
6
|
+
!rm -r /content/llama.cpp/
|
7
|
+
!rm -r /content/outputs/
|
8
|
+
!rm -r /content/model/
|
9
|
+
!rm -r /content/huggingface_tokenizers_cache/
|
10
|
+
|
11
|
+
%cd /content/
|
12
|
+
%rm -rf LLaMA-Factory
|
13
|
+
!git clone --depth 1 https://github.com/hiyouga/LLaMA-Factory.git
|
14
|
+
%cd LLaMA-Factory
|
15
|
+
%ls
|
16
|
+
!pip install -e .[torch,bitsandbytes]
|
17
|
+
|
18
|
+
# Use this to resolve package conflicts.
|
19
|
+
# pip install --no-deps -e .
|
20
|
+
|
21
|
+
# dataset_info.json
|
22
|
+
# "dataset_name": {
|
23
|
+
# "file_name": "data.json",
|
24
|
+
# "columns": {
|
25
|
+
# "prompt": "text"
|
26
|
+
# }
|
27
|
+
# }
|
28
|
+
# [
|
29
|
+
# {"text": "document"},
|
30
|
+
# {"text": "document"}
|
31
|
+
# ]
|
32
|
+
|
33
|
+
# llamafactory-cli train examples/train_lora/llama3_lora_sft.yaml
|
34
|
+
# llamafactory-cli chat examples/inference/llama3_lora_sft.yaml
|
35
|
+
# llamafactory-cli export examples/merge_lora/llama3_lora_sft.yaml
|
@@ -0,0 +1,192 @@
|
|
1
|
+
# !rm -r /content/.ipynb_checkpoints
|
2
|
+
# !rm -r /content/data
|
3
|
+
# !rm -r /content/.config
|
4
|
+
# !rm -r /content/sample_data/
|
5
|
+
# !rm -r /content/lora_model/
|
6
|
+
# !rm -r /content/llama.cpp/
|
7
|
+
# !rm -r /content/outputs/
|
8
|
+
# !rm -r /content/model/
|
9
|
+
# !rm -r /content/huggingface_tokenizers_cache/
|
10
|
+
|
11
|
+
|
12
|
+
# Commented out IPython magic to ensure Python compatibility.
|
13
|
+
# %%capture
|
14
|
+
|
15
|
+
|
16
|
+
# !pip uninstall unsloth -y && pip install --upgrade --no-cache-dir --no-deps git+https://github.com/unslothai/unsloth.git
|
17
|
+
|
18
|
+
|
19
|
+
# Disconnect and delete the runtime
|
20
|
+
# !pip uninstall unsloth -y
|
21
|
+
# !pip install --force-reinstall --no-cache-dir --upgrade "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"
|
22
|
+
|
23
|
+
|
24
|
+
# Disconnect and delete the runtime
|
25
|
+
# !pip uninstall unsloth -y
|
26
|
+
# !pip install unsloth
|
27
|
+
|
28
|
+
|
29
|
+
%%capture
|
30
|
+
import sys; modules = list(sys.modules.keys())
|
31
|
+
for x in modules: sys.modules.pop(x) if "PIL" in x or "google" in x else None
|
32
|
+
|
33
|
+
!pip install unsloth vllm
|
34
|
+
!pip install --upgrade pillow
|
35
|
+
!pip install git+https://github.com/huggingface/trl.git@e95f9fb74a3c3647b86f251b7e230ec51c64b72b
|
36
|
+
|
37
|
+
|
38
|
+
from google.colab import drive
|
39
|
+
drive.mount('/content/drive')
|
40
|
+
|
41
|
+
|
42
|
+
from unsloth import FastLanguageModel
|
43
|
+
import torch
|
44
|
+
max_seq_length = 2048 # Choose any! We auto support RoPE Scaling internally! 2048 is also default in ollama
|
45
|
+
dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
|
46
|
+
load_in_4bit = True # Use 4bit quantization to reduce memory usage (also less accuracy). Can be False.
|
47
|
+
|
48
|
+
model, tokenizer = FastLanguageModel.from_pretrained(
|
49
|
+
model_name = "unsloth/Llama-3.2-3B-Instruct-bnb-4bit",
|
50
|
+
max_seq_length = max_seq_length,
|
51
|
+
dtype = dtype,
|
52
|
+
load_in_4bit = load_in_4bit,
|
53
|
+
# token = "hf_...", # use one if using gated models like meta-llama/Llama-2-7b-hf
|
54
|
+
)
|
55
|
+
|
56
|
+
model = FastLanguageModel.get_peft_model(
|
57
|
+
model,
|
58
|
+
r = 128, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128
|
59
|
+
# Higher: Better accuracy on hard tasks but increases memory and risk of overfitting.
|
60
|
+
# Lower: Faster, memory-efficient but may reduce accuracy.
|
61
|
+
|
62
|
+
target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
|
63
|
+
"gate_proj", "up_proj", "down_proj", "lm_head", "embed_tokens"],
|
64
|
+
lora_alpha = 64, # 32, 16
|
65
|
+
# Higher: Learns more but may overfit.
|
66
|
+
# Lower: Slower to learn, more generalizable
|
67
|
+
|
68
|
+
lora_dropout = 0, # Supports any, but = 0 is optimized
|
69
|
+
bias = "none", # Supports any, but = "none" is optimized
|
70
|
+
# [NEW] "unsloth" uses 30% less VRAM, fits 2x larger batch sizes!
|
71
|
+
use_gradient_checkpointing = "unsloth", # True or "unsloth" for very long context
|
72
|
+
random_state = 3407,
|
73
|
+
use_rslora = True, # We support rank stabilized LoRA
|
74
|
+
loftq_config = None, # And LoftQ
|
75
|
+
)
|
76
|
+
|
77
|
+
from datasets import load_dataset
|
78
|
+
dataset = load_dataset(
|
79
|
+
"json",
|
80
|
+
data_files = "/content/drive/MyDrive/train.jsonl",
|
81
|
+
split = "train",
|
82
|
+
)
|
83
|
+
print(dataset.column_names)
|
84
|
+
print(dataset[0])
|
85
|
+
|
86
|
+
EOS_TOKEN = tokenizer.eos_token
|
87
|
+
def formatting_prompts_func(examples):
|
88
|
+
return { "text" : [example + EOS_TOKEN for example in examples["text"]] }
|
89
|
+
dataset = dataset.map(formatting_prompts_func, batched = True,)
|
90
|
+
|
91
|
+
print(dataset.column_names)
|
92
|
+
print(dataset[0])
|
93
|
+
|
94
|
+
from trl import SFTTrainer
|
95
|
+
from transformers import TrainingArguments
|
96
|
+
from unsloth import is_bfloat16_supported
|
97
|
+
from unsloth import UnslothTrainer, UnslothTrainingArguments
|
98
|
+
|
99
|
+
trainer = UnslothTrainer(
|
100
|
+
model = model,
|
101
|
+
tokenizer = tokenizer,
|
102
|
+
train_dataset = dataset,
|
103
|
+
dataset_text_field = "text",
|
104
|
+
max_seq_length = max_seq_length,
|
105
|
+
dataset_num_proc = 8, # 2
|
106
|
+
|
107
|
+
args = UnslothTrainingArguments(
|
108
|
+
per_device_train_batch_size = 2,
|
109
|
+
gradient_accumulation_steps = 8, # 4
|
110
|
+
|
111
|
+
warmup_ratio = 0.1,
|
112
|
+
num_train_epochs = 3, # 1, 2, 3, 4
|
113
|
+
# max_steps = 60,
|
114
|
+
|
115
|
+
learning_rate = 5e-5,
|
116
|
+
embedding_learning_rate = 5e-6,
|
117
|
+
|
118
|
+
fp16 = not is_bfloat16_supported(),
|
119
|
+
bf16 = is_bfloat16_supported(),
|
120
|
+
logging_steps = 1,
|
121
|
+
optim = "adamw_8bit",
|
122
|
+
weight_decay = 0.00,
|
123
|
+
lr_scheduler_type = "cosine",
|
124
|
+
seed = 3407,
|
125
|
+
output_dir = "outputs",
|
126
|
+
report_to = "none", # Use this for WandB etc
|
127
|
+
),
|
128
|
+
)
|
129
|
+
|
130
|
+
trainer_stats = trainer.train()
|
131
|
+
|
132
|
+
"""
|
133
|
+
### Saving, loading finetuned models
|
134
|
+
To save the final model as LoRA adapters, either use Huggingface's `push_to_hub` for an online save or `save_pretrained` for a local save.
|
135
|
+
|
136
|
+
**[NOTE]** This ONLY saves the LoRA adapters, and not the full model. To save to 16bit or GGUF, scroll down!
|
137
|
+
"""
|
138
|
+
|
139
|
+
model.save_pretrained("lora_model") # Local saving
|
140
|
+
tokenizer.save_pretrained("lora_model")
|
141
|
+
# model.push_to_hub("your_name/lora_model", token = "...") # Online saving
|
142
|
+
# tokenizer.push_to_hub("your_name/lora_model", token = "...") # Online saving
|
143
|
+
|
144
|
+
"""
|
145
|
+
### Ollama Support
|
146
|
+
|
147
|
+
[Unsloth](https://github.com/unslothai/unsloth) now allows you to automatically finetune and create a [Modelfile](https://github.com/ollama/ollama/blob/main/docs/modelfile.md), and export to [Ollama](https://ollama.com/)! This makes finetuning much easier and provides a seamless workflow from `Unsloth` to `Ollama`!
|
148
|
+
|
149
|
+
Let's first install `Ollama`!
|
150
|
+
"""
|
151
|
+
|
152
|
+
# Save to 8bit Q8_0
|
153
|
+
if False: model.save_pretrained_gguf("model", tokenizer,)
|
154
|
+
# Remember to go to https://huggingface.co/settings/tokens for a token!
|
155
|
+
# And change hf to your username!
|
156
|
+
if False: model.push_to_hub_gguf("mlibre/model", tokenizer, token = "token")
|
157
|
+
|
158
|
+
# Save to 16bit GGUF
|
159
|
+
if False: model.save_pretrained_gguf("model", tokenizer, quantization_method = "f16")
|
160
|
+
if False: model.push_to_hub_gguf("mlibre/model", tokenizer, quantization_method = "f16", token = "token")
|
161
|
+
|
162
|
+
# Save to q4_k_m GGUF
|
163
|
+
if False: model.save_pretrained_gguf("model", tokenizer, quantization_method = "q4_k_m")
|
164
|
+
if False: model.push_to_hub_gguf("mlibre/model", tokenizer, quantization_method = "q4_k_m", token = "token")
|
165
|
+
|
166
|
+
# Save to multiple GGUF options - much faster if you want multiple!
|
167
|
+
if True:
|
168
|
+
model.push_to_hub_gguf(
|
169
|
+
"mlibre/model", # Change mlibre to your username!
|
170
|
+
tokenizer,
|
171
|
+
quantization_method = ["q4_k_m", "q8_0", "q5_k_m",],
|
172
|
+
token = "token", # Get a token at https://huggingface.co/settings/tokens
|
173
|
+
)
|
174
|
+
|
175
|
+
"""We use `subprocess` to start `Ollama` up in a non blocking fashion! In your own desktop, you can simply open up a new `terminal` and type `ollama serve`, but in Colab, we have to use this hack!"""
|
176
|
+
|
177
|
+
print(tokenizer._ollama_modelfile)
|
178
|
+
|
179
|
+
"""We now will create an `Ollama` model called `unsloth_model` using the `Modelfile` which we auto generated!"""
|
180
|
+
|
181
|
+
!curl -fsSL https://ollama.com/install.sh | sh
|
182
|
+
!ollama create unsloth_model -f ./model/Modelfile
|
183
|
+
|
184
|
+
# In colab terminal type: ollama run unsloth_model
|
185
|
+
# in local ollama:
|
186
|
+
!curl http://localhost:11434/api/chat -d '{ \
|
187
|
+
"model": "unsloth_model", \
|
188
|
+
"messages": [ \
|
189
|
+
{"role": "user", \
|
190
|
+
"content": "Their passenger class is 3.\nTheir age is 22.0.\nThey paid $107.25 for the trip."} \
|
191
|
+
] \
|
192
|
+
}'
|