clean-web-scraper 4.3.4 → 4.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -11,7 +11,7 @@ jobs:
11
11
  - uses: actions/checkout@v2
12
12
  - uses: actions/setup-node@v2
13
13
  with:
14
- node-version: 20.x
14
+ node-version: 22.x
15
15
  registry-url: https://registry.npmjs.org/
16
16
  # - run: npm install
17
17
  - run: npm publish --access public
package/README.md CHANGED
@@ -36,10 +36,7 @@ npm i clean-web-scraper
36
36
  git clone https://github.com/mlibre/Clean-Web-Scraper
37
37
  cd Clean-Web-Scraper
38
38
  sudo pacman -S extra/xorg-server-xvfb chromium
39
- npm install
40
-
41
- # Skip chromium download during npm installation
42
- # npm install --ignore-scripts
39
+ npm install --ignore-scripts
43
40
  ```
44
41
 
45
42
  ## 💻 Usage
@@ -120,7 +117,7 @@ node example-usage.js
120
117
 
121
118
  ## 📤 Output
122
119
 
123
- Your AI-ready content is saved in a clean, structured format:
120
+ The content is saved in a clean, structured format:
124
121
 
125
122
  - 📁 Base folder: `./folderPath/example.com/`
126
123
  - 📑 Files preserve original URL paths
@@ -130,36 +127,36 @@ Your AI-ready content is saved in a clean, structured format:
130
127
  ```bash
131
128
  example.com/
132
129
  ├── website/
133
- │ ├── page1.txt # Clean text content
134
- │ ├── page1.json # Full metadata
135
- │ ├── page1.html # Original HTML content
130
+ │ ├── page1.txt # Clean text content
131
+ │ ├── page1.json # Full metadata
132
+ │ ├── page1.html # Original HTML content
136
133
  │ └── blog/
137
134
  │ ├── post1.txt
138
135
  │ └── post1.json
139
136
  │ └── post1.html
140
- ├── texts/ # Numbered text files
137
+ ├── texts/ # Numbered text files
141
138
  │ ├── 1.txt
142
139
  │ └── 2.txt
143
- ├── texts_with_metadata/ # When includeMetadata is true
140
+ ├── texts_with_metadata/ # When includeMetadata is true
144
141
  │ ├── 1.txt
145
142
  │ └── 2.txt
146
- ├── train.jsonl # Combined content
147
- ├── train_with_metadata.jsonl # When includeMetadata is true
148
- ├── train.csv # Clean text in CSV format
149
- └── train_with_metadata.csv # When includeMetadata is true
143
+ ├── train.jsonl # Combined content
144
+ ├── train_with_metadata.jsonl # When includeMetadata is true
145
+ ├── train.csv # Clean text in CSV format
146
+ └── train_with_metadata.csv # When includeMetadata is true
150
147
 
151
148
  combined/
152
- ├── texts/ # Combined numbered text files
149
+ ├── texts/ # Combined numbered text files
153
150
  │ ├── 1.txt
154
151
  │ ├── 2.txt
155
152
  │ └── n.txt
156
- ├── texts_with_metadata/ # Combined metadata text files
153
+ ├── texts_with_metadata/ # Combined metadata text files
157
154
  │ ├── 1.txt
158
155
  │ ├── 2.txt
159
156
  │ └── n.txt
160
- ├── combined.jsonl # Combined JSONL content
157
+ ├── combined.jsonl # Combined JSONL content
161
158
  ├── combined_with_metadata.jsonl
162
- ├── combined.csv # Combined CSV content
159
+ ├── combined.csv # Combined CSV content
163
160
  └── combined_with_metadata.csv
164
161
  ```
165
162
 
@@ -200,7 +197,7 @@ The actual article content starts here. This is the clean, processed text of the
200
197
  {"text": "Another article", "metadata": {"articleTitle": "Second Page", "author": "Jane Smith"}}
201
198
  ```
202
199
 
203
- ### 🗃️ JSON Files In Website Output (*.json)
200
+ ### 🗃️ JSON Files In Website Directory (*.json)
204
201
 
205
202
  ```json
206
203
  {
package/example-usage.js CHANGED
@@ -49,7 +49,7 @@ async function khameneiIrFreePalestineTag ( enable )
49
49
  baseURL: "https://english.khamenei.ir/news",
50
50
  startURL: "https://english.khamenei.ir/page/search.xhtml?topicid=0&period=0&q=FreePalestine&pageSize=100#",
51
51
  maxDepth: 1,
52
- maxArticles: 200,
52
+ maxArticles: 300,
53
53
  exactExcludeList: [
54
54
  "https://english.khamenei.ir/page/search.xhtml?topicid=0&period=0&q=FreePalestine&pageSize=100#",
55
55
  "https://english.khamenei.ir/page/search.xhtml?topicid=0&period=0&q=FreePalestine&pageSize=100"
@@ -72,7 +72,7 @@ async function khameneiIrPalestineSpecialPage ( enable )
72
72
  baseURL: "https://english.khamenei.ir/news",
73
73
  startURL: "https://english.khamenei.ir/palestine-special-page",
74
74
  maxDepth: 1,
75
- maxArticles: 200,
75
+ maxArticles: 300,
76
76
  exactExcludeList: [
77
77
  "https://english.khamenei.ir/palestine-special-page/"
78
78
  ],
@@ -102,7 +102,7 @@ async function decolonizepalestine ( enable )
102
102
  "https://decolonizepalestine.com/rainbow-washing",
103
103
  "https://decolonizepalestine.com/"
104
104
  ],
105
- maxArticles: 400,
105
+ maxArticles: 500,
106
106
  scrapResultPath: "./dataset/decolonizepalestine/website",
107
107
  jsonlOutputPath: "./dataset/decolonizepalestine/train.jsonl",
108
108
  textOutputPath: "./dataset/decolonizepalestine/texts",
@@ -149,18 +149,18 @@ async function electronicintifada ( enable )
149
149
  csvOutputPath: "./dataset/electronicintifada/train.csv",
150
150
  includeMetadata: true,
151
151
  metadataFields: ["author", "articleTitle", "pageTitle", "description", "dataScrapedDate", "url"],
152
- maxArticles: 2000,
152
+ maxArticles: 3000,
153
153
  maxDepth: 16,
154
154
  batchSize: 40,
155
155
  axiosHeaders: headers,
156
156
  axiosMaxRetries: 2,
157
157
  axiosRetryDelay: 8000,
158
- axiosProxy: {
159
- host: "localhost",
160
- port: 2080,
161
- protocol: "http"
162
- },
163
- useProxyAsFallback: true,
158
+ // axiosProxy: {
159
+ // host: "localhost",
160
+ // port: 10808,
161
+ // protocol: "socks5"
162
+ // },
163
+ // useProxyAsFallback: true,
164
164
  };
165
165
  return await runScraper( config, enable );
166
166
  }
@@ -228,18 +228,18 @@ async function mondoweiss ( enable )
228
228
  csvOutputPath: "./dataset/mondoweiss/train.csv",
229
229
  includeMetadata: true,
230
230
  metadataFields: ["author", "articleTitle", "pageTitle", "description", "dataScrapedDate", "url"],
231
- maxArticles: 2500,
231
+ maxArticles: 3000,
232
232
  maxDepth: 15,
233
233
  batchSize: 20,
234
234
  axiosHeaders: headers,
235
235
  axiosMaxRetries: 2,
236
236
  axiosRetryDelay: 10000,
237
- axiosProxy: {
238
- host: "localhost",
239
- port: 2080,
240
- protocol: "http"
241
- },
242
- useProxyAsFallback: true,
237
+ // axiosProxy: {
238
+ // host: "localhost",
239
+ // port: 10808,
240
+ // protocol: "socks5"
241
+ // },
242
+ // useProxyAsFallback: true,
243
243
  };
244
244
  return await runScraper( config, enable );
245
245
  }
@@ -263,13 +263,15 @@ async function bdsmovement ( enable )
263
263
  "https://bdsmovement.net/news-type",
264
264
  "https://bdsmovement.net/cdn-cgi",
265
265
  "https://bdsmovement.net/es/",
266
- "https://bdsmovement.net/ar/"
266
+ "https://bdsmovement.net/ar/",
267
+ "https://bdsmovement.net/resource-type/",
267
268
  ],
268
269
  exactExcludeList: [
269
270
  "https://bdsmovement.net/",
270
271
  "https://bdsmovement.net/shutdownnation",
271
272
  "https://bdsmovement.net/campaigns",
272
273
  "https://bdsmovement.net/resources",
274
+ "https://bdsmovement.net/news",
273
275
  /^https:\/\/bdsmovement\.net\/resources\?page=\d+$/,
274
276
  /^https:\/\/bdsmovement\.net\/resources\?campaign=\d+$/,
275
277
  /^https:\/\/bdsmovement\.net\/resources\?type=\d+$/,
@@ -283,18 +285,18 @@ async function bdsmovement ( enable )
283
285
  csvOutputPath: "./dataset/bdsmovement/train.csv",
284
286
  includeMetadata: true,
285
287
  metadataFields: ["author", "articleTitle", "pageTitle", "description", "dataScrapedDate", "url"],
286
- maxArticles: 2000,
288
+ maxArticles: 3000,
287
289
  maxDepth: 16,
288
- batchSize: 40,
290
+ batchSize: 100,
289
291
  axiosHeaders: headers,
290
292
  axiosMaxRetries: 2,
291
293
  axiosRetryDelay: 8000,
292
- axiosProxy: {
293
- host: "localhost",
294
- port: 2080,
295
- protocol: "http"
296
- },
297
- useProxyAsFallback: true
294
+ // axiosProxy: {
295
+ // host: "localhost",
296
+ // port: 10808,
297
+ // protocol: "socks5"
298
+ // },
299
+ // useProxyAsFallback: true
298
300
  };
299
301
  return await runScraper( config, enable );
300
302
  }
@@ -332,8 +334,8 @@ async function palestineremembered ( enable )
332
334
  batchSize: 10,
333
335
  axiosProxy: {
334
336
  host: "localhost",
335
- port: 2080,
336
- protocol: "http"
337
+ port: 10808,
338
+ protocol: "socks5"
337
339
  }
338
340
  };
339
341
  return await runScraper( config, enable );
package/main.js CHANGED
@@ -555,7 +555,6 @@ class WebScraper
555
555
  proxy: this.axiosProxy
556
556
  };
557
557
  }
558
-
559
558
  return await axios.get( url, options );
560
559
  }
561
560
  catch ( error )
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "clean-web-scraper",
3
- "version": "4.3.4",
3
+ "version": "4.4.0",
4
4
  "main": "main.js",
5
5
  "scripts": {
6
6
  "start": "node main.js",
@@ -24,8 +24,8 @@
24
24
  "description": "",
25
25
  "dependencies": {
26
26
  "@mozilla/readability": "^0.6.0",
27
- "axios": "^1.8.4",
28
- "eslint": "^9.23.0",
27
+ "axios": "^1.12.2",
28
+ "eslint": "^9.38.0",
29
29
  "jsdom": "^26.0.0",
30
30
  "puppeteer": "^24.1.1",
31
31
  "puppeteer-real-browser": "^1.3.22"
@@ -1,333 +0,0 @@
1
- # Fine-Tuning LLMs on Raw Text
2
-
3
- Fine-tuning large language models (LLMs) on raw text allows them to specialize in new knowledge domains.
4
- This guide walks you through fine-tuning an LLM using JSONL-formatted data, covering data preparation, model training, and deployment.
5
- We use the [Unsloth](https://docs.unsloth.ai/) library for efficient fine-tuning and demonstrate on a small [SmolLM2-135M](https://huggingface.co/HuggingFaceTB/SmolLM2-135M).
6
- The final model can be deployed with [Ollama](https://github.com/ollama/ollama) for local inference.
7
-
8
- 📌 **Full Code & Implementation Details**: [GitHub Repository](https://github.com/mlibre/Clean-Web-Scraper/tree/main/fine-tuning)
9
-
10
- ---
11
-
12
- ## 🛠️ Overview of the Process
13
-
14
- Fine-tuning an LLM involves several steps:
15
-
16
- ### 1️⃣ Data Collection & Preparation
17
-
18
- First, prepare your dataset in a structured format. Common formats for fine-tuning include **JSONL, CSV, and TXT**.
19
- In this guide, we use **JSONL** because it's easy to work with and widely used.
20
-
21
- 📄 **Sample JSONL file (`train.jsonl`)**:
22
-
23
- ```json
24
- {"text": "Despite facing constant oppression, Palestinians have continued to resist Israeli occupation.", "metadata": {"title": "Palestinian Resistance", "dateScraped": "2025-02-13T12:37:53.776Z"}}
25
- {"text": "Palestinians have shown remarkable resilience.", "metadata": {"title": "Youth Resistance", "dateScraped": "2025-02-13T12:37:53.776Z"}}
26
- ```
27
-
28
- To scrape data efficiently, we use the [Clean-Web-Scraper](https://github.com/mlibre/Clean-Web-Scraper) library.
29
- This **Node.js** library extracts articles from websites, cleans them, and saves them in `JSONL` format.
30
- The dataset is available on [Hugging Face](https://huggingface.co/datasets/mlibre/palestine).
31
-
32
- ---
33
-
34
- ### 2️⃣ Fine-Tuning Library – **Why Unsloth?** 🦥
35
-
36
- At the time of writing, [Unsloth](https://docs.unsloth.ai/) is one of the **fastest and most memory-efficient** fine-tuning libraries available.
37
- It supports **fine-tuning and Continued Pretraining (CPT)**, allowing LLMs to learn **new knowledge domains** efficiently.
38
-
39
- ---
40
-
41
- ### 3️⃣ Setting Up the Training Environment 🖥️
42
-
43
- We use **Google Colab** for training, as it provides free GPU access.
44
-
45
- ---
46
-
47
- ### 4️⃣ The Model 🏗️
48
-
49
- We use **SmolLM2-135M**, a very small 135M-parameter model, for fine-tuning. To optimize memory, we load the model in **4-bit quantization** using `Unsloth`.
50
-
51
- ---
52
-
53
- ### 5️⃣ Deployment with Ollama
54
-
55
- After fine-tuning, we save the new model and deploy it using [Ollama](https://github.com/ollama/ollama).
56
-
57
- ---
58
-
59
- ## 💻 The Code
60
-
61
- The provided Colab code includes all the steps to fine-tune the model.
62
-
63
- ### Installing Dependencies
64
-
65
- ```python
66
- !pip install unsloth vllm
67
- !pip install --upgrade pillow
68
-
69
- # Install trl if needed
70
- # !pip install git+https://github.com/huggingface/trl.git@e95f9fb74a3c3647b86f251b7e230ec51c64b72b
71
- ```
72
-
73
- ### Loading and Preparing the Model
74
-
75
- Using [Unsloth’s documentation](https://docs.unsloth.ai), we load a pretrained model (a 4-bit quantized version of SmolLM2-135M) and set it up for fine-tuning with LoRA.
76
- This method allows for memory efficiency while updating the model's parameters.
77
-
78
- ```python
79
- from unsloth import FastLanguageModel
80
- import torch
81
- max_seq_length = 2048 # Choose any! Unsloth auto support RoPE Scaling internally!
82
- dtype = None # None for auto detection
83
- load_in_4bit = True # Use 4bit quantization to reduce memory usage (also less accuracy). Can be False.
84
-
85
- model, tokenizer = FastLanguageModel.from_pretrained(
86
- model_name = "unsloth/SmolLM2-135M-bnb-4bit",
87
- max_seq_length = max_seq_length,
88
- dtype = dtype,
89
- load_in_4bit = load_in_4bit,
90
- # token = "hf_...", # use one if using gated models like meta-llama/Llama-2-7b-hf
91
- )
92
-
93
- model = FastLanguageModel.get_peft_model(
94
- model,
95
- r = 128, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128
96
- # Higher: Better accuracy on hard tasks but increases memory and risk of overfitting.
97
- # Lower: Faster, memory-efficient but may reduce accuracy.
98
-
99
- target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
100
- "gate_proj", "up_proj", "down_proj", "lm_head", "embed_tokens"],
101
- lora_alpha = 64, # 32, 16
102
- # Higher: Learns more but may overfit.
103
- # Lower: Slower to learn, more generalizable
104
-
105
- lora_dropout = 0, # Supports any, but = 0 is optimized
106
- bias = "none", # Supports any, but = "none" is optimized
107
- use_gradient_checkpointing = "unsloth", # True or "unsloth" for very long context
108
- random_state = 3407,
109
- use_rslora = True, # unsloth support rank stabilized LoRA
110
- loftq_config = None, # And LoftQ
111
- )
112
- ```
113
-
114
- ---
115
-
116
- ### Loading the Dataset 📂
117
-
118
- Upload the JSONL dataset to Google Drive and load it into Colab:
119
-
120
- ```python
121
- # Mount Google Drive to access training data
122
- from google.colab import drive
123
- drive.mount('/content/drive')
124
-
125
- # Load the dataset
126
- from datasets import load_dataset
127
- dataset = load_dataset(
128
- "json",
129
- data_files = "/content/drive/MyDrive/train.jsonl",
130
- split = "train",
131
- )
132
-
133
- EOS_TOKEN = tokenizer.eos_token
134
- def formatting_prompts_func(examples):
135
- return { "text" : [example + EOS_TOKEN for example in examples["text"]] }
136
- dataset = dataset.map(formatting_prompts_func, batched = True,)
137
-
138
- print(dataset.column_names)
139
- print(dataset[0])
140
- ```
141
-
142
- ---
143
-
144
- ### Training the Model 🚴‍♂️
145
-
146
- Fine-tuning is managed with `UnslothTrainer`, allowing optimization of batch size, learning rate, and epochs.
147
-
148
- ```python
149
- from trl import SFTTrainer
150
- from transformers import TrainingArguments
151
- from unsloth import is_bfloat16_supported
152
- from unsloth import UnslothTrainer, UnslothTrainingArguments
153
-
154
- trainer = UnslothTrainer(
155
- model = model,
156
- tokenizer = tokenizer,
157
- train_dataset = dataset,
158
- dataset_text_field = "text",
159
- max_seq_length = max_seq_length,
160
- dataset_num_proc = 8, # 2
161
-
162
- args = UnslothTrainingArguments(
163
- per_device_train_batch_size = 2,
164
- gradient_accumulation_steps = 8, # 4
165
-
166
- warmup_ratio = 0.1,
167
- num_train_epochs = 3, # 1, 2, 3, 4
168
- # max_steps = 60,
169
-
170
- learning_rate = 5e-5,
171
- embedding_learning_rate = 5e-6,
172
-
173
- fp16 = not is_bfloat16_supported(),
174
- bf16 = is_bfloat16_supported(),
175
- logging_steps = 1,
176
- optim = "adamw_8bit",
177
- weight_decay = 0.00,
178
- lr_scheduler_type = "cosine",
179
- seed = 3407,
180
- output_dir = "outputs",
181
- report_to = "none", # Use this for WandB etc
182
- ),
183
- )
184
-
185
- trainer_stats = trainer.train()
186
- ```
187
-
188
- ---
189
-
190
- ### Saving & Exporting the Model 💾
191
-
192
- Once training is complete, we save the fine-tuned model.
193
- For **quantized GGUF format**, use:
194
-
195
- ```python
196
- # saves the LoRA adapters, and not the full model. To save to 16bit or GGUF, scroll down!
197
- model.save_pretrained("lora_model") # Local saving
198
- tokenizer.save_pretrained("lora_model")
199
-
200
- # Save to 8bit Q8_0
201
- if False: model.save_pretrained_gguf("model", tokenizer,)
202
- # Remember to go to https://huggingface.co/settings/tokens for a token!
203
- # And change your username from mlibre to your username!!
204
- if False: model.push_to_hub_gguf("mlibre/model", tokenizer, token = "token")
205
-
206
- # Save to 16bit GGUF
207
- if False: model.save_pretrained_gguf("model", tokenizer, quantization_method = "f16")
208
- if False: model.push_to_hub_gguf("mlibre/model", tokenizer, quantization_method = "f16", token = "token")
209
-
210
- # Save to q4_k_m GGUF
211
- if True: model.save_pretrained_gguf("model", tokenizer, quantization_method = "q4_k_m")
212
- if False: model.push_to_hub_gguf("mlibre/model", tokenizer, quantization_method = "q4_k_m", token = "token")
213
-
214
- # Save to multiple GGUF options - much faster if you want multiple!
215
- if False:
216
- model.push_to_hub_gguf(
217
- "mlibre/model", # Change mlibre to your username!
218
- tokenizer,
219
- quantization_method = ["q4_k_m", "q8_0", "q5_k_m",],
220
- token = "token", # Get a token at https://huggingface.co/settings/tokens
221
- )
222
- ```
223
-
224
- Now, go to the model folder and download the new model (**unsloth.Q4_K_M.gguf**) along with the Ollama **Modelfile**.
225
-
226
- ---
227
-
228
- ## 🚀 Deploying the Model with Ollama
229
-
230
- ### 📥 Step 1: Install Ollama
231
-
232
- Ollama is a lightweight, open-source LLM server that allows you to run and deploy models locally.
233
-
234
- ```bash
235
- curl -fsSL https://ollama.com/install.sh | sh
236
- ```
237
-
238
- ### 📝 Step 2: Create the Modelfile
239
-
240
- To run GGUF models on ollama, we first must create a **Modelfile** taht tells Ollama how to run the model.
241
- If the Modelfile was not available for download in Colab for any reason, you can create it manually.
242
- Navigate to the model folder and **create a new file named `Modelfile`**:
243
-
244
- ```bash
245
- nano Modelfile
246
- ```
247
-
248
- Inside the file, add the following:
249
-
250
- ```text
251
- TEMPLATE """{{- if .Messages }}
252
- {{- if .System }}<|im_start|>system
253
- {{ .System }}<|im_end|>
254
- {{ end }}
255
- {{- range $i, $_ := .Messages }}
256
- {{- $last := eq (len (slice $.Messages $i)) 1 -}}
257
- {{- if eq .Role "user" }}<|im_start|>user
258
- {{ .Content }}<|im_end|>
259
- {{ else if eq .Role "assistant" }}<|im_start|>assistant
260
- {{ .Content }}{{ if not $last }}<|im_end|>
261
- {{ end }}
262
- {{- end }}
263
- {{- if and (ne .Role "assistant") $last }}<|im_start|>assistant
264
- {{ end }}
265
- {{- end }}
266
- {{- else }}
267
- {{- if .System }}<|im_start|>system
268
- {{ .System }}<|im_end|>
269
- {{ end }}{{ if .Prompt }}<|im_start|>user
270
- {{ .Prompt }}<|im_end|>
271
- {{ end }}<|im_start|>assistant
272
- {{ end }}{{ .Response }}{{ if .Response }}<|im_end|>{{ end }}"""
273
- SYSTEM You are a helpful AI assistant named SmolLM, trained by Hugging Face
274
- PARAMETER stop <|im_start|>
275
- PARAMETER stop <|im_end|>
276
- ```
277
-
278
- Save and close the file.
279
- If you wonder where I did find the `Modelfile` template, it’s from the original SmolLM2.
280
- You can show it with this command:
281
-
282
- ```bash
283
- ollama show --modelfile smollm2:135m
284
- ```
285
-
286
- ### 🏃 Step 3: Create & Run the Model
287
-
288
- ```bash
289
- ollama create Modelfile
290
- ollama run Modelfile:latest
291
- ```
292
-
293
- ---
294
-
295
- ### 🎯 Testing the Model
296
-
297
- Let’s test both the **default SmolLM2 model** and our **fine-tuned version** to compare outputs.
298
-
299
- 📌 **Official SmolLM2 Model**:
300
-
301
- ```bash
302
- ollama run smollm2:135m "Palestine is"
303
- ```
304
-
305
- Example output:
306
-
307
- ```text
308
- What an intriguing combination of languages you're sharing with me. I'm ready to help refine your communication style and enhance the effectiveness of our
309
- exchange. How can we work together to make your writing more engaging?
310
-
311
- ```
312
-
313
- 📌 **Fine-Tuned Model**:
314
-
315
- ```bash
316
- ollama run Modelfile:latest "Palestine is"
317
- ```
318
-
319
- Example output:
320
-
321
- ```text
322
- The world has long held Palestine in low respectability as an unappreciated people’s land. It remains that way despite the genocide and occupation. That is
323
- why we have so many Palestinian children who are orphans with no homeland, yet still want to belong there. We need you to help them live in this orphanhood
324
- by becoming a source of life for our future generation.
325
- ```
326
-
327
- ## ✊ A Call for Justice: Standing in Solidarity with Palestine
328
-
329
- In these times of rapid technological advancement, we are reminded that progress is not measured solely by the innovations we create but also by the compassion and justice we uphold. As we fine-tune our language models to process vast quantities of raw text and specialize in new knowledge domains, we must also harness our skills to amplify the voices of the marginalized.
330
-
331
- The struggle of the Palestinian people, who continue to face relentless oppression and violence—including the reported genocide in Gaza—demands that we not remain silent. When silence becomes complicity, our work in technology can serve as a platform to challenge injustice and promote accountability. By integrating critical perspectives into our technical endeavors, we contribute to a broader dialogue that insists on human rights, transparency, and an end to systemic violence.
332
-
333
- Let this guide be a reminder: every line of code, every dataset we prepare, and every model we deploy carries the potential not only to advance our field but also to support those whose stories are often ignored. As we pursue excellence in machine learning, we stand in solidarity with Palestinians and all oppressed communities, affirming that technology must be a force for justice and liberation.
@@ -1,35 +0,0 @@
1
- !rm -r /content/.ipynb_checkpoints
2
- !rm -r /content/data
3
- !rm -r /content/.config
4
- !rm -r /content/sample_data/
5
- !rm -r /content/lora_model/
6
- !rm -r /content/llama.cpp/
7
- !rm -r /content/outputs/
8
- !rm -r /content/model/
9
- !rm -r /content/huggingface_tokenizers_cache/
10
-
11
- %cd /content/
12
- %rm -rf LLaMA-Factory
13
- !git clone --depth 1 https://github.com/hiyouga/LLaMA-Factory.git
14
- %cd LLaMA-Factory
15
- %ls
16
- !pip install -e .[torch,bitsandbytes]
17
-
18
- # Use this to resolve package conflicts.
19
- # pip install --no-deps -e .
20
-
21
- # dataset_info.json
22
- # "dataset_name": {
23
- # "file_name": "data.json",
24
- # "columns": {
25
- # "prompt": "text"
26
- # }
27
- # }
28
- # [
29
- # {"text": "document"},
30
- # {"text": "document"}
31
- # ]
32
-
33
- # llamafactory-cli train examples/train_lora/llama3_lora_sft.yaml
34
- # llamafactory-cli chat examples/inference/llama3_lora_sft.yaml
35
- # llamafactory-cli export examples/merge_lora/llama3_lora_sft.yaml
@@ -1,182 +0,0 @@
1
- # !rm -r /content/.ipynb_checkpoints
2
- # !rm -r /content/data
3
- # !rm -r /content/.config
4
- # !rm -r /content/sample_data/
5
- # !rm -r /content/lora_model/
6
- # !rm -r /content/llama.cpp/
7
- # !rm -r /content/outputs/
8
- # !rm -r /content/model/
9
- # !rm -r /content/huggingface_tokenizers_cache/
10
-
11
-
12
- # Commented out IPython magic to ensure Python compatibility.
13
- # %%capture
14
-
15
-
16
- # !pip uninstall unsloth -y && pip install --upgrade --no-cache-dir --no-deps git+https://github.com/unslothai/unsloth.git
17
-
18
-
19
- # Disconnect and delete the runtime
20
- # !pip uninstall unsloth -y
21
- # !pip install --force-reinstall --no-cache-dir --upgrade "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"
22
-
23
-
24
- # Disconnect and delete the runtime
25
- # !pip uninstall unsloth -y
26
- # !pip install unsloth
27
-
28
-
29
- %%capture
30
- import sys; modules = list(sys.modules.keys())
31
- for x in modules: sys.modules.pop(x) if "PIL" in x or "google" in x else None
32
-
33
- !pip install unsloth vllm
34
- !pip install --upgrade pillow
35
- !pip install git+https://github.com/huggingface/trl.git@e95f9fb74a3c3647b86f251b7e230ec51c64b72b
36
-
37
-
38
- from google.colab import drive
39
- drive.mount('/content/drive')
40
-
41
-
42
- from unsloth import FastLanguageModel
43
- import torch
44
- max_seq_length = 2048 # Choose any! We auto support RoPE Scaling internally! 2048 is also default in ollama
45
- dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
46
- load_in_4bit = True # Use 4bit quantization to reduce memory usage (also less accuracy). Can be False.
47
-
48
- model, tokenizer = FastLanguageModel.from_pretrained(
49
- model_name = "unsloth/SmolLM2-135M-bnb-4bit",
50
- max_seq_length = max_seq_length,
51
- dtype = dtype,
52
- load_in_4bit = load_in_4bit,
53
- # token = "hf_...", # use one if using gated models like meta-llama/Llama-2-7b-hf
54
- )
55
-
56
- model = FastLanguageModel.get_peft_model(
57
- model,
58
- r = 128, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128
59
- # Higher: Better accuracy on hard tasks but increases memory and risk of overfitting.
60
- # Lower: Faster, memory-efficient but may reduce accuracy.
61
-
62
- target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
63
- "gate_proj", "up_proj", "down_proj", "lm_head", "embed_tokens"],
64
- lora_alpha = 64, # 32, 16
65
- # Higher: Learns more but may overfit.
66
- # Lower: Slower to learn, more generalizable
67
-
68
- lora_dropout = 0, # Supports any, but = 0 is optimized
69
- bias = "none", # Supports any, but = "none" is optimized
70
- # [NEW] "unsloth" uses 30% less VRAM, fits 2x larger batch sizes!
71
- use_gradient_checkpointing = "unsloth", # True or "unsloth" for very long context
72
- random_state = 3407,
73
- use_rslora = True, # We support rank stabilized LoRA
74
- loftq_config = None, # And LoftQ
75
- )
76
-
77
- from datasets import load_dataset
78
- dataset = load_dataset(
79
- "json",
80
- data_files = "/content/drive/MyDrive/train.jsonl",
81
- split = "train",
82
- )
83
- print(dataset.column_names)
84
- print(dataset[0])
85
-
86
- EOS_TOKEN = tokenizer.eos_token
87
- def formatting_prompts_func(examples):
88
- return { "text" : [example + EOS_TOKEN for example in examples["text"]] }
89
- dataset = dataset.map(formatting_prompts_func, batched = True,)
90
-
91
- print(dataset.column_names)
92
- print(dataset[0])
93
-
94
- from trl import SFTTrainer
95
- from transformers import TrainingArguments
96
- from unsloth import is_bfloat16_supported
97
- from unsloth import UnslothTrainer, UnslothTrainingArguments
98
-
99
- trainer = UnslothTrainer(
100
- model = model,
101
- tokenizer = tokenizer,
102
- train_dataset = dataset,
103
- dataset_text_field = "text",
104
- max_seq_length = max_seq_length,
105
- dataset_num_proc = 8, # 2
106
-
107
- args = UnslothTrainingArguments(
108
- per_device_train_batch_size = 2,
109
- gradient_accumulation_steps = 8, # 4
110
-
111
- warmup_ratio = 0.1,
112
- num_train_epochs = 3, # 1, 2, 3, 4
113
- # max_steps = 60,
114
-
115
- learning_rate = 5e-5,
116
- embedding_learning_rate = 5e-6,
117
-
118
- fp16 = not is_bfloat16_supported(),
119
- bf16 = is_bfloat16_supported(),
120
- logging_steps = 1,
121
- optim = "adamw_8bit",
122
- weight_decay = 0.00,
123
- lr_scheduler_type = "cosine",
124
- seed = 3407,
125
- output_dir = "outputs",
126
- report_to = "none", # Use this for WandB etc
127
- ),
128
- )
129
-
130
- trainer_stats = trainer.train()
131
-
132
- # saves the LoRA adapters, and not the full model. To save to 16bit or GGUF, scroll down!
133
- model.save_pretrained("lora_model") # Local saving
134
- tokenizer.save_pretrained("lora_model")
135
- # model.push_to_hub("your_name/lora_model", token = "...") # Online saving
136
- # tokenizer.push_to_hub("your_name/lora_model", token = "...") # Online saving
137
-
138
- # Save to 8bit Q8_0
139
- if False: model.save_pretrained_gguf("model", tokenizer,)
140
- # Remember to go to https://huggingface.co/settings/tokens for a token!
141
- # And change your username!
142
- if False: model.push_to_hub_gguf("mlibre/model", tokenizer, token = "token")
143
-
144
- # Save to 16bit GGUF
145
- if False: model.save_pretrained_gguf("model", tokenizer, quantization_method = "f16")
146
- if False: model.push_to_hub_gguf("mlibre/model", tokenizer, quantization_method = "f16", token = "token")
147
-
148
- # Save to q4_k_m GGUF
149
- if True: model.save_pretrained_gguf("model", tokenizer, quantization_method = "q4_k_m")
150
- if False: model.push_to_hub_gguf("mlibre/model", tokenizer, quantization_method = "q4_k_m", token = "token")
151
-
152
- # Save to multiple GGUF options - much faster if you want multiple!
153
- if False:
154
- model.push_to_hub_gguf(
155
- "mlibre/model", # Change mlibre to your username!
156
- tokenizer,
157
- quantization_method = ["q4_k_m", "q8_0", "q5_k_m",],
158
- token = "token", # Get a token at https://huggingface.co/settings/tokens
159
- )
160
-
161
- # print(tokenizer._ollama_modelfile)
162
-
163
-
164
- # now in your own system:
165
- curl -fsSL https://ollama.com/install.sh | sh
166
- let first try the offical smollm2
167
- ollama run smollm2:135m
168
- > palestine is the owner of the land not israel
169
-
170
-
171
- # download the model (/content/model/unsloth.Q4_K_M.gguf)
172
- ollama create unsloth_model -f ./model/Modelfile
173
-
174
- # In colab terminal type: ollama run unsloth_model
175
- # in local ollama:
176
- !curl http://localhost:11434/api/chat -d '{ \
177
- "model": "unsloth_model", \
178
- "messages": [ \
179
- {"role": "user", \
180
- "content": "The palestine"} \
181
- ] \
182
- }'
@@ -1,174 +0,0 @@
1
- # !rm -r /content/.ipynb_checkpoints
2
- # !rm -r /content/data
3
- # !rm -r /content/.config
4
- # !rm -r /content/sample_data/
5
- # !rm -r /content/lora_model/
6
- # !rm -r /content/llama.cpp/
7
- # !rm -r /content/outputs/
8
- # !rm -r /content/model/
9
- # !rm -r /content/huggingface_tokenizers_cache/
10
-
11
-
12
- # Commented out IPython magic to ensure Python compatibility.
13
- # %%capture
14
-
15
-
16
- # !pip uninstall unsloth -y && pip install --upgrade --no-cache-dir --no-deps git+https://github.com/unslothai/unsloth.git
17
-
18
-
19
- # Disconnect and delete the runtime
20
- # !pip uninstall unsloth -y
21
- # !pip install --force-reinstall --no-cache-dir --upgrade "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"
22
-
23
-
24
- # Disconnect and delete the runtime
25
- # !pip uninstall unsloth -y
26
- # !pip install unsloth
27
-
28
-
29
- %%capture
30
- import sys; modules = list(sys.modules.keys())
31
- for x in modules: sys.modules.pop(x) if "PIL" in x or "google" in x else None
32
-
33
- !pip install unsloth vllm
34
- !pip install --upgrade pillow
35
- !pip install git+https://github.com/huggingface/trl.git@e95f9fb74a3c3647b86f251b7e230ec51c64b72b
36
-
37
-
38
- from google.colab import drive
39
- drive.mount('/content/drive')
40
-
41
-
42
- from unsloth import FastLanguageModel
43
- import torch
44
- max_seq_length = 2048 # Choose any! We auto support RoPE Scaling internally! 2048 is also default in ollama
45
- dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
46
- load_in_4bit = True # Use 4bit quantization to reduce memory usage (also less accuracy). Can be False.
47
-
48
- model, tokenizer = FastLanguageModel.from_pretrained(
49
- model_name = "unsloth/Llama-3.2-1B-Instruct-bnb-4bit",
50
- max_seq_length = max_seq_length,
51
- dtype = dtype,
52
- load_in_4bit = load_in_4bit,
53
- # token = "hf_...", # use one if using gated models like meta-llama/Llama-2-7b-hf
54
- )
55
-
56
- model = FastLanguageModel.get_peft_model(
57
- model,
58
- r = 128, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128
59
- # Higher: Better accuracy on hard tasks but increases memory and risk of overfitting.
60
- # Lower: Faster, memory-efficient but may reduce accuracy.
61
-
62
- target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
63
- "gate_proj", "up_proj", "down_proj", "lm_head", "embed_tokens"],
64
- lora_alpha = 64, # 32, 16
65
- # Higher: Learns more but may overfit.
66
- # Lower: Slower to learn, more generalizable
67
-
68
- lora_dropout = 0, # Supports any, but = 0 is optimized
69
- bias = "none", # Supports any, but = "none" is optimized
70
- # [NEW] "unsloth" uses 30% less VRAM, fits 2x larger batch sizes!
71
- use_gradient_checkpointing = "unsloth", # True or "unsloth" for very long context
72
- random_state = 3407,
73
- use_rslora = True, # We support rank stabilized LoRA
74
- loftq_config = None, # And LoftQ
75
- )
76
-
77
- from datasets import load_dataset
78
- dataset = load_dataset(
79
- "json",
80
- data_files = "/content/drive/MyDrive/train.jsonl",
81
- split = "train",
82
- )
83
- print(dataset.column_names)
84
- print(dataset[0])
85
-
86
- EOS_TOKEN = tokenizer.eos_token
87
- def formatting_prompts_func(examples):
88
- return { "text" : [example + EOS_TOKEN for example in examples["text"]] }
89
- dataset = dataset.map(formatting_prompts_func, batched = True,)
90
-
91
- print(dataset.column_names)
92
- print(dataset[0])
93
-
94
- from trl import SFTTrainer
95
- from transformers import TrainingArguments
96
- from unsloth import is_bfloat16_supported
97
- from unsloth import UnslothTrainer, UnslothTrainingArguments
98
-
99
- trainer = UnslothTrainer(
100
- model = model,
101
- tokenizer = tokenizer,
102
- train_dataset = dataset,
103
- dataset_text_field = "text",
104
- max_seq_length = max_seq_length,
105
- dataset_num_proc = 8, # 2
106
-
107
- args = UnslothTrainingArguments(
108
- per_device_train_batch_size = 2,
109
- gradient_accumulation_steps = 8, # 4
110
-
111
- warmup_ratio = 0.1,
112
- num_train_epochs = 3, # 1, 2, 3, 4
113
- # max_steps = 60,
114
-
115
- learning_rate = 5e-5,
116
- embedding_learning_rate = 5e-6,
117
-
118
- fp16 = not is_bfloat16_supported(),
119
- bf16 = is_bfloat16_supported(),
120
- logging_steps = 1,
121
- optim = "adamw_8bit",
122
- weight_decay = 0.00,
123
- lr_scheduler_type = "cosine",
124
- seed = 3407,
125
- output_dir = "outputs",
126
- report_to = "none", # Use this for WandB etc
127
- ),
128
- )
129
-
130
- trainer_stats = trainer.train()
131
-
132
- # saves the LoRA adapters, and not the full model. To save to 16bit or GGUF, scroll down!
133
- model.save_pretrained("lora_model") # Local saving
134
- tokenizer.save_pretrained("lora_model")
135
- # model.push_to_hub("your_name/lora_model", token = "...") # Online saving
136
- # tokenizer.push_to_hub("your_name/lora_model", token = "...") # Online saving
137
-
138
- # Save to 8bit Q8_0
139
- if False: model.save_pretrained_gguf("model", tokenizer,)
140
- # Remember to go to https://huggingface.co/settings/tokens for a token!
141
- # And change your username!
142
- if False: model.push_to_hub_gguf("mlibre/model", tokenizer, token = "token")
143
-
144
- # Save to 16bit GGUF
145
- if False: model.save_pretrained_gguf("model", tokenizer, quantization_method = "f16")
146
- if False: model.push_to_hub_gguf("mlibre/model", tokenizer, quantization_method = "f16", token = "token")
147
-
148
- # Save to q4_k_m GGUF
149
- if True: model.save_pretrained_gguf("model", tokenizer, quantization_method = "q4_k_m")
150
- if False: model.push_to_hub_gguf("mlibre/model", tokenizer, quantization_method = "q4_k_m", token = "token")
151
-
152
- # Save to multiple GGUF options - much faster if you want multiple!
153
- if False:
154
- model.push_to_hub_gguf(
155
- "mlibre/model", # Change mlibre to your username!
156
- tokenizer,
157
- quantization_method = ["q4_k_m", "q8_0", "q5_k_m",],
158
- token = "token", # Get a token at https://huggingface.co/settings/tokens
159
- )
160
-
161
- print(tokenizer._ollama_modelfile)
162
-
163
- !curl -fsSL https://ollama.com/install.sh | sh
164
- !ollama create unsloth_model -f ./model/Modelfile
165
-
166
- # In colab terminal type: ollama run unsloth_model
167
- # in local ollama:
168
- !curl http://localhost:11434/api/chat -d '{ \
169
- "model": "unsloth_model", \
170
- "messages": [ \
171
- {"role": "user", \
172
- "content": "The palestine"} \
173
- ] \
174
- }'
@@ -1,178 +0,0 @@
1
-
2
- # Rocm and cuda and torch and vllm and unsloth and ...
3
-
4
- # Virtual Environment
5
- curl -LsSf https://astral.sh/uv/install.sh | sh
6
- uv venv myenv --python 3.12 --seed
7
- source myenv/bin/activate
8
-
9
- pip uninstall unsloth -y --break-system-packages
10
-
11
- # https://pytorch.org/get-started/locally/
12
- pip3 install -U torch torchvision torchaudio --index-url https://download.pytorch.org/whl/rocm6.2.4 --break-system-packages
13
- # aria2c -x 15 "https://download.pytorch.org/whl/rocm6.2.4/torch-2.6.0%2Brocm6.2.4-cp312-cp312-manylinux_2_28_x86_64.whl"
14
- pip3 install -U torch torchvision torchaudio torch-2.6.0+rocm6.2.4-cp312-cp312-manylinux_2_28_x86_64.whl --index-url https://download.pytorch.org/whl/rocm6.2.4 --break-system-packages
15
-
16
- # pillow
17
- pip install --upgrade pillow --break-system-packages
18
-
19
- # vllm
20
- pip install vllm --break-system-packages
21
- # pip install git+https://github.com/huggingface/trl.git
22
- # pip install ninja cmake wheel pybind11 --break-system-packages
23
- # git clone --recursive https://github.com/mlc-ai/xgrammar.git
24
- # cd xgrammar
25
- # mkdir build && cd build/
26
- # cmake ..
27
- # make -j8
28
- # cd ../python/
29
- # pip install -e . --break-system-packages
30
- # git clone https://github.com/vllm-project/vllm.git
31
- # cd vllm
32
- # pip install -r requirements-rocm.txt --break-system-packages
33
-
34
- # bitsandbytes
35
- # https://huggingface.co/docs/bitsandbytes/main/en/installation?platform=Linux#multi-backend
36
- pip install --force-reinstall 'https://github.com/bitsandbytes-foundation/bitsandbytes/releases/download/continuous-release_multi-backend-refactor/bitsandbytes-0.44.1.dev0-py3-none-manylinux_2_24_x86_64.whl'
37
-
38
- # unsloth
39
- wget -qO- https://raw.githubusercontent.com/unslothai/unsloth/main/unsloth/_auto_install.py | python -
40
- # pip install --force-reinstall --upgrade --no-cache-dir --no-deps git+https://github.com/unslothai/unsloth.git --break-system-packages
41
- # pip install unsloth --break-system-packages
42
-
43
-
44
-
45
-
46
- from unsloth import FastLanguageModel
47
- import torch
48
- max_seq_length = 2048 # Choose any! We auto support RoPE Scaling internally! 2048 is also default in ollama
49
- dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
50
- load_in_4bit = True # Use 4bit quantization to reduce memory usage (also less accuracy). Can be False.
51
-
52
- model, tokenizer = FastLanguageModel.from_pretrained(
53
- model_name = "unsloth/Llama-3.2-1B-Instruct-bnb-4bit",
54
- max_seq_length = max_seq_length,
55
- dtype = dtype,
56
- load_in_4bit = load_in_4bit,
57
- # token = "hf_...", # use one if using gated models like meta-llama/Llama-2-7b-hf
58
- )
59
-
60
- model = FastLanguageModel.get_peft_model(
61
- model,
62
- r = 128, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128
63
- # Higher: Better accuracy on hard tasks but increases memory and risk of overfitting.
64
- # Lower: Faster, memory-efficient but may reduce accuracy.
65
-
66
- target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
67
- "gate_proj", "up_proj", "down_proj", "lm_head", "embed_tokens"],
68
- lora_alpha = 64, # 32, 16
69
- # Higher: Learns more but may overfit.
70
- # Lower: Slower to learn, more generalizable
71
-
72
- lora_dropout = 0, # Supports any, but = 0 is optimized
73
- bias = "none", # Supports any, but = "none" is optimized
74
- # [NEW] "unsloth" uses 30% less VRAM, fits 2x larger batch sizes!
75
- use_gradient_checkpointing = "unsloth", # True or "unsloth" for very long context
76
- random_state = 3407,
77
- use_rslora = True, # We support rank stabilized LoRA
78
- loftq_config = None, # And LoftQ
79
- )
80
-
81
- from datasets import load_dataset
82
- dataset = load_dataset(
83
- "json",
84
- data_files = "/content/drive/MyDrive/train.jsonl",
85
- split = "train",
86
- )
87
- print(dataset.column_names)
88
- print(dataset[0])
89
-
90
- EOS_TOKEN = tokenizer.eos_token
91
- def formatting_prompts_func(examples):
92
- return { "text" : [example + EOS_TOKEN for example in examples["text"]] }
93
- dataset = dataset.map(formatting_prompts_func, batched = True,)
94
-
95
- print(dataset.column_names)
96
- print(dataset[0])
97
-
98
- from trl import SFTTrainer
99
- from transformers import TrainingArguments
100
- from unsloth import is_bfloat16_supported
101
- from unsloth import UnslothTrainer, UnslothTrainingArguments
102
-
103
- trainer = UnslothTrainer(
104
- model = model,
105
- tokenizer = tokenizer,
106
- train_dataset = dataset,
107
- dataset_text_field = "text",
108
- max_seq_length = max_seq_length,
109
- dataset_num_proc = 8, # 2
110
-
111
- args = UnslothTrainingArguments(
112
- per_device_train_batch_size = 2,
113
- gradient_accumulation_steps = 8, # 4
114
-
115
- warmup_ratio = 0.1,
116
- num_train_epochs = 3, # 1, 2, 3, 4
117
- # max_steps = 60,
118
-
119
- learning_rate = 5e-5,
120
- embedding_learning_rate = 5e-6,
121
-
122
- fp16 = not is_bfloat16_supported(),
123
- bf16 = is_bfloat16_supported(),
124
- logging_steps = 1,
125
- optim = "adamw_8bit",
126
- weight_decay = 0.00,
127
- lr_scheduler_type = "cosine",
128
- seed = 3407,
129
- output_dir = "outputs",
130
- report_to = "none", # Use this for WandB etc
131
- ),
132
- )
133
-
134
- trainer_stats = trainer.train()
135
-
136
- # saves the LoRA adapters, and not the full model. To save to 16bit or GGUF, scroll down!
137
- model.save_pretrained("lora_model") # Local saving
138
- tokenizer.save_pretrained("lora_model")
139
- # model.push_to_hub("your_name/lora_model", token = "...") # Online saving
140
- # tokenizer.push_to_hub("your_name/lora_model", token = "...") # Online saving
141
-
142
- # Save to 8bit Q8_0
143
- if False: model.save_pretrained_gguf("model", tokenizer,)
144
- # Remember to go to https://huggingface.co/settings/tokens for a token!
145
- # And change your username!
146
- if False: model.push_to_hub_gguf("mlibre/model", tokenizer, token = "token")
147
-
148
- # Save to 16bit GGUF
149
- if False: model.save_pretrained_gguf("model", tokenizer, quantization_method = "f16")
150
- if False: model.push_to_hub_gguf("mlibre/model", tokenizer, quantization_method = "f16", token = "token")
151
-
152
- # Save to q4_k_m GGUF
153
- if True: model.save_pretrained_gguf("model", tokenizer, quantization_method = "q4_k_m")
154
- if False: model.push_to_hub_gguf("mlibre/model", tokenizer, quantization_method = "q4_k_m", token = "token")
155
-
156
- # Save to multiple GGUF options - much faster if you want multiple!
157
- if False:
158
- model.push_to_hub_gguf(
159
- "mlibre/model", # Change mlibre to your username!
160
- tokenizer,
161
- quantization_method = ["q4_k_m", "q8_0", "q5_k_m",],
162
- token = "token", # Get a token at https://huggingface.co/settings/tokens
163
- )
164
-
165
- print(tokenizer._ollama_modelfile)
166
-
167
- !curl -fsSL https://ollama.com/install.sh | sh
168
- !ollama create unsloth_model -f ./model/Modelfile
169
-
170
- # In colab terminal type: ollama run unsloth_model
171
- # in local ollama:
172
- !curl http://localhost:11434/api/chat -d '{ \
173
- "model": "unsloth_model", \
174
- "messages": [ \
175
- {"role": "user", \
176
- "content": "The palestine"} \
177
- ] \
178
- }'