clarity-ai 6.3.3 → 6.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -2,6 +2,25 @@
2
2
 
3
3
  ---
4
4
 
5
+ ## 6.4.1 (2026-06-06)
6
+
7
+ ### Clarity Flash 14B Model
8
+ - Added `Clarity Flash 14B` to `/model` picker — 128K context, HuggingFace Inference API
9
+ - New `huggingface` provider: calls `https://api-inference.huggingface.co/models/{model}/v1/chat/completions`
10
+ - `getKey()` falls back to `HF_TOKEN` env var for seamless auth
11
+ - Model weights at `Universal-618/Clarity-flash-weights` on HF Hub
12
+
13
+ ### Fixed-Height Engine (v6.3.x carried forward)
14
+ - Fixed-height viewport with `sliceToViewport()` + `buildLineArray()`
15
+ - Line-by-line streaming via `LineRenderer` (14 line types)
16
+ - Composer: multiline input with auto-grow (max 3 rows), Shift+Enter newline
17
+
18
+ ### Training Notebooks
19
+ - `clarity_flash_14b.ipynb`: Colab T4 — DeepSeek-R1-Distill-Qwen-14B, 4-bit QLoRA, 500 steps, 20 datasets
20
+ - `clarity_heavy_20b_moe.ipynb`: Kaggle T4 — GPT-OSS-20B MoE, MXFP4 native, LoRA, 20 datasets
21
+
22
+ ---
23
+
5
24
  ## 3.1.0 (2026-06-05)
6
25
 
7
26
  ### UI Rewrite — OpenCode Style
package/README.md CHANGED
@@ -92,16 +92,26 @@ clarity /bash ls -la
92
92
  | `/help [command]` | Command help |
93
93
  | `/exit` | Exit CLARITY |
94
94
 
95
+ ## Available Models
96
+
97
+ | Model | Provider | Context |
98
+ |---|---|---|
99
+ | **Clarity Flash 14B** | HuggingFace Inference | 128K |
100
+ | Llama 3.3 70B Versatile | Groq | 32K |
101
+ | Llama 3.1 8B Instant | Groq | 8K (fast) |
102
+ | DeepSeek R1 Distill 70B | Groq | 32K (reasoning) |
103
+ | Gemini 2.0 Flash | Google | 32K (fast) |
104
+ | DeepSeek R1 Free | OpenRouter | 128K |
105
+
95
106
  ## Provider Comparison
96
107
 
97
108
  | Provider | Free Tier | Streaming | Priority |
98
109
  |---|---|---|---|
99
110
  | Groq | ✓ | ✓ | 1 (fastest) |
100
111
  | Google Gemini | ✓ | ✓ | 2 |
101
- | DeepSeek | Cheap | ✓ | 3 |
102
- | OpenRouter | | ✓ | 4 |
103
- | OpenAI | Paid | ✓ | 5 |
104
- | Anthropic | Paid | ✓ | 6 |
112
+ | HuggingFace (Clarity Flash) | Needs HF_TOKEN | ✓ | 3 |
113
+ | DeepSeek | Cheap | ✓ | 4 |
114
+ | OpenRouter | | ✓ | 5 |
105
115
 
106
116
  ## License
107
117
 
@@ -2,232 +2,213 @@
2
2
  "cells": [
3
3
  {
4
4
  "cell_type": "markdown",
5
- "metadata": {},
6
5
  "source": [
7
- "# CLARITY Flash 14B — TPU Fine-Tuning\n",
8
- "Trains a 14B parameter model on agent CoT + tool-calling data.\n",
9
- "Target: Google Colab TPU v2-8 (free tier)\n",
10
- "HF token: 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"
11
- ]
6
+ "# CLARITY Flash 14B — Trained! Push to Universal-618/Clarity-flash-weights\n",
7
+ "Model: deepseek-ai/DeepSeek-R1-Distill-Qwen-14B\n",
8
+ "4-bit QLoRA + grad ckpt + max_len=256\n",
9
+ "Weights pushed to HF dataset repo for inference on 6 Clarity Spaces\n"
10
+ ],
11
+ "metadata": {}
12
12
  },
13
13
  {
14
14
  "cell_type": "code",
15
- "metadata": {},
16
15
  "source": [
17
- "# === Install ===\n",
18
- "!pip install -q torch torch-xla torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu\n",
19
- "!pip install -q transformers datasets accelerate peft bitsandbytes sentencepiece huggingface_hub"
16
+ "import os,gc,torch\n",
17
+ "from huggingface_hub import create_repo\n",
18
+ "from google.colab import userdata\n",
19
+ "HF_TOKEN = os.environ.get('HF_TOKEN') or userdata.get('HF_TOKEN')\n",
20
+ "assert HF_TOKEN and len(HF_TOKEN)>10, 'Set HF_TOKEN in Colab secrets'\n"
20
21
  ],
22
+ "metadata": {},
21
23
  "execution_count": null,
22
24
  "outputs": []
23
25
  },
24
26
  {
25
27
  "cell_type": "code",
26
- "metadata": {},
27
28
  "source": [
28
- "# === HF Auth ===\n",
29
- "from huggingface_hub import login, HfApi, create_repo\n",
30
- "HF_TOKEN = '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'\n",
31
- "login(token=HF_TOKEN, add_to_git_credential=True)\n",
32
- "api = HfApi(token=HF_TOKEN)"
29
+ "!pip install -q transformers datasets accelerate peft bitsandbytes sentencepiece huggingface_hub\n"
33
30
  ],
31
+ "metadata": {},
34
32
  "execution_count": null,
35
33
  "outputs": []
36
34
  },
37
35
  {
38
36
  "cell_type": "code",
39
- "metadata": {},
40
37
  "source": [
41
- "# === TPU Setup ===\n",
42
- "import torch\n",
43
- "import torch_xla\n",
44
- "import torch_xla.core.xla_model as xm\n",
45
- "device = xm.xla_device()\n",
46
- "print('Device:', device)\n",
47
- "print('TPU cores:', torch_xla._XLAC._xla_get_num_devices())"
38
+ "assert torch.cuda.is_available()\n",
39
+ "print('GPU:', torch.cuda.get_device_name(0))\n",
40
+ "gc.collect(); torch.cuda.empty_cache()\n"
48
41
  ],
42
+ "metadata": {},
49
43
  "execution_count": null,
50
44
  "outputs": []
51
45
  },
52
46
  {
53
47
  "cell_type": "code",
54
- "metadata": {},
55
48
  "source": [
56
- "# === Data Loading ===\n",
57
- "import requests\n",
58
- "import json\n",
59
- "from datasets import Dataset\n",
60
- "\n",
61
- "DATA_URLS = [\n",
62
- " 'https://huggingface.co/spaces/Universal-618/Clarity-main/main-data',\n",
63
- " 'https://huggingface.co/spaces/Universal-618/Clarity-2/main-data',\n",
64
- " 'https://huggingface.co/spaces/Universal-618/Clarity-3/main-data',\n",
65
- "]\n",
49
+ "import requests,sys\n",
50
+ "from datasets import Dataset, load_dataset\n",
51
+ "import random\n",
66
52
  "\n",
67
53
  "all_samples = []\n",
68
- "for url in DATA_URLS:\n",
69
- " try:\n",
70
- " r = requests.get(url, headers={'Authorization': f'Bearer {HF_TOKEN}'}, timeout=60)\n",
71
- " if r.status_code == 200:\n",
72
- " data = r.json()\n",
73
- " samples = data if isinstance(data, list) else data.get('data', [])\n",
74
- " all_samples.extend(samples)\n",
75
- " print(f'Loaded {len(samples)} from {url}')\n",
76
- " except Exception as e:\n",
77
- " print(f'Skipped {url}: {e}')\n",
78
- "\n",
79
- "# Fallback: synthetic CoT samples if no data\n",
80
- "if len(all_samples) < 10:\n",
81
- " print('No remote data found — using synthetic samples')\n",
82
- " all_samples = [\n",
83
- " {'instruction': 'List files in current directory', 'response': 'I will run the ls command.\\n<tool>bash</tool><cmd>ls -la</cmd>', 'tools': 'bash'},\n",
84
- " {'instruction': 'Read the file config.json', 'response': 'Let me read that file.\\n<tool>read_file</tool><path>config.json</path>', 'tools': 'read_file'},\n",
85
- " {'instruction': 'Write hello world script', 'response': 'I will create the file.\\n<tool>write_file</tool><path>hello.py</path><content>print(\"hello\")</content>', 'tools': 'write_file'},\n",
86
- " ]\n",
87
- "\n",
88
- "print(f'Total training samples: {len(all_samples)}')"
54
+ "def add(i,r): all_samples.append(dict(instruction=str(i or ''), response=str(r or '')))\n",
55
+ "\n",
56
+ "def load_std(n,s,f,l,**kw):\n",
57
+ " try:\n",
58
+ " for i,row in enumerate(load_dataset(n,split=s,streaming=True,token=HF_TOKEN,**kw)):\n",
59
+ " if i>=l: break\n",
60
+ " add(row.get(f['instruction'],''),row.get(f['response'],''))\n",
61
+ " except Exception as e: print(f' skip {n}: {e}', file=sys.stderr)\n",
62
+ "\n",
63
+ "def load_msgs(n,s,l,**kw):\n",
64
+ " try:\n",
65
+ " for i,row in enumerate(load_dataset(n,split=s,streaming=True,token=HF_TOKEN,**kw)):\n",
66
+ " if i>=l: break\n",
67
+ " msgs=row.get('messages',[])\n",
68
+ " if len(msgs)>=2: add(msgs[0].get('content',''),msgs[-1].get('content',''))\n",
69
+ " except Exception as e: print(f' skip {n}: {e}', file=sys.stderr)\n",
70
+ "\n",
71
+ "for sfx in ['main','2','3','4','5','6']:\n",
72
+ " try:\n",
73
+ " r=requests.get(f'https://huggingface.co/spaces/Universal-618/Clarity-{sfx}/main-data',headers={'Authorization':f'Bearer {HF_TOKEN}'},timeout=30)\n",
74
+ " if r.status_code==200:\n",
75
+ " d=r.json()\n",
76
+ " for x in (d if isinstance(d,list) else d.get('data',[])):\n",
77
+ " add(x.get('instruction',x.get('prompt',x.get('problem',''))),x.get('response',x.get('completion',x.get('output',x.get('solution','')))))\n",
78
+ " except: pass\n",
79
+ "\n",
80
+ "load_std('Open-Orca/OpenOrca','train',dict(instruction='question',response='response'),800)\n",
81
+ "load_msgs('HuggingFaceH4/no_robots','train',500)\n",
82
+ "load_msgs('HuggingFaceH4/ultrachat_200k','train_sft',500)\n",
83
+ "load_std('tatsu-lab/alpaca','train',dict(instruction='instruction',response='output'),500)\n",
84
+ "load_std('TIGER-Lab/MathInstruct','train',dict(instruction='instruction',response='output'),800)\n",
85
+ "load_std('AI-MO/NuminaMath-CoT','train',dict(instruction='problem',response='solution'),600)\n",
86
+ "load_std('meta-math/MetaMathQA','train',dict(instruction='query',response='response'),600)\n",
87
+ "load_std('microsoft/orca-math-word-problems-200k','train',dict(instruction='question',response='answer'),500)\n",
88
+ "load_std('GAIR/Reasoning-Intensive','train',dict(instruction='question',response='answer'),500)\n",
89
+ "load_std('BAAI/AgentInstruct','train',dict(instruction='instruction',response='output'),500)\n",
90
+ "load_std('bigcode/commitpackft','train',dict(instruction='instruction',response='response'),500)\n",
91
+ "load_std('sahil2801/CodeAlpaca-20k','train',dict(instruction='instruction',response='output'),500)\n",
92
+ "load_std('jondurbin/airoboros-3.2','train',dict(instruction='instruction',response='response'),500)\n",
93
+ "load_std('cognitivecomputations/dolphin','train',dict(instruction='instruction',response='response'),500)\n",
94
+ "load_std('databricks/databricks-dolly-15k','train',dict(instruction='instruction',response='response'),500)\n",
95
+ "load_std('WizardLM/WizardLM_evol_instruct_V2_196k','train',dict(instruction='instruction',response='output'),500)\n",
96
+ "load_std('Intel/orca_dpo_pairs','train',dict(instruction='question',response='chosen'),500)\n",
97
+ "load_std('nvidia/HelpSteer','train',dict(instruction='instruction',response='response'),500)\n",
98
+ "load_std('Dahoas/full-hh-rlhf','train',dict(instruction='instruction',response='response'),500)\n",
99
+ "load_std('BAAI/Infinity-Instruct','0625',dict(instruction='instruction',response='output'),500)\n",
100
+ "\n",
101
+ "print(f'Total: {len(all_samples)}')\n",
102
+ "random.shuffle(all_samples); gc.collect()\n"
89
103
  ],
104
+ "metadata": {},
90
105
  "execution_count": null,
91
106
  "outputs": []
92
107
  },
93
108
  {
94
109
  "cell_type": "code",
95
- "metadata": {},
96
110
  "source": [
97
- "# === Format for Training ===\n",
98
- "def format_chat(sample):\n",
99
- " inst = sample.get('instruction', sample.get('prompt', sample.get('input', '')))\n",
100
- " resp = sample.get('response', sample.get('completion', sample.get('output', '')))\n",
101
- " return {\n",
102
- " 'text': f'<|im_start|>user\\n{inst}<|im_end|>\\n<|im_start|>assistant\\n{resp}<|im_end|>'\n",
103
- " }\n",
111
+ "from transformers import AutoTokenizer\n",
112
+ "\n",
113
+ "MODEL_ID='deepseek-ai/DeepSeek-R1-Distill-Qwen-14B'\n",
114
+ "tokz=AutoTokenizer.from_pretrained(MODEL_ID,token=HF_TOKEN,trust_remote_code=True,use_fast=True)\n",
115
+ "tokz.pad_token=tokz.eos_token\n",
104
116
  "\n",
105
- "dataset = Dataset.from_list([format_chat(s) for s in all_samples])\n",
106
- "dataset = dataset.train_test_split(test_size=0.05, seed=42)\n",
107
- "print(f'Train: {len(dataset[\"train\"])}, Test: {len(dataset[\"test\"])}')"
117
+ "def fmt(s): return tokz.apply_chat_template([{'role':'user','content':s.get('instruction','')},{'role':'assistant','content':s.get('response','')}],tokenize=False)\n",
118
+ "\n",
119
+ "ds=Dataset.from_list([{'text':fmt(s)} for s in all_samples])\n",
120
+ "sp=ds.train_test_split(test_size=0.01,seed=42)\n",
121
+ "del all_samples,ds; gc.collect()\n",
122
+ "print(f'Train: {len(sp[\"train\"])} Test: {len(sp[\"test\"])}')\n"
108
123
  ],
124
+ "metadata": {},
109
125
  "execution_count": null,
110
126
  "outputs": []
111
127
  },
112
128
  {
113
129
  "cell_type": "code",
114
- "metadata": {},
115
130
  "source": [
116
- "# === Load Model ===\n",
117
- "from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig\n",
118
- "import torch\n",
131
+ "from transformers import AutoModelForCausalLM,BitsAndBytesConfig\n",
132
+ "gc.collect(); torch.cuda.empty_cache()\n",
119
133
  "\n",
120
- "MODEL_ID = 'Qwen/Qwen2.5-14B-Instruct'\n",
134
+ "bnb=BitsAndBytesConfig(load_in_4bit=True,bnb_4bit_use_double_quant=True,bnb_4bit_quant_type='nf4',bnb_4bit_compute_dtype=torch.float16)\n",
121
135
  "\n",
122
- "bnb = BitsAndBytesConfig(\n",
123
- " load_in_4bit=True,\n",
124
- " bnb_4bit_use_double_quant=True,\n",
125
- " bnb_4bit_quant_type='nf4',\n",
126
- " bnb_4bit_compute_dtype=torch.bfloat16,\n",
127
- ")\n",
128
- "\n",
129
- "tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, token=HF_TOKEN, trust_remote_code=True)\n",
130
- "tokenizer.pad_token = tokenizer.eos_token\n",
131
- "\n",
132
- "model = AutoModelForCausalLM.from_pretrained(\n",
133
- " MODEL_ID,\n",
134
- " quantization_config=bnb,\n",
135
- " device_map='auto',\n",
136
- " torch_dtype=torch.bfloat16,\n",
137
- " token=HF_TOKEN,\n",
138
- " trust_remote_code=True,\n",
139
- ")\n",
140
- "print(f'Model loaded: {MODEL_ID}')"
136
+ "model=AutoModelForCausalLM.from_pretrained(MODEL_ID,quantization_config=bnb,device_map='auto',dtype=torch.float16,token=HF_TOKEN,trust_remote_code=True,low_cpu_mem_usage=True)\n",
137
+ "model.gradient_checkpointing_enable()\n",
138
+ "model.config.use_cache=False\n",
139
+ "gc.collect(); torch.cuda.empty_cache()\n",
140
+ "print('Model:',round(model.num_parameters()/1e9,1),'B trainable:',round(model.num_parameters(only_trainable=True)/1e6,1),'M')\n"
141
141
  ],
142
+ "metadata": {},
142
143
  "execution_count": null,
143
144
  "outputs": []
144
145
  },
145
146
  {
146
147
  "cell_type": "code",
147
- "metadata": {},
148
148
  "source": [
149
- "# === LoRA Config ===\n",
150
- "from peft import LoraConfig, get_peft_model, TaskType\n",
151
- "\n",
152
- "lora_config = LoraConfig(\n",
153
- " task_type=TaskType.CAUSAL_LM,\n",
154
- " r=16,\n",
155
- " lora_alpha=32,\n",
156
- " lora_dropout=0.05,\n",
157
- " target_modules=['q_proj', 'k_proj', 'v_proj', 'o_proj', 'gate_proj', 'up_proj', 'down_proj'],\n",
158
- " bias='none',\n",
159
- ")\n",
160
- "model = get_peft_model(model, lora_config)\n",
161
- "model.print_trainable_parameters()"
149
+ "from peft import LoraConfig,get_peft_model,TaskType\n",
150
+ "lora=LoraConfig(task_type=TaskType.CAUSAL_LM,r=4,lora_alpha=8,lora_dropout=0.05,target_modules=['q_proj','k_proj','v_proj','o_proj','gate_proj','up_proj','down_proj'],bias='none')\n",
151
+ "model=get_peft_model(model,lora)\n",
152
+ "model.print_trainable_parameters()\n",
153
+ "gc.collect(); torch.cuda.empty_cache()\n"
162
154
  ],
155
+ "metadata": {},
163
156
  "execution_count": null,
164
157
  "outputs": []
165
158
  },
166
159
  {
167
160
  "cell_type": "code",
168
- "metadata": {},
169
161
  "source": [
170
- "# === Training ===\n",
171
- "from transformers import TrainingArguments, Trainer, DataCollatorForSeq2Seq\n",
172
- "import numpy as np\n",
173
- "\n",
174
- "def tokenize_fn(examples):\n",
175
- " tok = tokenizer(examples['text'], truncation=True, max_length=2048, padding=False)\n",
176
- " tok['labels'] = tok['input_ids'].copy()\n",
177
- " return tok\n",
178
- "\n",
179
- "tokenized = dataset.map(tokenize_fn, remove_columns=['text'], batched=True)\n",
180
- "\n",
181
- "args = TrainingArguments(\n",
182
- " output_dir='./clarity-flash-14b',\n",
183
- " per_device_train_batch_size=1,\n",
184
- " gradient_accumulation_steps=16,\n",
185
- " num_train_epochs=3,\n",
186
- " learning_rate=2e-4,\n",
187
- " bf16=True,\n",
188
- " logging_steps=10,\n",
189
- " save_steps=200,\n",
190
- " save_total_limit=2,\n",
191
- " optim='adamw_8bit',\n",
192
- " report_to='none',\n",
193
- " dataloader_drop_last=False,\n",
194
- ")\n",
195
- "\n",
196
- "trainer = Trainer(\n",
197
- " model=model,\n",
198
- " args=args,\n",
199
- " train_dataset=tokenized['train'],\n",
200
- " eval_dataset=tokenized['test'],\n",
201
- " data_collator=DataCollatorForSeq2Seq(tokenizer, padding=True),\n",
162
+ "from transformers import TrainingArguments,Trainer,DataCollatorForSeq2Seq\n",
163
+ "\n",
164
+ "def tok_fn(ex):\n",
165
+ " t=tokz(ex['text'],truncation=True,max_length=256,padding=False)\n",
166
+ " t['labels']=t['input_ids'].copy()\n",
167
+ " return t\n",
168
+ "\n",
169
+ "tok=sp.map(tok_fn,remove_columns=['text'],batched=True,num_proc=2)\n",
170
+ "del sp; gc.collect()\n",
171
+ "\n",
172
+ "args=TrainingArguments(\n",
173
+ " output_dir='./clarity-flash',\n",
174
+ " per_device_train_batch_size=1,\n",
175
+ " gradient_accumulation_steps=4,\n",
176
+ " max_steps=500,\n",
177
+ " learning_rate=3e-4,\n",
178
+ " fp16=True,\n",
179
+ " logging_steps=10,\n",
180
+ " save_strategy='no',\n",
181
+ " optim='adamw_8bit',\n",
182
+ " report_to='none',\n",
183
+ " dataloader_num_workers=0,\n",
184
+ " lr_scheduler_type='cosine',\n",
185
+ " warmup_steps=25,\n",
202
186
  ")\n",
203
187
  "\n",
204
- "trainer.train()"
188
+ "trainer=Trainer(model=model,args=args,train_dataset=tok['train'],data_collator=DataCollatorForSeq2Seq(tokz,padding=True,pad_to_multiple_of=8))\n",
189
+ "gc.collect(); torch.cuda.empty_cache()\n",
190
+ "trainer.train()\n"
205
191
  ],
192
+ "metadata": {},
206
193
  "execution_count": null,
207
194
  "outputs": []
208
195
  },
209
196
  {
210
197
  "cell_type": "code",
211
- "metadata": {},
212
198
  "source": [
213
- "# === Push Weights to HF ===\n",
214
- "WEIGHTS_REPO = 'Universal-618/Clarity-flash-weights'\n",
215
- "try:\n",
216
- " create_repo(WEIGHTS_REPO, repo_type='dataset', exist_ok=True, token=HF_TOKEN)\n",
217
- " print(f'Repo {WEIGHTS_REPO} ready')\n",
218
- "except Exception as e:\n",
219
- " print(f'Repo exists or error: {e}')\n",
220
- "\n",
221
- "model.push_to_hub(WEIGHTS_REPO, token=HF_TOKEN, use_temp_dir=True)\n",
222
- "tokenizer.push_to_hub(WEIGHTS_REPO, token=HF_TOKEN)\n",
223
- "print(f'Weights pushed to {WEIGHTS_REPO}')"
199
+ "WEIGHTS_REPO='Universal-618/Clarity-flash-weights'\n",
200
+ "create_repo(WEIGHTS_REPO,repo_type='model',exist_ok=True,token=HF_TOKEN)\n",
201
+ "model.push_to_hub(WEIGHTS_REPO,token=HF_TOKEN,use_temp_dir=True)\n",
202
+ "tokz.push_to_hub(WEIGHTS_REPO,token=HF_TOKEN)\n",
203
+ "import sys; print('done',file=sys.stderr)\n"
224
204
  ],
205
+ "metadata": {},
225
206
  "execution_count": null,
226
207
  "outputs": []
227
208
  }
228
209
  ],
229
210
  "metadata": {
230
- "accelerator": "TPU",
211
+ "accelerator": "GPU",
231
212
  "colab": {
232
213
  "provenance": []
233
214
  },
@@ -242,4 +223,4 @@
242
223
  },
243
224
  "nbformat": 4,
244
225
  "nbformat_minor": 4
245
- }
226
+ }
@@ -2,249 +2,211 @@
2
2
  "cells": [
3
3
  {
4
4
  "cell_type": "markdown",
5
- "metadata": {},
6
- "source": [
7
- "# CLARITY Heavy 20B MoE — Multi-GPU Fine-Tuning\n",
8
- "Trains a 20B Mixture-of-Experts model on deep CoT + recursive tool execution data.\n",
9
- "Target: Kaggle dual T4 (2x 16GB) with 4-bit quantization + FSDP.\n",
10
- "HF token: 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"
11
- ]
12
- },
13
- {
14
- "cell_type": "code",
15
- "metadata": {},
16
5
  "source": [
17
- "# === Install ===\n",
18
- "!pip install -q torch transformers datasets accelerate peft bitsandbytes\n",
19
- "!pip install -q deepspeed sentencepiece huggingface_hub"
6
+ "# CLARITY Heavy 20B MoE — Kaggle Single T4\n",
7
+ "Model: openai/gpt-oss-20b (21B MoE, 3.6B active, native MXFP4)\n",
8
+ "LoRA + grad ckpt + max_len=256 + no eval + no checkpoints\n"
20
9
  ],
21
- "execution_count": null,
22
- "outputs": []
10
+ "metadata": {}
23
11
  },
24
12
  {
25
13
  "cell_type": "code",
26
- "metadata": {},
27
14
  "source": [
28
- "# === Check GPUs ===\n",
29
- "import torch\n",
30
- "n_gpus = torch.cuda.device_count()\n",
31
- "for i in range(n_gpus):\n",
32
- " print(f'GPU {i}: {torch.cuda.get_device_name(i)} — {torch.cuda.get_device_properties(i).total_memory / 1e9:.1f} GB')\n",
33
- "assert n_gpus >= 2, 'Need at least 2 GPUs'"
15
+ "import os,gc,torch\n",
16
+ "from huggingface_hub import create_repo\n",
17
+ "HF_TOKEN=os.environ.get('HF_TOKEN')\n",
18
+ "if not HF_TOKEN:\n",
19
+ " from kaggle_secrets import UserSecretsClient\n",
20
+ " HF_TOKEN=UserSecretsClient().get_secret('HF_TOKEN')\n",
21
+ "assert HF_TOKEN and len(HF_TOKEN)>10, 'Set HF_TOKEN as Kaggle secret'\n"
34
22
  ],
23
+ "metadata": {},
35
24
  "execution_count": null,
36
25
  "outputs": []
37
26
  },
38
27
  {
39
28
  "cell_type": "code",
40
- "metadata": {},
41
29
  "source": [
42
- "# === HF Auth ===\n",
43
- "from huggingface_hub import login, HfApi, create_repo\n",
44
- "HF_TOKEN = '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'\n",
45
- "login(token=HF_TOKEN, add_to_git_credential=True)\n",
46
- "api = HfApi(token=HF_TOKEN)"
30
+ "!pip install -q transformers datasets accelerate peft bitsandbytes sentencepiece huggingface_hub\n"
47
31
  ],
32
+ "metadata": {},
48
33
  "execution_count": null,
49
34
  "outputs": []
50
35
  },
51
36
  {
52
37
  "cell_type": "code",
53
- "metadata": {},
54
38
  "source": [
55
- "# === Data Loading ===\n",
56
- "import requests\n",
57
- "import json\n",
58
- "from datasets import Dataset, concatenate_datasets\n",
59
- "\n",
60
- "DATA_URLS = [\n",
61
- " 'https://huggingface.co/spaces/Universal-618/Clarity-4/main-data',\n",
62
- " 'https://huggingface.co/spaces/Universal-618/Clarity-5/main-data',\n",
63
- " 'https://huggingface.co/spaces/Universal-618/Clarity-6/main-data',\n",
64
- " 'https://huggingface.co/spaces/Universal-618/Clarity-main/main-data',\n",
65
- "]\n",
66
- "\n",
67
- "all_samples = []\n",
68
- "for url in DATA_URLS:\n",
69
- " try:\n",
70
- " r = requests.get(url, headers={'Authorization': f'Bearer {HF_TOKEN}'}, timeout=120)\n",
71
- " if r.status_code == 200:\n",
72
- " data = r.json()\n",
73
- " samples = data if isinstance(data, list) else data.get('data', [])\n",
74
- " all_samples.extend(samples)\n",
75
- " print(f'Loaded {len(samples)} from {url}')\n",
76
- " except Exception as e:\n",
77
- " print(f'Skipped {url}: {e}')\n",
78
- "\n",
79
- "if len(all_samples) < 10:\n",
80
- " print('No remote data — generating synthetic deep CoT samples')\n",
81
- " import random\n",
82
- " code_snippets = [\n",
83
- " 'def fib(n): return n if n < 2 else fib(n-1) + fib(n-2)',\n",
84
- " 'for i in range(10): print(i**2)',\n",
85
- " 'with open(\"data.txt\") as f: content = f.read()',\n",
86
- " ]\n",
87
- " for _ in range(50):\n",
88
- " cs = random.choice(code_snippets)\n",
89
- " all_samples.append({\n",
90
- " 'instruction': f'Write and test a function',\n",
91
- " 'thinking': f'I need to think step by step. First, I will analyze what the user wants. Then I will write the code. Let me reason through this carefully.',\n",
92
- " 'response': f'I will write the code now.\\n<tool>bash</tool><cmd>cat > /tmp/test.py << \\'EOF\\'\\n{cs}\\nEOF\\npython3 /tmp/test.py</cmd>',\n",
93
- " 'tools': 'bash,write_file',\n",
94
- " })\n",
95
- "\n",
96
- "print(f'Total training samples: {len(all_samples)}')"
39
+ "assert torch.cuda.is_available()\n",
40
+ "print('GPU:',torch.cuda.get_device_name(0))\n",
41
+ "gc.collect(); torch.cuda.empty_cache()\n"
97
42
  ],
43
+ "metadata": {},
98
44
  "execution_count": null,
99
45
  "outputs": []
100
46
  },
101
47
  {
102
48
  "cell_type": "code",
103
- "metadata": {},
104
49
  "source": [
105
- "# === Format ===\n",
106
- "def format_deep_cot(sample):\n",
107
- " inst = sample.get('instruction', sample.get('prompt', ''))\n",
108
- " thinking = sample.get('thinking', '')\n",
109
- " resp = sample.get('response', sample.get('completion', ''))\n",
110
- " thinking_block = f'<|thinking_start|>{thinking}<|thinking_end|>' if thinking else ''\n",
111
- " return {\n",
112
- " 'text': f'<|im_start|>user\\n{inst}<|im_end|>\\n<|im_start|>assistant\\n{thinking_block}{resp}<|im_end|>'\n",
113
- " }\n",
114
- "\n",
115
- "dataset = Dataset.from_list([format_deep_cot(s) for s in all_samples])\n",
116
- "split = dataset.train_test_split(test_size=0.05, seed=42)\n",
117
- "print(f'Train: {len(split[\"train\"])}, Test: {len(split[\"test\"])}')"
50
+ "import requests,sys\n",
51
+ "from datasets import Dataset, load_dataset\n",
52
+ "import random\n",
53
+ "\n",
54
+ "all_samples=[]\n",
55
+ "def add(i,r): all_samples.append(dict(instruction=str(i or ''), response=str(r or '')))\n",
56
+ "\n",
57
+ "def load_std(n,s,f,l,**kw):\n",
58
+ " try:\n",
59
+ " for i,row in enumerate(load_dataset(n,split=s,streaming=True,token=HF_TOKEN,**kw)):\n",
60
+ " if i>=l: break\n",
61
+ " add(row.get(f['instruction'],''),row.get(f['response'],''))\n",
62
+ " except Exception as e: print(f' skip {n}: {e}', file=sys.stderr)\n",
63
+ "\n",
64
+ "def load_msgs(n,s,l,**kw):\n",
65
+ " try:\n",
66
+ " for i,row in enumerate(load_dataset(n,split=s,streaming=True,token=HF_TOKEN,**kw)):\n",
67
+ " if i>=l: break\n",
68
+ " msgs=row.get('messages',[])\n",
69
+ " if len(msgs)>=2: add(msgs[0].get('content',''),msgs[-1].get('content',''))\n",
70
+ " except Exception as e: print(f' skip {n}: {e}', file=sys.stderr)\n",
71
+ "\n",
72
+ "for sfx in ['main','2','3','4','5','6']:\n",
73
+ " try:\n",
74
+ " r=requests.get(f'https://huggingface.co/spaces/Universal-618/Clarity-{sfx}/main-data',headers={'Authorization':f'Bearer {HF_TOKEN}'},timeout=30)\n",
75
+ " if r.status_code==200:\n",
76
+ " d=r.json()\n",
77
+ " for x in (d if isinstance(d,list) else d.get('data',[])):\n",
78
+ " add(x.get('instruction',x.get('prompt',x.get('problem',''))),x.get('response',x.get('completion',x.get('output',x.get('solution','')))))\n",
79
+ " except: pass\n",
80
+ "\n",
81
+ "load_std('Open-Orca/OpenOrca','train',dict(instruction='question',response='response'),800)\n",
82
+ "load_msgs('HuggingFaceH4/no_robots','train',500)\n",
83
+ "load_msgs('HuggingFaceH4/ultrachat_200k','train_sft',500)\n",
84
+ "load_std('tatsu-lab/alpaca','train',dict(instruction='instruction',response='output'),500)\n",
85
+ "load_std('TIGER-Lab/MathInstruct','train',dict(instruction='instruction',response='output'),800)\n",
86
+ "load_std('AI-MO/NuminaMath-CoT','train',dict(instruction='problem',response='solution'),600)\n",
87
+ "load_std('meta-math/MetaMathQA','train',dict(instruction='query',response='response'),600)\n",
88
+ "load_std('microsoft/orca-math-word-problems-200k','train',dict(instruction='question',response='answer'),500)\n",
89
+ "load_std('GAIR/Reasoning-Intensive','train',dict(instruction='question',response='answer'),500)\n",
90
+ "load_std('BAAI/AgentInstruct','train',dict(instruction='instruction',response='output'),500)\n",
91
+ "load_std('bigcode/commitpackft','train',dict(instruction='instruction',response='response'),500)\n",
92
+ "load_std('sahil2801/CodeAlpaca-20k','train',dict(instruction='instruction',response='output'),500)\n",
93
+ "load_std('jondurbin/airoboros-3.2','train',dict(instruction='instruction',response='response'),500)\n",
94
+ "load_std('cognitivecomputations/dolphin','train',dict(instruction='instruction',response='response'),500)\n",
95
+ "load_std('databricks/databricks-dolly-15k','train',dict(instruction='instruction',response='response'),500)\n",
96
+ "load_std('WizardLM/WizardLM_evol_instruct_V2_196k','train',dict(instruction='instruction',response='output'),500)\n",
97
+ "load_std('Intel/orca_dpo_pairs','train',dict(instruction='question',response='chosen'),500)\n",
98
+ "load_std('nvidia/HelpSteer','train',dict(instruction='instruction',response='response'),500)\n",
99
+ "load_std('Dahoas/full-hh-rlhf','train',dict(instruction='instruction',response='response'),500)\n",
100
+ "load_std('BAAI/Infinity-Instruct','0625',dict(instruction='instruction',response='output'),500)\n",
101
+ "\n",
102
+ "print(f'Total: {len(all_samples)}')\n",
103
+ "random.shuffle(all_samples); gc.collect()\n"
118
104
  ],
105
+ "metadata": {},
119
106
  "execution_count": null,
120
107
  "outputs": []
121
108
  },
122
109
  {
123
110
  "cell_type": "code",
124
- "metadata": {},
125
111
  "source": [
126
- "# === Load MoE Model (4-bit) ===\n",
127
- "from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig\n",
128
- "\n",
129
- "MODEL_ID = 'deepseek-ai/DeepSeek-MoE-16B-Chat'\n",
112
+ "from transformers import AutoTokenizer\n",
130
113
  "\n",
131
- "bnb = BitsAndBytesConfig(\n",
132
- " load_in_4bit=True,\n",
133
- " bnb_4bit_use_double_quant=True,\n",
134
- " bnb_4bit_quant_type='nf4',\n",
135
- " bnb_4bit_compute_dtype=torch.bfloat16,\n",
136
- ")\n",
114
+ "MODEL_ID='openai/gpt-oss-20b'\n",
115
+ "tokz=AutoTokenizer.from_pretrained(MODEL_ID,token=HF_TOKEN,trust_remote_code=True)\n",
116
+ "tokz.pad_token=tokz.eos_token\n",
137
117
  "\n",
138
- "tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, token=HF_TOKEN, trust_remote_code=True)\n",
139
- "tokenizer.pad_token = tokenizer.eos_token\n",
118
+ "def fmt(s): return tokz.apply_chat_template([{'role':'system','content':'Reasoning: high'},{'role':'user','content':s.get('instruction','')},{'role':'assistant','content':s.get('response','')}],tokenize=False)\n",
140
119
  "\n",
141
- "model = AutoModelForCausalLM.from_pretrained(\n",
142
- " MODEL_ID,\n",
143
- " quantization_config=bnb,\n",
144
- " device_map='auto',\n",
145
- " torch_dtype=torch.bfloat16,\n",
146
- " token=HF_TOKEN,\n",
147
- " trust_remote_code=True,\n",
148
- ")\n",
149
- "print(f'MoE model loaded: {MODEL_ID}')\n",
150
- "print(f'Params: {model.num_parameters():,.0f}')"
120
+ "ds=Dataset.from_list([{'text':fmt(s)} for s in all_samples])\n",
121
+ "sp=ds.train_test_split(test_size=0.01,seed=42)\n",
122
+ "del all_samples,ds; gc.collect()\n",
123
+ "print(f'Train: {len(sp[\"train\"])} Test: {len(sp[\"test\"])}')\n"
151
124
  ],
125
+ "metadata": {},
152
126
  "execution_count": null,
153
127
  "outputs": []
154
128
  },
155
129
  {
156
130
  "cell_type": "code",
157
- "metadata": {},
158
131
  "source": [
159
- "# === LoRA for MoE ===\n",
160
- "from peft import LoraConfig, get_peft_model, TaskType\n",
161
- "\n",
162
- "lora_config = LoraConfig(\n",
163
- " task_type=TaskType.CAUSAL_LM,\n",
164
- " r=8,\n",
165
- " lora_alpha=16,\n",
166
- " lora_dropout=0.1,\n",
167
- " target_modules=['q_proj', 'k_proj', 'v_proj', 'o_proj', 'gate_proj', 'up_proj', 'down_proj', 'gate'],\n",
168
- " bias='none',\n",
169
- ")\n",
170
- "model = get_peft_model(model, lora_config)\n",
171
- "model.print_trainable_parameters()"
132
+ "from transformers import AutoModelForCausalLM\n",
133
+ "import requests as req\n",
134
+ "gc.collect(); torch.cuda.empty_cache()\n",
135
+ "\n",
136
+ "r=req.get(f'https://huggingface.co/{MODEL_ID}/raw/main/config.json',headers={'Authorization':f'Bearer {HF_TOKEN}'})\n",
137
+ "cd=r.json()\n",
138
+ "cd.pop('_attn_implementation',None); cd.pop('attn_implementation',None)\n",
139
+ "\n",
140
+ "model=AutoModelForCausalLM.from_pretrained(MODEL_ID,config=cd,device_map='auto',token=HF_TOKEN,trust_remote_code=True,low_cpu_mem_usage=True)\n",
141
+ "model.gradient_checkpointing_enable()\n",
142
+ "model.config.use_cache=False\n",
143
+ "gc.collect(); torch.cuda.empty_cache()\n",
144
+ "print('Model:',round(model.num_parameters()/1e9,1),'B')\n"
172
145
  ],
146
+ "metadata": {},
173
147
  "execution_count": null,
174
148
  "outputs": []
175
149
  },
176
150
  {
177
151
  "cell_type": "code",
178
- "metadata": {},
179
152
  "source": [
180
- "# === Gradient Checkpointing (prevents OOM) ===\n",
181
- "model.gradient_checkpointing_enable(gradient_checkpointing_kwargs={'use_reentrant': False})\n",
182
- "model.config.use_cache = False\n",
183
- "print('Gradient checkpointing enabled')"
153
+ "from peft import LoraConfig,get_peft_model,TaskType\n",
154
+ "lora=LoraConfig(task_type=TaskType.CAUSAL_LM,r=4,lora_alpha=8,lora_dropout=0.1,target_modules=['q_proj','k_proj','v_proj','o_proj','gate_proj','up_proj','down_proj'],bias='none')\n",
155
+ "model=get_peft_model(model,lora)\n",
156
+ "model.print_trainable_parameters()\n",
157
+ "gc.collect(); torch.cuda.empty_cache()\n"
184
158
  ],
159
+ "metadata": {},
185
160
  "execution_count": null,
186
161
  "outputs": []
187
162
  },
188
163
  {
189
164
  "cell_type": "code",
190
- "metadata": {},
191
165
  "source": [
192
- "# === Training ===\n",
193
- "from transformers import TrainingArguments, Trainer, DataCollatorForSeq2Seq\n",
194
- "\n",
195
- "def tokenize_fn(examples):\n",
196
- " tok = tokenizer(examples['text'], truncation=True, max_length=2048, padding=False)\n",
197
- " tok['labels'] = tok['input_ids'].copy()\n",
198
- " return tok\n",
199
- "\n",
200
- "tokenized = split.map(tokenize_fn, remove_columns=['text'], batched=True)\n",
201
- "\n",
202
- "args = TrainingArguments(\n",
203
- " output_dir='./clarity-heavy-20b-moe',\n",
204
- " per_device_train_batch_size=1,\n",
205
- " per_device_eval_batch_size=1,\n",
206
- " gradient_accumulation_steps=8,\n",
207
- " num_train_epochs=3,\n",
208
- " learning_rate=1e-4,\n",
209
- " bf16=True,\n",
210
- " logging_steps=10,\n",
211
- " save_steps=200,\n",
212
- " save_total_limit=2,\n",
213
- " optim='adamw_8bit',\n",
214
- " gradient_checkpointing=True,\n",
215
- " report_to='none',\n",
216
- " ddp_find_unused_parameters=False,\n",
217
- ")\n",
218
- "\n",
219
- "trainer = Trainer(\n",
220
- " model=model,\n",
221
- " args=args,\n",
222
- " train_dataset=tokenized['train'],\n",
223
- " eval_dataset=tokenized['test'],\n",
224
- " data_collator=DataCollatorForSeq2Seq(tokenizer, padding=True, pad_to_multiple_of=8),\n",
166
+ "from transformers import TrainingArguments,Trainer,DataCollatorForSeq2Seq\n",
167
+ "\n",
168
+ "def tok_fn(ex):\n",
169
+ " t=tokz(ex['text'],truncation=True,max_length=256,padding=False)\n",
170
+ " t['labels']=t['input_ids'].copy()\n",
171
+ " return t\n",
172
+ "\n",
173
+ "tok=sp.map(tok_fn,remove_columns=['text'],batched=True,num_proc=2)\n",
174
+ "del sp; gc.collect()\n",
175
+ "\n",
176
+ "args=TrainingArguments(\n",
177
+ " output_dir='./clarity-heavy',\n",
178
+ " per_device_train_batch_size=1,\n",
179
+ " gradient_accumulation_steps=16,\n",
180
+ " num_train_epochs=1,\n",
181
+ " learning_rate=2e-4,\n",
182
+ " fp16=True,\n",
183
+ " logging_steps=10,\n",
184
+ " save_strategy='no',\n",
185
+ " optim='adamw_8bit',\n",
186
+ " report_to='none',\n",
187
+ " dataloader_num_workers=0,\n",
188
+ " lr_scheduler_type='cosine',\n",
189
+ " warmup_steps=25,\n",
225
190
  ")\n",
226
191
  "\n",
227
- "trainer.train()"
192
+ "trainer=Trainer(model=model,args=args,train_dataset=tok['train'],data_collator=DataCollatorForSeq2Seq(tokz,padding=True,pad_to_multiple_of=8))\n",
193
+ "gc.collect(); torch.cuda.empty_cache()\n",
194
+ "trainer.train()\n"
228
195
  ],
196
+ "metadata": {},
229
197
  "execution_count": null,
230
198
  "outputs": []
231
199
  },
232
200
  {
233
201
  "cell_type": "code",
234
- "metadata": {},
235
202
  "source": [
236
- "# === Push to HF ===\n",
237
- "WEIGHTS_REPO = 'Universal-618/Clarity-heavy-weights'\n",
238
- "try:\n",
239
- " create_repo(WEIGHTS_REPO, repo_type='dataset', exist_ok=True, token=HF_TOKEN)\n",
240
- " print(f'Repo {WEIGHTS_REPO} ready')\n",
241
- "except Exception as e:\n",
242
- " print(f'Repo notice: {e}')\n",
243
- "\n",
244
- "model.push_to_hub(WEIGHTS_REPO, token=HF_TOKEN, use_temp_dir=True)\n",
245
- "tokenizer.push_to_hub(WEIGHTS_REPO, token=HF_TOKEN)\n",
246
- "print(f'Weights pushed to https://huggingface.co/datasets/{WEIGHTS_REPO}')"
203
+ "WEIGHTS_REPO='Universal-618/Clarity-heavy-weights'\n",
204
+ "create_repo(WEIGHTS_REPO,repo_type='model',exist_ok=True,token=HF_TOKEN)\n",
205
+ "model.push_to_hub(WEIGHTS_REPO,token=HF_TOKEN,use_temp_dir=True)\n",
206
+ "tokz.push_to_hub(WEIGHTS_REPO,token=HF_TOKEN)\n",
207
+ "import sys; print('done',file=sys.stderr)\n"
247
208
  ],
209
+ "metadata": {},
248
210
  "execution_count": null,
249
211
  "outputs": []
250
212
  }
@@ -267,4 +229,4 @@
267
229
  },
268
230
  "nbformat": 4,
269
231
  "nbformat_minor": 4
270
- }
232
+ }
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "clarity-ai",
3
- "version": "6.3.3",
4
- "description": "Premium terminal AI agent — fixed-height viewport, box-drawing UI, TrueColor theme, streaming with abort",
3
+ "version": "6.4.1",
4
+ "description": "Premium terminal AI agent — Clarity Flash 14B model, HF Inference API, fixed-height viewport, TrueColor theme",
5
5
  "type": "module",
6
6
  "bin": {
7
7
  "clarity": "bin/clarity.js"
@@ -1,6 +1,6 @@
1
1
  import React, { useMemo } from 'react';
2
2
  import { Box, Text } from 'ink';
3
- import { hex, usym } from '../config/theme.js';
3
+ import { hex, sym } from '../config/theme.js';
4
4
  import { getLayout } from '../config/layout.js';
5
5
  const { createElement: h } = React;
6
6
 
@@ -40,7 +40,7 @@ export function CodeBlock({ code, language }) {
40
40
  ),
41
41
  lines.length > maxLines
42
42
  ? h(Text, { color: hex.textMuted, backgroundColor: hex.codeBg },
43
- ' ' + usym.ellipsis + ' ' + (lines.length - maxLines) + ' more lines')
43
+ ' ' + sym.ellipsis + ' ' + (lines.length - maxLines) + ' more lines')
44
44
  : null
45
45
  )
46
46
  );
@@ -1,6 +1,6 @@
1
1
  import React, { useState } from 'react';
2
2
  import { Box, Text } from 'ink';
3
- import { hex, usym, u } from '../config/theme.js';
3
+ import { hex, sym } from '../config/theme.js';
4
4
  import { getLayout } from '../config/layout.js';
5
5
  const { createElement: h } = React;
6
6
 
@@ -12,7 +12,7 @@ export function ThinkingBlock({ toolResults, duration }) {
12
12
  ? (duration < 1000 ? duration + 'ms' : (duration / 1000).toFixed(1) + 's')
13
13
  : '';
14
14
 
15
- const headerText = usym.triR2 + ' ' + (collapsed ? usym.triR2 : usym.triD2) + ' Thought' + (durStr ? ' (' + durStr + ')' : '');
15
+ const headerText = sym.triR + ' ' + (collapsed ? sym.triR : sym.triD) + ' Thought' + (durStr ? ' (' + durStr + ')' : '');
16
16
 
17
17
  const rows = 1;
18
18
  const totalRows = collapsed ? 1 : 1 + items.length;
@@ -26,9 +26,9 @@ export function ThinkingBlock({ toolResults, duration }) {
26
26
  : h(Box, { flexDirection: 'column', backgroundColor: hex.surfaceAlt },
27
27
  items.map((tr, i) => {
28
28
  const isLast = i === items.length - 1;
29
- const prefix = isLast ? usym.treeTip + u.h : usym.treeFork + u.h;
30
- const conn = isLast ? ' ' : usym.treeCon;
31
- const icon = tr.status === 'failed' ? usym.cross : usym.circle;
29
+ const prefix = isLast ? sym.treeTip + sym.u.h : sym.treeFork + sym.u.h;
30
+ const conn = isLast ? ' ' : sym.treeCon;
31
+ const icon = tr.status === 'failed' ? sym.cross : sym.circle;
32
32
  const col = tr.status === 'failed' ? hex.red : hex.green;
33
33
  const td = tr.duration ? ' ' + tr.duration + 'ms' : '';
34
34
  const line = ' ' + prefix + ' ' + icon + ' ' + tr.name + td;
@@ -23,9 +23,9 @@ export function setKey(provider, key) {
23
23
  export function getKey(provider) {
24
24
  try {
25
25
  const keys = JSON.parse(readFileSync(KEYS_PATH, 'utf-8'));
26
- return keys[provider] || process.env[provider.toUpperCase() + '_API_KEY'] || null;
26
+ return keys[provider] || process.env[provider.toUpperCase() + '_API_KEY'] || process.env.HF_TOKEN || null;
27
27
  } catch {
28
- return process.env[provider.toUpperCase() + '_API_KEY'] || null;
28
+ return process.env[provider.toUpperCase() + '_API_KEY'] || process.env.HF_TOKEN || null;
29
29
  }
30
30
  }
31
31
 
@@ -1,4 +1,5 @@
1
1
  export const ALL_MODELS = [
2
+ { id: 'huggingface/Universal-618/Clarity-flash-weights', provider: 'huggingface', label: 'Clarity Flash 14B', badge: '128K' },
2
3
  { id: 'groq/llama-3.3-70b-versatile', provider: 'groq', label: 'Llama 3.3 70B Versatile', badge: null },
3
4
  { id: 'groq/llama-3.1-8b-instant', provider: 'groq', label: 'Llama 3.1 8B Instant', badge: 'Fast' },
4
5
  { id: 'groq/llama-4-scout-17b-16e-instruct', provider: 'groq', label: 'Llama 4 Scout 17B', badge: null },
@@ -67,6 +67,9 @@ export const sym = {
67
67
  treeJ: '\u2514',
68
68
  treeT: '\u251C',
69
69
  treeCon: '\u2502',
70
+ triR2: '\u25B8',
71
+ triD2: '\u25BE',
72
+ u: { h: '\u2500' },
70
73
  treeTip: '\u2570',
71
74
  treeFork: '\u256D',
72
75
  star: '\u2726',
@@ -3,6 +3,10 @@ import { streamResponse } from './streaming.js';
3
3
  import { parseErrorResponse } from './errors.js';
4
4
 
5
5
  const PROVIDERS = {
6
+ huggingface: {
7
+ endpoint: 'https://api-inference.huggingface.co/models',
8
+ name: 'huggingface',
9
+ },
6
10
  groq: {
7
11
  endpoint: 'https://api.groq.com/openai/v1/chat/completions',
8
12
  name: 'groq',
@@ -18,7 +22,9 @@ export async function* callAI(providerName, model, messages, options = {}) {
18
22
  if (!provider) throw { type: 'config_error', message: 'Unknown provider: ' + providerName };
19
23
 
20
24
  const key = getKey(providerName);
21
- if (!key) throw { type: 'auth_error', provider: providerName, message: 'No API key set for ' + providerName, hint: '/keys ' + providerName + ' <your-key>' };
25
+ if (!key && providerName !== 'huggingface') {
26
+ throw { type: 'auth_error', provider: providerName, message: 'No API key set for ' + providerName, hint: '/keys ' + providerName + ' <your-key>' };
27
+ }
22
28
 
23
29
  const modelName = model.replace(/^[^/]+\//, '');
24
30
 
@@ -34,10 +40,17 @@ export async function* callAI(providerName, model, messages, options = {}) {
34
40
  body.tool_choice = 'auto';
35
41
  }
36
42
 
37
- const endpoint = provider.endpoint;
38
- const extraHeaders = providerName === 'openrouter' ? { 'HTTP-Referer': 'https://clarity-ai.local', 'X-Title': 'CLARITY AI' } : {};
43
+ let endpoint = provider.endpoint;
44
+ const extraHeaders = {};
45
+ if (providerName === 'huggingface') {
46
+ endpoint = provider.endpoint + '/' + modelName + '/v1/chat/completions';
47
+ if (key) extraHeaders['Authorization'] = 'Bearer ' + key;
48
+ } else if (providerName === 'openrouter') {
49
+ extraHeaders['HTTP-Referer'] = 'https://clarity-ai.local';
50
+ extraHeaders['X-Title'] = 'CLARITY AI';
51
+ }
39
52
 
40
- const stream = streamResponse(endpoint, body, key, extraHeaders, options.signal);
53
+ const stream = streamResponse(endpoint, body, key || 'none', extraHeaders, options.signal);
41
54
  for await (const event of stream) {
42
55
  yield event;
43
56
  }