npcpy 1.3.16__py3-none-any.whl → 1.3.18__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
npcpy/serve.py CHANGED
@@ -63,7 +63,7 @@ from npcpy.llm_funcs import gen_image, breathe
63
63
  from sqlalchemy import create_engine, text
64
64
  from sqlalchemy.orm import sessionmaker
65
65
 
66
- from npcpy.npc_sysenv import get_locally_available_models
66
+ from npcpy.npc_sysenv import get_locally_available_models, get_data_dir, get_models_dir, get_cache_dir
67
67
  from npcpy.memory.command_history import (
68
68
  CommandHistory,
69
69
  save_conversation_message,
@@ -447,30 +447,46 @@ def get_db_session():
447
447
 
448
448
  def resolve_mcp_server_path(current_path=None, explicit_path=None, force_global=False):
449
449
  """
450
- Resolve an MCP server path using npcsh.corca's helper when available.
451
- Falls back to ~/.npcsh/npc_team/mcp_server.py.
450
+ Resolve an MCP server path.
451
+ 1. Use explicit_path if provided and exists
452
+ 2. Check if ~/.npcsh/npc_team/mcp_server.py exists
453
+ 3. If not, find mcp_server.py in npcsh package, copy it, and return the path
452
454
  """
455
+ import shutil
456
+
457
+ # 1. Check explicit path first
453
458
  if explicit_path:
454
459
  abs_path = os.path.abspath(os.path.expanduser(explicit_path))
455
460
  if os.path.exists(abs_path):
456
461
  return abs_path
462
+
463
+ # 2. Check if global mcp_server.py already exists
464
+ global_mcp_path = os.path.expanduser("~/.npcsh/npc_team/mcp_server.py")
465
+ if os.path.exists(global_mcp_path):
466
+ return global_mcp_path
467
+
468
+ # 3. Find mcp_server.py in npcsh package and copy it
457
469
  try:
458
- from npcsh.corca import _resolve_and_copy_mcp_server_path
459
- resolved = _resolve_and_copy_mcp_server_path(
460
- explicit_path=explicit_path,
461
- current_path=current_path,
462
- team_ctx_mcp_servers=None,
463
- interactive=False,
464
- auto_copy_bypass=True,
465
- force_global=force_global,
466
- )
467
- if resolved:
468
- return os.path.abspath(resolved)
470
+ import npcsh
471
+ npcsh_package_dir = os.path.dirname(npcsh.__file__)
472
+ package_mcp_server = os.path.join(npcsh_package_dir, "mcp_server.py")
473
+
474
+ if os.path.exists(package_mcp_server):
475
+ # Ensure the target directory exists
476
+ target_dir = os.path.dirname(global_mcp_path)
477
+ os.makedirs(target_dir, exist_ok=True)
478
+
479
+ # Copy the mcp_server.py to the global location
480
+ shutil.copy2(package_mcp_server, global_mcp_path)
481
+ print(f"[MCP] Copied mcp_server.py from {package_mcp_server} to {global_mcp_path}")
482
+ return global_mcp_path
483
+ else:
484
+ print(f"[MCP] mcp_server.py not found in npcsh package at {package_mcp_server}")
469
485
  except Exception as e:
470
- print(f"resolve_mcp_server_path: fallback path due to error: {e}")
471
-
472
- fallback = os.path.expanduser("~/.npcsh/npc_team/mcp_server.py")
473
- return fallback
486
+ print(f"[MCP] Error finding/copying mcp_server.py from npcsh package: {e}")
487
+
488
+ # Return the global path anyway (caller will handle if it doesn't exist)
489
+ return global_mcp_path
474
490
 
475
491
  extension_map = {
476
492
  "PNG": "images",
@@ -1512,6 +1528,9 @@ def get_models():
1512
1528
  if m.endswith(('.gguf', '.ggml')):
1513
1529
  # For local GGUF/GGML files, show just the filename
1514
1530
  display_model = os.path.basename(m)
1531
+ elif p == 'lora':
1532
+ # For LoRA adapters, show just the folder name
1533
+ display_model = os.path.basename(m.rstrip('/'))
1515
1534
 
1516
1535
  display_name = f"{display_model} | {p} {text_only}".strip()
1517
1536
 
@@ -1991,9 +2010,672 @@ def finetune_status(job_id):
1991
2010
  'start_time': job.get('start_time')
1992
2011
  })
1993
2012
 
2013
+
2014
+ # Instruction fine-tuning jobs storage
2015
+ instruction_finetune_jobs = {}
2016
+
2017
+
2018
+ @app.route('/api/finetune_instruction', methods=['POST'])
2019
+ def finetune_instruction():
2020
+ """
2021
+ Fine-tune an LLM on instruction/conversation data.
2022
+
2023
+ Request body:
2024
+ {
2025
+ "trainingData": [
2026
+ {"input": "user prompt", "output": "assistant response"},
2027
+ // For DPO: include "reward" or "quality" score (0-1)
2028
+ // For memory_classifier: include "status" as "approved"/"rejected"
2029
+ ...
2030
+ ],
2031
+ "outputName": "my_instruction_model",
2032
+ "baseModel": "google/gemma-3-270m-it",
2033
+ "strategy": "sft", // "sft", "usft", "dpo", or "memory_classifier"
2034
+ "epochs": 20,
2035
+ "learningRate": 3e-5,
2036
+ "batchSize": 2,
2037
+ "loraR": 8,
2038
+ "loraAlpha": 16,
2039
+ "outputPath": "~/.npcsh/models",
2040
+ "systemPrompt": "optional system prompt to prepend",
2041
+ "npc": "optional npc name",
2042
+ "formatStyle": "gemma" // "gemma", "llama", or "default"
2043
+ }
2044
+
2045
+ Strategies:
2046
+ - sft: Supervised Fine-Tuning with input/output pairs
2047
+ - usft: Unsupervised Fine-Tuning on raw text (domain adaptation)
2048
+ - dpo: Direct Preference Optimization using quality/reward scores
2049
+ - memory_classifier: Train memory approval classifier
2050
+ """
2051
+ from npcpy.ft.sft import run_sft, SFTConfig
2052
+ from npcpy.ft.usft import run_usft, USFTConfig
2053
+ from npcpy.ft.rl import train_with_dpo, RLConfig
2054
+
2055
+ data = request.json
2056
+ training_data = data.get('trainingData', [])
2057
+ output_name = data.get('outputName', 'my_instruction_model')
2058
+ base_model = data.get('baseModel', 'google/gemma-3-270m-it')
2059
+ strategy = data.get('strategy', 'sft') # sft, usft, dpo, memory_classifier
2060
+ num_epochs = data.get('epochs', 20)
2061
+ learning_rate = data.get('learningRate', 3e-5)
2062
+ batch_size = data.get('batchSize', 2)
2063
+ lora_r = data.get('loraR', 8)
2064
+ lora_alpha = data.get('loraAlpha', 16)
2065
+ output_path = data.get('outputPath', '~/.npcsh/models')
2066
+ system_prompt = data.get('systemPrompt', '')
2067
+ format_style = data.get('formatStyle', 'gemma')
2068
+ npc_name = data.get('npc', None)
2069
+
2070
+ print(f"🎓 Instruction Fine-tune Request Received!")
2071
+ print(f" Training examples: {len(training_data)}")
2072
+ print(f" Strategy: {strategy}")
2073
+ print(f" Base model: {base_model}")
2074
+ print(f" Output name: {output_name}")
2075
+ print(f" Epochs: {num_epochs}, LR: {learning_rate}, Batch: {batch_size}")
2076
+
2077
+ if not training_data:
2078
+ print("🎓 Error: No training data provided.")
2079
+ return jsonify({'error': 'No training data provided'}), 400
2080
+
2081
+ min_examples = 10 if strategy == 'memory_classifier' else 3
2082
+ if len(training_data) < min_examples:
2083
+ print(f"🎓 Error: Need at least {min_examples} training examples for {strategy}.")
2084
+ return jsonify({'error': f'Need at least {min_examples} training examples for {strategy}'}), 400
2085
+
2086
+ expanded_output_dir = os.path.expanduser(os.path.join(output_path, output_name))
2087
+
2088
+ job_id = f"ift_{int(time.time())}"
2089
+ instruction_finetune_jobs[job_id] = {
2090
+ 'status': 'running',
2091
+ 'strategy': strategy,
2092
+ 'output_dir': expanded_output_dir,
2093
+ 'base_model': base_model,
2094
+ 'epochs': num_epochs,
2095
+ 'current_epoch': 0,
2096
+ 'current_step': 0,
2097
+ 'total_steps': 0,
2098
+ 'current_loss': None,
2099
+ 'loss_history': [],
2100
+ 'start_time': datetime.datetime.now().isoformat(),
2101
+ 'npc': npc_name,
2102
+ 'num_examples': len(training_data)
2103
+ }
2104
+ print(f"🎓 Instruction fine-tuning job {job_id} initialized. Output: {expanded_output_dir}")
2105
+
2106
+ def run_training_async():
2107
+ print(f"🎓 Job {job_id}: Starting {strategy.upper()} training thread...")
2108
+ try:
2109
+ if strategy == 'sft':
2110
+ # Supervised Fine-Tuning with input/output pairs
2111
+ X = []
2112
+ y = []
2113
+ for example in training_data:
2114
+ inp = example.get('input', example.get('prompt', ''))
2115
+ out = example.get('output', example.get('response', example.get('completion', '')))
2116
+ if system_prompt:
2117
+ inp = f"{system_prompt}\n\n{inp}"
2118
+ X.append(inp)
2119
+ y.append(out)
2120
+
2121
+ config = SFTConfig(
2122
+ base_model_name=base_model,
2123
+ output_model_path=expanded_output_dir,
2124
+ num_train_epochs=num_epochs,
2125
+ learning_rate=learning_rate,
2126
+ per_device_train_batch_size=batch_size,
2127
+ lora_r=lora_r,
2128
+ lora_alpha=lora_alpha
2129
+ )
2130
+
2131
+ print(f"🎓 Job {job_id}: Running SFT with config: {config}")
2132
+ model_path = run_sft(
2133
+ X=X,
2134
+ y=y,
2135
+ config=config,
2136
+ format_style=format_style
2137
+ )
2138
+
2139
+ instruction_finetune_jobs[job_id]['status'] = 'complete'
2140
+ instruction_finetune_jobs[job_id]['model_path'] = model_path
2141
+ instruction_finetune_jobs[job_id]['end_time'] = datetime.datetime.now().isoformat()
2142
+ print(f"🎓 Job {job_id}: SFT complete! Model saved to: {model_path}")
2143
+
2144
+ elif strategy == 'usft':
2145
+ # Unsupervised Fine-Tuning - domain adaptation on raw text
2146
+ texts = []
2147
+ for example in training_data:
2148
+ # Combine input and output as training text, or just use text field
2149
+ if 'text' in example:
2150
+ texts.append(example['text'])
2151
+ else:
2152
+ inp = example.get('input', example.get('prompt', ''))
2153
+ out = example.get('output', example.get('response', ''))
2154
+ if inp and out:
2155
+ texts.append(f"{inp}\n{out}")
2156
+ elif inp:
2157
+ texts.append(inp)
2158
+ elif out:
2159
+ texts.append(out)
2160
+
2161
+ config = USFTConfig(
2162
+ base_model_name=base_model,
2163
+ output_model_path=expanded_output_dir,
2164
+ num_train_epochs=num_epochs,
2165
+ learning_rate=learning_rate,
2166
+ per_device_train_batch_size=batch_size,
2167
+ lora_r=lora_r,
2168
+ lora_alpha=lora_alpha
2169
+ )
2170
+
2171
+ print(f"🎓 Job {job_id}: Running USFT with {len(texts)} texts")
2172
+ model_path = run_usft(texts=texts, config=config)
2173
+
2174
+ instruction_finetune_jobs[job_id]['status'] = 'complete'
2175
+ instruction_finetune_jobs[job_id]['model_path'] = model_path
2176
+ instruction_finetune_jobs[job_id]['end_time'] = datetime.datetime.now().isoformat()
2177
+ print(f"🎓 Job {job_id}: USFT complete! Model saved to: {model_path}")
2178
+
2179
+ elif strategy == 'dpo':
2180
+ # Direct Preference Optimization - needs quality/reward scores
2181
+ traces = []
2182
+ for example in training_data:
2183
+ traces.append({
2184
+ 'task_prompt': example.get('input', example.get('prompt', '')),
2185
+ 'final_output': example.get('output', example.get('response', '')),
2186
+ 'reward': example.get('reward', example.get('quality', 0.5))
2187
+ })
2188
+
2189
+ config = RLConfig(
2190
+ base_model_name=base_model,
2191
+ adapter_path=expanded_output_dir,
2192
+ num_train_epochs=num_epochs,
2193
+ learning_rate=learning_rate,
2194
+ per_device_train_batch_size=batch_size,
2195
+ lora_r=lora_r,
2196
+ lora_alpha=lora_alpha
2197
+ )
2198
+
2199
+ print(f"🎓 Job {job_id}: Running DPO with {len(traces)} traces")
2200
+ adapter_path = train_with_dpo(traces, config)
2201
+
2202
+ if adapter_path:
2203
+ instruction_finetune_jobs[job_id]['status'] = 'complete'
2204
+ instruction_finetune_jobs[job_id]['model_path'] = adapter_path
2205
+ else:
2206
+ instruction_finetune_jobs[job_id]['status'] = 'error'
2207
+ instruction_finetune_jobs[job_id]['error_msg'] = 'Not enough valid preference pairs for DPO training'
2208
+
2209
+ instruction_finetune_jobs[job_id]['end_time'] = datetime.datetime.now().isoformat()
2210
+ print(f"🎓 Job {job_id}: DPO complete! Adapter saved to: {adapter_path}")
2211
+
2212
+ elif strategy == 'memory_classifier':
2213
+ # Train memory approval/rejection classifier
2214
+ from npcpy.ft.memory_trainer import MemoryTrainer
2215
+
2216
+ approved_memories = []
2217
+ rejected_memories = []
2218
+
2219
+ for example in training_data:
2220
+ status = example.get('status', 'approved')
2221
+ memory_data = {
2222
+ 'initial_memory': example.get('input', example.get('memory', '')),
2223
+ 'final_memory': example.get('output', example.get('final_memory', '')),
2224
+ 'context': example.get('context', '')
2225
+ }
2226
+ if status in ['approved', 'model-approved']:
2227
+ approved_memories.append(memory_data)
2228
+ else:
2229
+ rejected_memories.append(memory_data)
2230
+
2231
+ if len(approved_memories) < 10 or len(rejected_memories) < 10:
2232
+ instruction_finetune_jobs[job_id]['status'] = 'error'
2233
+ instruction_finetune_jobs[job_id]['error_msg'] = 'Need at least 10 approved and 10 rejected memories'
2234
+ instruction_finetune_jobs[job_id]['end_time'] = datetime.datetime.now().isoformat()
2235
+ return
2236
+
2237
+ trainer = MemoryTrainer(model_name=base_model)
2238
+ success = trainer.train(
2239
+ approved_memories=approved_memories,
2240
+ rejected_memories=rejected_memories,
2241
+ output_dir=expanded_output_dir,
2242
+ epochs=num_epochs
2243
+ )
2244
+
2245
+ if success:
2246
+ instruction_finetune_jobs[job_id]['status'] = 'complete'
2247
+ instruction_finetune_jobs[job_id]['model_path'] = expanded_output_dir
2248
+ else:
2249
+ instruction_finetune_jobs[job_id]['status'] = 'error'
2250
+ instruction_finetune_jobs[job_id]['error_msg'] = 'Memory classifier training failed'
2251
+
2252
+ instruction_finetune_jobs[job_id]['end_time'] = datetime.datetime.now().isoformat()
2253
+ print(f"🎓 Job {job_id}: Memory classifier complete!")
2254
+
2255
+ else:
2256
+ raise ValueError(f"Unknown strategy: {strategy}. Supported: sft, usft, dpo, memory_classifier")
2257
+
2258
+ except Exception as e:
2259
+ instruction_finetune_jobs[job_id]['status'] = 'error'
2260
+ instruction_finetune_jobs[job_id]['error_msg'] = str(e)
2261
+ instruction_finetune_jobs[job_id]['end_time'] = datetime.datetime.now().isoformat()
2262
+ print(f"🎓 Job {job_id}: ERROR during training: {e}")
2263
+ traceback.print_exc()
2264
+
2265
+ print(f"🎓 Job {job_id}: Training thread finished.")
2266
+
2267
+ thread = threading.Thread(target=run_training_async)
2268
+ thread.daemon = True
2269
+ thread.start()
2270
+
2271
+ print(f"🎓 Job {job_id} launched in background.")
2272
+ return jsonify({
2273
+ 'status': 'started',
2274
+ 'jobId': job_id,
2275
+ 'strategy': strategy,
2276
+ 'message': f"Instruction fine-tuning job '{job_id}' started. Check /api/finetune_instruction_status/{job_id} for updates."
2277
+ })
2278
+
2279
+
2280
+ @app.route('/api/finetune_instruction_status/<job_id>', methods=['GET'])
2281
+ def finetune_instruction_status(job_id):
2282
+ """Get the status of an instruction fine-tuning job."""
2283
+ if job_id not in instruction_finetune_jobs:
2284
+ return jsonify({'error': 'Job not found'}), 404
2285
+
2286
+ job = instruction_finetune_jobs[job_id]
2287
+
2288
+ if job['status'] == 'complete':
2289
+ return jsonify({
2290
+ 'status': 'complete',
2291
+ 'complete': True,
2292
+ 'outputPath': job.get('model_path', job['output_dir']),
2293
+ 'strategy': job.get('strategy'),
2294
+ 'loss_history': job.get('loss_history', []),
2295
+ 'start_time': job.get('start_time'),
2296
+ 'end_time': job.get('end_time')
2297
+ })
2298
+ elif job['status'] == 'error':
2299
+ return jsonify({
2300
+ 'status': 'error',
2301
+ 'error': job.get('error_msg', 'Unknown error'),
2302
+ 'start_time': job.get('start_time'),
2303
+ 'end_time': job.get('end_time')
2304
+ })
2305
+
2306
+ return jsonify({
2307
+ 'status': 'running',
2308
+ 'strategy': job.get('strategy'),
2309
+ 'epoch': job.get('current_epoch', 0),
2310
+ 'total_epochs': job.get('epochs', 0),
2311
+ 'step': job.get('current_step', 0),
2312
+ 'total_steps': job.get('total_steps', 0),
2313
+ 'loss': job.get('current_loss'),
2314
+ 'loss_history': job.get('loss_history', []),
2315
+ 'start_time': job.get('start_time'),
2316
+ 'num_examples': job.get('num_examples', 0)
2317
+ })
2318
+
2319
+
2320
+ @app.route('/api/instruction_models', methods=['GET'])
2321
+ def get_instruction_models():
2322
+ """Get list of available instruction-tuned models."""
2323
+ current_path = request.args.get("currentPath")
2324
+
2325
+ potential_root_paths = [
2326
+ os.path.expanduser('~/.npcsh/models'),
2327
+ ]
2328
+ if current_path:
2329
+ project_models_path = os.path.join(current_path, 'models')
2330
+ potential_root_paths.append(project_models_path)
2331
+
2332
+ instruction_models = []
2333
+
2334
+ print(f"🎓 Searching for instruction models in: {set(potential_root_paths)}")
2335
+
2336
+ for root_path in set(potential_root_paths):
2337
+ if not os.path.exists(root_path) or not os.path.isdir(root_path):
2338
+ continue
2339
+
2340
+ for model_dir_name in os.listdir(root_path):
2341
+ full_model_path = os.path.join(root_path, model_dir_name)
2342
+
2343
+ if not os.path.isdir(full_model_path):
2344
+ continue
2345
+
2346
+ # Check for adapter_config.json (LoRA models) or config.json (full models)
2347
+ has_adapter_config = os.path.exists(os.path.join(full_model_path, 'adapter_config.json'))
2348
+ has_config = os.path.exists(os.path.join(full_model_path, 'config.json'))
2349
+ has_tokenizer = os.path.exists(os.path.join(full_model_path, 'tokenizer_config.json'))
2350
+
2351
+ if has_adapter_config or (has_config and has_tokenizer):
2352
+ model_type = 'lora_adapter' if has_adapter_config else 'full_model'
2353
+ print(f"🎓 Found instruction model: {model_dir_name} ({model_type})")
2354
+ instruction_models.append({
2355
+ "value": full_model_path,
2356
+ "name": model_dir_name,
2357
+ "type": model_type,
2358
+ "display_name": f"{model_dir_name} | Instruction Model"
2359
+ })
2360
+
2361
+ print(f"🎓 Found {len(instruction_models)} instruction models.")
2362
+ return jsonify({"models": instruction_models, "error": None})
2363
+
2364
+
2365
+ # Genetic Evolution jobs storage
2366
+ ge_jobs = {}
2367
+ ge_populations = {} # Store active populations by ID
2368
+
2369
+
2370
+ @app.route('/api/genetic/create_population', methods=['POST'])
2371
+ def create_genetic_population():
2372
+ """
2373
+ Create a new genetic evolution population.
2374
+
2375
+ Request body:
2376
+ {
2377
+ "populationId": "optional_id",
2378
+ "populationType": "prompt" | "npc_config" | "model_ensemble" | "custom",
2379
+ "populationSize": 20,
2380
+ "config": {
2381
+ "mutationRate": 0.15,
2382
+ "crossoverRate": 0.7,
2383
+ "tournamentSize": 3,
2384
+ "elitismCount": 2
2385
+ },
2386
+ "initialPopulation": [...], // Optional initial individuals
2387
+ "fitnessEndpoint": "/api/evaluate_fitness" // Optional custom fitness endpoint
2388
+ }
2389
+ """
2390
+ from npcpy.ft.ge import GeneticEvolver, GAConfig
2391
+
2392
+ data = request.json
2393
+ population_id = data.get('populationId', f"pop_{int(time.time())}")
2394
+ population_type = data.get('populationType', 'prompt')
2395
+ population_size = data.get('populationSize', 20)
2396
+ config_data = data.get('config', {})
2397
+ initial_population = data.get('initialPopulation', [])
2398
+ npc_name = data.get('npc', None)
2399
+
2400
+ config = GAConfig(
2401
+ population_size=population_size,
2402
+ mutation_rate=config_data.get('mutationRate', 0.15),
2403
+ crossover_rate=config_data.get('crossoverRate', 0.7),
2404
+ tournament_size=config_data.get('tournamentSize', 3),
2405
+ elitism_count=config_data.get('elitismCount', 2),
2406
+ generations=config_data.get('generations', 50)
2407
+ )
2408
+
2409
+ print(f"🧬 Creating genetic population {population_id} (type: {population_type})")
2410
+
2411
+ # Define type-specific functions based on population type
2412
+ if population_type == 'prompt':
2413
+ # Evolve prompts for better responses
2414
+ import random
2415
+
2416
+ def initialize_fn():
2417
+ if initial_population:
2418
+ return random.choice(initial_population)
2419
+ return f"You are a helpful assistant. {random.choice(['Be concise.', 'Be detailed.', 'Be creative.', 'Be precise.'])}"
2420
+
2421
+ def mutate_fn(individual):
2422
+ mutations = [
2423
+ lambda s: s + " Think step by step.",
2424
+ lambda s: s + " Be specific.",
2425
+ lambda s: s.replace("helpful", "expert"),
2426
+ lambda s: s.replace("assistant", "specialist"),
2427
+ lambda s: s + " Provide examples.",
2428
+ ]
2429
+ return random.choice(mutations)(individual)
2430
+
2431
+ def crossover_fn(p1, p2):
2432
+ words1 = p1.split()
2433
+ words2 = p2.split()
2434
+ mid = len(words1) // 2
2435
+ return ' '.join(words1[:mid] + words2[mid:])
2436
+
2437
+ def fitness_fn(individual):
2438
+ # Placeholder - should be overridden with actual evaluation
2439
+ return len(individual) / 100.0 # Longer prompts score higher (placeholder)
2440
+
2441
+ elif population_type == 'npc_config':
2442
+ # Evolve NPC configurations
2443
+ import random
2444
+
2445
+ def initialize_fn():
2446
+ if initial_population:
2447
+ return random.choice(initial_population)
2448
+ return {
2449
+ 'temperature': random.uniform(0.1, 1.0),
2450
+ 'top_p': random.uniform(0.7, 1.0),
2451
+ 'system_prompt_modifier': random.choice(['detailed', 'concise', 'creative']),
2452
+ }
2453
+
2454
+ def mutate_fn(individual):
2455
+ mutated = individual.copy()
2456
+ key = random.choice(list(mutated.keys()))
2457
+ if key == 'temperature':
2458
+ mutated[key] = max(0.1, min(2.0, mutated[key] + random.gauss(0, 0.1)))
2459
+ elif key == 'top_p':
2460
+ mutated[key] = max(0.5, min(1.0, mutated[key] + random.gauss(0, 0.05)))
2461
+ return mutated
2462
+
2463
+ def crossover_fn(p1, p2):
2464
+ child = {}
2465
+ for key in p1:
2466
+ child[key] = random.choice([p1.get(key), p2.get(key)])
2467
+ return child
2468
+
2469
+ def fitness_fn(individual):
2470
+ return 0.5 # Placeholder
2471
+
2472
+ else:
2473
+ # Custom type - use simple string evolution
2474
+ import random
2475
+
2476
+ def initialize_fn():
2477
+ if initial_population:
2478
+ return random.choice(initial_population)
2479
+ return {"value": random.random()}
2480
+
2481
+ def mutate_fn(individual):
2482
+ if isinstance(individual, dict):
2483
+ mutated = individual.copy()
2484
+ mutated['value'] = individual.get('value', 0) + random.gauss(0, 0.1)
2485
+ return mutated
2486
+ return individual
2487
+
2488
+ def crossover_fn(p1, p2):
2489
+ if isinstance(p1, dict) and isinstance(p2, dict):
2490
+ return {'value': (p1.get('value', 0) + p2.get('value', 0)) / 2}
2491
+ return p1
2492
+
2493
+ def fitness_fn(individual):
2494
+ if isinstance(individual, dict):
2495
+ return 1.0 - abs(individual.get('value', 0) - 0.5) # Closer to 0.5 is better
2496
+ return 0.5
2497
+
2498
+ evolver = GeneticEvolver(
2499
+ fitness_fn=fitness_fn,
2500
+ mutate_fn=mutate_fn,
2501
+ crossover_fn=crossover_fn,
2502
+ initialize_fn=initialize_fn,
2503
+ config=config
2504
+ )
2505
+
2506
+ evolver.initialize_population()
2507
+
2508
+ ge_populations[population_id] = {
2509
+ 'evolver': evolver,
2510
+ 'type': population_type,
2511
+ 'config': config,
2512
+ 'generation': 0,
2513
+ 'history': [],
2514
+ 'npc': npc_name,
2515
+ 'created_at': datetime.datetime.now().isoformat()
2516
+ }
2517
+
2518
+ return jsonify({
2519
+ 'populationId': population_id,
2520
+ 'populationType': population_type,
2521
+ 'populationSize': population_size,
2522
+ 'generation': 0,
2523
+ 'message': f"Population '{population_id}' created with {population_size} individuals"
2524
+ })
2525
+
2526
+
2527
+ @app.route('/api/genetic/evolve', methods=['POST'])
2528
+ def evolve_population():
2529
+ """
2530
+ Run evolution for N generations.
2531
+
2532
+ Request body:
2533
+ {
2534
+ "populationId": "pop_123",
2535
+ "generations": 10,
2536
+ "fitnessScores": [...] // Optional: external fitness scores for current population
2537
+ }
2538
+ """
2539
+ data = request.json
2540
+ population_id = data.get('populationId')
2541
+ generations = data.get('generations', 1)
2542
+ fitness_scores = data.get('fitnessScores', None)
2543
+
2544
+ if population_id not in ge_populations:
2545
+ return jsonify({'error': f"Population '{population_id}' not found"}), 404
2546
+
2547
+ pop_data = ge_populations[population_id]
2548
+ evolver = pop_data['evolver']
2549
+
2550
+ print(f"🧬 Evolving population {population_id} for {generations} generations")
2551
+
2552
+ # If external fitness scores provided, inject them
2553
+ if fitness_scores and len(fitness_scores) == len(evolver.population):
2554
+ # Override the fitness function temporarily
2555
+ original_fitness = evolver.fitness_fn
2556
+ score_iter = iter(fitness_scores)
2557
+ evolver.fitness_fn = lambda x: next(score_iter, 0.5)
2558
+
2559
+ results = []
2560
+ for gen in range(generations):
2561
+ gen_stats = evolver.evolve_generation()
2562
+ pop_data['generation'] += 1
2563
+ pop_data['history'].append(gen_stats)
2564
+ results.append({
2565
+ 'generation': pop_data['generation'],
2566
+ 'bestFitness': gen_stats['best_fitness'],
2567
+ 'avgFitness': gen_stats['avg_fitness'],
2568
+ 'bestIndividual': gen_stats['best_individual']
2569
+ })
2570
+
2571
+ # Restore original fitness function
2572
+ if fitness_scores:
2573
+ evolver.fitness_fn = original_fitness
2574
+
2575
+ return jsonify({
2576
+ 'populationId': population_id,
2577
+ 'generationsRun': generations,
2578
+ 'currentGeneration': pop_data['generation'],
2579
+ 'results': results,
2580
+ 'bestIndividual': results[-1]['bestIndividual'] if results else None,
2581
+ 'population': evolver.population[:5] # Return top 5 individuals
2582
+ })
2583
+
2584
+
2585
+ @app.route('/api/genetic/population/<population_id>', methods=['GET'])
2586
+ def get_population(population_id):
2587
+ """Get current state of a population."""
2588
+ if population_id not in ge_populations:
2589
+ return jsonify({'error': f"Population '{population_id}' not found"}), 404
2590
+
2591
+ pop_data = ge_populations[population_id]
2592
+ evolver = pop_data['evolver']
2593
+
2594
+ return jsonify({
2595
+ 'populationId': population_id,
2596
+ 'type': pop_data['type'],
2597
+ 'generation': pop_data['generation'],
2598
+ 'populationSize': len(evolver.population),
2599
+ 'population': evolver.population,
2600
+ 'history': pop_data['history'][-50:], # Last 50 generations
2601
+ 'createdAt': pop_data['created_at'],
2602
+ 'npc': pop_data.get('npc')
2603
+ })
2604
+
2605
+
2606
+ @app.route('/api/genetic/populations', methods=['GET'])
2607
+ def list_populations():
2608
+ """List all active populations."""
2609
+ populations = []
2610
+ for pop_id, pop_data in ge_populations.items():
2611
+ populations.append({
2612
+ 'populationId': pop_id,
2613
+ 'type': pop_data['type'],
2614
+ 'generation': pop_data['generation'],
2615
+ 'populationSize': len(pop_data['evolver'].population),
2616
+ 'createdAt': pop_data['created_at'],
2617
+ 'npc': pop_data.get('npc')
2618
+ })
2619
+
2620
+ return jsonify({'populations': populations})
2621
+
2622
+
2623
+ @app.route('/api/genetic/population/<population_id>', methods=['DELETE'])
2624
+ def delete_population(population_id):
2625
+ """Delete a population."""
2626
+ if population_id not in ge_populations:
2627
+ return jsonify({'error': f"Population '{population_id}' not found"}), 404
2628
+
2629
+ del ge_populations[population_id]
2630
+ print(f"🧬 Deleted population {population_id}")
2631
+
2632
+ return jsonify({'message': f"Population '{population_id}' deleted"})
2633
+
2634
+
2635
+ @app.route('/api/genetic/inject', methods=['POST'])
2636
+ def inject_individuals():
2637
+ """
2638
+ Inject new individuals into a population.
2639
+
2640
+ Request body:
2641
+ {
2642
+ "populationId": "pop_123",
2643
+ "individuals": [...],
2644
+ "replaceWorst": true // Replace worst individuals or append
2645
+ }
2646
+ """
2647
+ data = request.json
2648
+ population_id = data.get('populationId')
2649
+ individuals = data.get('individuals', [])
2650
+ replace_worst = data.get('replaceWorst', True)
2651
+
2652
+ if population_id not in ge_populations:
2653
+ return jsonify({'error': f"Population '{population_id}' not found"}), 404
2654
+
2655
+ pop_data = ge_populations[population_id]
2656
+ evolver = pop_data['evolver']
2657
+
2658
+ if replace_worst:
2659
+ # Evaluate and sort population, replace worst with new individuals
2660
+ fitness_scores = evolver.evaluate_population()
2661
+ sorted_pop = sorted(zip(evolver.population, fitness_scores), key=lambda x: x[1], reverse=True)
2662
+ keep_count = len(sorted_pop) - len(individuals)
2663
+ evolver.population = [ind for ind, _ in sorted_pop[:keep_count]] + individuals
2664
+ else:
2665
+ evolver.population.extend(individuals)
2666
+
2667
+ print(f"🧬 Injected {len(individuals)} individuals into {population_id}")
2668
+
2669
+ return jsonify({
2670
+ 'populationId': population_id,
2671
+ 'injectedCount': len(individuals),
2672
+ 'newPopulationSize': len(evolver.population)
2673
+ })
2674
+
2675
+
1994
2676
  @app.route("/api/ml/train", methods=["POST"])
1995
2677
  def train_ml_model():
1996
- import pickle
2678
+ import joblib
1997
2679
  import numpy as np
1998
2680
  from sklearn.linear_model import LinearRegression, LogisticRegression
1999
2681
  from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
@@ -2001,7 +2683,7 @@ def train_ml_model():
2001
2683
  from sklearn.cluster import KMeans
2002
2684
  from sklearn.model_selection import train_test_split
2003
2685
  from sklearn.metrics import mean_squared_error, r2_score, accuracy_score
2004
-
2686
+
2005
2687
  data = request.json
2006
2688
  model_name = data.get("name")
2007
2689
  model_type = data.get("type")
@@ -2009,13 +2691,13 @@ def train_ml_model():
2009
2691
  features = data.get("features")
2010
2692
  training_data = data.get("data")
2011
2693
  hyperparams = data.get("hyperparameters", {})
2012
-
2694
+
2013
2695
  df = pd.DataFrame(training_data)
2014
2696
  X = df[features].values
2015
-
2697
+
2016
2698
  metrics = {}
2017
2699
  model = None
2018
-
2700
+
2019
2701
  if model_type == "linear_regression":
2020
2702
  y = df[target].values
2021
2703
  X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
@@ -2026,7 +2708,7 @@ def train_ml_model():
2026
2708
  "r2_score": r2_score(y_test, y_pred),
2027
2709
  "rmse": np.sqrt(mean_squared_error(y_test, y_pred))
2028
2710
  }
2029
-
2711
+
2030
2712
  elif model_type == "logistic_regression":
2031
2713
  y = df[target].values
2032
2714
  X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
@@ -2034,7 +2716,7 @@ def train_ml_model():
2034
2716
  model.fit(X_train, y_train)
2035
2717
  y_pred = model.predict(X_test)
2036
2718
  metrics = {"accuracy": accuracy_score(y_test, y_pred)}
2037
-
2719
+
2038
2720
  elif model_type == "random_forest":
2039
2721
  y = df[target].values
2040
2722
  X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
@@ -2045,13 +2727,13 @@ def train_ml_model():
2045
2727
  "r2_score": r2_score(y_test, y_pred),
2046
2728
  "rmse": np.sqrt(mean_squared_error(y_test, y_pred))
2047
2729
  }
2048
-
2730
+
2049
2731
  elif model_type == "clustering":
2050
2732
  n_clusters = hyperparams.get("n_clusters", 3)
2051
2733
  model = KMeans(n_clusters=n_clusters)
2052
2734
  labels = model.fit_predict(X)
2053
2735
  metrics = {"inertia": model.inertia_, "n_clusters": n_clusters}
2054
-
2736
+
2055
2737
  elif model_type == "gradient_boost":
2056
2738
  y = df[target].values
2057
2739
  X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
@@ -2062,19 +2744,18 @@ def train_ml_model():
2062
2744
  "r2_score": r2_score(y_test, y_pred),
2063
2745
  "rmse": np.sqrt(mean_squared_error(y_test, y_pred))
2064
2746
  }
2065
-
2747
+
2066
2748
  model_id = f"{model_name}_{int(time.time())}"
2067
- model_path = os.path.expanduser(f"~/.npcsh/models/{model_id}.pkl")
2749
+ model_path = os.path.join(get_models_dir(), f"{model_id}.joblib")
2068
2750
  os.makedirs(os.path.dirname(model_path), exist_ok=True)
2069
-
2070
- with open(model_path, 'wb') as f:
2071
- pickle.dump({
2072
- "model": model,
2073
- "features": features,
2074
- "target": target,
2075
- "type": model_type
2076
- }, f)
2077
-
2751
+
2752
+ joblib.dump({
2753
+ "model": model,
2754
+ "features": features,
2755
+ "target": target,
2756
+ "type": model_type
2757
+ }, model_path)
2758
+
2078
2759
  return jsonify({
2079
2760
  "model_id": model_id,
2080
2761
  "metrics": metrics,
@@ -2084,26 +2765,25 @@ def train_ml_model():
2084
2765
 
2085
2766
  @app.route("/api/ml/predict", methods=["POST"])
2086
2767
  def ml_predict():
2087
- import pickle
2088
-
2768
+ import joblib
2769
+
2089
2770
  data = request.json
2090
2771
  model_name = data.get("model_name")
2091
2772
  input_data = data.get("input_data")
2092
-
2093
- model_dir = os.path.expanduser("~/.npcsh/models/")
2773
+
2774
+ model_dir = get_models_dir()
2094
2775
  model_files = [f for f in os.listdir(model_dir) if f.startswith(model_name)]
2095
-
2776
+
2096
2777
  if not model_files:
2097
2778
  return jsonify({"error": f"Model {model_name} not found"})
2098
-
2779
+
2099
2780
  model_path = os.path.join(model_dir, model_files[0])
2100
-
2101
- with open(model_path, 'rb') as f:
2102
- model_data = pickle.load(f)
2103
-
2781
+
2782
+ model_data = joblib.load(model_path)
2783
+
2104
2784
  model = model_data["model"]
2105
2785
  prediction = model.predict([input_data])
2106
-
2786
+
2107
2787
  return jsonify({
2108
2788
  "prediction": prediction.tolist(),
2109
2789
  "error": None
@@ -2969,6 +3649,11 @@ def get_attachment_response():
2969
3649
 
2970
3650
 
2971
3651
  IMAGE_MODELS = {
3652
+ "diffusers": [
3653
+ {"value": "runwayml/stable-diffusion-v1-5", "display_name": "Stable Diffusion v1.5"},
3654
+ {"value": "stabilityai/stable-diffusion-xl-base-1.0", "display_name": "SDXL Base 1.0"},
3655
+ {"value": "black-forest-labs/FLUX.1-schnell", "display_name": "FLUX.1 Schnell"},
3656
+ ],
2972
3657
  "openai": [
2973
3658
  {"value": "gpt-image-1.5", "display_name": "GPT-Image-1.5"},
2974
3659
  {"value": "gpt-image-1", "display_name": "GPT-Image-1"},
@@ -2980,9 +3665,69 @@ IMAGE_MODELS = {
2980
3665
  {"value": "gemini-2.5-flash-image-preview", "display_name": "Gemini 2.5 Flash Image"},
2981
3666
  {"value": "imagen-3.0-generate-002", "display_name": "Imagen 3.0 Generate (Preview)"},
2982
3667
  ],
2983
- "diffusers": [
2984
- {"value": "runwayml/stable-diffusion-v1-5", "display_name": "Stable Diffusion v1.5"},
3668
+ "stability": [
3669
+ {"value": "stable-diffusion-xl-1024-v1-0", "display_name": "SDXL 1.0"},
3670
+ {"value": "stable-diffusion-v1-6", "display_name": "SD 1.6"},
3671
+ {"value": "stable-image-core", "display_name": "Stable Image Core"},
3672
+ {"value": "stable-image-ultra", "display_name": "Stable Image Ultra"},
3673
+ ],
3674
+ "replicate": [
3675
+ {"value": "stability-ai/sdxl", "display_name": "SDXL (Replicate)"},
3676
+ {"value": "black-forest-labs/flux-schnell", "display_name": "FLUX Schnell"},
3677
+ {"value": "black-forest-labs/flux-dev", "display_name": "FLUX Dev"},
3678
+ {"value": "black-forest-labs/flux-pro", "display_name": "FLUX Pro"},
3679
+ ],
3680
+ "fal": [
3681
+ {"value": "fal-ai/flux/schnell", "display_name": "FLUX Schnell"},
3682
+ {"value": "fal-ai/flux/dev", "display_name": "FLUX Dev"},
3683
+ {"value": "fal-ai/flux-pro", "display_name": "FLUX Pro"},
3684
+ {"value": "fal-ai/stable-diffusion-v3-medium", "display_name": "SD3 Medium"},
3685
+ ],
3686
+ "together": [
3687
+ {"value": "stabilityai/stable-diffusion-xl-base-1.0", "display_name": "SDXL Base"},
3688
+ {"value": "black-forest-labs/FLUX.1-schnell", "display_name": "FLUX.1 Schnell"},
3689
+ {"value": "black-forest-labs/FLUX.1.1-pro", "display_name": "FLUX 1.1 Pro"},
3690
+ ],
3691
+ "fireworks": [
3692
+ {"value": "stable-diffusion-xl-1024-v1-0", "display_name": "SDXL 1.0"},
3693
+ {"value": "playground-v2-1024px-aesthetic", "display_name": "Playground v2"},
3694
+ ],
3695
+ "deepinfra": [
3696
+ {"value": "stability-ai/sdxl", "display_name": "SDXL"},
3697
+ {"value": "black-forest-labs/FLUX-1-schnell", "display_name": "FLUX Schnell"},
3698
+ ],
3699
+ "bfl": [
3700
+ {"value": "flux-pro-1.1", "display_name": "FLUX Pro 1.1"},
3701
+ {"value": "flux-pro", "display_name": "FLUX Pro"},
3702
+ {"value": "flux-dev", "display_name": "FLUX Dev"},
3703
+ ],
3704
+ "bagel": [
3705
+ {"value": "bagel-image-v1", "display_name": "Bagel Image v1"},
2985
3706
  ],
3707
+ "leonardo": [
3708
+ {"value": "leonardo-diffusion-xl", "display_name": "Leonardo Diffusion XL"},
3709
+ {"value": "leonardo-vision-xl", "display_name": "Leonardo Vision XL"},
3710
+ ],
3711
+ "ideogram": [
3712
+ {"value": "ideogram-v2", "display_name": "Ideogram v2"},
3713
+ {"value": "ideogram-v2-turbo", "display_name": "Ideogram v2 Turbo"},
3714
+ ],
3715
+ }
3716
+
3717
+ # Map provider names to their environment variable keys
3718
+ IMAGE_PROVIDER_API_KEYS = {
3719
+ "openai": "OPENAI_API_KEY",
3720
+ "gemini": "GEMINI_API_KEY",
3721
+ "stability": "STABILITY_API_KEY",
3722
+ "replicate": "REPLICATE_API_TOKEN",
3723
+ "fal": "FAL_KEY",
3724
+ "together": "TOGETHER_API_KEY",
3725
+ "fireworks": "FIREWORKS_API_KEY",
3726
+ "deepinfra": "DEEPINFRA_API_KEY",
3727
+ "bfl": "BFL_API_KEY",
3728
+ "bagel": "BAGEL_API_KEY",
3729
+ "leonardo": "LEONARDO_API_KEY",
3730
+ "ideogram": "IDEOGRAM_API_KEY",
2986
3731
  }
2987
3732
  # In npcpy/serve.py, find the @app.route('/api/finetuned_models', methods=['GET'])
2988
3733
  # and replace the entire function with this:
@@ -3058,25 +3803,22 @@ def get_available_image_models(current_path=None):
3058
3803
  "display_name": f"{env_image_model} | {env_image_provider} (Configured)"
3059
3804
  })
3060
3805
 
3061
- # Add predefined models (OpenAI, Gemini, and standard Diffusers)
3806
+ # Add predefined models - diffusers always available, others require API keys
3062
3807
  for provider_key, models_list in IMAGE_MODELS.items():
3063
- if provider_key == "openai":
3064
- if os.environ.get("OPENAI_API_KEY"):
3065
- all_image_models.extend([
3066
- {**model, "provider": provider_key, "display_name": f"{model['display_name']} | {provider_key}"}
3067
- for model in models_list
3068
- ])
3069
- elif provider_key == "gemini":
3070
- if os.environ.get("GEMINI_API_KEY"):
3071
- all_image_models.extend([
3072
- {**model, "provider": provider_key, "display_name": f"{model['display_name']} | {provider_key}"}
3073
- for model in models_list
3074
- ])
3075
- elif provider_key == "diffusers": # This entry in IMAGE_MODELS is for standard diffusers
3808
+ if provider_key == "diffusers":
3809
+ # Diffusers (local) is always available
3076
3810
  all_image_models.extend([
3077
3811
  {**model, "provider": provider_key, "display_name": f"{model['display_name']} | {provider_key}"}
3078
3812
  for model in models_list
3079
3813
  ])
3814
+ else:
3815
+ # Check if API key is present for this provider
3816
+ api_key_env = IMAGE_PROVIDER_API_KEYS.get(provider_key)
3817
+ if api_key_env and os.environ.get(api_key_env):
3818
+ all_image_models.extend([
3819
+ {**model, "provider": provider_key, "display_name": f"{model['display_name']} | {provider_key}"}
3820
+ for model in models_list
3821
+ ])
3080
3822
 
3081
3823
  # <--- CRITICAL FIX: Directly call the internal helper function for fine-tuned models
3082
3824
  try:
@@ -3804,8 +4546,10 @@ def stream():
3804
4546
  return jsonify({"error": "conversationId is required"}), 400
3805
4547
  model = data.get("model", None)
3806
4548
  provider = data.get("provider", None)
4549
+ print(f"🔍 Stream request - model: {model}, provider from request: {provider}")
3807
4550
  if provider is None:
3808
4551
  provider = available_models.get(model)
4552
+ print(f"🔍 Provider looked up from available_models: {provider}")
3809
4553
 
3810
4554
  npc_name = data.get("npc", None)
3811
4555
  npc_source = data.get("npcSource", "global")
@@ -3824,8 +4568,10 @@ def stream():
3824
4568
 
3825
4569
  npc_object = None
3826
4570
  team_object = None
3827
- team = None
4571
+ team = None
3828
4572
  tool_results_for_db = []
4573
+ # Initialize stream_response early to ensure it's always defined for closures
4574
+ stream_response = {"output": "", "messages": []}
3829
4575
  if npc_name:
3830
4576
  if hasattr(app, 'registered_teams'):
3831
4577
  for team_name, team_object in app.registered_teams.items():
@@ -4449,7 +5195,30 @@ def stream():
4449
5195
  yield f"data: {json.dumps(chunk_data)}\n\n"
4450
5196
 
4451
5197
  elif isinstance(stream_response, dict):
4452
- for response_chunk in stream_response.get('response', stream_response.get('output')):
5198
+ # Handle LoRA responses - they return the full response at once, not streaming
5199
+ if provider == 'lora':
5200
+ lora_text = stream_response.get('response', stream_response.get('output', ''))
5201
+ if lora_text:
5202
+ complete_response.append(lora_text)
5203
+ chunk_data = {
5204
+ "id": None,
5205
+ "object": None,
5206
+ "created": datetime.datetime.now().strftime('YYYY-DD-MM-HHMMSS'),
5207
+ "model": model,
5208
+ "choices": [
5209
+ {
5210
+ "index": 0,
5211
+ "delta": {
5212
+ "content": lora_text,
5213
+ "role": "assistant"
5214
+ },
5215
+ "finish_reason": "stop"
5216
+ }
5217
+ ]
5218
+ }
5219
+ yield f"data: {json.dumps(chunk_data)}\n\n"
5220
+ else:
5221
+ for response_chunk in stream_response.get('response', stream_response.get('output')):
4453
5222
  with cancellation_lock:
4454
5223
  if cancellation_flags.get(current_stream_id, False):
4455
5224
  print(f"Cancellation flag triggered for {current_stream_id}. Breaking loop.")
@@ -5482,12 +6251,14 @@ def scan_gguf_models():
5482
6251
  """Scan for GGUF/GGML model files in specified or default directories."""
5483
6252
  directory = request.args.get('directory')
5484
6253
 
5485
- # Default directories to scan
6254
+ # Default directories to scan (using platform-specific paths)
6255
+ models_dir = get_models_dir()
5486
6256
  default_dirs = [
5487
- os.path.expanduser('~/.npcsh/models/gguf'),
5488
- os.path.expanduser('~/.npcsh/models'),
6257
+ os.path.join(models_dir, 'gguf'),
6258
+ models_dir,
5489
6259
  os.path.expanduser('~/models'),
5490
- os.path.expanduser('~/.cache/huggingface/hub'),
6260
+ os.path.join(get_cache_dir(), 'huggingface/hub'),
6261
+ os.path.expanduser('~/.cache/huggingface/hub'), # Fallback
5491
6262
  ]
5492
6263
 
5493
6264
  # Add env var directory if set
@@ -5529,7 +6300,8 @@ def download_hf_model():
5529
6300
  """Download a GGUF model from HuggingFace."""
5530
6301
  data = request.json
5531
6302
  url = data.get('url', '')
5532
- target_dir = data.get('target_dir', '~/.npcsh/models/gguf')
6303
+ default_target = os.path.join(get_models_dir(), 'gguf')
6304
+ target_dir = data.get('target_dir', default_target)
5533
6305
 
5534
6306
  target_dir = os.path.expanduser(target_dir)
5535
6307
  os.makedirs(target_dir, exist_ok=True)
@@ -5695,7 +6467,8 @@ def download_hf_file():
5695
6467
  data = request.json
5696
6468
  repo_id = data.get('repo_id', '')
5697
6469
  filename = data.get('filename', '')
5698
- target_dir = data.get('target_dir', '~/.npcsh/models/gguf')
6470
+ default_target = os.path.join(get_models_dir(), 'gguf')
6471
+ target_dir = data.get('target_dir', default_target)
5699
6472
 
5700
6473
  if not repo_id or not filename:
5701
6474
  return jsonify({'error': 'repo_id and filename are required'}), 400